From 9c32761a0addc339dc02f9b071e2a9c9bf33a34c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Thu, 11 Apr 2024 09:30:18 +0200 Subject: [PATCH] Remove fdsp context --- Common/Common.vcxproj | 2 +- Common/Common.vcxproj.filters | 2 +- ext/at3_standalone/atrac3.c | 7 +- ext/at3_standalone/atrac3plus.h | 5 +- ext/at3_standalone/atrac3plusdec.c | 9 +- ext/at3_standalone/atrac3plusdsp.c | 30 ++- ext/at3_standalone/float_dsp.c | 81 +------- ext/at3_standalone/float_dsp.h | 176 ++++-------------- .../{bitstream.c => get_bits.c} | 0 9 files changed, 62 insertions(+), 250 deletions(-) rename ext/at3_standalone/{bitstream.c => get_bits.c} (100%) diff --git a/Common/Common.vcxproj b/Common/Common.vcxproj index 9335079a50..f49ee5ad16 100644 --- a/Common/Common.vcxproj +++ b/Common/Common.vcxproj @@ -610,7 +610,7 @@ - + diff --git a/Common/Common.vcxproj.filters b/Common/Common.vcxproj.filters index d775f3f937..b91f95181d 100644 --- a/Common/Common.vcxproj.filters +++ b/Common/Common.vcxproj.filters @@ -1086,7 +1086,7 @@ ext\at3_standalone - + ext\at3_standalone diff --git a/ext/at3_standalone/atrac3.c b/ext/at3_standalone/atrac3.c index 630bc59dd3..feda479215 100644 --- a/ext/at3_standalone/atrac3.c +++ b/ext/at3_standalone/atrac3.c @@ -106,7 +106,6 @@ typedef struct ATRAC3Context { AtracGCContext gainc_ctx; FFTContext mdct_ctx; - AVFloatDSPContext *fdsp; } ATRAC3Context; static DECLARE_ALIGNED(32, float, mdct_window)[MDCT_SIZE]; @@ -139,7 +138,7 @@ static void imlt(ATRAC3Context *q, float *input, float *output, int odd_band) q->mdct_ctx.imdct_calc(&q->mdct_ctx, output, input); /* Perform windowing on the output. */ - q->fdsp->vector_fmul(output, output, mdct_window, MDCT_SIZE); + vector_fmul(output, output, mdct_window, MDCT_SIZE); } /* @@ -189,7 +188,6 @@ static int atrac3_decode_close(AVCodecContext *avctx) av_freep(&q->units); av_freep(&q->decoded_bytes_buffer); - av_freep(&q->fdsp); ff_mdct_end(&q->mdct_ctx); @@ -906,10 +904,9 @@ static int atrac3_decode_init(AVCodecContext *avctx) } ff_atrac_init_gain_compensation(&q->gainc_ctx, 4, 3); - q->fdsp = avpriv_float_dsp_alloc(avctx->flags & AV_CODEC_FLAG_BITEXACT); q->units = av_mallocz_array(avctx->channels, sizeof(*q->units)); - if (!q->units || !q->fdsp) { + if (!q->units) { atrac3_decode_close(avctx); return AVERROR(ENOMEM); } diff --git a/ext/at3_standalone/atrac3plus.h b/ext/at3_standalone/atrac3plus.h index 8eae592f3b..23f1ff2cee 100644 --- a/ext/at3_standalone/atrac3plus.h +++ b/ext/at3_standalone/atrac3plus.h @@ -192,8 +192,7 @@ void ff_atrac3p_init_wave_synth(void); * @param[in] sb which subband to process * @param[out] out receives processed data */ -void ff_atrac3p_generate_tones(Atrac3pChanUnitCtx *ch_unit, AVFloatDSPContext *fdsp, - int ch_num, int sb, float *out); +void ff_atrac3p_generate_tones(Atrac3pChanUnitCtx *ch_unit, int ch_num, int sb, float *out); /** * Perform power compensation aka noise dithering. @@ -218,7 +217,7 @@ void ff_atrac3p_power_compensation(Atrac3pChanUnitCtx *ctx, int ch_index, * @param[in] wind_id which MDCT window to apply * @param[in] sb subband number */ -void ff_atrac3p_imdct(AVFloatDSPContext *fdsp, FFTContext *mdct_ctx, float *pIn, +void ff_atrac3p_imdct(FFTContext *mdct_ctx, float *pIn, float *pOut, int wind_id, int sb); /** diff --git a/ext/at3_standalone/atrac3plusdec.c b/ext/at3_standalone/atrac3plusdec.c index e82bcab8ba..cf8b376409 100644 --- a/ext/at3_standalone/atrac3plusdec.c +++ b/ext/at3_standalone/atrac3plusdec.c @@ -47,7 +47,6 @@ typedef struct ATRAC3PContext { GetBitContext gb; - AVFloatDSPContext *fdsp; DECLARE_ALIGNED(32, float, samples)[2][ATRAC3P_FRAME_SAMPLES]; ///< quantized MDCT spectrum DECLARE_ALIGNED(32, float, mdct_buf)[2][ATRAC3P_FRAME_SAMPLES]; ///< output of the IMDCT @@ -70,7 +69,6 @@ int atrac3p_decode_close(AVCodecContext *avctx) ATRAC3PContext *ctx = avctx->priv_data; av_freep(&ctx->ch_units); - av_freep(&ctx->fdsp); ff_mdct_end(&ctx->mdct_ctx); ff_mdct_end(&ctx->ipqf_dct_ctx); @@ -171,9 +169,8 @@ int atrac3p_decode_init(AVCodecContext *avctx) ctx->my_channel_layout = avctx->channel_layout; ctx->ch_units = av_mallocz_array(ctx->num_channel_blocks, sizeof(*ctx->ch_units)); - ctx->fdsp = avpriv_float_dsp_alloc(avctx->flags & AV_CODEC_FLAG_BITEXACT); - if (!ctx->ch_units || !ctx->fdsp) { + if (!ctx->ch_units) { atrac3p_decode_close(avctx); return AVERROR(ENOMEM); } @@ -268,7 +265,7 @@ static void reconstruct_frame(ATRAC3PContext *ctx, Atrac3pChanUnitCtx *ch_unit, for (ch = 0; ch < num_channels; ch++) { for (sb = 0; sb < ch_unit->num_subbands; sb++) { /* inverse transform and windowing */ - ff_atrac3p_imdct(ctx->fdsp, &ctx->mdct_ctx, + ff_atrac3p_imdct(&ctx->mdct_ctx, &ctx->samples[ch][sb * ATRAC3P_SUBBAND_SAMPLES], &ctx->mdct_buf[ch][sb * ATRAC3P_SUBBAND_SAMPLES], (ch_unit->channels[ch].wnd_shape_prev[sb] << 1) + @@ -302,7 +299,7 @@ static void reconstruct_frame(ATRAC3PContext *ctx, Atrac3pChanUnitCtx *ch_unit, for (sb = 0; sb < ch_unit->num_subbands; sb++) if (ch_unit->channels[ch].tones_info[sb].num_wavs || ch_unit->channels[ch].tones_info_prev[sb].num_wavs) { - ff_atrac3p_generate_tones(ch_unit, ctx->fdsp, ch, sb, + ff_atrac3p_generate_tones(ch_unit, ch, sb, &ctx->time_buf[ch][sb * 128]); } } diff --git a/ext/at3_standalone/atrac3plusdsp.c b/ext/at3_standalone/atrac3plusdsp.c index 399bbdc17e..d98775b94a 100644 --- a/ext/at3_standalone/atrac3plusdsp.c +++ b/ext/at3_standalone/atrac3plusdsp.c @@ -117,7 +117,6 @@ void ff_atrac3p_init_wave_synth(void) * @param[in] synth_param ptr to common synthesis parameters * @param[in] waves_info parameters for each sine wave * @param[in] envelope envelope data for all waves in a group - * @param[in] fdsp ptr to floating-point DSP context * @param[in] invert_phase flag indicating 180° phase shift * @param[in] reg_offset region offset for trimming envelope data * @param[out] out receives sythesized data @@ -125,7 +124,6 @@ void ff_atrac3p_init_wave_synth(void) static void waves_synth(Atrac3pWaveSynthParams *synth_param, Atrac3pWavesData *waves_info, Atrac3pWaveEnvelope *envelope, - AVFloatDSPContext *fdsp, int invert_phase, int reg_offset, float *out) { int i, wn, inc, pos; @@ -151,7 +149,7 @@ static void waves_synth(Atrac3pWaveSynthParams *synth_param, /* invert phase if requested */ if (invert_phase) - fdsp->vector_fmul_scalar(out, out, -1.0f, 128); + vector_fmul_scalar(out, out, -1.0f, 128); /* fade in with steep Hann window if requested */ if (envelope->has_start_point) { @@ -181,8 +179,7 @@ static void waves_synth(Atrac3pWaveSynthParams *synth_param, } } -void ff_atrac3p_generate_tones(Atrac3pChanUnitCtx *ch_unit, AVFloatDSPContext *fdsp, - int ch_num, int sb, float *out) +void ff_atrac3p_generate_tones(Atrac3pChanUnitCtx *ch_unit, int ch_num, int sb, float *out) { DECLARE_ALIGNED(32, float, wavreg1)[128] = { 0 }; DECLARE_ALIGNED(32, float, wavreg2)[128] = { 0 }; @@ -223,24 +220,24 @@ void ff_atrac3p_generate_tones(Atrac3pChanUnitCtx *ch_unit, AVFloatDSPContext *f /* synthesize waves for both overlapping regions */ if (tones_now->num_wavs && reg1_env_nonzero) waves_synth(ch_unit->waves_info_prev, tones_now, &tones_now->curr_env, - fdsp, ch_unit->waves_info_prev->invert_phase[sb] & ch_num, + ch_unit->waves_info_prev->invert_phase[sb] & ch_num, 128, wavreg1); if (tones_next->num_wavs && reg2_env_nonzero) - waves_synth(ch_unit->waves_info, tones_next, &tones_next->curr_env, fdsp, + waves_synth(ch_unit->waves_info, tones_next, &tones_next->curr_env, ch_unit->waves_info->invert_phase[sb] & ch_num, 0, wavreg2); /* Hann windowing for non-faded wave signals */ if (tones_now->num_wavs && tones_next->num_wavs && reg1_env_nonzero && reg2_env_nonzero) { - fdsp->vector_fmul(wavreg1, wavreg1, &hann_window[128], 128); - fdsp->vector_fmul(wavreg2, wavreg2, hann_window, 128); + vector_fmul(wavreg1, wavreg1, &hann_window[128], 128); + vector_fmul(wavreg2, wavreg2, hann_window, 128); } else { if (tones_now->num_wavs && !tones_now->curr_env.has_stop_point) - fdsp->vector_fmul(wavreg1, wavreg1, &hann_window[128], 128); + vector_fmul(wavreg1, wavreg1, &hann_window[128], 128); if (tones_next->num_wavs && !tones_next->curr_env.has_start_point) - fdsp->vector_fmul(wavreg2, wavreg2, hann_window, 128); + vector_fmul(wavreg2, wavreg2, hann_window, 128); } /* Overlap and add to residual */ @@ -461,7 +458,7 @@ void ff_atrac3p_power_compensation(Atrac3pChanUnitCtx *ctx, int ch_index, } } -void ff_atrac3p_imdct(AVFloatDSPContext *fdsp, FFTContext *mdct_ctx, float *pIn, +void ff_atrac3p_imdct(FFTContext *mdct_ctx, float *pIn, float *pOut, int wind_id, int sb) { int i; @@ -480,16 +477,15 @@ void ff_atrac3p_imdct(AVFloatDSPContext *fdsp, FFTContext *mdct_ctx, float *pIn, * Both regions are 32 samples long. */ if (wind_id & 2) { /* 1st half: steep window */ memset(pOut, 0, sizeof(float) * 32); - fdsp->vector_fmul(&pOut[32], &pOut[32], ff_sine_64, 64); + vector_fmul(&pOut[32], &pOut[32], ff_sine_64, 64); } else /* 1st half: simple sine window */ - fdsp->vector_fmul(pOut, pOut, ff_sine_128, ATRAC3P_MDCT_SIZE / 2); + vector_fmul(pOut, pOut, ff_sine_128, ATRAC3P_MDCT_SIZE / 2); if (wind_id & 1) { /* 2nd half: steep window */ - fdsp->vector_fmul_reverse(&pOut[160], &pOut[160], ff_sine_64, 64); + vector_fmul_reverse(&pOut[160], &pOut[160], ff_sine_64, 64); memset(&pOut[224], 0, sizeof(float) * 32); } else /* 2nd half: simple sine window */ - fdsp->vector_fmul_reverse(&pOut[128], &pOut[128], ff_sine_128, - ATRAC3P_MDCT_SIZE / 2); + vector_fmul_reverse(&pOut[128], &pOut[128], ff_sine_128, ATRAC3P_MDCT_SIZE / 2); } /* lookup table for fast modulo 23 op required for cyclic buffers of the IPQF */ diff --git a/ext/at3_standalone/float_dsp.c b/ext/at3_standalone/float_dsp.c index b1e59ec837..91e78e03e7 100644 --- a/ext/at3_standalone/float_dsp.c +++ b/ext/at3_standalone/float_dsp.c @@ -23,100 +23,27 @@ #include "float_dsp.h" #include "mem.h" -static void vector_fmul_c(float *dst, const float *src0, const float *src1, - int len) -{ +void vector_fmul(float *dst, const float *src0, const float *src1, int len) { int i; for (i = 0; i < len; i++) dst[i] = src0[i] * src1[i]; } -static void vector_fmac_scalar_c(float *dst, const float *src, float mul, - int len) -{ - int i; - for (i = 0; i < len; i++) - dst[i] += src[i] * mul; -} - -static void vector_fmul_scalar_c(float *dst, const float *src, float mul, - int len) -{ +void vector_fmul_scalar(float *dst, const float *src, float mul, int len) { int i; for (i = 0; i < len; i++) dst[i] = src[i] * mul; } -static void vector_dmul_scalar_c(double *dst, const double *src, double mul, - int len) -{ +void vector_fmul_add(float *dst, const float *src0, const float *src1, const float *src2, int len) { int i; - for (i = 0; i < len; i++) - dst[i] = src[i] * mul; -} - -static void vector_fmul_window_c(float *dst, const float *src0, - const float *src1, const float *win, int len) -{ - int i, j; - - dst += len; - win += len; - src0 += len; - - for (i = -len, j = len - 1; i < 0; i++, j--) { - float s0 = src0[i]; - float s1 = src1[j]; - float wi = win[i]; - float wj = win[j]; - dst[i] = s0 * wj - s1 * wi; - dst[j] = s0 * wi + s1 * wj; - } -} - -static void vector_fmul_add_c(float *dst, const float *src0, const float *src1, - const float *src2, int len){ - int i; - for (i = 0; i < len; i++) dst[i] = src0[i] * src1[i] + src2[i]; } -static void vector_fmul_reverse_c(float *dst, const float *src0, - const float *src1, int len) -{ +void vector_fmul_reverse(float *dst, const float *src0, const float *src1, int len) { int i; - src1 += len-1; for (i = 0; i < len; i++) dst[i] = src0[i] * src1[-i]; } - -AVFloatDSPContext *avpriv_float_dsp_alloc(int bit_exact) -{ - AVFloatDSPContext *fdsp = av_mallocz(sizeof(AVFloatDSPContext)); - if (!fdsp) - return NULL; - - fdsp->vector_fmul = vector_fmul_c; - fdsp->vector_fmac_scalar = vector_fmac_scalar_c; - fdsp->vector_fmul_scalar = vector_fmul_scalar_c; - fdsp->vector_dmul_scalar = vector_dmul_scalar_c; - fdsp->vector_fmul_window = vector_fmul_window_c; - fdsp->vector_fmul_add = vector_fmul_add_c; - fdsp->vector_fmul_reverse = vector_fmul_reverse_c; - - /* - if (ARCH_AARCH64) - ff_float_dsp_init_aarch64(fdsp); - if (ARCH_ARM) - ff_float_dsp_init_arm(fdsp); - if (ARCH_PPC) - ff_float_dsp_init_ppc(fdsp, bit_exact); - if (ARCH_X86) - ff_float_dsp_init_x86(fdsp); - if (ARCH_MIPS) - ff_float_dsp_init_mips(fdsp); - */ - return fdsp; -} diff --git a/ext/at3_standalone/float_dsp.h b/ext/at3_standalone/float_dsp.h index b2b433a6f7..9379c7a7d1 100644 --- a/ext/at3_standalone/float_dsp.h +++ b/ext/at3_standalone/float_dsp.h @@ -16,149 +16,45 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -#ifndef AVUTIL_FLOAT_DSP_H -#define AVUTIL_FLOAT_DSP_H +#pragma once -typedef struct AVFloatDSPContext { - /** - * Calculate the entry wise product of two vectors of floats and store the result in - * a vector of floats. - * - * @param dst output vector - * constraints: 32-byte aligned - * @param src0 first input vector - * constraints: 32-byte aligned - * @param src1 second input vector - * constraints: 32-byte aligned - * @param len number of elements in the input - * constraints: multiple of 16 - */ - void (*vector_fmul)(float *dst, const float *src0, const float *src1, - int len); - - /** - * Multiply a vector of floats by a scalar float and add to - * destination vector. Source and destination vectors must - * overlap exactly or not at all. - * - * @param dst result vector - * constraints: 32-byte aligned - * @param src input vector - * constraints: 32-byte aligned - * @param mul scalar value - * @param len length of vector - * constraints: multiple of 16 - */ - void (*vector_fmac_scalar)(float *dst, const float *src, float mul, - int len); - - /** - * Multiply a vector of floats by a scalar float. Source and - * destination vectors must overlap exactly or not at all. - * - * @param dst result vector - * constraints: 16-byte aligned - * @param src input vector - * constraints: 16-byte aligned - * @param mul scalar value - * @param len length of vector - * constraints: multiple of 4 - */ - void (*vector_fmul_scalar)(float *dst, const float *src, float mul, - int len); - - /** - * Multiply a vector of double by a scalar double. Source and - * destination vectors must overlap exactly or not at all. - * - * @param dst result vector - * constraints: 32-byte aligned - * @param src input vector - * constraints: 32-byte aligned - * @param mul scalar value - * @param len length of vector - * constraints: multiple of 8 - */ - void (*vector_dmul_scalar)(double *dst, const double *src, double mul, - int len); - - /** - * Overlap/add with window function. - * Used primarily by MDCT-based audio codecs. - * Source and destination vectors must overlap exactly or not at all. - * - * @param dst result vector - * constraints: 16-byte aligned - * @param src0 first source vector - * constraints: 16-byte aligned - * @param src1 second source vector - * constraints: 16-byte aligned - * @param win half-window vector - * constraints: 16-byte aligned - * @param len length of vector - * constraints: multiple of 4 - */ - void (*vector_fmul_window)(float *dst, const float *src0, - const float *src1, const float *win, int len); - - /** - * Calculate the entry wise product of two vectors of floats, add a third vector of - * floats and store the result in a vector of floats. - * - * @param dst output vector - * constraints: 32-byte aligned - * @param src0 first input vector - * constraints: 32-byte aligned - * @param src1 second input vector - * constraints: 32-byte aligned - * @param src2 third input vector - * constraints: 32-byte aligned - * @param len number of elements in the input - * constraints: multiple of 16 - */ - void (*vector_fmul_add)(float *dst, const float *src0, const float *src1, - const float *src2, int len); - - /** - * Calculate the entry wise product of two vectors of floats, and store the result - * in a vector of floats. The second vector of floats is iterated over - * in reverse order. - * - * @param dst output vector - * constraints: 32-byte aligned - * @param src0 first input vector - * constraints: 32-byte aligned - * @param src1 second input vector - * constraints: 32-byte aligned - * @param len number of elements in the input - * constraints: multiple of 16 - */ - void (*vector_fmul_reverse)(float *dst, const float *src0, - const float *src1, int len); -} AVFloatDSPContext; +void vector_fmul(float *dst, const float *src0, const float *src1, int len); /** - * Return the scalar product of two vectors. - * - * @param v1 first input vector - * @param v2 first input vector - * @param len number of elements - * - * @return sum of elementwise products - */ -float avpriv_scalarproduct_float_c(const float *v1, const float *v2, int len); - -void ff_float_dsp_init_aarch64(AVFloatDSPContext *fdsp); -void ff_float_dsp_init_arm(AVFloatDSPContext *fdsp); -void ff_float_dsp_init_ppc(AVFloatDSPContext *fdsp, int strict); -void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp); -void ff_float_dsp_init_mips(AVFloatDSPContext *fdsp); + * Multiply a vector of floats by a scalar float. Source and + * destination vectors must overlap exactly or not at all. + */ +void vector_fmul_scalar(float *dst, const float *src, float mul, int len); /** - * Allocate a float DSP context. - * - * @param strict setting to non-zero avoids using functions which may not be IEEE-754 compliant - */ -AVFloatDSPContext *avpriv_float_dsp_alloc(int strict); + * Calculate the entry wise product of two vectors of floats, add a third vector of + * floats and store the result in a vector of floats. + * + * @param dst output vector + * constraints: 32-byte aligned + * @param src0 first input vector + * constraints: 32-byte aligned + * @param src1 second input vector + * constraints: 32-byte aligned + * @param src2 third input vector + * constraints: 32-byte aligned + * @param len number of elements in the input + * constraints: multiple of 16 + */ +void vector_fmul_add(float *dst, const float *src0, const float *src1, const float *src2, int len); -#endif /* AVUTIL_FLOAT_DSP_H */ +/** + * Calculate the entry wise product of two vectors of floats, and store the result + * in a vector of floats. The second vector of floats is iterated over + * in reverse order. + * + * @param dst output vector + * constraints: 32-byte aligned + * @param src0 first input vector + * constraints: 32-byte aligned + * @param src1 second input vector + * constraints: 32-byte aligned + * @param len number of elements in the input + * constraints: multiple of 16 + */ +void vector_fmul_reverse(float *dst, const float *src0, const float *src1, int len); diff --git a/ext/at3_standalone/bitstream.c b/ext/at3_standalone/get_bits.c similarity index 100% rename from ext/at3_standalone/bitstream.c rename to ext/at3_standalone/get_bits.c