From 9c32761a0addc339dc02f9b071e2a9c9bf33a34c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= <hrydgard@gmail.com>
Date: Thu, 11 Apr 2024 09:30:18 +0200
Subject: [PATCH] Remove fdsp context

---
 Common/Common.vcxproj                         |   2 +-
 Common/Common.vcxproj.filters                 |   2 +-
 ext/at3_standalone/atrac3.c                   |   7 +-
 ext/at3_standalone/atrac3plus.h               |   5 +-
 ext/at3_standalone/atrac3plusdec.c            |   9 +-
 ext/at3_standalone/atrac3plusdsp.c            |  30 ++-
 ext/at3_standalone/float_dsp.c                |  81 +-------
 ext/at3_standalone/float_dsp.h                | 176 ++++--------------
 .../{bitstream.c => get_bits.c}               |   0
 9 files changed, 62 insertions(+), 250 deletions(-)
 rename ext/at3_standalone/{bitstream.c => get_bits.c} (100%)
diff --git a/Common/Common.vcxproj b/Common/Common.vcxproj
index 9335079a50..f49ee5ad16 100644
--- a/Common/Common.vcxproj
+++ b/Common/Common.vcxproj
@@ -610,7 +610,7 @@
     <ClCompile Include="..\ext\at3_standalone\atrac3plus.c" />
     <ClCompile Include="..\ext\at3_standalone\atrac3plusdec.c" />
     <ClCompile Include="..\ext\at3_standalone\atrac3plusdsp.c" />
-    <ClCompile Include="..\ext\at3_standalone\bitstream.c" />
+    <ClCompile Include="..\ext\at3_standalone\get_bits.c" />
     <ClCompile Include="..\ext\at3_standalone\channel_layout.c" />
     <ClCompile Include="..\ext\at3_standalone\compat.c" />
     <ClCompile Include="..\ext\at3_standalone\fft_template.c" />
diff --git a/Common/Common.vcxproj.filters b/Common/Common.vcxproj.filters
index d775f3f937..b91f95181d 100644
--- a/Common/Common.vcxproj.filters
+++ b/Common/Common.vcxproj.filters
@@ -1086,7 +1086,7 @@
     <ClCompile Include="..\ext\at3_standalone\intmath.c">
       <Filter>ext\at3_standalone</Filter>
     </ClCompile>
-    <ClCompile Include="..\ext\at3_standalone\bitstream.c">
+    <ClCompile Include="..\ext\at3_standalone\get_bits.c">
       <Filter>ext\at3_standalone</Filter>
     </ClCompile>
     <ClCompile Include="..\ext\at3_standalone\compat.c">
diff --git a/ext/at3_standalone/atrac3.c b/ext/at3_standalone/atrac3.c
index 630bc59dd3..feda479215 100644
--- a/ext/at3_standalone/atrac3.c
+++ b/ext/at3_standalone/atrac3.c
@@ -106,7 +106,6 @@ typedef struct ATRAC3Context {
 
     AtracGCContext    gainc_ctx;
     FFTContext        mdct_ctx;
-    AVFloatDSPContext *fdsp;
 } ATRAC3Context;
 
 static DECLARE_ALIGNED(32, float, mdct_window)[MDCT_SIZE];
@@ -139,7 +138,7 @@ static void imlt(ATRAC3Context *q, float *input, float *output, int odd_band)
     q->mdct_ctx.imdct_calc(&q->mdct_ctx, output, input);
 
     /* Perform windowing on the output. */
-    q->fdsp->vector_fmul(output, output, mdct_window, MDCT_SIZE);
+    vector_fmul(output, output, mdct_window, MDCT_SIZE);
 }
 
 /*
@@ -189,7 +188,6 @@ static int atrac3_decode_close(AVCodecContext *avctx)
 
     av_freep(&q->units);
     av_freep(&q->decoded_bytes_buffer);
-    av_freep(&q->fdsp);
 
     ff_mdct_end(&q->mdct_ctx);
 
@@ -906,10 +904,9 @@ static int atrac3_decode_init(AVCodecContext *avctx)
     }
 
     ff_atrac_init_gain_compensation(&q->gainc_ctx, 4, 3);
-    q->fdsp = avpriv_float_dsp_alloc(avctx->flags & AV_CODEC_FLAG_BITEXACT);
 
     q->units = av_mallocz_array(avctx->channels, sizeof(*q->units));
-    if (!q->units || !q->fdsp) {
+    if (!q->units) {
         atrac3_decode_close(avctx);
         return AVERROR(ENOMEM);
     }
diff --git a/ext/at3_standalone/atrac3plus.h b/ext/at3_standalone/atrac3plus.h
index 8eae592f3b..23f1ff2cee 100644
--- a/ext/at3_standalone/atrac3plus.h
+++ b/ext/at3_standalone/atrac3plus.h
@@ -192,8 +192,7 @@ void ff_atrac3p_init_wave_synth(void);
  * @param[in]   sb        which subband to process
  * @param[out]  out       receives processed data
  */
-void ff_atrac3p_generate_tones(Atrac3pChanUnitCtx *ch_unit, AVFloatDSPContext *fdsp,
-                               int ch_num, int sb, float *out);
+void ff_atrac3p_generate_tones(Atrac3pChanUnitCtx *ch_unit, int ch_num, int sb, float *out);
 
 /**
  * Perform power compensation aka noise dithering.
@@ -218,7 +217,7 @@ void ff_atrac3p_power_compensation(Atrac3pChanUnitCtx *ctx, int ch_index,
  * @param[in]   wind_id    which MDCT window to apply
  * @param[in]   sb         subband number
  */
-void ff_atrac3p_imdct(AVFloatDSPContext *fdsp, FFTContext *mdct_ctx, float *pIn,
+void ff_atrac3p_imdct(FFTContext *mdct_ctx, float *pIn,
                       float *pOut, int wind_id, int sb);
 
 /**
diff --git a/ext/at3_standalone/atrac3plusdec.c b/ext/at3_standalone/atrac3plusdec.c
index e82bcab8ba..cf8b376409 100644
--- a/ext/at3_standalone/atrac3plusdec.c
+++ b/ext/at3_standalone/atrac3plusdec.c
@@ -47,7 +47,6 @@
 
 typedef struct ATRAC3PContext {
     GetBitContext gb;
-    AVFloatDSPContext *fdsp;
 
     DECLARE_ALIGNED(32, float, samples)[2][ATRAC3P_FRAME_SAMPLES];  ///< quantized MDCT spectrum
     DECLARE_ALIGNED(32, float, mdct_buf)[2][ATRAC3P_FRAME_SAMPLES]; ///< output of the IMDCT
@@ -70,7 +69,6 @@ int atrac3p_decode_close(AVCodecContext *avctx)
     ATRAC3PContext *ctx = avctx->priv_data;
 
     av_freep(&ctx->ch_units);
-    av_freep(&ctx->fdsp);
 
     ff_mdct_end(&ctx->mdct_ctx);
     ff_mdct_end(&ctx->ipqf_dct_ctx);
@@ -171,9 +169,8 @@ int atrac3p_decode_init(AVCodecContext *avctx)
     ctx->my_channel_layout = avctx->channel_layout;
 
     ctx->ch_units = av_mallocz_array(ctx->num_channel_blocks, sizeof(*ctx->ch_units));
-    ctx->fdsp = avpriv_float_dsp_alloc(avctx->flags & AV_CODEC_FLAG_BITEXACT);
 
-    if (!ctx->ch_units || !ctx->fdsp) {
+    if (!ctx->ch_units) {
         atrac3p_decode_close(avctx);
         return AVERROR(ENOMEM);
     }
@@ -268,7 +265,7 @@ static void reconstruct_frame(ATRAC3PContext *ctx, Atrac3pChanUnitCtx *ch_unit,
     for (ch = 0; ch < num_channels; ch++) {
         for (sb = 0; sb < ch_unit->num_subbands; sb++) {
             /* inverse transform and windowing */
-            ff_atrac3p_imdct(ctx->fdsp, &ctx->mdct_ctx,
+            ff_atrac3p_imdct(&ctx->mdct_ctx,
                              &ctx->samples[ch][sb * ATRAC3P_SUBBAND_SAMPLES],
                              &ctx->mdct_buf[ch][sb * ATRAC3P_SUBBAND_SAMPLES],
                              (ch_unit->channels[ch].wnd_shape_prev[sb] << 1) +
@@ -302,7 +299,7 @@ static void reconstruct_frame(ATRAC3PContext *ctx, Atrac3pChanUnitCtx *ch_unit,
             for (sb = 0; sb < ch_unit->num_subbands; sb++)
                 if (ch_unit->channels[ch].tones_info[sb].num_wavs ||
                     ch_unit->channels[ch].tones_info_prev[sb].num_wavs) {
-                    ff_atrac3p_generate_tones(ch_unit, ctx->fdsp, ch, sb,
+                    ff_atrac3p_generate_tones(ch_unit, ch, sb,
                                               &ctx->time_buf[ch][sb * 128]);
                 }
         }
diff --git a/ext/at3_standalone/atrac3plusdsp.c b/ext/at3_standalone/atrac3plusdsp.c
index 399bbdc17e..d98775b94a 100644
--- a/ext/at3_standalone/atrac3plusdsp.c
+++ b/ext/at3_standalone/atrac3plusdsp.c
@@ -117,7 +117,6 @@ void ff_atrac3p_init_wave_synth(void)
  *  @param[in]    synth_param   ptr to common synthesis parameters
  *  @param[in]    waves_info    parameters for each sine wave
  *  @param[in]    envelope      envelope data for all waves in a group
- *  @param[in]    fdsp          ptr to floating-point DSP context
  *  @param[in]    invert_phase  flag indicating 180° phase shift
  *  @param[in]    reg_offset    region offset for trimming envelope data
  *  @param[out]   out           receives sythesized data
@@ -125,7 +124,6 @@ void ff_atrac3p_init_wave_synth(void)
 static void waves_synth(Atrac3pWaveSynthParams *synth_param,
                         Atrac3pWavesData *waves_info,
                         Atrac3pWaveEnvelope *envelope,
-                        AVFloatDSPContext *fdsp,
                         int invert_phase, int reg_offset, float *out)
 {
     int i, wn, inc, pos;
@@ -151,7 +149,7 @@ static void waves_synth(Atrac3pWaveSynthParams *synth_param,
 
     /* invert phase if requested */
     if (invert_phase)
-        fdsp->vector_fmul_scalar(out, out, -1.0f, 128);
+        vector_fmul_scalar(out, out, -1.0f, 128);
 
     /* fade in with steep Hann window if requested */
     if (envelope->has_start_point) {
@@ -181,8 +179,7 @@ static void waves_synth(Atrac3pWaveSynthParams *synth_param,
     }
 }
 
-void ff_atrac3p_generate_tones(Atrac3pChanUnitCtx *ch_unit, AVFloatDSPContext *fdsp,
-                               int ch_num, int sb, float *out)
+void ff_atrac3p_generate_tones(Atrac3pChanUnitCtx *ch_unit, int ch_num, int sb, float *out)
 {
     DECLARE_ALIGNED(32, float, wavreg1)[128] = { 0 };
     DECLARE_ALIGNED(32, float, wavreg2)[128] = { 0 };
@@ -223,24 +220,24 @@ void ff_atrac3p_generate_tones(Atrac3pChanUnitCtx *ch_unit, AVFloatDSPContext *f
     /* synthesize waves for both overlapping regions */
     if (tones_now->num_wavs && reg1_env_nonzero)
         waves_synth(ch_unit->waves_info_prev, tones_now, &tones_now->curr_env,
-                    fdsp, ch_unit->waves_info_prev->invert_phase[sb] & ch_num,
+                    ch_unit->waves_info_prev->invert_phase[sb] & ch_num,
                     128, wavreg1);
 
     if (tones_next->num_wavs && reg2_env_nonzero)
-        waves_synth(ch_unit->waves_info, tones_next, &tones_next->curr_env, fdsp,
+        waves_synth(ch_unit->waves_info, tones_next, &tones_next->curr_env,
                     ch_unit->waves_info->invert_phase[sb] & ch_num, 0, wavreg2);
 
     /* Hann windowing for non-faded wave signals */
     if (tones_now->num_wavs && tones_next->num_wavs &&
         reg1_env_nonzero && reg2_env_nonzero) {
-        fdsp->vector_fmul(wavreg1, wavreg1, &hann_window[128], 128);
-        fdsp->vector_fmul(wavreg2, wavreg2,  hann_window,      128);
+        vector_fmul(wavreg1, wavreg1, &hann_window[128], 128);
+        vector_fmul(wavreg2, wavreg2,  hann_window,      128);
     } else {
         if (tones_now->num_wavs && !tones_now->curr_env.has_stop_point)
-            fdsp->vector_fmul(wavreg1, wavreg1, &hann_window[128], 128);
+            vector_fmul(wavreg1, wavreg1, &hann_window[128], 128);
 
         if (tones_next->num_wavs && !tones_next->curr_env.has_start_point)
-            fdsp->vector_fmul(wavreg2, wavreg2, hann_window, 128);
+            vector_fmul(wavreg2, wavreg2, hann_window, 128);
     }
 
     /* Overlap and add to residual */
@@ -461,7 +458,7 @@ void ff_atrac3p_power_compensation(Atrac3pChanUnitCtx *ctx, int ch_index,
     }
 }
 
-void ff_atrac3p_imdct(AVFloatDSPContext *fdsp, FFTContext *mdct_ctx, float *pIn,
+void ff_atrac3p_imdct(FFTContext *mdct_ctx, float *pIn,
                       float *pOut, int wind_id, int sb)
 {
     int i;
@@ -480,16 +477,15 @@ void ff_atrac3p_imdct(AVFloatDSPContext *fdsp, FFTContext *mdct_ctx, float *pIn,
      *   Both regions are 32 samples long. */
     if (wind_id & 2) { /* 1st half: steep window */
         memset(pOut, 0, sizeof(float) * 32);
-        fdsp->vector_fmul(&pOut[32], &pOut[32], ff_sine_64, 64);
+        vector_fmul(&pOut[32], &pOut[32], ff_sine_64, 64);
     } else /* 1st half: simple sine window */
-        fdsp->vector_fmul(pOut, pOut, ff_sine_128, ATRAC3P_MDCT_SIZE / 2);
+        vector_fmul(pOut, pOut, ff_sine_128, ATRAC3P_MDCT_SIZE / 2);
 
     if (wind_id & 1) { /* 2nd half: steep window */
-        fdsp->vector_fmul_reverse(&pOut[160], &pOut[160], ff_sine_64, 64);
+        vector_fmul_reverse(&pOut[160], &pOut[160], ff_sine_64, 64);
         memset(&pOut[224], 0, sizeof(float) * 32);
     } else /* 2nd half: simple sine window */
-        fdsp->vector_fmul_reverse(&pOut[128], &pOut[128], ff_sine_128,
-                                  ATRAC3P_MDCT_SIZE / 2);
+        vector_fmul_reverse(&pOut[128], &pOut[128], ff_sine_128, ATRAC3P_MDCT_SIZE / 2);
 }
 
 /* lookup table for fast modulo 23 op required for cyclic buffers of the IPQF */
diff --git a/ext/at3_standalone/float_dsp.c b/ext/at3_standalone/float_dsp.c
index b1e59ec837..91e78e03e7 100644
--- a/ext/at3_standalone/float_dsp.c
+++ b/ext/at3_standalone/float_dsp.c
@@ -23,100 +23,27 @@
 #include "float_dsp.h"
 #include "mem.h"
 
-static void vector_fmul_c(float *dst, const float *src0, const float *src1,
-                          int len)
-{
+void vector_fmul(float *dst, const float *src0, const float *src1, int len) {
     int i;
     for (i = 0; i < len; i++)
         dst[i] = src0[i] * src1[i];
 }
 
-static void vector_fmac_scalar_c(float *dst, const float *src, float mul,
-                                 int len)
-{
-    int i;
-    for (i = 0; i < len; i++)
-        dst[i] += src[i] * mul;
-}
-
-static void vector_fmul_scalar_c(float *dst, const float *src, float mul,
-                                 int len)
-{
+void vector_fmul_scalar(float *dst, const float *src, float mul, int len) {
     int i;
     for (i = 0; i < len; i++)
         dst[i] = src[i] * mul;
 }
 
-static void vector_dmul_scalar_c(double *dst, const double *src, double mul,
-                                 int len)
-{
+void vector_fmul_add(float *dst, const float *src0, const float *src1, const float *src2, int len) {
     int i;
-    for (i = 0; i < len; i++)
-        dst[i] = src[i] * mul;
-}
-
-static void vector_fmul_window_c(float *dst, const float *src0,
-                                 const float *src1, const float *win, int len)
-{
-    int i, j;
-
-    dst  += len;
-    win  += len;
-    src0 += len;
-
-    for (i = -len, j = len - 1; i < 0; i++, j--) {
-        float s0 = src0[i];
-        float s1 = src1[j];
-        float wi = win[i];
-        float wj = win[j];
-        dst[i] = s0 * wj - s1 * wi;
-        dst[j] = s0 * wi + s1 * wj;
-    }
-}
-
-static void vector_fmul_add_c(float *dst, const float *src0, const float *src1,
-                              const float *src2, int len){
-    int i;
-
     for (i = 0; i < len; i++)
         dst[i] = src0[i] * src1[i] + src2[i];
 }
 
-static void vector_fmul_reverse_c(float *dst, const float *src0,
-                                  const float *src1, int len)
-{
+void vector_fmul_reverse(float *dst, const float *src0, const float *src1, int len) {
     int i;
-
     src1 += len-1;
     for (i = 0; i < len; i++)
         dst[i] = src0[i] * src1[-i];
 }
-
-AVFloatDSPContext *avpriv_float_dsp_alloc(int bit_exact)
-{
-    AVFloatDSPContext *fdsp = av_mallocz(sizeof(AVFloatDSPContext));
-    if (!fdsp)
-        return NULL;
-
-    fdsp->vector_fmul = vector_fmul_c;
-    fdsp->vector_fmac_scalar = vector_fmac_scalar_c;
-    fdsp->vector_fmul_scalar = vector_fmul_scalar_c;
-    fdsp->vector_dmul_scalar = vector_dmul_scalar_c;
-    fdsp->vector_fmul_window = vector_fmul_window_c;
-    fdsp->vector_fmul_add = vector_fmul_add_c;
-    fdsp->vector_fmul_reverse = vector_fmul_reverse_c;
-
-	/*
-    if (ARCH_AARCH64)
-        ff_float_dsp_init_aarch64(fdsp);
-    if (ARCH_ARM)
-        ff_float_dsp_init_arm(fdsp);
-    if (ARCH_PPC)
-        ff_float_dsp_init_ppc(fdsp, bit_exact);
-    if (ARCH_X86)
-        ff_float_dsp_init_x86(fdsp);
-    if (ARCH_MIPS)
-        ff_float_dsp_init_mips(fdsp);
-		*/
-    return fdsp;
-}
diff --git a/ext/at3_standalone/float_dsp.h b/ext/at3_standalone/float_dsp.h
index b2b433a6f7..9379c7a7d1 100644
--- a/ext/at3_standalone/float_dsp.h
+++ b/ext/at3_standalone/float_dsp.h
@@ -16,149 +16,45 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#ifndef AVUTIL_FLOAT_DSP_H
-#define AVUTIL_FLOAT_DSP_H
+#pragma once
 
-typedef struct AVFloatDSPContext {
-    /**
-     * Calculate the entry wise product of two vectors of floats and store the result in
-     * a vector of floats.
-     *
-     * @param dst  output vector
-     *             constraints: 32-byte aligned
-     * @param src0 first input vector
-     *             constraints: 32-byte aligned
-     * @param src1 second input vector
-     *             constraints: 32-byte aligned
-     * @param len  number of elements in the input
-     *             constraints: multiple of 16
-     */
-    void (*vector_fmul)(float *dst, const float *src0, const float *src1,
-                        int len);
-
-    /**
-     * Multiply a vector of floats by a scalar float and add to
-     * destination vector.  Source and destination vectors must
-     * overlap exactly or not at all.
-     *
-     * @param dst result vector
-     *            constraints: 32-byte aligned
-     * @param src input vector
-     *            constraints: 32-byte aligned
-     * @param mul scalar value
-     * @param len length of vector
-     *            constraints: multiple of 16
-     */
-    void (*vector_fmac_scalar)(float *dst, const float *src, float mul,
-                               int len);
-
-    /**
-     * Multiply a vector of floats by a scalar float.  Source and
-     * destination vectors must overlap exactly or not at all.
-     *
-     * @param dst result vector
-     *            constraints: 16-byte aligned
-     * @param src input vector
-     *            constraints: 16-byte aligned
-     * @param mul scalar value
-     * @param len length of vector
-     *            constraints: multiple of 4
-     */
-    void (*vector_fmul_scalar)(float *dst, const float *src, float mul,
-                               int len);
-
-    /**
-     * Multiply a vector of double by a scalar double.  Source and
-     * destination vectors must overlap exactly or not at all.
-     *
-     * @param dst result vector
-     *            constraints: 32-byte aligned
-     * @param src input vector
-     *            constraints: 32-byte aligned
-     * @param mul scalar value
-     * @param len length of vector
-     *            constraints: multiple of 8
-     */
-    void (*vector_dmul_scalar)(double *dst, const double *src, double mul,
-                               int len);
-
-    /**
-     * Overlap/add with window function.
-     * Used primarily by MDCT-based audio codecs.
-     * Source and destination vectors must overlap exactly or not at all.
-     *
-     * @param dst  result vector
-     *             constraints: 16-byte aligned
-     * @param src0 first source vector
-     *             constraints: 16-byte aligned
-     * @param src1 second source vector
-     *             constraints: 16-byte aligned
-     * @param win  half-window vector
-     *             constraints: 16-byte aligned
-     * @param len  length of vector
-     *             constraints: multiple of 4
-     */
-    void (*vector_fmul_window)(float *dst, const float *src0,
-                               const float *src1, const float *win, int len);
-
-    /**
-     * Calculate the entry wise product of two vectors of floats, add a third vector of
-     * floats and store the result in a vector of floats.
-     *
-     * @param dst  output vector
-     *             constraints: 32-byte aligned
-     * @param src0 first input vector
-     *             constraints: 32-byte aligned
-     * @param src1 second input vector
-     *             constraints: 32-byte aligned
-     * @param src2 third input vector
-     *             constraints: 32-byte aligned
-     * @param len  number of elements in the input
-     *             constraints: multiple of 16
-     */
-    void (*vector_fmul_add)(float *dst, const float *src0, const float *src1,
-                            const float *src2, int len);
-
-    /**
-     * Calculate the entry wise product of two vectors of floats, and store the result
-     * in a vector of floats. The second vector of floats is iterated over
-     * in reverse order.
-     *
-     * @param dst  output vector
-     *             constraints: 32-byte aligned
-     * @param src0 first input vector
-     *             constraints: 32-byte aligned
-     * @param src1 second input vector
-     *             constraints: 32-byte aligned
-     * @param len  number of elements in the input
-     *             constraints: multiple of 16
-     */
-    void (*vector_fmul_reverse)(float *dst, const float *src0,
-                                const float *src1, int len);
-} AVFloatDSPContext;
+void vector_fmul(float *dst, const float *src0, const float *src1, int len);
 
 /**
- * Return the scalar product of two vectors.
- *
- * @param v1  first input vector
- * @param v2  first input vector
- * @param len number of elements
- *
- * @return sum of elementwise products
- */
-float avpriv_scalarproduct_float_c(const float *v1, const float *v2, int len);
-
-void ff_float_dsp_init_aarch64(AVFloatDSPContext *fdsp);
-void ff_float_dsp_init_arm(AVFloatDSPContext *fdsp);
-void ff_float_dsp_init_ppc(AVFloatDSPContext *fdsp, int strict);
-void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp);
-void ff_float_dsp_init_mips(AVFloatDSPContext *fdsp);
+    * Multiply a vector of floats by a scalar float.  Source and
+    * destination vectors must overlap exactly or not at all.
+    */
+void vector_fmul_scalar(float *dst, const float *src, float mul, int len);
 
 /**
- * Allocate a float DSP context.
- *
- * @param strict  setting to non-zero avoids using functions which may not be IEEE-754 compliant
- */
-AVFloatDSPContext *avpriv_float_dsp_alloc(int strict);
+    * Calculate the entry wise product of two vectors of floats, add a third vector of
+    * floats and store the result in a vector of floats.
+    *
+    * @param dst  output vector
+    *             constraints: 32-byte aligned
+    * @param src0 first input vector
+    *             constraints: 32-byte aligned
+    * @param src1 second input vector
+    *             constraints: 32-byte aligned
+    * @param src2 third input vector
+    *             constraints: 32-byte aligned
+    * @param len  number of elements in the input
+    *             constraints: multiple of 16
+    */
+void vector_fmul_add(float *dst, const float *src0, const float *src1, const float *src2, int len);
 
-#endif /* AVUTIL_FLOAT_DSP_H */
+/**
+    * Calculate the entry wise product of two vectors of floats, and store the result
+    * in a vector of floats. The second vector of floats is iterated over
+    * in reverse order.
+    *
+    * @param dst  output vector
+    *             constraints: 32-byte aligned
+    * @param src0 first input vector
+    *             constraints: 32-byte aligned
+    * @param src1 second input vector
+    *             constraints: 32-byte aligned
+    * @param len  number of elements in the input
+    *             constraints: multiple of 16
+    */
+void vector_fmul_reverse(float *dst, const float *src0, const float *src1, int len);
diff --git a/ext/at3_standalone/bitstream.c b/ext/at3_standalone/get_bits.c
similarity index 100%
rename from ext/at3_standalone/bitstream.c
rename to ext/at3_standalone/get_bits.c