mirror of
https://github.com/libretro/libretro-common.git
synced 2024-06-20 21:28:28 -04:00
Updates
This commit is contained in:
parent
5815d110e9
commit
4c71603e94
|
@ -48,7 +48,7 @@ void audio_mix_volume_C(float *out, const float *in, float vol, size_t samples)
|
|||
#ifdef __SSE2__
|
||||
void audio_mix_volume_SSE2(float *out, const float *in, float vol, size_t samples)
|
||||
{
|
||||
size_t i;
|
||||
size_t i, remaining_samples;
|
||||
__m128 volume = _mm_set1_ps(vol);
|
||||
|
||||
for (i = 0; i + 16 <= samples; i += 16, out += 16, in += 16)
|
||||
|
@ -71,7 +71,10 @@ void audio_mix_volume_SSE2(float *out, const float *in, float vol, size_t sample
|
|||
_mm_storeu_ps(out + 4 * j, _mm_add_ps(input[j], additive[j]));
|
||||
}
|
||||
|
||||
audio_mix_volume_C(out, in, vol, samples - i);
|
||||
remaining_samples = samples - i;
|
||||
|
||||
for (i = 0; i < remaining_samples; i++)
|
||||
out[i] += in[i] * vol;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
|
|
@ -161,87 +161,137 @@ static void resampler_sinc_process_avx(void *re_, struct resampler_data *data)
|
|||
size_t frames = data->input_frames;
|
||||
size_t out_frames = 0;
|
||||
|
||||
while (frames)
|
||||
if (resamp->window_type == SINC_WINDOW_KAISER)
|
||||
{
|
||||
while (frames && resamp->time >= phases)
|
||||
while (frames)
|
||||
{
|
||||
/* Push in reverse to make filter more obvious. */
|
||||
if (!resamp->ptr)
|
||||
resamp->ptr = resamp->taps;
|
||||
resamp->ptr--;
|
||||
|
||||
resamp->buffer_l[resamp->ptr + resamp->taps] =
|
||||
resamp->buffer_l[resamp->ptr] = *input++;
|
||||
|
||||
resamp->buffer_r[resamp->ptr + resamp->taps] =
|
||||
resamp->buffer_r[resamp->ptr] = *input++;
|
||||
|
||||
resamp->time -= phases;
|
||||
frames--;
|
||||
}
|
||||
|
||||
while (resamp->time < phases)
|
||||
{
|
||||
unsigned i;
|
||||
__m256 delta, sum_l, sum_r;
|
||||
float *delta_table = NULL;
|
||||
float *phase_table = NULL;
|
||||
const float *buffer_l = resamp->buffer_l + resamp->ptr;
|
||||
const float *buffer_r = resamp->buffer_r + resamp->ptr;
|
||||
unsigned taps = resamp->taps;
|
||||
unsigned phase = resamp->time >> resamp->subphase_bits;
|
||||
|
||||
phase_table = resamp->phase_table + phase * taps;
|
||||
|
||||
if (resamp->window_type == SINC_WINDOW_KAISER)
|
||||
while (frames && resamp->time >= phases)
|
||||
{
|
||||
phase_table = resamp->phase_table + phase * taps * 2;
|
||||
delta_table = phase_table + taps;
|
||||
delta = _mm256_set1_ps((float)
|
||||
/* Push in reverse to make filter more obvious. */
|
||||
if (!resamp->ptr)
|
||||
resamp->ptr = resamp->taps;
|
||||
resamp->ptr--;
|
||||
|
||||
resamp->buffer_l[resamp->ptr + resamp->taps] =
|
||||
resamp->buffer_l[resamp->ptr] = *input++;
|
||||
|
||||
resamp->buffer_r[resamp->ptr + resamp->taps] =
|
||||
resamp->buffer_r[resamp->ptr] = *input++;
|
||||
|
||||
resamp->time -= phases;
|
||||
frames--;
|
||||
}
|
||||
|
||||
while (resamp->time < phases)
|
||||
{
|
||||
unsigned i;
|
||||
const float *buffer_l = resamp->buffer_l + resamp->ptr;
|
||||
const float *buffer_r = resamp->buffer_r + resamp->ptr;
|
||||
unsigned taps = resamp->taps;
|
||||
unsigned phase = resamp->time >> resamp->subphase_bits;
|
||||
|
||||
float *phase_table = resamp->phase_table + phase * taps * 2;
|
||||
float *delta_table = phase_table + taps;
|
||||
__m256 delta = _mm256_set1_ps((float)
|
||||
(resamp->time & resamp->subphase_mask) * resamp->subphase_mod);
|
||||
}
|
||||
|
||||
sum_l = _mm256_setzero_ps();
|
||||
sum_r = _mm256_setzero_ps();
|
||||
__m256 sum_l = _mm256_setzero_ps();
|
||||
__m256 sum_r = _mm256_setzero_ps();
|
||||
|
||||
for (i = 0; i < taps; i += 8)
|
||||
{
|
||||
__m256 sinc;
|
||||
__m256 buf_l = _mm256_loadu_ps(buffer_l + i);
|
||||
__m256 buf_r = _mm256_loadu_ps(buffer_r + i);
|
||||
|
||||
if (resamp->window_type == SINC_WINDOW_KAISER)
|
||||
for (i = 0; i < taps; i += 8)
|
||||
{
|
||||
__m256 buf_l = _mm256_loadu_ps(buffer_l + i);
|
||||
__m256 buf_r = _mm256_loadu_ps(buffer_r + i);
|
||||
__m256 deltas = _mm256_load_ps(delta_table + i);
|
||||
sinc = _mm256_add_ps(_mm256_load_ps((const float*)phase_table + i),
|
||||
__m256 sinc = _mm256_add_ps(_mm256_load_ps((const float*)phase_table + i),
|
||||
_mm256_mul_ps(deltas, delta));
|
||||
}
|
||||
else
|
||||
{
|
||||
sinc = _mm256_load_ps((const float*)phase_table + i);
|
||||
|
||||
sum_l = _mm256_add_ps(sum_l, _mm256_mul_ps(buf_l, sinc));
|
||||
sum_r = _mm256_add_ps(sum_r, _mm256_mul_ps(buf_r, sinc));
|
||||
}
|
||||
|
||||
sum_l = _mm256_add_ps(sum_l, _mm256_mul_ps(buf_l, sinc));
|
||||
sum_r = _mm256_add_ps(sum_r, _mm256_mul_ps(buf_r, sinc));
|
||||
/* hadd on AVX is weird, and acts on low-lanes
|
||||
* and high-lanes separately. */
|
||||
__m256 res_l = _mm256_hadd_ps(sum_l, sum_l);
|
||||
__m256 res_r = _mm256_hadd_ps(sum_r, sum_r);
|
||||
res_l = _mm256_hadd_ps(res_l, res_l);
|
||||
res_r = _mm256_hadd_ps(res_r, res_r);
|
||||
res_l = _mm256_add_ps(_mm256_permute2f128_ps(res_l, res_l, 1), res_l);
|
||||
res_r = _mm256_add_ps(_mm256_permute2f128_ps(res_r, res_r, 1), res_r);
|
||||
|
||||
/* This is optimized to mov %xmmN, [mem].
|
||||
* There doesn't seem to be any _mm256_store_ss intrinsic. */
|
||||
_mm_store_ss(output + 0, _mm256_extractf128_ps(res_l, 0));
|
||||
_mm_store_ss(output + 1, _mm256_extractf128_ps(res_r, 0));
|
||||
|
||||
output += 2;
|
||||
out_frames++;
|
||||
resamp->time += ratio;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
while (frames)
|
||||
{
|
||||
while (frames && resamp->time >= phases)
|
||||
{
|
||||
/* Push in reverse to make filter more obvious. */
|
||||
if (!resamp->ptr)
|
||||
resamp->ptr = resamp->taps;
|
||||
resamp->ptr--;
|
||||
|
||||
resamp->buffer_l[resamp->ptr + resamp->taps] =
|
||||
resamp->buffer_l[resamp->ptr] = *input++;
|
||||
|
||||
resamp->buffer_r[resamp->ptr + resamp->taps] =
|
||||
resamp->buffer_r[resamp->ptr] = *input++;
|
||||
|
||||
resamp->time -= phases;
|
||||
frames--;
|
||||
}
|
||||
|
||||
/* hadd on AVX is weird, and acts on low-lanes
|
||||
* and high-lanes separately. */
|
||||
__m256 res_l = _mm256_hadd_ps(sum_l, sum_l);
|
||||
__m256 res_r = _mm256_hadd_ps(sum_r, sum_r);
|
||||
res_l = _mm256_hadd_ps(res_l, res_l);
|
||||
res_r = _mm256_hadd_ps(res_r, res_r);
|
||||
res_l = _mm256_add_ps(_mm256_permute2f128_ps(res_l, res_l, 1), res_l);
|
||||
res_r = _mm256_add_ps(_mm256_permute2f128_ps(res_r, res_r, 1), res_r);
|
||||
while (resamp->time < phases)
|
||||
{
|
||||
unsigned i;
|
||||
__m256 delta;
|
||||
const float *buffer_l = resamp->buffer_l + resamp->ptr;
|
||||
const float *buffer_r = resamp->buffer_r + resamp->ptr;
|
||||
unsigned taps = resamp->taps;
|
||||
unsigned phase = resamp->time >> resamp->subphase_bits;
|
||||
float *phase_table = resamp->phase_table + phase * taps;
|
||||
|
||||
/* This is optimized to mov %xmmN, [mem].
|
||||
* There doesn't seem to be any _mm256_store_ss intrinsic. */
|
||||
_mm_store_ss(output + 0, _mm256_extractf128_ps(res_l, 0));
|
||||
_mm_store_ss(output + 1, _mm256_extractf128_ps(res_r, 0));
|
||||
__m256 sum_l = _mm256_setzero_ps();
|
||||
__m256 sum_r = _mm256_setzero_ps();
|
||||
|
||||
output += 2;
|
||||
out_frames++;
|
||||
resamp->time += ratio;
|
||||
for (i = 0; i < taps; i += 8)
|
||||
{
|
||||
__m256 buf_l = _mm256_loadu_ps(buffer_l + i);
|
||||
__m256 buf_r = _mm256_loadu_ps(buffer_r + i);
|
||||
__m256 sinc = _mm256_load_ps((const float*)phase_table + i);
|
||||
|
||||
sum_l = _mm256_add_ps(sum_l, _mm256_mul_ps(buf_l, sinc));
|
||||
sum_r = _mm256_add_ps(sum_r, _mm256_mul_ps(buf_r, sinc));
|
||||
}
|
||||
|
||||
/* hadd on AVX is weird, and acts on low-lanes
|
||||
* and high-lanes separately. */
|
||||
__m256 res_l = _mm256_hadd_ps(sum_l, sum_l);
|
||||
__m256 res_r = _mm256_hadd_ps(sum_r, sum_r);
|
||||
res_l = _mm256_hadd_ps(res_l, res_l);
|
||||
res_r = _mm256_hadd_ps(res_r, res_r);
|
||||
res_l = _mm256_add_ps(_mm256_permute2f128_ps(res_l, res_l, 1), res_l);
|
||||
res_r = _mm256_add_ps(_mm256_permute2f128_ps(res_r, res_r, 1), res_r);
|
||||
|
||||
/* This is optimized to mov %xmmN, [mem].
|
||||
* There doesn't seem to be any _mm256_store_ss intrinsic. */
|
||||
_mm_store_ss(output + 0, _mm256_extractf128_ps(res_l, 0));
|
||||
_mm_store_ss(output + 1, _mm256_extractf128_ps(res_r, 0));
|
||||
|
||||
output += 2;
|
||||
out_frames++;
|
||||
resamp->time += ratio;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -261,99 +311,157 @@ static void resampler_sinc_process_sse(void *re_, struct resampler_data *data)
|
|||
size_t frames = data->input_frames;
|
||||
size_t out_frames = 0;
|
||||
|
||||
while (frames)
|
||||
if (resamp->window_type == SINC_WINDOW_KAISER)
|
||||
{
|
||||
while (frames && resamp->time >= phases)
|
||||
while (frames)
|
||||
{
|
||||
/* Push in reverse to make filter more obvious. */
|
||||
if (!resamp->ptr)
|
||||
resamp->ptr = resamp->taps;
|
||||
resamp->ptr--;
|
||||
|
||||
resamp->buffer_l[resamp->ptr + resamp->taps] =
|
||||
resamp->buffer_l[resamp->ptr] = *input++;
|
||||
|
||||
resamp->buffer_r[resamp->ptr + resamp->taps] =
|
||||
resamp->buffer_r[resamp->ptr] = *input++;
|
||||
|
||||
resamp->time -= phases;
|
||||
frames--;
|
||||
}
|
||||
|
||||
while (resamp->time < phases)
|
||||
{
|
||||
unsigned i;
|
||||
__m128 sum, sum_l, sum_r, delta;
|
||||
float *phase_table = NULL;
|
||||
float *delta_table = NULL;
|
||||
const float *buffer_l = resamp->buffer_l + resamp->ptr;
|
||||
const float *buffer_r = resamp->buffer_r + resamp->ptr;
|
||||
unsigned taps = resamp->taps;
|
||||
unsigned phase = resamp->time >> resamp->subphase_bits;
|
||||
|
||||
if (resamp->window_type == SINC_WINDOW_KAISER)
|
||||
while (frames && resamp->time >= phases)
|
||||
{
|
||||
phase_table = resamp->phase_table + phase * taps * 2;
|
||||
delta_table = phase_table + taps;
|
||||
delta = _mm_set1_ps((float)
|
||||
/* Push in reverse to make filter more obvious. */
|
||||
if (!resamp->ptr)
|
||||
resamp->ptr = resamp->taps;
|
||||
resamp->ptr--;
|
||||
|
||||
resamp->buffer_l[resamp->ptr + resamp->taps] =
|
||||
resamp->buffer_l[resamp->ptr] = *input++;
|
||||
|
||||
resamp->buffer_r[resamp->ptr + resamp->taps] =
|
||||
resamp->buffer_r[resamp->ptr] = *input++;
|
||||
|
||||
resamp->time -= phases;
|
||||
frames--;
|
||||
}
|
||||
|
||||
while (resamp->time < phases)
|
||||
{
|
||||
unsigned i;
|
||||
__m128 sum;
|
||||
const float *buffer_l = resamp->buffer_l + resamp->ptr;
|
||||
const float *buffer_r = resamp->buffer_r + resamp->ptr;
|
||||
unsigned taps = resamp->taps;
|
||||
unsigned phase = resamp->time >> resamp->subphase_bits;
|
||||
float *phase_table = resamp->phase_table + phase * taps * 2;
|
||||
float *delta_table = phase_table + taps;
|
||||
__m128 delta = _mm_set1_ps((float)
|
||||
(resamp->time & resamp->subphase_mask) * resamp->subphase_mod);
|
||||
}
|
||||
else
|
||||
{
|
||||
phase_table = resamp->phase_table + phase * taps;
|
||||
}
|
||||
|
||||
sum_l = _mm_setzero_ps();
|
||||
sum_r = _mm_setzero_ps();
|
||||
__m128 sum_l = _mm_setzero_ps();
|
||||
__m128 sum_r = _mm_setzero_ps();
|
||||
|
||||
for (i = 0; i < taps; i += 4)
|
||||
{
|
||||
__m128 deltas, _sinc;
|
||||
__m128 buf_l = _mm_loadu_ps(buffer_l + i);
|
||||
__m128 buf_r = _mm_loadu_ps(buffer_r + i);
|
||||
|
||||
if (resamp->window_type == SINC_WINDOW_KAISER)
|
||||
for (i = 0; i < taps; i += 4)
|
||||
{
|
||||
deltas = _mm_load_ps(delta_table + i);
|
||||
_sinc = _mm_add_ps(_mm_load_ps((const float*)phase_table + i),
|
||||
__m128 buf_l = _mm_loadu_ps(buffer_l + i);
|
||||
__m128 buf_r = _mm_loadu_ps(buffer_r + i);
|
||||
__m128 deltas = _mm_load_ps(delta_table + i);
|
||||
__m128 _sinc = _mm_add_ps(_mm_load_ps((const float*)phase_table + i),
|
||||
_mm_mul_ps(deltas, delta));
|
||||
sum_l = _mm_add_ps(sum_l, _mm_mul_ps(buf_l, _sinc));
|
||||
sum_r = _mm_add_ps(sum_r, _mm_mul_ps(buf_r, _sinc));
|
||||
}
|
||||
else
|
||||
{
|
||||
_sinc = _mm_load_ps((const float*)phase_table + i);
|
||||
}
|
||||
sum_l = _mm_add_ps(sum_l, _mm_mul_ps(buf_l, _sinc));
|
||||
sum_r = _mm_add_ps(sum_r, _mm_mul_ps(buf_r, _sinc));
|
||||
|
||||
/* Them annoying shuffles.
|
||||
* sum_l = { l3, l2, l1, l0 }
|
||||
* sum_r = { r3, r2, r1, r0 }
|
||||
*/
|
||||
|
||||
sum = _mm_add_ps(_mm_shuffle_ps(sum_l, sum_r,
|
||||
_MM_SHUFFLE(1, 0, 1, 0)),
|
||||
_mm_shuffle_ps(sum_l, sum_r, _MM_SHUFFLE(3, 2, 3, 2)));
|
||||
|
||||
/* sum = { r1, r0, l1, l0 } + { r3, r2, l3, l2 }
|
||||
* sum = { R1, R0, L1, L0 }
|
||||
*/
|
||||
|
||||
sum = _mm_add_ps(_mm_shuffle_ps(sum, sum, _MM_SHUFFLE(3, 3, 1, 1)), sum);
|
||||
|
||||
/* sum = {R1, R1, L1, L1 } + { R1, R0, L1, L0 }
|
||||
* sum = { X, R, X, L }
|
||||
*/
|
||||
|
||||
/* Store L */
|
||||
_mm_store_ss(output + 0, sum);
|
||||
|
||||
/* movehl { X, R, X, L } == { X, R, X, R } */
|
||||
_mm_store_ss(output + 1, _mm_movehl_ps(sum, sum));
|
||||
|
||||
output += 2;
|
||||
out_frames++;
|
||||
resamp->time += ratio;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
while (frames)
|
||||
{
|
||||
while (frames && resamp->time >= phases)
|
||||
{
|
||||
/* Push in reverse to make filter more obvious. */
|
||||
if (!resamp->ptr)
|
||||
resamp->ptr = resamp->taps;
|
||||
resamp->ptr--;
|
||||
|
||||
resamp->buffer_l[resamp->ptr + resamp->taps] =
|
||||
resamp->buffer_l[resamp->ptr] = *input++;
|
||||
|
||||
resamp->buffer_r[resamp->ptr + resamp->taps] =
|
||||
resamp->buffer_r[resamp->ptr] = *input++;
|
||||
|
||||
resamp->time -= phases;
|
||||
frames--;
|
||||
}
|
||||
|
||||
/* Them annoying shuffles.
|
||||
* sum_l = { l3, l2, l1, l0 }
|
||||
* sum_r = { r3, r2, r1, r0 }
|
||||
*/
|
||||
while (resamp->time < phases)
|
||||
{
|
||||
unsigned i;
|
||||
__m128 sum;
|
||||
const float *buffer_l = resamp->buffer_l + resamp->ptr;
|
||||
const float *buffer_r = resamp->buffer_r + resamp->ptr;
|
||||
unsigned taps = resamp->taps;
|
||||
unsigned phase = resamp->time >> resamp->subphase_bits;
|
||||
float *phase_table = resamp->phase_table + phase * taps;
|
||||
|
||||
sum = _mm_add_ps(_mm_shuffle_ps(sum_l, sum_r,
|
||||
_MM_SHUFFLE(1, 0, 1, 0)),
|
||||
_mm_shuffle_ps(sum_l, sum_r, _MM_SHUFFLE(3, 2, 3, 2)));
|
||||
__m128 sum_l = _mm_setzero_ps();
|
||||
__m128 sum_r = _mm_setzero_ps();
|
||||
|
||||
/* sum = { r1, r0, l1, l0 } + { r3, r2, l3, l2 }
|
||||
* sum = { R1, R0, L1, L0 }
|
||||
*/
|
||||
for (i = 0; i < taps; i += 4)
|
||||
{
|
||||
__m128 buf_l = _mm_loadu_ps(buffer_l + i);
|
||||
__m128 buf_r = _mm_loadu_ps(buffer_r + i);
|
||||
__m128 _sinc = _mm_load_ps((const float*)phase_table + i);
|
||||
sum_l = _mm_add_ps(sum_l, _mm_mul_ps(buf_l, _sinc));
|
||||
sum_r = _mm_add_ps(sum_r, _mm_mul_ps(buf_r, _sinc));
|
||||
}
|
||||
|
||||
sum = _mm_add_ps(_mm_shuffle_ps(sum, sum, _MM_SHUFFLE(3, 3, 1, 1)), sum);
|
||||
/* Them annoying shuffles.
|
||||
* sum_l = { l3, l2, l1, l0 }
|
||||
* sum_r = { r3, r2, r1, r0 }
|
||||
*/
|
||||
|
||||
/* sum = {R1, R1, L1, L1 } + { R1, R0, L1, L0 }
|
||||
* sum = { X, R, X, L }
|
||||
*/
|
||||
sum = _mm_add_ps(_mm_shuffle_ps(sum_l, sum_r,
|
||||
_MM_SHUFFLE(1, 0, 1, 0)),
|
||||
_mm_shuffle_ps(sum_l, sum_r, _MM_SHUFFLE(3, 2, 3, 2)));
|
||||
|
||||
/* Store L */
|
||||
_mm_store_ss(output + 0, sum);
|
||||
/* sum = { r1, r0, l1, l0 } + { r3, r2, l3, l2 }
|
||||
* sum = { R1, R0, L1, L0 }
|
||||
*/
|
||||
|
||||
/* movehl { X, R, X, L } == { X, R, X, R } */
|
||||
_mm_store_ss(output + 1, _mm_movehl_ps(sum, sum));
|
||||
sum = _mm_add_ps(_mm_shuffle_ps(sum, sum, _MM_SHUFFLE(3, 3, 1, 1)), sum);
|
||||
|
||||
output += 2;
|
||||
out_frames++;
|
||||
resamp->time += ratio;
|
||||
/* sum = {R1, R1, L1, L1 } + { R1, R0, L1, L0 }
|
||||
* sum = { X, R, X, L }
|
||||
*/
|
||||
|
||||
/* Store L */
|
||||
_mm_store_ss(output + 0, sum);
|
||||
|
||||
/* movehl { X, R, X, L } == { X, R, X, R } */
|
||||
_mm_store_ss(output + 1, _mm_movehl_ps(sum, sum));
|
||||
|
||||
output += 2;
|
||||
out_frames++;
|
||||
resamp->time += ratio;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -372,69 +480,108 @@ static void resampler_sinc_process_c(void *re_, struct resampler_data *data)
|
|||
size_t frames = data->input_frames;
|
||||
size_t out_frames = 0;
|
||||
|
||||
while (frames)
|
||||
if (resamp->window_type == SINC_WINDOW_KAISER)
|
||||
{
|
||||
while (frames && resamp->time >= phases)
|
||||
while (frames)
|
||||
{
|
||||
/* Push in reverse to make filter more obvious. */
|
||||
if (!resamp->ptr)
|
||||
resamp->ptr = resamp->taps;
|
||||
resamp->ptr--;
|
||||
|
||||
resamp->buffer_l[resamp->ptr + resamp->taps] =
|
||||
resamp->buffer_l[resamp->ptr] = *input++;
|
||||
|
||||
resamp->buffer_r[resamp->ptr + resamp->taps] =
|
||||
resamp->buffer_r[resamp->ptr] = *input++;
|
||||
|
||||
resamp->time -= phases;
|
||||
frames--;
|
||||
}
|
||||
|
||||
while (resamp->time < phases)
|
||||
{
|
||||
unsigned i;
|
||||
float delta = 0.0f;
|
||||
float sum_l = 0.0f;
|
||||
float sum_r = 0.0f;
|
||||
float *phase_table = NULL;
|
||||
float *delta_table = NULL;
|
||||
const float *buffer_l = resamp->buffer_l + resamp->ptr;
|
||||
const float *buffer_r = resamp->buffer_r + resamp->ptr;
|
||||
unsigned taps = resamp->taps;
|
||||
unsigned phase = resamp->time >> resamp->subphase_bits;
|
||||
|
||||
if (resamp->window_type == SINC_WINDOW_KAISER)
|
||||
while (frames && resamp->time >= phases)
|
||||
{
|
||||
phase_table = resamp->phase_table + phase * taps * 2;
|
||||
delta_table = phase_table + taps;
|
||||
delta = (float)
|
||||
/* Push in reverse to make filter more obvious. */
|
||||
if (!resamp->ptr)
|
||||
resamp->ptr = resamp->taps;
|
||||
resamp->ptr--;
|
||||
|
||||
resamp->buffer_l[resamp->ptr + resamp->taps] =
|
||||
resamp->buffer_l[resamp->ptr] = *input++;
|
||||
|
||||
resamp->buffer_r[resamp->ptr + resamp->taps] =
|
||||
resamp->buffer_r[resamp->ptr] = *input++;
|
||||
|
||||
resamp->time -= phases;
|
||||
frames--;
|
||||
}
|
||||
|
||||
while (resamp->time < phases)
|
||||
{
|
||||
unsigned i;
|
||||
float sum_l = 0.0f;
|
||||
float sum_r = 0.0f;
|
||||
const float *buffer_l = resamp->buffer_l + resamp->ptr;
|
||||
const float *buffer_r = resamp->buffer_r + resamp->ptr;
|
||||
unsigned taps = resamp->taps;
|
||||
unsigned phase = resamp->time >> resamp->subphase_bits;
|
||||
float *phase_table = resamp->phase_table + phase * taps * 2;
|
||||
float *delta_table = phase_table + taps;
|
||||
float delta = (float)
|
||||
(resamp->time & resamp->subphase_mask) * resamp->subphase_mod;
|
||||
}
|
||||
else
|
||||
{
|
||||
phase_table = resamp->phase_table + phase * taps;
|
||||
|
||||
for (i = 0; i < taps; i++)
|
||||
{
|
||||
float sinc_val = phase_table[i] + delta_table[i] * delta;
|
||||
|
||||
sum_l += buffer_l[i] * sinc_val;
|
||||
sum_r += buffer_r[i] * sinc_val;
|
||||
}
|
||||
|
||||
output[0] = sum_l;
|
||||
output[1] = sum_r;
|
||||
|
||||
output += 2;
|
||||
out_frames++;
|
||||
resamp->time += ratio;
|
||||
}
|
||||
|
||||
for (i = 0; i < taps; i++)
|
||||
{
|
||||
float sinc_val = phase_table[i];
|
||||
|
||||
if (resamp->window_type == SINC_WINDOW_KAISER)
|
||||
sinc_val = sinc_val + delta_table[i] * delta;
|
||||
|
||||
sum_l += buffer_l[i] * sinc_val;
|
||||
sum_r += buffer_r[i] * sinc_val;
|
||||
}
|
||||
|
||||
output[0] = sum_l;
|
||||
output[1] = sum_r;
|
||||
|
||||
output += 2;
|
||||
out_frames++;
|
||||
resamp->time += ratio;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
while (frames)
|
||||
{
|
||||
while (frames && resamp->time >= phases)
|
||||
{
|
||||
/* Push in reverse to make filter more obvious. */
|
||||
if (!resamp->ptr)
|
||||
resamp->ptr = resamp->taps;
|
||||
resamp->ptr--;
|
||||
|
||||
resamp->buffer_l[resamp->ptr + resamp->taps] =
|
||||
resamp->buffer_l[resamp->ptr] = *input++;
|
||||
|
||||
resamp->buffer_r[resamp->ptr + resamp->taps] =
|
||||
resamp->buffer_r[resamp->ptr] = *input++;
|
||||
|
||||
resamp->time -= phases;
|
||||
frames--;
|
||||
}
|
||||
|
||||
while (resamp->time < phases)
|
||||
{
|
||||
unsigned i;
|
||||
float sum_l = 0.0f;
|
||||
float sum_r = 0.0f;
|
||||
const float *buffer_l = resamp->buffer_l + resamp->ptr;
|
||||
const float *buffer_r = resamp->buffer_r + resamp->ptr;
|
||||
unsigned taps = resamp->taps;
|
||||
unsigned phase = resamp->time >> resamp->subphase_bits;
|
||||
float *phase_table = resamp->phase_table + phase * taps;
|
||||
|
||||
for (i = 0; i < taps; i++)
|
||||
{
|
||||
float sinc_val = phase_table[i];
|
||||
|
||||
sum_l += buffer_l[i] * sinc_val;
|
||||
sum_r += buffer_r[i] * sinc_val;
|
||||
}
|
||||
|
||||
output[0] = sum_l;
|
||||
output[1] = sum_r;
|
||||
|
||||
output += 2;
|
||||
out_frames++;
|
||||
resamp->time += ratio;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
data->output_frames = out_frames;
|
||||
|
|
|
@ -29,6 +29,8 @@ void genode_free_secondary_stack(void *stack);
|
|||
#include "ppc.c"
|
||||
#elif defined(__aarch64__)
|
||||
#include "aarch64.c"
|
||||
#elif defined(PSP)
|
||||
#include "psp1.c"
|
||||
#elif defined VITA
|
||||
#include "scefiber.c"
|
||||
#elif defined(__ARM_EABI__) || defined(__arm__)
|
||||
|
|
45
libco/psp1.c
Normal file
45
libco/psp1.c
Normal file
|
@ -0,0 +1,45 @@
|
|||
#define LIBCO_C
|
||||
#include "libco.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <pspthreadman.h>
|
||||
|
||||
/* Since cothread_t is a void pointer it must contain an address. We can't return a reference to a local variable
|
||||
* because it would go out of scope, so we create a static variable instead so we can return a reference to it.
|
||||
*/
|
||||
static SceUID active_thread_id = 0;
|
||||
|
||||
cothread_t co_active()
|
||||
{
|
||||
active_thread_id = sceKernelGetThreadId();
|
||||
return &active_thread_id;
|
||||
}
|
||||
|
||||
cothread_t co_create(unsigned int size, void (*entrypoint)(void))
|
||||
{
|
||||
/* Similar scenario as with active_thread_id except there will only be one active_thread_id while there could be many
|
||||
* new threads each with their own handle, so we create them on the heap instead and delete them manually when they're
|
||||
* no longer needed in co_delete().
|
||||
*/
|
||||
cothread_t handle = malloc(sizeof(cothread_t));
|
||||
|
||||
/* SceKernelThreadEntry has a different signature than entrypoint, but in practice this seems to work */
|
||||
SceUID new_thread_id = sceKernelCreateThread("cothread", (SceKernelThreadEntry)entrypoint, 0x12, size, 0, NULL);
|
||||
sceKernelStartThread(new_thread_id, 0, NULL);
|
||||
|
||||
*(SceUID *)handle = new_thread_id;
|
||||
return handle;
|
||||
}
|
||||
|
||||
void co_delete(cothread_t handle)
|
||||
{
|
||||
sceKernelTerminateDeleteThread(*(SceUID *)handle);
|
||||
free(handle);
|
||||
}
|
||||
|
||||
void co_switch(cothread_t handle)
|
||||
{
|
||||
sceKernelWakeupThread(*(SceUID *)handle);
|
||||
/* Sleep the currently active thread so the new thread can start */
|
||||
sceKernelSleepThread();
|
||||
}
|
Loading…
Reference in a new issue