Switch from shift to multiplier for volume in StereoResampler

This commit is contained in:
Henrik Rydgård 2025-02-11 15:45:10 -06:00
parent 1a41a36093
commit 25bedc44b8
2 changed files with 66 additions and 52 deletions

View file

@ -25,11 +25,11 @@
#include "Common/Common.h"
#include "Common/CommonFuncs.h"
const int PSP_MODEL_FAT = 0;
const int PSP_MODEL_SLIM = 1;
const int PSP_DEFAULT_FIRMWARE = 660;
static const int8_t VOLUME_OFF = 0;
static const int8_t VOLUME_FULL = 10;
constexpr int PSP_MODEL_FAT = 0;
constexpr int PSP_MODEL_SLIM = 1;
constexpr int PSP_DEFAULT_FIRMWARE = 660;
constexpr int VOLUME_OFF = 0;
constexpr int VOLUME_FULL = 10;
struct ConfigTouchPos {
float x;

View file

@ -41,6 +41,7 @@
#include "Common/System/System.h"
#include "Common/Log.h"
#include "Common/Math/SIMDHeaders.h"
#include "Common/Math/CrossSIMD.h"
#include "Common/TimeUtil.h"
#include "Core/Config.h"
#include "Core/ConfigValues.h"
@ -90,61 +91,57 @@ void StereoResampler::UpdateBufferSize() {
}
}
template<bool useShift>
inline void ClampBufferToS16(s16 *out, const s32 *in, size_t size, s8 volShift) {
// factor is a 0.12-bit fixed point number.
template<bool multiply>
inline void ClampBufferToS16(s16 *out, const s32 *in, size_t size, int factor) {
if (multiply) {
// Let's SIMD later. Unfortunately for s16 operations, SSE2 is very different and odd
// so CrossSIMD won't be very useful.
for (size_t i = 0; i < size; i++) {
out[i] = clamp_s16((in[i] * factor) >> 12);
}
} else {
#ifdef _M_SSE
// Size will always be 16-byte aligned as the hwBlockSize is.
while (size >= 8) {
__m128i in1 = _mm_loadu_si128((__m128i *)in);
__m128i in2 = _mm_loadu_si128((__m128i *)(in + 4));
__m128i packed = _mm_packs_epi32(in1, in2);
if (useShift) {
packed = _mm_srai_epi16(packed, volShift);
// Size will always be 16-byte aligned as the hwBlockSize is.
while (size >= 8) {
__m128i in1 = _mm_loadu_si128((__m128i *)in);
__m128i in2 = _mm_loadu_si128((__m128i *)(in + 4));
__m128i packed = _mm_packs_epi32(in1, in2); // pack with signed saturation, perfect.
_mm_storeu_si128((__m128i *)out, packed);
out += 8;
in += 8;
size -= 8;
}
_mm_storeu_si128((__m128i *)out, packed);
out += 8;
in += 8;
size -= 8;
}
#elif PPSSPP_ARCH(ARM_NEON)
// Dynamic shifts can only be left, but it's signed - negate to shift right.
int16x4_t signedVolShift = vdup_n_s16(-volShift);
while (size >= 8) {
int32x4_t in1 = vld1q_s32(in);
int32x4_t in2 = vld1q_s32(in + 4);
int16x4_t packed1 = vqmovn_s32(in1);
int16x4_t packed2 = vqmovn_s32(in2);
if (useShift) {
packed1 = vshl_s16(packed1, signedVolShift);
packed2 = vshl_s16(packed2, signedVolShift);
// Dynamic shifts can only be left, but it's signed - negate to shift right.
int16x4_t signedVolShift = vdup_n_s16(-volShift);
while (size >= 8) {
int32x4_t in1 = vld1q_s32(in);
int32x4_t in2 = vld1q_s32(in + 4);
int16x4_t packed1 = vqmovn_s32(in1);
int16x4_t packed2 = vqmovn_s32(in2);
vst1_s16(out, packed1);
vst1_s16(out + 4, packed2);
out += 8;
in += 8;
size -= 8;
}
vst1_s16(out, packed1);
vst1_s16(out + 4, packed2);
out += 8;
in += 8;
size -= 8;
}
#endif
// This does the remainder if SIMD was used, otherwise it does it all.
for (size_t i = 0; i < size; i++) {
out[i] = clamp_s16(useShift ? (in[i] >> volShift) : in[i]);
// This does the remainder if SIMD was used, otherwise it does it all.
for (size_t i = 0; i < size; i++) {
out[i] = clamp_s16(in[i]);
}
}
}
inline void ClampBufferToS16WithVolume(s16 *out, const s32 *in, size_t size) {
int volume = g_Config.iGlobalVolume;
if (PSP_CoreParameter().fpsLimit != FPSLimit::NORMAL || PSP_CoreParameter().fastForward) {
if (g_Config.iAltSpeedVolume != -1) {
volume = g_Config.iAltSpeedVolume;
}
}
if (volume >= VOLUME_FULL) {
inline void ClampBufferToS16WithVolume(s16 *out, const s32 *in, size_t size, int volume) {
// The last parameter to ClampBufferToS16 is no longer a shift, now it's a 12-bit multiplier.
if (volume >= 4096) {
ClampBufferToS16<false>(out, in, size, 0);
} else if (volume <= VOLUME_OFF) {
} else if (volume <= 0) {
memset(out, 0, size * sizeof(s16));
} else {
ClampBufferToS16<true>(out, in, size, VOLUME_FULL - (s8)volume);
ClampBufferToS16<true>(out, in, size, volume);
}
}
@ -277,13 +274,30 @@ void StereoResampler::PushSamples(const s32 *samples, unsigned int numSamples) {
return;
}
int vol = g_Config.iGlobalVolume;
if (PSP_CoreParameter().fpsLimit != FPSLimit::NORMAL || PSP_CoreParameter().fastForward) {
if (g_Config.iAltSpeedVolume != -1) {
vol = g_Config.iAltSpeedVolume;
}
}
vol = std::clamp(vol, 0, VOLUME_FULL);
// 12-bit volume. So far this isn't any better than the shift, but stay tuned.
int volume;
if (vol != 0) {
volume = 4096 >> (VOLUME_FULL - vol);
} else {
volume = 0;
}
// Check if we need to roll over to the start of the buffer during the copy.
unsigned int indexW_left_samples = m_maxBufsize * 2 - (indexW & INDEX_MASK);
if (numSamples * 2 > indexW_left_samples) {
ClampBufferToS16WithVolume(&m_buffer[indexW & INDEX_MASK], samples, indexW_left_samples);
ClampBufferToS16WithVolume(&m_buffer[0], samples + indexW_left_samples, numSamples * 2 - indexW_left_samples);
ClampBufferToS16WithVolume(&m_buffer[indexW & INDEX_MASK], samples, indexW_left_samples, volume);
ClampBufferToS16WithVolume(&m_buffer[0], samples + indexW_left_samples, numSamples * 2 - indexW_left_samples, volume);
} else {
ClampBufferToS16WithVolume(&m_buffer[indexW & INDEX_MASK], samples, numSamples * 2);
ClampBufferToS16WithVolume(&m_buffer[indexW & INDEX_MASK], samples, numSamples * 2, volume);
}
m_indexW += numSamples * 2;