Replace outAudioQueue with Dolphin's resampler.

2025-04-02 11:01:50 -04:00 · 2015-01-11 15:13:43 +01:00 · 2015-01-11 15:13:43 +01:00 · e312d6b5fd
commit e312d6b5fd
parent 1b055fd07e
8 changed files with 343 additions and 104 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -1276,6 +1276,8 @@ add_library(${CoreLibName} ${CoreLinkType}
 	Core/HW/MemoryStick.h
 	Core/HW/SasAudio.cpp
 	Core/HW/SasAudio.h
+	Core/HW/StereoResampler.cpp
+	Core/HW/StereoResampler.h
 	Core/Host.cpp
 	Core/Host.h
 	Core/Loaders.cpp
--- a/Core/Core.vcxproj
+++ b/Core/Core.vcxproj
@ -274,6 +274,7 @@
    <ClCompile Include="HW\SasAudio.cpp" />
    <ClCompile Include="HW\AsyncIOManager.cpp" />
    <ClCompile Include="HW\SimpleAudioDec.cpp" />
+    <ClCompile Include="HW\StereoResampler.cpp" />
    <ClCompile Include="Loaders.cpp" />
    <ClCompile Include="MemMap.cpp" />
    <ClCompile Include="MemmapFunctions.cpp" />
@ -512,6 +513,7 @@
    <ClInclude Include="HW\MemoryStick.h" />
    <ClInclude Include="HW\AsyncIOManager.h" />
    <ClInclude Include="HW\SimpleAudioDec.h" />
+    <ClInclude Include="HW\StereoResampler.h" />
    <ClInclude Include="Loaders.h" />
    <ClInclude Include="MemMap.h" />
    <ClInclude Include="MIPS\ARM\ArmAsm.h">
--- a/Core/Core.vcxproj.filters
+++ b/Core/Core.vcxproj.filters
@ -300,6 +300,9 @@
    <ClCompile Include="HW\MediaEngine.cpp">
      <Filter>HW</Filter>
    </ClCompile>
+    <ClCompile Include="HW\StereoResampler.cpp">
+      <Filter>HW</Filter>
+    </ClCompile>
    <ClCompile Include="Util\PPGeDraw.cpp">
      <Filter>Util</Filter>
    </ClCompile>
@ -775,6 +778,9 @@
    <ClInclude Include="HW\MediaEngine.h">
      <Filter>HW</Filter>
    </ClInclude>
+    <ClInclude Include="HW\StereoResampler.h">
+      <Filter>HW</Filter>
+    </ClInclude>
    <ClInclude Include="Util\PPGeDraw.h">
      <Filter>Util</Filter>
    </ClInclude>
--- a/Core/HLE/__sceAudio.cpp
+++ b/Core/HLE/__sceAudio.cpp
@ -35,7 +35,9 @@
 #include "Core/HLE/sceAudio.h"
 #include "Core/HLE/sceKernel.h"
 #include "Core/HLE/sceKernelThread.h"
+#include "Core/HW/StereoResampler.h"

+StereoResampler resampler;

 // Should be used to lock anything related to the outAudioQueue.
 // atomic locks are used on the lock. TODO: make this lock-free
@ -67,14 +69,6 @@ static s32 *mixBuffer;
 static int chanQueueMaxSizeFactor;
 static int chanQueueMinSizeFactor;

-// TODO: Need to replace this with something lockless. Mutexes in the audio pipeline
-// is bad mojo.
-FixedSizeQueue<s16, 512 * 16> outAudioQueue;
-
-bool __gainAudioQueueLock();
-void __releaseAcquiredLock();
-void __blockForAudioQueueLock();
-
 static inline s16 adjustvolume(s16 sample, int vol) {
 #ifdef ARM
 	register int r;
@ -181,9 +175,7 @@ void __AudioInit() {
 	mixBuffer = new s32[hwBlockSize * 2];
 	memset(mixBuffer, 0, hwBlockSize * 2 * sizeof(s32));

-	__blockForAudioQueueLock();
-	outAudioQueue.clear();
-	__releaseAcquiredLock();
+	resampler.Clear();
 	CoreTiming::RegisterMHzChangeCallback(&__AudioCPUMHzChange);
 }

@ -199,16 +191,14 @@ void __AudioDoState(PointerWrap &p) {

 	p.Do(mixFrequency);

-	{	
-		//block until a lock is achieved. Not a good idea at all, but
-		//can't think of a better one...
-		__blockForAudioQueueLock();
-
+	if (s >= 2) {
+		resampler.DoState(p);
+	} else {
+		// Only to preserve the previous file format. Might cause a slight audio glitch on upgrades?
+		FixedSizeQueue<s16, 512 * 16> outAudioQueue;
 		outAudioQueue.DoState(p);

-		//release the atomic lock
-		__releaseAcquiredLock();
-		
+		resampler.Clear();
 	}

 	int chanCount = ARRAY_SIZE(chans);
@ -358,28 +348,6 @@ void __AudioSetOutputFrequency(int freq) {
 	mixFrequency = freq;
 }

-inline void ClampBufferToS16(s16 *out, s32 *in, size_t size) {
-#ifdef _M_SSE
-	// Size will always be 16-byte aligned as the hwBlockSize is.
-	while (size >= 8) {
-		__m128i in1 = _mm_loadu_si128((__m128i *)in);
-		__m128i in2 = _mm_loadu_si128((__m128i *)(in + 4));
-		__m128i packed = _mm_packs_epi32(in1, in2);
-		_mm_storeu_si128((__m128i *)out, packed);
-		out += 8;
-		in += 8;
-		size -= 8;
-	}
-	for (size_t i = 0; i < size; i++) {
-		out[i] = clamp_s16(in[i]);
-	}
-#else
-	for (size_t i = 0; i < size; i++) {
-		out[i] = clamp_s16(in[i]);
-	}
-#endif
-}
-
 // Mix samples from the various audio channels into a single sample queue.
 // This single sample queue is where __AudioMix should read from. If the sample queue is full, we should
 // just sleep the main emulator thread a little.
@ -433,35 +401,17 @@ void __AudioUpdate() {
 	}

 	if (g_Config.bEnableSound) {
-
-		__blockForAudioQueueLock();
-		/*
-		if (!__gainAudioQueueLock()){
-			return;
-		}
-		*/
-
-		if (outAudioQueue.room() >= hwBlockSize * 2) {
-			s16 *buf1 = 0, *buf2 = 0;
-			size_t sz1, sz2;
-			outAudioQueue.pushPointers(hwBlockSize * 2, &buf1, &sz1, &buf2, &sz2);
-			ClampBufferToS16(buf1, mixBuffer, sz1);
-			if (buf2) {
-				ClampBufferToS16(buf2, mixBuffer + sz1, sz2);
-			}
-		} else {
-			// This happens quite a lot. There's still something slightly off
-			// about the amount of audio we produce.
-		}
-		//release the atomic lock
-		__releaseAcquiredLock();
+		resampler.PushSamples(mixBuffer, hwBlockSize);
 	}
 }

 // numFrames is number of stereo frames.
 // This is called from *outside* the emulator thread.
-int __AudioMix(short *outstereo, int numFrames)
-{
+int __AudioMix(short *outstereo, int numFrames, int sampleRate) {
+	resampler.Mix(outstereo, numFrames, false, sampleRate);
+	return numFrames;
+
+	/*
 	// TODO: if mixFrequency != the actual output frequency, resample!
 	int underrun = -1;
 	s16 sampleL = 0;
@ -476,6 +426,7 @@ int __AudioMix(short *outstereo, int numFrames)
 			 return 0;
 		}
 		
+		resampler.Mix(outstereo, numFrames);
 		outAudioQueue.popPointers(numFrames * 2, &buf1, &sz1, &buf2, &sz2);

 		memcpy(outstereo, buf1, sz1 * sizeof(s16));
@ -496,40 +447,5 @@ int __AudioMix(short *outstereo, int numFrames)
 		VERBOSE_LOG(SCEAUDIO, "Audio out buffer UNDERRUN at %i of %i", underrun, numFrames);
 	}
 	return underrun >= 0 ? underrun : numFrames;
-}
-
-
-
-/*returns whether the lock was successfully gained or not.
-i.e - whether the lock belongs to you 
-*/
-inline bool __gainAudioQueueLock(){
-	if (g_Config.bAtomicAudioLocks){
-		/*if the previous state was 0, that means the lock was "unlocked". So,
-		we return !0, which is true thanks to C's int to bool conversion
-
-		One the other hand, if it was locked, then the lock would return 1.
-		so, !1 = 0 = false.
-		*/		
-		return atomicLock_.test_and_set() == 0;
-	} else {
-		mutex_.lock();
-		return true;
-	}
-};
-
-inline void __releaseAcquiredLock(){
-	if (g_Config.bAtomicAudioLocks){
-		atomicLock_.clear();
-	} else {
-		mutex_.unlock();
-	}
-}
-
-inline void __blockForAudioQueueLock(){
-	if (g_Config.bAtomicAudioLocks){
-		while ((atomicLock_.test_and_set() == 0)){ }
-	} else {
-		mutex_.lock();
-	}
+	*/
 }
--- a/Core/HLE/__sceAudio.h
+++ b/Core/HLE/__sceAudio.h
@ -32,4 +32,4 @@ u32 __AudioEnqueue(AudioChannel &chan, int chanNum, bool blocking);
 void __AudioWakeThreads(AudioChannel &chan, int result, int step);
 void __AudioWakeThreads(AudioChannel &chan, int result);

-int __AudioMix(short *outstereo, int numSamples);
+int __AudioMix(short *outstereo, int numSamples, int sampleRate);
--- a/Core/HW/StereoResampler.cpp
+++ b/Core/HW/StereoResampler.cpp
@ -0,0 +1,202 @@
+// Copyright (c) 2015- PPSSPP Project and Dolphin Project.
+
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, version 2.0 or later versions.
+
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License 2.0 for more details.
+
+// A copy of the GPL 2.0 should have been included with the program.
+// If not, see http://www.gnu.org/licenses/
+
+// Official git repository and contact information can be found at
+// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
+
+// Adapted from Dolphin.
+
+#include <string.h>
+
+#include "base/logging.h"
+#include "Common/ChunkFile.h"
+#include "Common/MathUtil.h"
+#include "Common/Atomics.h"
+#include "Core/HW/StereoResampler.h"
+#include "Globals.h"
+
+#ifdef _M_SSE
+#include <emmintrin.h>
+#endif
+
+inline void ClampBufferToS16(s16 *out, const s32 *in, size_t size) {
+#ifdef _M_SSE
+	// Size will always be 16-byte aligned as the hwBlockSize is.
+	while (size >= 8) {
+		__m128i in1 = _mm_loadu_si128((__m128i *)in);
+		__m128i in2 = _mm_loadu_si128((__m128i *)(in + 4));
+		__m128i packed = _mm_packs_epi32(in1, in2);
+		_mm_storeu_si128((__m128i *)out, packed);
+		out += 8;
+		in += 8;
+		size -= 8;
+	}
+	for (size_t i = 0; i < size; i++) {
+		out[i] = clamp_s16(in[i]);
+	}
+#else
+	for (size_t i = 0; i < size; i++) {
+		out[i] = clamp_s16(in[i]);
+	}
+#endif
+}
+
+void StereoResampler::MixerFifo::Clear() {
+	// TODO
+}
+
+// Executed from sound stream thread
+unsigned int StereoResampler::MixerFifo::Mix(short* samples, unsigned int numSamples, bool consider_framelimit, int sample_rate) {
+	unsigned int currentSample = 0;
+
+	// Cache access in non-volatile variable
+	// This is the only function changing the read value, so it's safe to
+	// cache it locally although it's written here.
+	// The writing pointer will be modified outside, but it will only increase,
+	// so we will just ignore new written data while interpolating.
+	// Without this cache, the compiler wouldn't be allowed to optimize the
+	// interpolation loop.
+	u32 indexR = Common::AtomicLoad(m_indexR);
+	u32 indexW = Common::AtomicLoad(m_indexW);
+
+	float numLeft = (float)(((indexW - indexR) & INDEX_MASK) / 2);
+	m_numLeftI = (numLeft + m_numLeftI*(CONTROL_AVG - 1)) / CONTROL_AVG;
+	float offset = (m_numLeftI - LOW_WATERMARK) * CONTROL_FACTOR;
+	if (offset > MAX_FREQ_SHIFT) offset = MAX_FREQ_SHIFT;
+	if (offset < -MAX_FREQ_SHIFT) offset = -MAX_FREQ_SHIFT;
+
+	// render numleft sample pairs to samples[]
+	// advance indexR with sample position
+	// remember fractional offset
+
+
+	float aid_sample_rate = m_input_sample_rate + offset;
+	
+	/*
+	u32 framelimit = SConfig::GetInstance().m_Framelimit;
+	if (consider_framelimit && framelimit > 1) {
+		aid_sample_rate = aid_sample_rate * (framelimit - 1) * 5 / 59.994;
+	}*/
+
+	const u32 ratio = (u32)(65536.0f * aid_sample_rate / (float)sample_rate);
+
+	s32 lvolume = m_LVolume;
+	s32 rvolume = m_RVolume;
+
+	// TODO: consider a higher-quality resampling algorithm.
+	// TODO: Add a fast path for 1:1.
+	for (; currentSample < numSamples * 2 && ((indexW - indexR) & INDEX_MASK) > 2; currentSample += 2) {
+		u32 indexR2 = indexR + 2; //next sample
+
+		s16 l1 = m_buffer[indexR & INDEX_MASK]; //current
+		s16 l2 = m_buffer[indexR2 & INDEX_MASK]; //next
+		int sampleL = ((l1 << 16) + (l2 - l1) * (u16)m_frac) >> 16;
+		sampleL = (sampleL * lvolume) >> 8;
+		sampleL += samples[currentSample + 1];
+		MathUtil::Clamp(&sampleL, -32767, 32767);
+		samples[currentSample + 1] = sampleL;
+
+		s16 r1 = m_buffer[(indexR + 1) & INDEX_MASK]; //current
+		s16 r2 = m_buffer[(indexR2 + 1) & INDEX_MASK]; //next
+		int sampleR = ((r1 << 16) + (r2 - r1) * (u16)m_frac) >> 16;
+		sampleR = (sampleR * rvolume) >> 8;
+		sampleR += samples[currentSample];
+		MathUtil::Clamp(&sampleR, -32767, 32767);
+		samples[currentSample] = sampleR;
+
+		m_frac += ratio;
+		indexR += 2 * (u16)(m_frac >> 16);
+		m_frac &= 0xffff;
+	}
+
+	// Padding with the last value to reduce clicking
+	short s[2];
+	s[0] = m_buffer[(indexR - 1) & INDEX_MASK];
+	s[1] = m_buffer[(indexR - 2) & INDEX_MASK];
+	s[0] = (s[0] * rvolume) >> 8;
+	s[1] = (s[1] * lvolume) >> 8;
+	for (; currentSample < numSamples * 2; currentSample += 2) {
+		int sampleR = s[0] + samples[currentSample];
+		MathUtil::Clamp(&sampleR, -32767, 32767);
+		samples[currentSample] = sampleR;
+		int sampleL = s[1] + samples[currentSample + 1];
+		MathUtil::Clamp(&sampleL, -32767, 32767);
+		samples[currentSample + 1] = sampleL;
+	}
+
+	// Flush cached variable
+	Common::AtomicStore(m_indexR, indexR);
+
+	return numSamples;
+}
+
+unsigned int StereoResampler::Mix(short* samples, unsigned int num_samples, bool consider_framelimit, int sample_rate) {
+	if (!samples)
+		return 0;
+
+	lock_guard lk(m_csMixing);
+	memset(samples, 0, num_samples * 2 * sizeof(short));
+	return m_dma_mixer.Mix(samples, num_samples, consider_framelimit, sample_rate);
+}
+
+void StereoResampler::MixerFifo::PushSamples(const s32 *samples, unsigned int num_samples) {
+	// Cache access in non-volatile variable
+	// indexR isn't allowed to cache in the audio throttling loop as it
+	// needs to get updates to not deadlock.
+	u32 indexW = Common::AtomicLoad(m_indexW);
+
+	// Check if we have enough free space
+	// indexW == m_indexR results in empty buffer, so indexR must always be smaller than indexW
+	if (num_samples * 2 + ((indexW - Common::AtomicLoad(m_indexR)) & INDEX_MASK) >= MAX_SAMPLES * 2)
+		return;
+
+	// AyuanX: Actual re-sampling work has been moved to sound thread
+	// to alleviate the workload on main thread
+	// and we simply store raw data here to make fast mem copy
+	int over_bytes = num_samples * 4 - (MAX_SAMPLES * 2 - (indexW & INDEX_MASK)) * sizeof(short);
+	if (over_bytes > 0) {
+		ClampBufferToS16(&m_buffer[indexW & INDEX_MASK], samples, (num_samples * 4 - over_bytes) / 2);
+		ClampBufferToS16(&m_buffer[0], samples + (num_samples * 4 - over_bytes) / sizeof(short), over_bytes / 2);
+	} else {
+		ClampBufferToS16(&m_buffer[indexW & INDEX_MASK], samples, num_samples * 2);
+	}
+
+	Common::AtomicAdd(m_indexW, num_samples * 2);
+
+	return;
+}
+
+void StereoResampler::PushSamples(const int *samples, unsigned int num_samples) {
+	m_dma_mixer.PushSamples(samples, num_samples);
+}
+
+void StereoResampler::SetDMAInputSampleRate(unsigned int rate) {
+	m_dma_mixer.SetInputSampleRate(rate);
+}
+
+void StereoResampler::MixerFifo::SetInputSampleRate(unsigned int rate) {
+	m_input_sample_rate = rate;
+}
+
+void StereoResampler::MixerFifo::SetVolume(unsigned int lvolume, unsigned int rvolume)
+{
+	m_LVolume = lvolume + (lvolume >> 7);
+	m_RVolume = rvolume + (rvolume >> 7);
+}
+
+void StereoResampler::DoState(PointerWrap &p) {
+	auto s = p.Section("resampler", 1);
+	if (!s)
+		return;
+}
--- a/Core/HW/StereoResampler.h
+++ b/Core/HW/StereoResampler.h
@ -0,0 +1,110 @@
+// Copyright (c) 2015- PPSSPP Project and Dolphin Project.
+
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, version 2.0 or later versions.
+
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License 2.0 for more details.
+
+// A copy of the GPL 2.0 should have been included with the program.
+// If not, see http://www.gnu.org/licenses/
+
+// Official git repository and contact information can be found at
+// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
+
+// Adapted from Dolphin.
+
+#pragma once
+
+#include <string>
+
+#include "base/mutex.h"
+
+#include "Common/ChunkFile.h"
+#include "Common/CommonTypes.h"
+
+// 16 bit Stereo
+#define MAX_SAMPLES     (1024 * 2) // 64ms
+#define INDEX_MASK      (MAX_SAMPLES * 2 - 1)
+
+#define LOW_WATERMARK   1280 // 40 ms
+#define MAX_FREQ_SHIFT  200  // per 32000 Hz
+#define CONTROL_FACTOR  0.2f // in freq_shift per fifo size offset
+#define CONTROL_AVG     32
+
+class StereoResampler {
+
+public:
+	StereoResampler()
+		: m_dma_mixer(this, 44100)
+		, m_speed(1.0)
+	{
+	}
+
+	virtual ~StereoResampler() {}
+
+	// Called from audio threads
+	virtual unsigned int Mix(short* samples, unsigned int numSamples, bool consider_framelimit, int sampleRate);
+
+	// Called from main thread
+	// This clamps the samples to 16-bit before starting to work on them.
+	virtual void PushSamples(const s32* samples, unsigned int num_samples);
+	unsigned int GetSampleRate() const { return m_sampleRate; }
+
+	void SetDMAInputSampleRate(unsigned int rate);
+
+	recursive_mutex& MixerCritical() { return m_csMixing; }
+
+	float GetCurrentSpeed() const { return m_speed; }
+	void UpdateSpeed(volatile float val) { m_speed = val; }
+
+	void Clear() {
+		m_dma_mixer.Clear();
+	}
+
+	void DoState(PointerWrap &p);
+
+protected:
+	class MixerFifo {
+	public:
+		MixerFifo(StereoResampler *mixer, unsigned sample_rate)
+			: m_mixer(mixer)
+			, m_input_sample_rate(sample_rate)
+			, m_indexW(0)
+			, m_indexR(0)
+			, m_LVolume(256)
+			, m_RVolume(256)
+			, m_numLeftI(0.0f)
+			, m_frac(0)
+		{
+			memset(m_buffer, 0, sizeof(m_buffer));
+		}
+		void PushSamples(const s32* samples, unsigned int num_samples);
+		unsigned int Mix(short* samples, unsigned int numSamples, bool consider_framelimit, int sample_rate);
+		void SetInputSampleRate(unsigned int rate);
+		void SetVolume(unsigned int lvolume, unsigned int rvolume);
+		void Clear();
+
+	private:
+		StereoResampler *m_mixer;
+		unsigned m_input_sample_rate;
+		short m_buffer[MAX_SAMPLES * 2];
+		volatile u32 m_indexW;
+		volatile u32 m_indexR;
+		// Volume ranges from 0-256
+		volatile s32 m_LVolume;
+		volatile s32 m_RVolume;
+		float m_numLeftI;
+		u32 m_frac;
+	};
+
+	MixerFifo m_dma_mixer;
+	unsigned int m_sampleRate;
+
+	recursive_mutex m_csMixing;
+
+	volatile float m_speed; // Current rate of the emulation (1.0 = 100% speed)
+};
--- a/UI/NativeApp.cpp
+++ b/UI/NativeApp.cpp
@ -226,7 +226,8 @@ std::string NativeQueryConfig(std::string query) {

 int NativeMix(short *audio, int num_samples) {
 	if (GetUIState() == UISTATE_INGAME) {
-		num_samples = __AudioMix(audio, num_samples);
+		int sample_rate = System_GetPropertyInt(SYSPROP_AUDIO_SAMPLE_RATE);
+		num_samples = __AudioMix(audio, num_samples, sample_rate > 0 ? sample_rate : 44100);
 	}	else {
 		MixBackgroundAudio(audio, num_samples);
 	}
@ -568,7 +569,7 @@ void NativeInitGraphics() {
 	glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);

 #ifdef _WIN32
-	DSound_StartSound(MainWindow::GetHWND(), &Win32Mix, 44100);
+	DSound_StartSound(MainWindow::GetHWND(), &Win32Mix, 48000);
 #endif
 }