diff --git a/CMakeLists.txt b/CMakeLists.txt
index 6972eaf731..46d7cd292b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1276,6 +1276,8 @@ add_library(${CoreLibName} ${CoreLinkType}
Core/HW/MemoryStick.h
Core/HW/SasAudio.cpp
Core/HW/SasAudio.h
+ Core/HW/StereoResampler.cpp
+ Core/HW/StereoResampler.h
Core/Host.cpp
Core/Host.h
Core/Loaders.cpp
diff --git a/Core/Core.vcxproj b/Core/Core.vcxproj
index bac7ae807d..00d65c1f3f 100644
--- a/Core/Core.vcxproj
+++ b/Core/Core.vcxproj
@@ -274,6 +274,7 @@
+
@@ -512,6 +513,7 @@
+
diff --git a/Core/Core.vcxproj.filters b/Core/Core.vcxproj.filters
index bb2f85b6c5..8023fb8c7a 100644
--- a/Core/Core.vcxproj.filters
+++ b/Core/Core.vcxproj.filters
@@ -300,6 +300,9 @@
HW
+
+ HW
+
Util
@@ -775,6 +778,9 @@
HW
+
+ HW
+
Util
diff --git a/Core/HLE/__sceAudio.cpp b/Core/HLE/__sceAudio.cpp
index d776e63f98..8b24bbd64e 100644
--- a/Core/HLE/__sceAudio.cpp
+++ b/Core/HLE/__sceAudio.cpp
@@ -35,7 +35,9 @@
#include "Core/HLE/sceAudio.h"
#include "Core/HLE/sceKernel.h"
#include "Core/HLE/sceKernelThread.h"
+#include "Core/HW/StereoResampler.h"
+StereoResampler resampler;
// Should be used to lock anything related to the outAudioQueue.
// atomic locks are used on the lock. TODO: make this lock-free
@@ -67,14 +69,6 @@ static s32 *mixBuffer;
static int chanQueueMaxSizeFactor;
static int chanQueueMinSizeFactor;
-// TODO: Need to replace this with something lockless. Mutexes in the audio pipeline
-// is bad mojo.
-FixedSizeQueue outAudioQueue;
-
-bool __gainAudioQueueLock();
-void __releaseAcquiredLock();
-void __blockForAudioQueueLock();
-
static inline s16 adjustvolume(s16 sample, int vol) {
#ifdef ARM
register int r;
@@ -181,9 +175,7 @@ void __AudioInit() {
mixBuffer = new s32[hwBlockSize * 2];
memset(mixBuffer, 0, hwBlockSize * 2 * sizeof(s32));
- __blockForAudioQueueLock();
- outAudioQueue.clear();
- __releaseAcquiredLock();
+ resampler.Clear();
CoreTiming::RegisterMHzChangeCallback(&__AudioCPUMHzChange);
}
@@ -199,16 +191,14 @@ void __AudioDoState(PointerWrap &p) {
p.Do(mixFrequency);
- {
- //block until a lock is achieved. Not a good idea at all, but
- //can't think of a better one...
- __blockForAudioQueueLock();
-
+ if (s >= 2) {
+ resampler.DoState(p);
+ } else {
+ // Only to preserve the previous file format. Might cause a slight audio glitch on upgrades?
+ FixedSizeQueue outAudioQueue;
outAudioQueue.DoState(p);
- //release the atomic lock
- __releaseAcquiredLock();
-
+ resampler.Clear();
}
int chanCount = ARRAY_SIZE(chans);
@@ -358,28 +348,6 @@ void __AudioSetOutputFrequency(int freq) {
mixFrequency = freq;
}
-inline void ClampBufferToS16(s16 *out, s32 *in, size_t size) {
-#ifdef _M_SSE
- // Size will always be 16-byte aligned as the hwBlockSize is.
- while (size >= 8) {
- __m128i in1 = _mm_loadu_si128((__m128i *)in);
- __m128i in2 = _mm_loadu_si128((__m128i *)(in + 4));
- __m128i packed = _mm_packs_epi32(in1, in2);
- _mm_storeu_si128((__m128i *)out, packed);
- out += 8;
- in += 8;
- size -= 8;
- }
- for (size_t i = 0; i < size; i++) {
- out[i] = clamp_s16(in[i]);
- }
-#else
- for (size_t i = 0; i < size; i++) {
- out[i] = clamp_s16(in[i]);
- }
-#endif
-}
-
// Mix samples from the various audio channels into a single sample queue.
// This single sample queue is where __AudioMix should read from. If the sample queue is full, we should
// just sleep the main emulator thread a little.
@@ -433,35 +401,17 @@ void __AudioUpdate() {
}
if (g_Config.bEnableSound) {
-
- __blockForAudioQueueLock();
- /*
- if (!__gainAudioQueueLock()){
- return;
- }
- */
-
- if (outAudioQueue.room() >= hwBlockSize * 2) {
- s16 *buf1 = 0, *buf2 = 0;
- size_t sz1, sz2;
- outAudioQueue.pushPointers(hwBlockSize * 2, &buf1, &sz1, &buf2, &sz2);
- ClampBufferToS16(buf1, mixBuffer, sz1);
- if (buf2) {
- ClampBufferToS16(buf2, mixBuffer + sz1, sz2);
- }
- } else {
- // This happens quite a lot. There's still something slightly off
- // about the amount of audio we produce.
- }
- //release the atomic lock
- __releaseAcquiredLock();
+ resampler.PushSamples(mixBuffer, hwBlockSize);
}
}
// numFrames is number of stereo frames.
// This is called from *outside* the emulator thread.
-int __AudioMix(short *outstereo, int numFrames)
-{
+int __AudioMix(short *outstereo, int numFrames, int sampleRate) {
+ resampler.Mix(outstereo, numFrames, false, sampleRate);
+ return numFrames;
+
+ /*
// TODO: if mixFrequency != the actual output frequency, resample!
int underrun = -1;
s16 sampleL = 0;
@@ -476,6 +426,7 @@ int __AudioMix(short *outstereo, int numFrames)
return 0;
}
+ resampler.Mix(outstereo, numFrames);
outAudioQueue.popPointers(numFrames * 2, &buf1, &sz1, &buf2, &sz2);
memcpy(outstereo, buf1, sz1 * sizeof(s16));
@@ -496,40 +447,5 @@ int __AudioMix(short *outstereo, int numFrames)
VERBOSE_LOG(SCEAUDIO, "Audio out buffer UNDERRUN at %i of %i", underrun, numFrames);
}
return underrun >= 0 ? underrun : numFrames;
-}
-
-
-
-/*returns whether the lock was successfully gained or not.
-i.e - whether the lock belongs to you
-*/
-inline bool __gainAudioQueueLock(){
- if (g_Config.bAtomicAudioLocks){
- /*if the previous state was 0, that means the lock was "unlocked". So,
- we return !0, which is true thanks to C's int to bool conversion
-
- One the other hand, if it was locked, then the lock would return 1.
- so, !1 = 0 = false.
- */
- return atomicLock_.test_and_set() == 0;
- } else {
- mutex_.lock();
- return true;
- }
-};
-
-inline void __releaseAcquiredLock(){
- if (g_Config.bAtomicAudioLocks){
- atomicLock_.clear();
- } else {
- mutex_.unlock();
- }
-}
-
-inline void __blockForAudioQueueLock(){
- if (g_Config.bAtomicAudioLocks){
- while ((atomicLock_.test_and_set() == 0)){ }
- } else {
- mutex_.lock();
- }
+ */
}
diff --git a/Core/HLE/__sceAudio.h b/Core/HLE/__sceAudio.h
index 403c2f961f..9bbd9d9090 100644
--- a/Core/HLE/__sceAudio.h
+++ b/Core/HLE/__sceAudio.h
@@ -32,4 +32,4 @@ u32 __AudioEnqueue(AudioChannel &chan, int chanNum, bool blocking);
void __AudioWakeThreads(AudioChannel &chan, int result, int step);
void __AudioWakeThreads(AudioChannel &chan, int result);
-int __AudioMix(short *outstereo, int numSamples);
+int __AudioMix(short *outstereo, int numSamples, int sampleRate);
diff --git a/Core/HW/StereoResampler.cpp b/Core/HW/StereoResampler.cpp
new file mode 100644
index 0000000000..23f6aa52d7
--- /dev/null
+++ b/Core/HW/StereoResampler.cpp
@@ -0,0 +1,202 @@
+// Copyright (c) 2015- PPSSPP Project and Dolphin Project.
+
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, version 2.0 or later versions.
+
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License 2.0 for more details.
+
+// A copy of the GPL 2.0 should have been included with the program.
+// If not, see http://www.gnu.org/licenses/
+
+// Official git repository and contact information can be found at
+// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
+
+// Adapted from Dolphin.
+
+#include
+
+#include "base/logging.h"
+#include "Common/ChunkFile.h"
+#include "Common/MathUtil.h"
+#include "Common/Atomics.h"
+#include "Core/HW/StereoResampler.h"
+#include "Globals.h"
+
+#ifdef _M_SSE
+#include
+#endif
+
+inline void ClampBufferToS16(s16 *out, const s32 *in, size_t size) {
+#ifdef _M_SSE
+ // Size will always be 16-byte aligned as the hwBlockSize is.
+ while (size >= 8) {
+ __m128i in1 = _mm_loadu_si128((__m128i *)in);
+ __m128i in2 = _mm_loadu_si128((__m128i *)(in + 4));
+ __m128i packed = _mm_packs_epi32(in1, in2);
+ _mm_storeu_si128((__m128i *)out, packed);
+ out += 8;
+ in += 8;
+ size -= 8;
+ }
+ for (size_t i = 0; i < size; i++) {
+ out[i] = clamp_s16(in[i]);
+ }
+#else
+ for (size_t i = 0; i < size; i++) {
+ out[i] = clamp_s16(in[i]);
+ }
+#endif
+}
+
+void StereoResampler::MixerFifo::Clear() {
+ // TODO
+}
+
+// Executed from sound stream thread
+unsigned int StereoResampler::MixerFifo::Mix(short* samples, unsigned int numSamples, bool consider_framelimit, int sample_rate) {
+ unsigned int currentSample = 0;
+
+ // Cache access in non-volatile variable
+ // This is the only function changing the read value, so it's safe to
+ // cache it locally although it's written here.
+ // The writing pointer will be modified outside, but it will only increase,
+ // so we will just ignore new written data while interpolating.
+ // Without this cache, the compiler wouldn't be allowed to optimize the
+ // interpolation loop.
+ u32 indexR = Common::AtomicLoad(m_indexR);
+ u32 indexW = Common::AtomicLoad(m_indexW);
+
+ float numLeft = (float)(((indexW - indexR) & INDEX_MASK) / 2);
+ m_numLeftI = (numLeft + m_numLeftI*(CONTROL_AVG - 1)) / CONTROL_AVG;
+ float offset = (m_numLeftI - LOW_WATERMARK) * CONTROL_FACTOR;
+ if (offset > MAX_FREQ_SHIFT) offset = MAX_FREQ_SHIFT;
+ if (offset < -MAX_FREQ_SHIFT) offset = -MAX_FREQ_SHIFT;
+
+ // render numleft sample pairs to samples[]
+ // advance indexR with sample position
+ // remember fractional offset
+
+
+ float aid_sample_rate = m_input_sample_rate + offset;
+
+ /*
+ u32 framelimit = SConfig::GetInstance().m_Framelimit;
+ if (consider_framelimit && framelimit > 1) {
+ aid_sample_rate = aid_sample_rate * (framelimit - 1) * 5 / 59.994;
+ }*/
+
+ const u32 ratio = (u32)(65536.0f * aid_sample_rate / (float)sample_rate);
+
+ s32 lvolume = m_LVolume;
+ s32 rvolume = m_RVolume;
+
+ // TODO: consider a higher-quality resampling algorithm.
+ // TODO: Add a fast path for 1:1.
+ for (; currentSample < numSamples * 2 && ((indexW - indexR) & INDEX_MASK) > 2; currentSample += 2) {
+ u32 indexR2 = indexR + 2; //next sample
+
+ s16 l1 = m_buffer[indexR & INDEX_MASK]; //current
+ s16 l2 = m_buffer[indexR2 & INDEX_MASK]; //next
+ int sampleL = ((l1 << 16) + (l2 - l1) * (u16)m_frac) >> 16;
+ sampleL = (sampleL * lvolume) >> 8;
+ sampleL += samples[currentSample + 1];
+ MathUtil::Clamp(&sampleL, -32767, 32767);
+ samples[currentSample + 1] = sampleL;
+
+ s16 r1 = m_buffer[(indexR + 1) & INDEX_MASK]; //current
+ s16 r2 = m_buffer[(indexR2 + 1) & INDEX_MASK]; //next
+ int sampleR = ((r1 << 16) + (r2 - r1) * (u16)m_frac) >> 16;
+ sampleR = (sampleR * rvolume) >> 8;
+ sampleR += samples[currentSample];
+ MathUtil::Clamp(&sampleR, -32767, 32767);
+ samples[currentSample] = sampleR;
+
+ m_frac += ratio;
+ indexR += 2 * (u16)(m_frac >> 16);
+ m_frac &= 0xffff;
+ }
+
+ // Padding with the last value to reduce clicking
+ short s[2];
+ s[0] = m_buffer[(indexR - 1) & INDEX_MASK];
+ s[1] = m_buffer[(indexR - 2) & INDEX_MASK];
+ s[0] = (s[0] * rvolume) >> 8;
+ s[1] = (s[1] * lvolume) >> 8;
+ for (; currentSample < numSamples * 2; currentSample += 2) {
+ int sampleR = s[0] + samples[currentSample];
+ MathUtil::Clamp(&sampleR, -32767, 32767);
+ samples[currentSample] = sampleR;
+ int sampleL = s[1] + samples[currentSample + 1];
+ MathUtil::Clamp(&sampleL, -32767, 32767);
+ samples[currentSample + 1] = sampleL;
+ }
+
+ // Flush cached variable
+ Common::AtomicStore(m_indexR, indexR);
+
+ return numSamples;
+}
+
+unsigned int StereoResampler::Mix(short* samples, unsigned int num_samples, bool consider_framelimit, int sample_rate) {
+ if (!samples)
+ return 0;
+
+ lock_guard lk(m_csMixing);
+ memset(samples, 0, num_samples * 2 * sizeof(short));
+ return m_dma_mixer.Mix(samples, num_samples, consider_framelimit, sample_rate);
+}
+
+void StereoResampler::MixerFifo::PushSamples(const s32 *samples, unsigned int num_samples) {
+ // Cache access in non-volatile variable
+ // indexR isn't allowed to cache in the audio throttling loop as it
+ // needs to get updates to not deadlock.
+ u32 indexW = Common::AtomicLoad(m_indexW);
+
+ // Check if we have enough free space
+ // indexW == m_indexR results in empty buffer, so indexR must always be smaller than indexW
+ if (num_samples * 2 + ((indexW - Common::AtomicLoad(m_indexR)) & INDEX_MASK) >= MAX_SAMPLES * 2)
+ return;
+
+ // AyuanX: Actual re-sampling work has been moved to sound thread
+ // to alleviate the workload on main thread
+ // and we simply store raw data here to make fast mem copy
+ int over_bytes = num_samples * 4 - (MAX_SAMPLES * 2 - (indexW & INDEX_MASK)) * sizeof(short);
+ if (over_bytes > 0) {
+ ClampBufferToS16(&m_buffer[indexW & INDEX_MASK], samples, (num_samples * 4 - over_bytes) / 2);
+ ClampBufferToS16(&m_buffer[0], samples + (num_samples * 4 - over_bytes) / sizeof(short), over_bytes / 2);
+ } else {
+ ClampBufferToS16(&m_buffer[indexW & INDEX_MASK], samples, num_samples * 2);
+ }
+
+ Common::AtomicAdd(m_indexW, num_samples * 2);
+
+ return;
+}
+
+void StereoResampler::PushSamples(const int *samples, unsigned int num_samples) {
+ m_dma_mixer.PushSamples(samples, num_samples);
+}
+
+void StereoResampler::SetDMAInputSampleRate(unsigned int rate) {
+ m_dma_mixer.SetInputSampleRate(rate);
+}
+
+void StereoResampler::MixerFifo::SetInputSampleRate(unsigned int rate) {
+ m_input_sample_rate = rate;
+}
+
+void StereoResampler::MixerFifo::SetVolume(unsigned int lvolume, unsigned int rvolume)
+{
+ m_LVolume = lvolume + (lvolume >> 7);
+ m_RVolume = rvolume + (rvolume >> 7);
+}
+
+void StereoResampler::DoState(PointerWrap &p) {
+ auto s = p.Section("resampler", 1);
+ if (!s)
+ return;
+}
diff --git a/Core/HW/StereoResampler.h b/Core/HW/StereoResampler.h
new file mode 100644
index 0000000000..def881eb37
--- /dev/null
+++ b/Core/HW/StereoResampler.h
@@ -0,0 +1,110 @@
+// Copyright (c) 2015- PPSSPP Project and Dolphin Project.
+
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, version 2.0 or later versions.
+
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License 2.0 for more details.
+
+// A copy of the GPL 2.0 should have been included with the program.
+// If not, see http://www.gnu.org/licenses/
+
+// Official git repository and contact information can be found at
+// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
+
+// Adapted from Dolphin.
+
+#pragma once
+
+#include
+
+#include "base/mutex.h"
+
+#include "Common/ChunkFile.h"
+#include "Common/CommonTypes.h"
+
+// 16 bit Stereo
+#define MAX_SAMPLES (1024 * 2) // 64ms
+#define INDEX_MASK (MAX_SAMPLES * 2 - 1)
+
+#define LOW_WATERMARK 1280 // 40 ms
+#define MAX_FREQ_SHIFT 200 // per 32000 Hz
+#define CONTROL_FACTOR 0.2f // in freq_shift per fifo size offset
+#define CONTROL_AVG 32
+
+class StereoResampler {
+
+public:
+ StereoResampler()
+ : m_dma_mixer(this, 44100)
+ , m_speed(1.0)
+ {
+ }
+
+ virtual ~StereoResampler() {}
+
+ // Called from audio threads
+ virtual unsigned int Mix(short* samples, unsigned int numSamples, bool consider_framelimit, int sampleRate);
+
+ // Called from main thread
+ // This clamps the samples to 16-bit before starting to work on them.
+ virtual void PushSamples(const s32* samples, unsigned int num_samples);
+ unsigned int GetSampleRate() const { return m_sampleRate; }
+
+ void SetDMAInputSampleRate(unsigned int rate);
+
+ recursive_mutex& MixerCritical() { return m_csMixing; }
+
+ float GetCurrentSpeed() const { return m_speed; }
+ void UpdateSpeed(volatile float val) { m_speed = val; }
+
+ void Clear() {
+ m_dma_mixer.Clear();
+ }
+
+ void DoState(PointerWrap &p);
+
+protected:
+ class MixerFifo {
+ public:
+ MixerFifo(StereoResampler *mixer, unsigned sample_rate)
+ : m_mixer(mixer)
+ , m_input_sample_rate(sample_rate)
+ , m_indexW(0)
+ , m_indexR(0)
+ , m_LVolume(256)
+ , m_RVolume(256)
+ , m_numLeftI(0.0f)
+ , m_frac(0)
+ {
+ memset(m_buffer, 0, sizeof(m_buffer));
+ }
+ void PushSamples(const s32* samples, unsigned int num_samples);
+ unsigned int Mix(short* samples, unsigned int numSamples, bool consider_framelimit, int sample_rate);
+ void SetInputSampleRate(unsigned int rate);
+ void SetVolume(unsigned int lvolume, unsigned int rvolume);
+ void Clear();
+
+ private:
+ StereoResampler *m_mixer;
+ unsigned m_input_sample_rate;
+ short m_buffer[MAX_SAMPLES * 2];
+ volatile u32 m_indexW;
+ volatile u32 m_indexR;
+ // Volume ranges from 0-256
+ volatile s32 m_LVolume;
+ volatile s32 m_RVolume;
+ float m_numLeftI;
+ u32 m_frac;
+ };
+
+ MixerFifo m_dma_mixer;
+ unsigned int m_sampleRate;
+
+ recursive_mutex m_csMixing;
+
+ volatile float m_speed; // Current rate of the emulation (1.0 = 100% speed)
+};
diff --git a/UI/NativeApp.cpp b/UI/NativeApp.cpp
index ddcfec695d..3098912dd8 100644
--- a/UI/NativeApp.cpp
+++ b/UI/NativeApp.cpp
@@ -226,7 +226,8 @@ std::string NativeQueryConfig(std::string query) {
int NativeMix(short *audio, int num_samples) {
if (GetUIState() == UISTATE_INGAME) {
- num_samples = __AudioMix(audio, num_samples);
+ int sample_rate = System_GetPropertyInt(SYSPROP_AUDIO_SAMPLE_RATE);
+ num_samples = __AudioMix(audio, num_samples, sample_rate > 0 ? sample_rate : 44100);
} else {
MixBackgroundAudio(audio, num_samples);
}
@@ -568,7 +569,7 @@ void NativeInitGraphics() {
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
#ifdef _WIN32
- DSound_StartSound(MainWindow::GetHWND(), &Win32Mix, 44100);
+ DSound_StartSound(MainWindow::GetHWND(), &Win32Mix, 48000);
#endif
}