From e312d6b5fd3407ab4c924f1a367d3fe5cede6bb5 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Sun, 11 Jan 2015 15:13:43 +0100 Subject: [PATCH] Replace outAudioQueue with Dolphin's resampler. --- CMakeLists.txt | 2 + Core/Core.vcxproj | 2 + Core/Core.vcxproj.filters | 6 ++ Core/HLE/__sceAudio.cpp | 118 +++------------------ Core/HLE/__sceAudio.h | 2 +- Core/HW/StereoResampler.cpp | 202 ++++++++++++++++++++++++++++++++++++ Core/HW/StereoResampler.h | 110 ++++++++++++++++++++ UI/NativeApp.cpp | 5 +- 8 files changed, 343 insertions(+), 104 deletions(-) create mode 100644 Core/HW/StereoResampler.cpp create mode 100644 Core/HW/StereoResampler.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 6972eaf731..46d7cd292b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1276,6 +1276,8 @@ add_library(${CoreLibName} ${CoreLinkType} Core/HW/MemoryStick.h Core/HW/SasAudio.cpp Core/HW/SasAudio.h + Core/HW/StereoResampler.cpp + Core/HW/StereoResampler.h Core/Host.cpp Core/Host.h Core/Loaders.cpp diff --git a/Core/Core.vcxproj b/Core/Core.vcxproj index bac7ae807d..00d65c1f3f 100644 --- a/Core/Core.vcxproj +++ b/Core/Core.vcxproj @@ -274,6 +274,7 @@ + @@ -512,6 +513,7 @@ + diff --git a/Core/Core.vcxproj.filters b/Core/Core.vcxproj.filters index bb2f85b6c5..8023fb8c7a 100644 --- a/Core/Core.vcxproj.filters +++ b/Core/Core.vcxproj.filters @@ -300,6 +300,9 @@ HW + + HW + Util @@ -775,6 +778,9 @@ HW + + HW + Util diff --git a/Core/HLE/__sceAudio.cpp b/Core/HLE/__sceAudio.cpp index d776e63f98..8b24bbd64e 100644 --- a/Core/HLE/__sceAudio.cpp +++ b/Core/HLE/__sceAudio.cpp @@ -35,7 +35,9 @@ #include "Core/HLE/sceAudio.h" #include "Core/HLE/sceKernel.h" #include "Core/HLE/sceKernelThread.h" +#include "Core/HW/StereoResampler.h" +StereoResampler resampler; // Should be used to lock anything related to the outAudioQueue. // atomic locks are used on the lock. TODO: make this lock-free @@ -67,14 +69,6 @@ static s32 *mixBuffer; static int chanQueueMaxSizeFactor; static int chanQueueMinSizeFactor; -// TODO: Need to replace this with something lockless. Mutexes in the audio pipeline -// is bad mojo. -FixedSizeQueue outAudioQueue; - -bool __gainAudioQueueLock(); -void __releaseAcquiredLock(); -void __blockForAudioQueueLock(); - static inline s16 adjustvolume(s16 sample, int vol) { #ifdef ARM register int r; @@ -181,9 +175,7 @@ void __AudioInit() { mixBuffer = new s32[hwBlockSize * 2]; memset(mixBuffer, 0, hwBlockSize * 2 * sizeof(s32)); - __blockForAudioQueueLock(); - outAudioQueue.clear(); - __releaseAcquiredLock(); + resampler.Clear(); CoreTiming::RegisterMHzChangeCallback(&__AudioCPUMHzChange); } @@ -199,16 +191,14 @@ void __AudioDoState(PointerWrap &p) { p.Do(mixFrequency); - { - //block until a lock is achieved. Not a good idea at all, but - //can't think of a better one... - __blockForAudioQueueLock(); - + if (s >= 2) { + resampler.DoState(p); + } else { + // Only to preserve the previous file format. Might cause a slight audio glitch on upgrades? + FixedSizeQueue outAudioQueue; outAudioQueue.DoState(p); - //release the atomic lock - __releaseAcquiredLock(); - + resampler.Clear(); } int chanCount = ARRAY_SIZE(chans); @@ -358,28 +348,6 @@ void __AudioSetOutputFrequency(int freq) { mixFrequency = freq; } -inline void ClampBufferToS16(s16 *out, s32 *in, size_t size) { -#ifdef _M_SSE - // Size will always be 16-byte aligned as the hwBlockSize is. - while (size >= 8) { - __m128i in1 = _mm_loadu_si128((__m128i *)in); - __m128i in2 = _mm_loadu_si128((__m128i *)(in + 4)); - __m128i packed = _mm_packs_epi32(in1, in2); - _mm_storeu_si128((__m128i *)out, packed); - out += 8; - in += 8; - size -= 8; - } - for (size_t i = 0; i < size; i++) { - out[i] = clamp_s16(in[i]); - } -#else - for (size_t i = 0; i < size; i++) { - out[i] = clamp_s16(in[i]); - } -#endif -} - // Mix samples from the various audio channels into a single sample queue. // This single sample queue is where __AudioMix should read from. If the sample queue is full, we should // just sleep the main emulator thread a little. @@ -433,35 +401,17 @@ void __AudioUpdate() { } if (g_Config.bEnableSound) { - - __blockForAudioQueueLock(); - /* - if (!__gainAudioQueueLock()){ - return; - } - */ - - if (outAudioQueue.room() >= hwBlockSize * 2) { - s16 *buf1 = 0, *buf2 = 0; - size_t sz1, sz2; - outAudioQueue.pushPointers(hwBlockSize * 2, &buf1, &sz1, &buf2, &sz2); - ClampBufferToS16(buf1, mixBuffer, sz1); - if (buf2) { - ClampBufferToS16(buf2, mixBuffer + sz1, sz2); - } - } else { - // This happens quite a lot. There's still something slightly off - // about the amount of audio we produce. - } - //release the atomic lock - __releaseAcquiredLock(); + resampler.PushSamples(mixBuffer, hwBlockSize); } } // numFrames is number of stereo frames. // This is called from *outside* the emulator thread. -int __AudioMix(short *outstereo, int numFrames) -{ +int __AudioMix(short *outstereo, int numFrames, int sampleRate) { + resampler.Mix(outstereo, numFrames, false, sampleRate); + return numFrames; + + /* // TODO: if mixFrequency != the actual output frequency, resample! int underrun = -1; s16 sampleL = 0; @@ -476,6 +426,7 @@ int __AudioMix(short *outstereo, int numFrames) return 0; } + resampler.Mix(outstereo, numFrames); outAudioQueue.popPointers(numFrames * 2, &buf1, &sz1, &buf2, &sz2); memcpy(outstereo, buf1, sz1 * sizeof(s16)); @@ -496,40 +447,5 @@ int __AudioMix(short *outstereo, int numFrames) VERBOSE_LOG(SCEAUDIO, "Audio out buffer UNDERRUN at %i of %i", underrun, numFrames); } return underrun >= 0 ? underrun : numFrames; -} - - - -/*returns whether the lock was successfully gained or not. -i.e - whether the lock belongs to you -*/ -inline bool __gainAudioQueueLock(){ - if (g_Config.bAtomicAudioLocks){ - /*if the previous state was 0, that means the lock was "unlocked". So, - we return !0, which is true thanks to C's int to bool conversion - - One the other hand, if it was locked, then the lock would return 1. - so, !1 = 0 = false. - */ - return atomicLock_.test_and_set() == 0; - } else { - mutex_.lock(); - return true; - } -}; - -inline void __releaseAcquiredLock(){ - if (g_Config.bAtomicAudioLocks){ - atomicLock_.clear(); - } else { - mutex_.unlock(); - } -} - -inline void __blockForAudioQueueLock(){ - if (g_Config.bAtomicAudioLocks){ - while ((atomicLock_.test_and_set() == 0)){ } - } else { - mutex_.lock(); - } + */ } diff --git a/Core/HLE/__sceAudio.h b/Core/HLE/__sceAudio.h index 403c2f961f..9bbd9d9090 100644 --- a/Core/HLE/__sceAudio.h +++ b/Core/HLE/__sceAudio.h @@ -32,4 +32,4 @@ u32 __AudioEnqueue(AudioChannel &chan, int chanNum, bool blocking); void __AudioWakeThreads(AudioChannel &chan, int result, int step); void __AudioWakeThreads(AudioChannel &chan, int result); -int __AudioMix(short *outstereo, int numSamples); +int __AudioMix(short *outstereo, int numSamples, int sampleRate); diff --git a/Core/HW/StereoResampler.cpp b/Core/HW/StereoResampler.cpp new file mode 100644 index 0000000000..23f6aa52d7 --- /dev/null +++ b/Core/HW/StereoResampler.cpp @@ -0,0 +1,202 @@ +// Copyright (c) 2015- PPSSPP Project and Dolphin Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +// Adapted from Dolphin. + +#include + +#include "base/logging.h" +#include "Common/ChunkFile.h" +#include "Common/MathUtil.h" +#include "Common/Atomics.h" +#include "Core/HW/StereoResampler.h" +#include "Globals.h" + +#ifdef _M_SSE +#include +#endif + +inline void ClampBufferToS16(s16 *out, const s32 *in, size_t size) { +#ifdef _M_SSE + // Size will always be 16-byte aligned as the hwBlockSize is. + while (size >= 8) { + __m128i in1 = _mm_loadu_si128((__m128i *)in); + __m128i in2 = _mm_loadu_si128((__m128i *)(in + 4)); + __m128i packed = _mm_packs_epi32(in1, in2); + _mm_storeu_si128((__m128i *)out, packed); + out += 8; + in += 8; + size -= 8; + } + for (size_t i = 0; i < size; i++) { + out[i] = clamp_s16(in[i]); + } +#else + for (size_t i = 0; i < size; i++) { + out[i] = clamp_s16(in[i]); + } +#endif +} + +void StereoResampler::MixerFifo::Clear() { + // TODO +} + +// Executed from sound stream thread +unsigned int StereoResampler::MixerFifo::Mix(short* samples, unsigned int numSamples, bool consider_framelimit, int sample_rate) { + unsigned int currentSample = 0; + + // Cache access in non-volatile variable + // This is the only function changing the read value, so it's safe to + // cache it locally although it's written here. + // The writing pointer will be modified outside, but it will only increase, + // so we will just ignore new written data while interpolating. + // Without this cache, the compiler wouldn't be allowed to optimize the + // interpolation loop. + u32 indexR = Common::AtomicLoad(m_indexR); + u32 indexW = Common::AtomicLoad(m_indexW); + + float numLeft = (float)(((indexW - indexR) & INDEX_MASK) / 2); + m_numLeftI = (numLeft + m_numLeftI*(CONTROL_AVG - 1)) / CONTROL_AVG; + float offset = (m_numLeftI - LOW_WATERMARK) * CONTROL_FACTOR; + if (offset > MAX_FREQ_SHIFT) offset = MAX_FREQ_SHIFT; + if (offset < -MAX_FREQ_SHIFT) offset = -MAX_FREQ_SHIFT; + + // render numleft sample pairs to samples[] + // advance indexR with sample position + // remember fractional offset + + + float aid_sample_rate = m_input_sample_rate + offset; + + /* + u32 framelimit = SConfig::GetInstance().m_Framelimit; + if (consider_framelimit && framelimit > 1) { + aid_sample_rate = aid_sample_rate * (framelimit - 1) * 5 / 59.994; + }*/ + + const u32 ratio = (u32)(65536.0f * aid_sample_rate / (float)sample_rate); + + s32 lvolume = m_LVolume; + s32 rvolume = m_RVolume; + + // TODO: consider a higher-quality resampling algorithm. + // TODO: Add a fast path for 1:1. + for (; currentSample < numSamples * 2 && ((indexW - indexR) & INDEX_MASK) > 2; currentSample += 2) { + u32 indexR2 = indexR + 2; //next sample + + s16 l1 = m_buffer[indexR & INDEX_MASK]; //current + s16 l2 = m_buffer[indexR2 & INDEX_MASK]; //next + int sampleL = ((l1 << 16) + (l2 - l1) * (u16)m_frac) >> 16; + sampleL = (sampleL * lvolume) >> 8; + sampleL += samples[currentSample + 1]; + MathUtil::Clamp(&sampleL, -32767, 32767); + samples[currentSample + 1] = sampleL; + + s16 r1 = m_buffer[(indexR + 1) & INDEX_MASK]; //current + s16 r2 = m_buffer[(indexR2 + 1) & INDEX_MASK]; //next + int sampleR = ((r1 << 16) + (r2 - r1) * (u16)m_frac) >> 16; + sampleR = (sampleR * rvolume) >> 8; + sampleR += samples[currentSample]; + MathUtil::Clamp(&sampleR, -32767, 32767); + samples[currentSample] = sampleR; + + m_frac += ratio; + indexR += 2 * (u16)(m_frac >> 16); + m_frac &= 0xffff; + } + + // Padding with the last value to reduce clicking + short s[2]; + s[0] = m_buffer[(indexR - 1) & INDEX_MASK]; + s[1] = m_buffer[(indexR - 2) & INDEX_MASK]; + s[0] = (s[0] * rvolume) >> 8; + s[1] = (s[1] * lvolume) >> 8; + for (; currentSample < numSamples * 2; currentSample += 2) { + int sampleR = s[0] + samples[currentSample]; + MathUtil::Clamp(&sampleR, -32767, 32767); + samples[currentSample] = sampleR; + int sampleL = s[1] + samples[currentSample + 1]; + MathUtil::Clamp(&sampleL, -32767, 32767); + samples[currentSample + 1] = sampleL; + } + + // Flush cached variable + Common::AtomicStore(m_indexR, indexR); + + return numSamples; +} + +unsigned int StereoResampler::Mix(short* samples, unsigned int num_samples, bool consider_framelimit, int sample_rate) { + if (!samples) + return 0; + + lock_guard lk(m_csMixing); + memset(samples, 0, num_samples * 2 * sizeof(short)); + return m_dma_mixer.Mix(samples, num_samples, consider_framelimit, sample_rate); +} + +void StereoResampler::MixerFifo::PushSamples(const s32 *samples, unsigned int num_samples) { + // Cache access in non-volatile variable + // indexR isn't allowed to cache in the audio throttling loop as it + // needs to get updates to not deadlock. + u32 indexW = Common::AtomicLoad(m_indexW); + + // Check if we have enough free space + // indexW == m_indexR results in empty buffer, so indexR must always be smaller than indexW + if (num_samples * 2 + ((indexW - Common::AtomicLoad(m_indexR)) & INDEX_MASK) >= MAX_SAMPLES * 2) + return; + + // AyuanX: Actual re-sampling work has been moved to sound thread + // to alleviate the workload on main thread + // and we simply store raw data here to make fast mem copy + int over_bytes = num_samples * 4 - (MAX_SAMPLES * 2 - (indexW & INDEX_MASK)) * sizeof(short); + if (over_bytes > 0) { + ClampBufferToS16(&m_buffer[indexW & INDEX_MASK], samples, (num_samples * 4 - over_bytes) / 2); + ClampBufferToS16(&m_buffer[0], samples + (num_samples * 4 - over_bytes) / sizeof(short), over_bytes / 2); + } else { + ClampBufferToS16(&m_buffer[indexW & INDEX_MASK], samples, num_samples * 2); + } + + Common::AtomicAdd(m_indexW, num_samples * 2); + + return; +} + +void StereoResampler::PushSamples(const int *samples, unsigned int num_samples) { + m_dma_mixer.PushSamples(samples, num_samples); +} + +void StereoResampler::SetDMAInputSampleRate(unsigned int rate) { + m_dma_mixer.SetInputSampleRate(rate); +} + +void StereoResampler::MixerFifo::SetInputSampleRate(unsigned int rate) { + m_input_sample_rate = rate; +} + +void StereoResampler::MixerFifo::SetVolume(unsigned int lvolume, unsigned int rvolume) +{ + m_LVolume = lvolume + (lvolume >> 7); + m_RVolume = rvolume + (rvolume >> 7); +} + +void StereoResampler::DoState(PointerWrap &p) { + auto s = p.Section("resampler", 1); + if (!s) + return; +} diff --git a/Core/HW/StereoResampler.h b/Core/HW/StereoResampler.h new file mode 100644 index 0000000000..def881eb37 --- /dev/null +++ b/Core/HW/StereoResampler.h @@ -0,0 +1,110 @@ +// Copyright (c) 2015- PPSSPP Project and Dolphin Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +// Adapted from Dolphin. + +#pragma once + +#include + +#include "base/mutex.h" + +#include "Common/ChunkFile.h" +#include "Common/CommonTypes.h" + +// 16 bit Stereo +#define MAX_SAMPLES (1024 * 2) // 64ms +#define INDEX_MASK (MAX_SAMPLES * 2 - 1) + +#define LOW_WATERMARK 1280 // 40 ms +#define MAX_FREQ_SHIFT 200 // per 32000 Hz +#define CONTROL_FACTOR 0.2f // in freq_shift per fifo size offset +#define CONTROL_AVG 32 + +class StereoResampler { + +public: + StereoResampler() + : m_dma_mixer(this, 44100) + , m_speed(1.0) + { + } + + virtual ~StereoResampler() {} + + // Called from audio threads + virtual unsigned int Mix(short* samples, unsigned int numSamples, bool consider_framelimit, int sampleRate); + + // Called from main thread + // This clamps the samples to 16-bit before starting to work on them. + virtual void PushSamples(const s32* samples, unsigned int num_samples); + unsigned int GetSampleRate() const { return m_sampleRate; } + + void SetDMAInputSampleRate(unsigned int rate); + + recursive_mutex& MixerCritical() { return m_csMixing; } + + float GetCurrentSpeed() const { return m_speed; } + void UpdateSpeed(volatile float val) { m_speed = val; } + + void Clear() { + m_dma_mixer.Clear(); + } + + void DoState(PointerWrap &p); + +protected: + class MixerFifo { + public: + MixerFifo(StereoResampler *mixer, unsigned sample_rate) + : m_mixer(mixer) + , m_input_sample_rate(sample_rate) + , m_indexW(0) + , m_indexR(0) + , m_LVolume(256) + , m_RVolume(256) + , m_numLeftI(0.0f) + , m_frac(0) + { + memset(m_buffer, 0, sizeof(m_buffer)); + } + void PushSamples(const s32* samples, unsigned int num_samples); + unsigned int Mix(short* samples, unsigned int numSamples, bool consider_framelimit, int sample_rate); + void SetInputSampleRate(unsigned int rate); + void SetVolume(unsigned int lvolume, unsigned int rvolume); + void Clear(); + + private: + StereoResampler *m_mixer; + unsigned m_input_sample_rate; + short m_buffer[MAX_SAMPLES * 2]; + volatile u32 m_indexW; + volatile u32 m_indexR; + // Volume ranges from 0-256 + volatile s32 m_LVolume; + volatile s32 m_RVolume; + float m_numLeftI; + u32 m_frac; + }; + + MixerFifo m_dma_mixer; + unsigned int m_sampleRate; + + recursive_mutex m_csMixing; + + volatile float m_speed; // Current rate of the emulation (1.0 = 100% speed) +}; diff --git a/UI/NativeApp.cpp b/UI/NativeApp.cpp index ddcfec695d..3098912dd8 100644 --- a/UI/NativeApp.cpp +++ b/UI/NativeApp.cpp @@ -226,7 +226,8 @@ std::string NativeQueryConfig(std::string query) { int NativeMix(short *audio, int num_samples) { if (GetUIState() == UISTATE_INGAME) { - num_samples = __AudioMix(audio, num_samples); + int sample_rate = System_GetPropertyInt(SYSPROP_AUDIO_SAMPLE_RATE); + num_samples = __AudioMix(audio, num_samples, sample_rate > 0 ? sample_rate : 44100); } else { MixBackgroundAudio(audio, num_samples); } @@ -568,7 +569,7 @@ void NativeInitGraphics() { glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); #ifdef _WIN32 - DSound_StartSound(MainWindow::GetHWND(), &Win32Mix, 44100); + DSound_StartSound(MainWindow::GetHWND(), &Win32Mix, 48000); #endif }