Replace outAudioQueue with Dolphin's resampler.

This commit is contained in:
Henrik Rydgard 2015-01-11 15:13:43 +01:00
parent 1b055fd07e
commit e312d6b5fd
8 changed files with 343 additions and 104 deletions

View file

@ -1276,6 +1276,8 @@ add_library(${CoreLibName} ${CoreLinkType}
Core/HW/MemoryStick.h
Core/HW/SasAudio.cpp
Core/HW/SasAudio.h
Core/HW/StereoResampler.cpp
Core/HW/StereoResampler.h
Core/Host.cpp
Core/Host.h
Core/Loaders.cpp

View file

@ -274,6 +274,7 @@
<ClCompile Include="HW\SasAudio.cpp" />
<ClCompile Include="HW\AsyncIOManager.cpp" />
<ClCompile Include="HW\SimpleAudioDec.cpp" />
<ClCompile Include="HW\StereoResampler.cpp" />
<ClCompile Include="Loaders.cpp" />
<ClCompile Include="MemMap.cpp" />
<ClCompile Include="MemmapFunctions.cpp" />
@ -512,6 +513,7 @@
<ClInclude Include="HW\MemoryStick.h" />
<ClInclude Include="HW\AsyncIOManager.h" />
<ClInclude Include="HW\SimpleAudioDec.h" />
<ClInclude Include="HW\StereoResampler.h" />
<ClInclude Include="Loaders.h" />
<ClInclude Include="MemMap.h" />
<ClInclude Include="MIPS\ARM\ArmAsm.h">

View file

@ -300,6 +300,9 @@
<ClCompile Include="HW\MediaEngine.cpp">
<Filter>HW</Filter>
</ClCompile>
<ClCompile Include="HW\StereoResampler.cpp">
<Filter>HW</Filter>
</ClCompile>
<ClCompile Include="Util\PPGeDraw.cpp">
<Filter>Util</Filter>
</ClCompile>
@ -775,6 +778,9 @@
<ClInclude Include="HW\MediaEngine.h">
<Filter>HW</Filter>
</ClInclude>
<ClInclude Include="HW\StereoResampler.h">
<Filter>HW</Filter>
</ClInclude>
<ClInclude Include="Util\PPGeDraw.h">
<Filter>Util</Filter>
</ClInclude>

View file

@ -35,7 +35,9 @@
#include "Core/HLE/sceAudio.h"
#include "Core/HLE/sceKernel.h"
#include "Core/HLE/sceKernelThread.h"
#include "Core/HW/StereoResampler.h"
StereoResampler resampler;
// Should be used to lock anything related to the outAudioQueue.
// atomic locks are used on the lock. TODO: make this lock-free
@ -67,14 +69,6 @@ static s32 *mixBuffer;
static int chanQueueMaxSizeFactor;
static int chanQueueMinSizeFactor;
// TODO: Need to replace this with something lockless. Mutexes in the audio pipeline
// is bad mojo.
FixedSizeQueue<s16, 512 * 16> outAudioQueue;
bool __gainAudioQueueLock();
void __releaseAcquiredLock();
void __blockForAudioQueueLock();
static inline s16 adjustvolume(s16 sample, int vol) {
#ifdef ARM
register int r;
@ -181,9 +175,7 @@ void __AudioInit() {
mixBuffer = new s32[hwBlockSize * 2];
memset(mixBuffer, 0, hwBlockSize * 2 * sizeof(s32));
__blockForAudioQueueLock();
outAudioQueue.clear();
__releaseAcquiredLock();
resampler.Clear();
CoreTiming::RegisterMHzChangeCallback(&__AudioCPUMHzChange);
}
@ -199,16 +191,14 @@ void __AudioDoState(PointerWrap &p) {
p.Do(mixFrequency);
{
//block until a lock is achieved. Not a good idea at all, but
//can't think of a better one...
__blockForAudioQueueLock();
if (s >= 2) {
resampler.DoState(p);
} else {
// Only to preserve the previous file format. Might cause a slight audio glitch on upgrades?
FixedSizeQueue<s16, 512 * 16> outAudioQueue;
outAudioQueue.DoState(p);
//release the atomic lock
__releaseAcquiredLock();
resampler.Clear();
}
int chanCount = ARRAY_SIZE(chans);
@ -358,28 +348,6 @@ void __AudioSetOutputFrequency(int freq) {
mixFrequency = freq;
}
inline void ClampBufferToS16(s16 *out, s32 *in, size_t size) {
#ifdef _M_SSE
// Size will always be 16-byte aligned as the hwBlockSize is.
while (size >= 8) {
__m128i in1 = _mm_loadu_si128((__m128i *)in);
__m128i in2 = _mm_loadu_si128((__m128i *)(in + 4));
__m128i packed = _mm_packs_epi32(in1, in2);
_mm_storeu_si128((__m128i *)out, packed);
out += 8;
in += 8;
size -= 8;
}
for (size_t i = 0; i < size; i++) {
out[i] = clamp_s16(in[i]);
}
#else
for (size_t i = 0; i < size; i++) {
out[i] = clamp_s16(in[i]);
}
#endif
}
// Mix samples from the various audio channels into a single sample queue.
// This single sample queue is where __AudioMix should read from. If the sample queue is full, we should
// just sleep the main emulator thread a little.
@ -433,35 +401,17 @@ void __AudioUpdate() {
}
if (g_Config.bEnableSound) {
__blockForAudioQueueLock();
/*
if (!__gainAudioQueueLock()){
return;
}
*/
if (outAudioQueue.room() >= hwBlockSize * 2) {
s16 *buf1 = 0, *buf2 = 0;
size_t sz1, sz2;
outAudioQueue.pushPointers(hwBlockSize * 2, &buf1, &sz1, &buf2, &sz2);
ClampBufferToS16(buf1, mixBuffer, sz1);
if (buf2) {
ClampBufferToS16(buf2, mixBuffer + sz1, sz2);
}
} else {
// This happens quite a lot. There's still something slightly off
// about the amount of audio we produce.
}
//release the atomic lock
__releaseAcquiredLock();
resampler.PushSamples(mixBuffer, hwBlockSize);
}
}
// numFrames is number of stereo frames.
// This is called from *outside* the emulator thread.
int __AudioMix(short *outstereo, int numFrames)
{
int __AudioMix(short *outstereo, int numFrames, int sampleRate) {
resampler.Mix(outstereo, numFrames, false, sampleRate);
return numFrames;
/*
// TODO: if mixFrequency != the actual output frequency, resample!
int underrun = -1;
s16 sampleL = 0;
@ -476,6 +426,7 @@ int __AudioMix(short *outstereo, int numFrames)
return 0;
}
resampler.Mix(outstereo, numFrames);
outAudioQueue.popPointers(numFrames * 2, &buf1, &sz1, &buf2, &sz2);
memcpy(outstereo, buf1, sz1 * sizeof(s16));
@ -496,40 +447,5 @@ int __AudioMix(short *outstereo, int numFrames)
VERBOSE_LOG(SCEAUDIO, "Audio out buffer UNDERRUN at %i of %i", underrun, numFrames);
}
return underrun >= 0 ? underrun : numFrames;
}
/*returns whether the lock was successfully gained or not.
i.e - whether the lock belongs to you
*/
inline bool __gainAudioQueueLock(){
if (g_Config.bAtomicAudioLocks){
/*if the previous state was 0, that means the lock was "unlocked". So,
we return !0, which is true thanks to C's int to bool conversion
One the other hand, if it was locked, then the lock would return 1.
so, !1 = 0 = false.
*/
return atomicLock_.test_and_set() == 0;
} else {
mutex_.lock();
return true;
}
};
inline void __releaseAcquiredLock(){
if (g_Config.bAtomicAudioLocks){
atomicLock_.clear();
} else {
mutex_.unlock();
}
}
inline void __blockForAudioQueueLock(){
if (g_Config.bAtomicAudioLocks){
while ((atomicLock_.test_and_set() == 0)){ }
} else {
mutex_.lock();
}
*/
}

View file

@ -32,4 +32,4 @@ u32 __AudioEnqueue(AudioChannel &chan, int chanNum, bool blocking);
void __AudioWakeThreads(AudioChannel &chan, int result, int step);
void __AudioWakeThreads(AudioChannel &chan, int result);
int __AudioMix(short *outstereo, int numSamples);
int __AudioMix(short *outstereo, int numSamples, int sampleRate);

202
Core/HW/StereoResampler.cpp Normal file
View file

@ -0,0 +1,202 @@
// Copyright (c) 2015- PPSSPP Project and Dolphin Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
// Adapted from Dolphin.
#include <string.h>
#include "base/logging.h"
#include "Common/ChunkFile.h"
#include "Common/MathUtil.h"
#include "Common/Atomics.h"
#include "Core/HW/StereoResampler.h"
#include "Globals.h"
#ifdef _M_SSE
#include <emmintrin.h>
#endif
inline void ClampBufferToS16(s16 *out, const s32 *in, size_t size) {
#ifdef _M_SSE
// Size will always be 16-byte aligned as the hwBlockSize is.
while (size >= 8) {
__m128i in1 = _mm_loadu_si128((__m128i *)in);
__m128i in2 = _mm_loadu_si128((__m128i *)(in + 4));
__m128i packed = _mm_packs_epi32(in1, in2);
_mm_storeu_si128((__m128i *)out, packed);
out += 8;
in += 8;
size -= 8;
}
for (size_t i = 0; i < size; i++) {
out[i] = clamp_s16(in[i]);
}
#else
for (size_t i = 0; i < size; i++) {
out[i] = clamp_s16(in[i]);
}
#endif
}
void StereoResampler::MixerFifo::Clear() {
// TODO
}
// Executed from sound stream thread
unsigned int StereoResampler::MixerFifo::Mix(short* samples, unsigned int numSamples, bool consider_framelimit, int sample_rate) {
unsigned int currentSample = 0;
// Cache access in non-volatile variable
// This is the only function changing the read value, so it's safe to
// cache it locally although it's written here.
// The writing pointer will be modified outside, but it will only increase,
// so we will just ignore new written data while interpolating.
// Without this cache, the compiler wouldn't be allowed to optimize the
// interpolation loop.
u32 indexR = Common::AtomicLoad(m_indexR);
u32 indexW = Common::AtomicLoad(m_indexW);
float numLeft = (float)(((indexW - indexR) & INDEX_MASK) / 2);
m_numLeftI = (numLeft + m_numLeftI*(CONTROL_AVG - 1)) / CONTROL_AVG;
float offset = (m_numLeftI - LOW_WATERMARK) * CONTROL_FACTOR;
if (offset > MAX_FREQ_SHIFT) offset = MAX_FREQ_SHIFT;
if (offset < -MAX_FREQ_SHIFT) offset = -MAX_FREQ_SHIFT;
// render numleft sample pairs to samples[]
// advance indexR with sample position
// remember fractional offset
float aid_sample_rate = m_input_sample_rate + offset;
/*
u32 framelimit = SConfig::GetInstance().m_Framelimit;
if (consider_framelimit && framelimit > 1) {
aid_sample_rate = aid_sample_rate * (framelimit - 1) * 5 / 59.994;
}*/
const u32 ratio = (u32)(65536.0f * aid_sample_rate / (float)sample_rate);
s32 lvolume = m_LVolume;
s32 rvolume = m_RVolume;
// TODO: consider a higher-quality resampling algorithm.
// TODO: Add a fast path for 1:1.
for (; currentSample < numSamples * 2 && ((indexW - indexR) & INDEX_MASK) > 2; currentSample += 2) {
u32 indexR2 = indexR + 2; //next sample
s16 l1 = m_buffer[indexR & INDEX_MASK]; //current
s16 l2 = m_buffer[indexR2 & INDEX_MASK]; //next
int sampleL = ((l1 << 16) + (l2 - l1) * (u16)m_frac) >> 16;
sampleL = (sampleL * lvolume) >> 8;
sampleL += samples[currentSample + 1];
MathUtil::Clamp(&sampleL, -32767, 32767);
samples[currentSample + 1] = sampleL;
s16 r1 = m_buffer[(indexR + 1) & INDEX_MASK]; //current
s16 r2 = m_buffer[(indexR2 + 1) & INDEX_MASK]; //next
int sampleR = ((r1 << 16) + (r2 - r1) * (u16)m_frac) >> 16;
sampleR = (sampleR * rvolume) >> 8;
sampleR += samples[currentSample];
MathUtil::Clamp(&sampleR, -32767, 32767);
samples[currentSample] = sampleR;
m_frac += ratio;
indexR += 2 * (u16)(m_frac >> 16);
m_frac &= 0xffff;
}
// Padding with the last value to reduce clicking
short s[2];
s[0] = m_buffer[(indexR - 1) & INDEX_MASK];
s[1] = m_buffer[(indexR - 2) & INDEX_MASK];
s[0] = (s[0] * rvolume) >> 8;
s[1] = (s[1] * lvolume) >> 8;
for (; currentSample < numSamples * 2; currentSample += 2) {
int sampleR = s[0] + samples[currentSample];
MathUtil::Clamp(&sampleR, -32767, 32767);
samples[currentSample] = sampleR;
int sampleL = s[1] + samples[currentSample + 1];
MathUtil::Clamp(&sampleL, -32767, 32767);
samples[currentSample + 1] = sampleL;
}
// Flush cached variable
Common::AtomicStore(m_indexR, indexR);
return numSamples;
}
unsigned int StereoResampler::Mix(short* samples, unsigned int num_samples, bool consider_framelimit, int sample_rate) {
if (!samples)
return 0;
lock_guard lk(m_csMixing);
memset(samples, 0, num_samples * 2 * sizeof(short));
return m_dma_mixer.Mix(samples, num_samples, consider_framelimit, sample_rate);
}
void StereoResampler::MixerFifo::PushSamples(const s32 *samples, unsigned int num_samples) {
// Cache access in non-volatile variable
// indexR isn't allowed to cache in the audio throttling loop as it
// needs to get updates to not deadlock.
u32 indexW = Common::AtomicLoad(m_indexW);
// Check if we have enough free space
// indexW == m_indexR results in empty buffer, so indexR must always be smaller than indexW
if (num_samples * 2 + ((indexW - Common::AtomicLoad(m_indexR)) & INDEX_MASK) >= MAX_SAMPLES * 2)
return;
// AyuanX: Actual re-sampling work has been moved to sound thread
// to alleviate the workload on main thread
// and we simply store raw data here to make fast mem copy
int over_bytes = num_samples * 4 - (MAX_SAMPLES * 2 - (indexW & INDEX_MASK)) * sizeof(short);
if (over_bytes > 0) {
ClampBufferToS16(&m_buffer[indexW & INDEX_MASK], samples, (num_samples * 4 - over_bytes) / 2);
ClampBufferToS16(&m_buffer[0], samples + (num_samples * 4 - over_bytes) / sizeof(short), over_bytes / 2);
} else {
ClampBufferToS16(&m_buffer[indexW & INDEX_MASK], samples, num_samples * 2);
}
Common::AtomicAdd(m_indexW, num_samples * 2);
return;
}
void StereoResampler::PushSamples(const int *samples, unsigned int num_samples) {
m_dma_mixer.PushSamples(samples, num_samples);
}
void StereoResampler::SetDMAInputSampleRate(unsigned int rate) {
m_dma_mixer.SetInputSampleRate(rate);
}
void StereoResampler::MixerFifo::SetInputSampleRate(unsigned int rate) {
m_input_sample_rate = rate;
}
void StereoResampler::MixerFifo::SetVolume(unsigned int lvolume, unsigned int rvolume)
{
m_LVolume = lvolume + (lvolume >> 7);
m_RVolume = rvolume + (rvolume >> 7);
}
void StereoResampler::DoState(PointerWrap &p) {
auto s = p.Section("resampler", 1);
if (!s)
return;
}

110
Core/HW/StereoResampler.h Normal file
View file

@ -0,0 +1,110 @@
// Copyright (c) 2015- PPSSPP Project and Dolphin Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
// Adapted from Dolphin.
#pragma once
#include <string>
#include "base/mutex.h"
#include "Common/ChunkFile.h"
#include "Common/CommonTypes.h"
// 16 bit Stereo
#define MAX_SAMPLES (1024 * 2) // 64ms
#define INDEX_MASK (MAX_SAMPLES * 2 - 1)
#define LOW_WATERMARK 1280 // 40 ms
#define MAX_FREQ_SHIFT 200 // per 32000 Hz
#define CONTROL_FACTOR 0.2f // in freq_shift per fifo size offset
#define CONTROL_AVG 32
class StereoResampler {
public:
StereoResampler()
: m_dma_mixer(this, 44100)
, m_speed(1.0)
{
}
virtual ~StereoResampler() {}
// Called from audio threads
virtual unsigned int Mix(short* samples, unsigned int numSamples, bool consider_framelimit, int sampleRate);
// Called from main thread
// This clamps the samples to 16-bit before starting to work on them.
virtual void PushSamples(const s32* samples, unsigned int num_samples);
unsigned int GetSampleRate() const { return m_sampleRate; }
void SetDMAInputSampleRate(unsigned int rate);
recursive_mutex& MixerCritical() { return m_csMixing; }
float GetCurrentSpeed() const { return m_speed; }
void UpdateSpeed(volatile float val) { m_speed = val; }
void Clear() {
m_dma_mixer.Clear();
}
void DoState(PointerWrap &p);
protected:
class MixerFifo {
public:
MixerFifo(StereoResampler *mixer, unsigned sample_rate)
: m_mixer(mixer)
, m_input_sample_rate(sample_rate)
, m_indexW(0)
, m_indexR(0)
, m_LVolume(256)
, m_RVolume(256)
, m_numLeftI(0.0f)
, m_frac(0)
{
memset(m_buffer, 0, sizeof(m_buffer));
}
void PushSamples(const s32* samples, unsigned int num_samples);
unsigned int Mix(short* samples, unsigned int numSamples, bool consider_framelimit, int sample_rate);
void SetInputSampleRate(unsigned int rate);
void SetVolume(unsigned int lvolume, unsigned int rvolume);
void Clear();
private:
StereoResampler *m_mixer;
unsigned m_input_sample_rate;
short m_buffer[MAX_SAMPLES * 2];
volatile u32 m_indexW;
volatile u32 m_indexR;
// Volume ranges from 0-256
volatile s32 m_LVolume;
volatile s32 m_RVolume;
float m_numLeftI;
u32 m_frac;
};
MixerFifo m_dma_mixer;
unsigned int m_sampleRate;
recursive_mutex m_csMixing;
volatile float m_speed; // Current rate of the emulation (1.0 = 100% speed)
};

View file

@ -226,7 +226,8 @@ std::string NativeQueryConfig(std::string query) {
int NativeMix(short *audio, int num_samples) {
if (GetUIState() == UISTATE_INGAME) {
num_samples = __AudioMix(audio, num_samples);
int sample_rate = System_GetPropertyInt(SYSPROP_AUDIO_SAMPLE_RATE);
num_samples = __AudioMix(audio, num_samples, sample_rate > 0 ? sample_rate : 44100);
} else {
MixBackgroundAudio(audio, num_samples);
}
@ -568,7 +569,7 @@ void NativeInitGraphics() {
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
#ifdef _WIN32
DSound_StartSound(MainWindow::GetHWND(), &Win32Mix, 44100);
DSound_StartSound(MainWindow::GetHWND(), &Win32Mix, 48000);
#endif
}