Add a NEON method to apply the volume.

This commit is contained in:
Unknown W. Brackets 2015-01-11 13:50:52 -08:00
parent ff4db2a8dd
commit db898aff23
11 changed files with 253 additions and 52 deletions

View file

@ -1071,6 +1071,10 @@ else()
ext/disarm.cpp)
endif()
if(ARMV7)
set(CORE_NEON Core/Util/AudioFormatNEON.cpp Core/Util/AudioFormatNEON.h)
endif()
# 'ppsspp_jni' on ANDROID, 'Core' everywhere else
# SHARED on ANDROID, STATIC everywhere else
add_library(${CoreLibName} ${CoreLinkType}
@ -1324,6 +1328,8 @@ add_library(${CoreLibName} ${CoreLinkType}
Core/Screenshot.h
Core/System.cpp
Core/System.h
Core/Util/AudioFormat.cpp
Core/Util/AudioFormat.h
Core/Util/GameManager.cpp
Core/Util/GameManager.h
Core/Util/BlockAllocator.cpp
@ -1332,6 +1338,7 @@ add_library(${CoreLibName} ${CoreLinkType}
Core/Util/PPGeDraw.h
Core/Util/ppge_atlas.cpp
Core/Util/ppge_atlas.h
${CORE_NEON}
$<TARGET_OBJECTS:GPU>
Globals.h
git-version.cpp)

View file

@ -375,6 +375,13 @@
<ClCompile Include="MIPS\MIPSStackWalk.cpp" />
<ClCompile Include="Screenshot.cpp" />
<ClCompile Include="System.cpp" />
<ClCompile Include="Util\AudioFormat.cpp" />
<ClCompile Include="Util\AudioFormatNEON.cpp">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</ClCompile>
<ClCompile Include="Util\BlockAllocator.cpp" />
<ClCompile Include="Util\GameManager.cpp" />
<ClCompile Include="Util\PPGeDraw.cpp" />
@ -569,6 +576,13 @@
<ClInclude Include="Screenshot.h" />
<ClInclude Include="System.h" />
<ClInclude Include="ThreadEventQueue.h" />
<ClInclude Include="Util\AudioFormat.h" />
<ClInclude Include="Util\AudioFormatNEON.h">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</ClInclude>
<ClInclude Include="Util\BlockAllocator.h" />
<ClInclude Include="Util\GameManager.h" />
<ClInclude Include="Util\PPGeDraw.h" />

View file

@ -533,6 +533,12 @@
<Filter>HLE\Libraries</Filter>
</ClCompile>
<ClCompile Include="Screenshot.cpp" />
<ClCompile Include="Util\AudioFormatNEON.cpp">
<Filter>Util</Filter>
</ClCompile>
<ClCompile Include="Util\AudioFormat.cpp">
<Filter>Util</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="ELF\ElfReader.h">
@ -1007,6 +1013,12 @@
<Filter>HLE\Libraries</Filter>
</ClInclude>
<ClInclude Include="Screenshot.h" />
<ClInclude Include="Util\AudioFormat.h">
<Filter>Util</Filter>
</ClInclude>
<ClInclude Include="Util\AudioFormatNEON.h">
<Filter>Util</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<None Include="CMakeLists.txt" />
@ -1014,4 +1026,4 @@
<None Include="..\android\jni\Android.mk" />
<None Include="GameLogNotes.txt" />
</ItemGroup>
</Project>
</Project>

View file

@ -37,6 +37,7 @@
#include "Core/HLE/sceKernel.h"
#include "Core/HLE/sceKernelThread.h"
#include "Core/HW/StereoResampler.h"
#include "Core/Util/AudioFormat.h"
StereoResampler resampler;
@ -70,53 +71,6 @@ static s32 *mixBuffer;
static int chanQueueMaxSizeFactor;
static int chanQueueMinSizeFactor;
static inline s16 adjustvolume(s16 sample, int vol) {
#ifdef ARM
register int r;
asm volatile("smulwb %0, %1, %2\n\t" \
"ssat %0, #16, %0" \
: "=r"(r) : "r"(vol), "r"(sample));
return r;
#else
return clamp_s16((sample * vol) >> 16);
#endif
}
inline void AdjustVolumeBlock(s16 *out, s16 *in, size_t size, int leftVol, int rightVol) {
#ifdef _M_SSE
if (leftVol <= 0x7fff && rightVol <= 0x7fff) {
__m128i volume = _mm_set_epi16(leftVol, rightVol, leftVol, rightVol, leftVol, rightVol, leftVol, rightVol);
while (size >= 16) {
__m128i indata1 = _mm_loadu_si128((__m128i *)in);
__m128i indata2 = _mm_loadu_si128((__m128i *)(in + 8));
_mm_storeu_si128((__m128i *)out, _mm_mulhi_epi16(indata1, volume));
_mm_storeu_si128((__m128i *)(out + 8), _mm_mulhi_epi16(indata2, volume));
in += 16;
out += 16;
size -= 16;
}
} else {
// We have to shift inside the loop to avoid the signed multiply issue.
leftVol >>= 1;
rightVol >>= 1;
__m128i volume = _mm_set_epi16(leftVol, rightVol, leftVol, rightVol, leftVol, rightVol, leftVol, rightVol);
while (size >= 16) {
__m128i indata1 = _mm_loadu_si128((__m128i *)in);
__m128i indata2 = _mm_loadu_si128((__m128i *)(in + 8));
_mm_storeu_si128((__m128i *)out, _mm_slli_epi16(_mm_mulhi_epi16(indata1, volume), 1));
_mm_storeu_si128((__m128i *)(out + 8), _mm_slli_epi16(_mm_mulhi_epi16(indata2, volume), 1));
in += 16;
out += 16;
size -= 16;
}
}
#endif
for (size_t i = 0; i < size; i += 2) {
out[i] = adjustvolume(in[i], leftVol);
out[i + 1] = adjustvolume(in[i + 1], rightVol);
}
}
static void hleAudioUpdate(u64 userdata, int cyclesLate) {
// Schedule the next cycle first. __AudioUpdate() may consume cycles.
CoreTiming::ScheduleEvent(audioIntervalCycles - cyclesLate, eventAudioUpdate, 0);
@ -305,8 +259,8 @@ u32 __AudioEnqueue(AudioChannel &chan, int chanNum, bool blocking) {
// Rare, so unoptimized. Expands to stereo.
for (u32 i = 0; i < chan.sampleCount; i++) {
s16 sample = (s16)Memory::Read_U16(chan.sampleAddress + 2 * i);
chan.sampleQueue.push(adjustvolume(sample, leftVol));
chan.sampleQueue.push(adjustvolume(sample, rightVol));
chan.sampleQueue.push(ApplySampleVolume(sample, leftVol));
chan.sampleQueue.push(ApplySampleVolume(sample, rightVol));
}
}
}

76
Core/Util/AudioFormat.cpp Normal file
View file

@ -0,0 +1,76 @@
// Copyright (c) 2012- PPSSPP Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
#include "Common/Common.h"
#include "Core/Util/AudioFormat.h"
#include "Core/Util/AudioFormatNEON.h"
#include "Globals.h"
#ifdef _M_SSE
#include <xmmintrin.h>
#endif
void AdjustVolumeBlockStandard(s16 *out, s16 *in, size_t size, int leftVol, int rightVol) {
#ifdef _M_SSE
if (leftVol <= 0x7fff && rightVol <= 0x7fff) {
__m128i volume = _mm_set_epi16(leftVol, rightVol, leftVol, rightVol, leftVol, rightVol, leftVol, rightVol);
while (size >= 16) {
__m128i indata1 = _mm_loadu_si128((__m128i *)in);
__m128i indata2 = _mm_loadu_si128((__m128i *)(in + 8));
_mm_storeu_si128((__m128i *)out, _mm_mulhi_epi16(indata1, volume));
_mm_storeu_si128((__m128i *)(out + 8), _mm_mulhi_epi16(indata2, volume));
in += 16;
out += 16;
size -= 16;
}
} else {
// We have to shift inside the loop to avoid the signed multiply issue.
leftVol >>= 1;
rightVol >>= 1;
__m128i volume = _mm_set_epi16(leftVol, rightVol, leftVol, rightVol, leftVol, rightVol, leftVol, rightVol);
while (size >= 16) {
__m128i indata1 = _mm_loadu_si128((__m128i *)in);
__m128i indata2 = _mm_loadu_si128((__m128i *)(in + 8));
_mm_storeu_si128((__m128i *)out, _mm_slli_epi16(_mm_mulhi_epi16(indata1, volume), 1));
_mm_storeu_si128((__m128i *)(out + 8), _mm_slli_epi16(_mm_mulhi_epi16(indata2, volume), 1));
in += 16;
out += 16;
size -= 16;
}
}
#endif
for (size_t i = 0; i < size; i += 2) {
out[i] = ApplySampleVolume(in[i], leftVol);
out[i + 1] = ApplySampleVolume(in[i + 1], rightVol);
}
}
#ifndef _M_SSE
AdjustVolumeBlockFunc AdjustVolumeBlock = &AdjustVolumeBlockStandard;
// This has to be done after CPUDetect has done its magic.
void SetupAudioFormats() {
#ifdef HAVE_ARMV7
if (cpu_info.bNEON) {
AdjustVolumeBlock = &AdjustVolumeBlockNEON;
}
#endif
}
#else
void SetupAudioFormats() {
}
#endif

43
Core/Util/AudioFormat.h Normal file
View file

@ -0,0 +1,43 @@
// Copyright (c) 2012- PPSSPP Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
#pragma once
#include "Common/Common.h"
#include "Globals.h"
static inline s16 ApplySampleVolume(s16 sample, int vol) {
#ifdef ARM
register int r;
asm volatile("smulwb %0, %1, %2\n\t" \
"ssat %0, #16, %0" \
: "=r"(r) : "r"(vol), "r"(sample));
return r;
#else
return clamp_s16((sample * vol) >> 16);
#endif
}
void SetupAudioFormats();
void AdjustVolumeBlockStandard(s16 *out, s16 *in, size_t size, int leftVol, int rightVol);
#ifdef _M_SSE
#define AdjustVolumeBlock AdjustVolumeBlockStandard
#else
typedef void (*AdjustVolumeBlockFunc)(s16 *out, s16 *in, size_t size, int leftVol, int rightVol);
extern AdjustVolumeBlockFunc AdjustVolumeBlock;
#endif

View file

@ -0,0 +1,59 @@
// Copyright (c) 2012- PPSSPP Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
#include <arm_neon.h>
#include "Common/Common.h"
#include "Core/Util/AudioFormat.h"
#include "Core/Util/AudioFormatNEON.h"
#ifndef ARM
#error Should not be compiled on non-ARM.
#endif
static s16 MEMORY_ALIGNED16(volumeValues[4]) = {};
void AdjustVolumeBlockNEON(s16 *out, s16 *in, size_t size, int leftVol, int rightVol) {
volumeValues[0] = leftVol >> 1;
volumeValues[1] = rightVol >> 1;
volumeValues[2] = leftVol >> 1;
volumeValues[3] = rightVol >> 1;
const int16x4_t vol = vld1_s16(volumeValues);
while (size >= 16) {
int16x8_t indata1 = vld1q_s16(in);
int16x8_t indata2 = vld1q_s16(in + 8);
int32x4_t outh1 = vmull_s16(vget_high_s16(indata1), vol);
int32x4_t outh2 = vmull_s16(vget_high_s16(indata2), vol);
int32x4_t outl1 = vmull_s16(vget_low_s16(indata1), vol);
int32x4_t outl2 = vmull_s16(vget_low_s16(indata2), vol);
int16x8_t outdata1 = vcombine_s16(vqshrn_n_s32(outl1, 15), vqshrn_n_s32(outh1, 15));
int16x8_t outdata2 = vcombine_s16(vqshrn_n_s32(outl2, 15), vqshrn_n_s32(outh2, 15));
vst1q_s16(out, outdata1);
vst1q_s16(out + 8, outdata2);
in += 16;
out += 16;
size -= 16;
}
for (size_t i = 0; i < size; i += 2) {
out[i] = ApplySampleVolume(in[i], leftVol);
out[i + 1] = ApplySampleVolume(in[i + 1], rightVol);
}
}

View file

@ -0,0 +1,22 @@
// Copyright (c) 2012- PPSSPP Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
#pragma once
#include "Common/CommonTypes.h"
void AdjustVolumeBlockNEON(s16 *out, s16 *in, size_t size, int leftVol, int rightVol);

View file

@ -32,7 +32,11 @@ SOURCES += $$P/Core/*.cpp \ # Core
$$P/Core/HW/*.cpp \
$$P/Core/MIPS/*.cpp \
$$P/Core/MIPS/JitCommon/*.cpp \
$$P/Core/Util/*.cpp \
$$P/Core/Util/AudioFormat.cpp \
$$P/Core/Util/BlockAllocator.cpp \
$$P/Core/Util/GameManager.cpp \
$$P/Core/Util/ppge_atlas.cpp \
$$P/Core/Util/PPGeDraw.cpp \
$$P/ext/libkirk/*.c # Kirk
HEADERS += $$P/Core/*.h \
@ -45,7 +49,13 @@ HEADERS += $$P/Core/*.h \
$$P/Core/HW/*.h \
$$P/Core/MIPS/*.h \
$$P/Core/MIPS/JitCommon/*.h \
$$P/Core/Util/*.h \
$$P/Core/Util/AudioFormat.h \
$$P/Core/Util/BlockAllocator.h \
$$P/Core/Util/GameManager.h \
$$P/Core/Util/ppge_atlas.h \
$$P/Core/Util/PPGeDraw.h \
$$P/ext/libkirk/*.h
armv7: SOURCES += $$P/Core/Util/AudioFormatNEON.cpp
win32: INCLUDEPATH += $$P/ffmpeg/WindowsInclude

View file

@ -77,6 +77,7 @@
#include "Core/HLE/__sceAudio.h"
#include "Core/HLE/sceCtrl.h"
#include "Core/Util/GameManager.h"
#include "Core/Util/AudioFormat.h"
#include "ui_atlas.h"
#include "EmuScreen.h"
@ -266,6 +267,7 @@ void NativeInit(int argc, const char *argv[],
#endif
InitFastMath(cpu_info.bNEON);
SetupAudioFormats();
// Sets both FZ and DefaultNaN on ARM, flipping some ARM implementations into "RunFast" mode for VFP.
// http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.ddi0274h/Babffifj.html

View file

@ -55,6 +55,7 @@ endif
ifeq ($(findstring armeabi-v7a,$(TARGET_ARCH_ABI)),armeabi-v7a)
ARCH_FILES := \
$(SRC)/GPU/Common/TextureDecoderNEON.cpp.neon \
$(SRC)/Core/Util/AudioFormatNEON.cpp.neon \
$(SRC)/Common/ArmEmitter.cpp \
$(SRC)/Common/ArmCPUDetect.cpp \
$(SRC)/Common/ArmThunk.cpp \
@ -286,6 +287,7 @@ EXEC_AND_LIB_FILES := \
$(SRC)/Core/FileSystems/tlzrc.cpp \
$(SRC)/Core/MIPS/JitCommon/JitCommon.cpp \
$(SRC)/Core/MIPS/JitCommon/JitBlockCache.cpp \
$(SRC)/Core/Util/AudioFormat.cpp \
$(SRC)/Core/Util/GameManager.cpp \
$(SRC)/Core/Util/BlockAllocator.cpp \
$(SRC)/Core/Util/ppge_atlas.cpp \