diff --git a/Core/MIPS/JitCommon/JitBlockCache.cpp b/Core/MIPS/JitCommon/JitBlockCache.cpp index 1a26688455..00e122c583 100644 --- a/Core/MIPS/JitCommon/JitBlockCache.cpp +++ b/Core/MIPS/JitCommon/JitBlockCache.cpp @@ -83,7 +83,7 @@ JitBlockCache::~JitBlockCache() { Shutdown(); } -bool JitBlock::ContainsAddress(u32 em_address) { +bool JitBlock::ContainsAddress(u32 em_address) const { // WARNING - THIS DOES NOT WORK WITH JIT INLINING ENABLED. // However, that doesn't exist yet so meh. return (em_address >= originalAddress && em_address < originalAddress + 4 * originalSize); diff --git a/Core/MIPS/JitCommon/JitBlockCache.h b/Core/MIPS/JitCommon/JitBlockCache.h index 71b7df9660..3049300f9a 100644 --- a/Core/MIPS/JitCommon/JitBlockCache.h +++ b/Core/MIPS/JitCommon/JitBlockCache.h @@ -59,7 +59,7 @@ enum class DestroyType { // We should be careful not to access these block structures during runtime as they are large. // Fine to mess with them at block compile time though. struct JitBlock { - bool ContainsAddress(u32 em_address); + bool ContainsAddress(u32 em_address) const; const u8 *checkedEntry; // const, we have to translate to writable. const u8 *normalEntry; diff --git a/Core/MIPS/MIPSVFPUUtils.cpp b/Core/MIPS/MIPSVFPUUtils.cpp index d487815eab..c72c107056 100644 --- a/Core/MIPS/MIPSVFPUUtils.cpp +++ b/Core/MIPS/MIPSVFPUUtils.cpp @@ -165,68 +165,58 @@ void GetMatrixRows(int matrixReg, MatrixSize msize, u8 vecs[4]) { } void ReadVector(float *rd, VectorSize size, int reg) { - int row = 0; - int length = 0; - + int row; + int length; switch (size) { case V_Single: rd[0] = V(reg); return; // transpose = 0; row=(reg>>5)&3; length = 1; break; case V_Pair: row=(reg>>5)&2; length = 2; break; case V_Triple: row=(reg>>6)&1; length = 3; break; case V_Quad: row=(reg>>5)&2; length = 4; break; - default: _assert_msg_(false, "%s: Bad vector size", __FUNCTION__); + default: length = 0; break; } - int transpose = (reg>>5) & 1; - const int mtx = (reg >> 2) & 7; + int transpose = (reg >> 5) & 1; + const int mtx = reg & (7 << 2); const int col = reg & 3; - if (transpose) { - const int base = mtx * 4 + col * 32; + const int base = mtx + col * 32; for (int i = 0; i < length; i++) rd[i] = V(base + ((row+i)&3)); } else { - const int base = mtx * 4 + col; + const int base = mtx + col; for (int i = 0; i < length; i++) rd[i] = V(base + ((row+i)&3)*32); } } void WriteVector(const float *rd, VectorSize size, int reg) { - if (size == V_Single) { - // Optimize the common case. - if (!currentMIPS->VfpuWriteMask(0)) { - V(reg) = rd[0]; - } - return; - } - - const int mtx = (reg>>2)&7; - const int col = reg & 3; - int transpose = (reg>>5)&1; - int row = 0; - int length = 0; + int row; + int length; switch (size) { - case V_Single: _dbg_assert_(false); return; // transpose = 0; row=(reg>>5)&3; length = 1; break; + case V_Single: if (!currentMIPS->VfpuWriteMask(0)) V(reg) = rd[0]; return; // transpose = 0; row=(reg>>5)&3; length = 1; break; case V_Pair: row=(reg>>5)&2; length = 2; break; case V_Triple: row=(reg>>6)&1; length = 3; break; case V_Quad: row=(reg>>5)&2; length = 4; break; - default: _assert_msg_(false, "%s: Bad vector size", __FUNCTION__); + default: length = 0; break; } + const int mtx = reg & (7 << 2); + const int col = reg & 3; + bool transpose = (reg >> 5) & 1; if (currentMIPS->VfpuWriteMask() == 0) { if (transpose) { - const int base = mtx * 4 + col * 32; + const int base = mtx + col * 32; for (int i = 0; i < length; i++) V(base + ((row+i)&3)) = rd[i]; } else { - const int base = mtx * 4 + col; + const int base = mtx + col; for (int i = 0; i < length; i++) V(base + ((row+i)&3)*32) = rd[i]; } } else { for (int i = 0; i < length; i++) { if (!currentMIPS->VfpuWriteMask(i)) { - int index = mtx * 4; + int index = mtx; if (transpose) index += ((row+i)&3) + col*32; else @@ -243,9 +233,6 @@ u32 VFPURewritePrefix(int ctrl, u32 remove, u32 add) { } void ReadMatrix(float *rd, MatrixSize size, int reg) { - int mtx = (reg >> 2) & 7; - int col = reg & 3; - int row = 0; int side = 0; int transpose = (reg >> 5) & 1; @@ -255,9 +242,12 @@ void ReadMatrix(float *rd, MatrixSize size, int reg) { case M_2x2: row = (reg >> 5) & 2; side = 2; break; case M_3x3: row = (reg >> 6) & 1; side = 3; break; case M_4x4: row = (reg >> 5) & 2; side = 4; break; - default: _assert_msg_(false, "%s: Bad matrix size", __FUNCTION__); + default: side = 0; break; } + int mtx = (reg >> 2) & 7; + int col = reg & 3; + // The voffset ordering is now integrated in these formulas, // eliminating a table lookup. const float *v = currentMIPS->v + (size_t)mtx * 16; @@ -296,8 +286,8 @@ void WriteMatrix(const float *rd, MatrixSize size, int reg) { int mtx = (reg>>2)&7; int col = reg&3; - int row = 0; - int side = 0; + int row; + int side; int transpose = (reg >> 5) & 1; switch (size) { @@ -305,7 +295,7 @@ void WriteMatrix(const float *rd, MatrixSize size, int reg) { case M_2x2: row = (reg >> 5) & 2; side = 2; break; case M_3x3: row = (reg >> 6) & 1; side = 3; break; case M_4x4: row = (reg >> 5) & 2; side = 4; break; - default: _assert_msg_(false, "%s: Bad matrix size", __FUNCTION__); + default: side = 0; } if (currentMIPS->VfpuWriteMask() != 0) { @@ -370,16 +360,6 @@ int GetVectorOverlap(int vec1, VectorSize size1, int vec2, VectorSize size2) { return count; } -int GetNumVectorElements(VectorSize sz) { - switch (sz) { - case V_Single: return 1; - case V_Pair: return 2; - case V_Triple: return 3; - case V_Quad: return 4; - default: return 0; - } -} - VectorSize GetHalfVectorSizeSafe(VectorSize sz) { switch (sz) { case V_Pair: return V_Single; diff --git a/Core/MIPS/MIPSVFPUUtils.h b/Core/MIPS/MIPSVFPUUtils.h index edd02d687d..6e2bc5d056 100644 --- a/Core/MIPS/MIPSVFPUUtils.h +++ b/Core/MIPS/MIPSVFPUUtils.h @@ -218,7 +218,17 @@ VectorSize GetDoubleVectorSizeSafe(VectorSize sz); VectorSize GetDoubleVectorSize(VectorSize sz); VectorSize MatrixVectorSizeSafe(MatrixSize sz); VectorSize MatrixVectorSize(MatrixSize sz); -int GetNumVectorElements(VectorSize sz); + +inline int GetNumVectorElements(VectorSize sz) { + switch (sz) { + case V_Single: return 1; + case V_Pair: return 2; + case V_Triple: return 3; + case V_Quad: return 4; + default: return 0; + } +} + int GetMatrixSideSafe(MatrixSize sz); int GetMatrixSide(MatrixSize sz); std::string GetVectorNotation(int reg, VectorSize size); diff --git a/Core/MIPS/x86/CompVFPU.cpp b/Core/MIPS/x86/CompVFPU.cpp index 8935d59cea..e553354d61 100644 --- a/Core/MIPS/x86/CompVFPU.cpp +++ b/Core/MIPS/x86/CompVFPU.cpp @@ -2208,8 +2208,8 @@ void CosOnly(SinCosArg angle, float *output) { output[1] = vfpu_cos(angle); } -void ASinScaled(SinCosArg angle, float *output) { - output[0] = vfpu_asin(angle); +void ASinScaled(SinCosArg sine, float *output) { + output[0] = vfpu_asin(sine); } void SinCosNegSin(SinCosArg angle, float *output) { @@ -2217,13 +2217,25 @@ void SinCosNegSin(SinCosArg angle, float *output) { output[0] = -output[0]; } +void Exp2(SinCosArg arg, float *output) { + output[0] = vfpu_exp2(arg); +} + +void Log2(SinCosArg arg, float *output) { + output[0] = vfpu_log2(arg); +} + +void RExp2(SinCosArg arg, float *output) { + output[0] = vfpu_rexp2(arg); +} + void Jit::Comp_VV2Op(MIPSOpcode op) { CONDITIONAL_DISABLE(VFPU_VEC); if (js.HasUnknownPrefix()) DISABLE; - auto trigCallHelper = [this](void (*sinCosFunc)(SinCosArg, float *output), u8 sreg) { + auto specialFuncCallHelper = [this](void (*specialFunc)(SinCosArg, float *output), u8 sreg) { #if PPSSPP_ARCH(AMD64) MOVSS(XMM0, fpr.V(sreg)); // TODO: This reg might be different on Linux... @@ -2232,7 +2244,7 @@ void Jit::Comp_VV2Op(MIPSOpcode op) { #else LEA(64, RDI, MIPSSTATE_VAR(sincostemp[0])); #endif - ABI_CallFunction(thunks.ProtectFunction((const void *)sinCosFunc, 0)); + ABI_CallFunction(thunks.ProtectFunction((const void *)specialFunc, 0)); #else // Sigh, passing floats with cdecl isn't pretty, ends up on the stack. if (fpr.V(sreg).IsSimpleReg()) { @@ -2240,7 +2252,7 @@ void Jit::Comp_VV2Op(MIPSOpcode op) { } else { MOV(32, R(EAX), fpr.V(sreg)); } - CallProtectedFunction((const void *)sinCosFunc, R(EAX), Imm32((uint32_t)(uintptr_t)&mips_->sincostemp[0])); + CallProtectedFunction((const void *)specialFunc, R(EAX), Imm32((uint32_t)(uintptr_t)&mips_->sincostemp[0])); #endif }; @@ -2406,18 +2418,20 @@ void Jit::Comp_VV2Op(MIPSOpcode op) { DIVSS(tempxregs[i], R(XMM0)); break; case 18: // d[i] = sinf((float)M_PI_2 * s[i]); break; //vsin - trigCallHelper(&SinOnly, sregs[i]); + specialFuncCallHelper(&SinOnly, sregs[i]); MOVSS(tempxregs[i], MIPSSTATE_VAR(sincostemp[0])); break; case 19: // d[i] = cosf((float)M_PI_2 * s[i]); break; //vcos - trigCallHelper(&CosOnly, sregs[i]); + specialFuncCallHelper(&CosOnly, sregs[i]); MOVSS(tempxregs[i], MIPSSTATE_VAR(sincostemp[1])); break; case 20: // d[i] = powf(2.0f, s[i]); break; //vexp2 - DISABLE; + specialFuncCallHelper(&Exp2, sregs[i]); + MOVSS(tempxregs[i], MIPSSTATE_VAR(sincostemp[0])); break; case 21: // d[i] = logf(s[i])/log(2.0f); break; //vlog2 - DISABLE; + specialFuncCallHelper(&Log2, sregs[i]); + MOVSS(tempxregs[i], MIPSSTATE_VAR(sincostemp[0])); break; case 22: // d[i] = sqrtf(s[i]); break; //vsqrt SQRTSS(tempxregs[i], fpr.V(sregs[i])); @@ -2425,7 +2439,7 @@ void Jit::Comp_VV2Op(MIPSOpcode op) { ANDPS(tempxregs[i], MatR(TEMPREG)); break; case 23: // d[i] = asinf(s[i]) / M_PI_2; break; //vasin - trigCallHelper(&ASinScaled, sregs[i]); + specialFuncCallHelper(&ASinScaled, sregs[i]); MOVSS(tempxregs[i], MIPSSTATE_VAR(sincostemp[0])); break; case 24: // d[i] = -1.0f / s[i]; break; // vnrcp @@ -2436,11 +2450,12 @@ void Jit::Comp_VV2Op(MIPSOpcode op) { MOVSS(tempxregs[i], R(XMM0)); break; case 26: // d[i] = -sinf((float)M_PI_2 * s[i]); break; // vnsin - trigCallHelper(&NegSinOnly, sregs[i]); + specialFuncCallHelper(&NegSinOnly, sregs[i]); MOVSS(tempxregs[i], MIPSSTATE_VAR(sincostemp[0])); break; case 28: // d[i] = 1.0f / expf(s[i] * (float)M_LOG2E); break; // vrexp2 - DISABLE; + specialFuncCallHelper(&RExp2, sregs[i]); + MOVSS(tempxregs[i], MIPSSTATE_VAR(sincostemp[0])); break; } } diff --git a/Windows/MainWindow.cpp b/Windows/MainWindow.cpp index 03be273789..fbba1e23cf 100644 --- a/Windows/MainWindow.cpp +++ b/Windows/MainWindow.cpp @@ -87,6 +87,7 @@ #define MOUSEEVENTF_FROMTOUCH_NOPEN 0xFF515780 //http://msdn.microsoft.com/en-us/library/windows/desktop/ms703320(v=vs.85).aspx #define MOUSEEVENTF_MASK_PLUS_PENTOUCH 0xFFFFFF80 +// See https://github.com/unknownbrackets/verysleepy/commit/fc1b1b3bd6081fae3566cdb542d896e413238b71 int verysleepy__useSendMessage = 1; const UINT WM_VERYSLEEPY_MSG = WM_APP + 0x3117;