mirror of
https://github.com/hrydgard/ppsspp.git
synced 2025-04-02 11:01:50 -04:00
VFPU: Some micro-optimizations. Don't fall back to interpreter path for vexp/vlog/vrexp.
This commit is contained in:
parent
27b8d27efc
commit
9db9fec898
6 changed files with 65 additions and 59 deletions
|
@ -83,7 +83,7 @@ JitBlockCache::~JitBlockCache() {
|
|||
Shutdown();
|
||||
}
|
||||
|
||||
bool JitBlock::ContainsAddress(u32 em_address) {
|
||||
bool JitBlock::ContainsAddress(u32 em_address) const {
|
||||
// WARNING - THIS DOES NOT WORK WITH JIT INLINING ENABLED.
|
||||
// However, that doesn't exist yet so meh.
|
||||
return (em_address >= originalAddress && em_address < originalAddress + 4 * originalSize);
|
||||
|
|
|
@ -59,7 +59,7 @@ enum class DestroyType {
|
|||
// We should be careful not to access these block structures during runtime as they are large.
|
||||
// Fine to mess with them at block compile time though.
|
||||
struct JitBlock {
|
||||
bool ContainsAddress(u32 em_address);
|
||||
bool ContainsAddress(u32 em_address) const;
|
||||
|
||||
const u8 *checkedEntry; // const, we have to translate to writable.
|
||||
const u8 *normalEntry;
|
||||
|
|
|
@ -165,68 +165,58 @@ void GetMatrixRows(int matrixReg, MatrixSize msize, u8 vecs[4]) {
|
|||
}
|
||||
|
||||
void ReadVector(float *rd, VectorSize size, int reg) {
|
||||
int row = 0;
|
||||
int length = 0;
|
||||
|
||||
int row;
|
||||
int length;
|
||||
switch (size) {
|
||||
case V_Single: rd[0] = V(reg); return; // transpose = 0; row=(reg>>5)&3; length = 1; break;
|
||||
case V_Pair: row=(reg>>5)&2; length = 2; break;
|
||||
case V_Triple: row=(reg>>6)&1; length = 3; break;
|
||||
case V_Quad: row=(reg>>5)&2; length = 4; break;
|
||||
default: _assert_msg_(false, "%s: Bad vector size", __FUNCTION__);
|
||||
default: length = 0; break;
|
||||
}
|
||||
int transpose = (reg>>5) & 1;
|
||||
const int mtx = (reg >> 2) & 7;
|
||||
int transpose = (reg >> 5) & 1;
|
||||
const int mtx = reg & (7 << 2);
|
||||
const int col = reg & 3;
|
||||
|
||||
if (transpose) {
|
||||
const int base = mtx * 4 + col * 32;
|
||||
const int base = mtx + col * 32;
|
||||
for (int i = 0; i < length; i++)
|
||||
rd[i] = V(base + ((row+i)&3));
|
||||
} else {
|
||||
const int base = mtx * 4 + col;
|
||||
const int base = mtx + col;
|
||||
for (int i = 0; i < length; i++)
|
||||
rd[i] = V(base + ((row+i)&3)*32);
|
||||
}
|
||||
}
|
||||
|
||||
void WriteVector(const float *rd, VectorSize size, int reg) {
|
||||
if (size == V_Single) {
|
||||
// Optimize the common case.
|
||||
if (!currentMIPS->VfpuWriteMask(0)) {
|
||||
V(reg) = rd[0];
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
const int mtx = (reg>>2)&7;
|
||||
const int col = reg & 3;
|
||||
int transpose = (reg>>5)&1;
|
||||
int row = 0;
|
||||
int length = 0;
|
||||
int row;
|
||||
int length;
|
||||
|
||||
switch (size) {
|
||||
case V_Single: _dbg_assert_(false); return; // transpose = 0; row=(reg>>5)&3; length = 1; break;
|
||||
case V_Single: if (!currentMIPS->VfpuWriteMask(0)) V(reg) = rd[0]; return; // transpose = 0; row=(reg>>5)&3; length = 1; break;
|
||||
case V_Pair: row=(reg>>5)&2; length = 2; break;
|
||||
case V_Triple: row=(reg>>6)&1; length = 3; break;
|
||||
case V_Quad: row=(reg>>5)&2; length = 4; break;
|
||||
default: _assert_msg_(false, "%s: Bad vector size", __FUNCTION__);
|
||||
default: length = 0; break;
|
||||
}
|
||||
|
||||
const int mtx = reg & (7 << 2);
|
||||
const int col = reg & 3;
|
||||
bool transpose = (reg >> 5) & 1;
|
||||
if (currentMIPS->VfpuWriteMask() == 0) {
|
||||
if (transpose) {
|
||||
const int base = mtx * 4 + col * 32;
|
||||
const int base = mtx + col * 32;
|
||||
for (int i = 0; i < length; i++)
|
||||
V(base + ((row+i)&3)) = rd[i];
|
||||
} else {
|
||||
const int base = mtx * 4 + col;
|
||||
const int base = mtx + col;
|
||||
for (int i = 0; i < length; i++)
|
||||
V(base + ((row+i)&3)*32) = rd[i];
|
||||
}
|
||||
} else {
|
||||
for (int i = 0; i < length; i++) {
|
||||
if (!currentMIPS->VfpuWriteMask(i)) {
|
||||
int index = mtx * 4;
|
||||
int index = mtx;
|
||||
if (transpose)
|
||||
index += ((row+i)&3) + col*32;
|
||||
else
|
||||
|
@ -243,9 +233,6 @@ u32 VFPURewritePrefix(int ctrl, u32 remove, u32 add) {
|
|||
}
|
||||
|
||||
void ReadMatrix(float *rd, MatrixSize size, int reg) {
|
||||
int mtx = (reg >> 2) & 7;
|
||||
int col = reg & 3;
|
||||
|
||||
int row = 0;
|
||||
int side = 0;
|
||||
int transpose = (reg >> 5) & 1;
|
||||
|
@ -255,9 +242,12 @@ void ReadMatrix(float *rd, MatrixSize size, int reg) {
|
|||
case M_2x2: row = (reg >> 5) & 2; side = 2; break;
|
||||
case M_3x3: row = (reg >> 6) & 1; side = 3; break;
|
||||
case M_4x4: row = (reg >> 5) & 2; side = 4; break;
|
||||
default: _assert_msg_(false, "%s: Bad matrix size", __FUNCTION__);
|
||||
default: side = 0; break;
|
||||
}
|
||||
|
||||
int mtx = (reg >> 2) & 7;
|
||||
int col = reg & 3;
|
||||
|
||||
// The voffset ordering is now integrated in these formulas,
|
||||
// eliminating a table lookup.
|
||||
const float *v = currentMIPS->v + (size_t)mtx * 16;
|
||||
|
@ -296,8 +286,8 @@ void WriteMatrix(const float *rd, MatrixSize size, int reg) {
|
|||
int mtx = (reg>>2)&7;
|
||||
int col = reg&3;
|
||||
|
||||
int row = 0;
|
||||
int side = 0;
|
||||
int row;
|
||||
int side;
|
||||
int transpose = (reg >> 5) & 1;
|
||||
|
||||
switch (size) {
|
||||
|
@ -305,7 +295,7 @@ void WriteMatrix(const float *rd, MatrixSize size, int reg) {
|
|||
case M_2x2: row = (reg >> 5) & 2; side = 2; break;
|
||||
case M_3x3: row = (reg >> 6) & 1; side = 3; break;
|
||||
case M_4x4: row = (reg >> 5) & 2; side = 4; break;
|
||||
default: _assert_msg_(false, "%s: Bad matrix size", __FUNCTION__);
|
||||
default: side = 0;
|
||||
}
|
||||
|
||||
if (currentMIPS->VfpuWriteMask() != 0) {
|
||||
|
@ -370,16 +360,6 @@ int GetVectorOverlap(int vec1, VectorSize size1, int vec2, VectorSize size2) {
|
|||
return count;
|
||||
}
|
||||
|
||||
int GetNumVectorElements(VectorSize sz) {
|
||||
switch (sz) {
|
||||
case V_Single: return 1;
|
||||
case V_Pair: return 2;
|
||||
case V_Triple: return 3;
|
||||
case V_Quad: return 4;
|
||||
default: return 0;
|
||||
}
|
||||
}
|
||||
|
||||
VectorSize GetHalfVectorSizeSafe(VectorSize sz) {
|
||||
switch (sz) {
|
||||
case V_Pair: return V_Single;
|
||||
|
|
|
@ -218,7 +218,17 @@ VectorSize GetDoubleVectorSizeSafe(VectorSize sz);
|
|||
VectorSize GetDoubleVectorSize(VectorSize sz);
|
||||
VectorSize MatrixVectorSizeSafe(MatrixSize sz);
|
||||
VectorSize MatrixVectorSize(MatrixSize sz);
|
||||
int GetNumVectorElements(VectorSize sz);
|
||||
|
||||
inline int GetNumVectorElements(VectorSize sz) {
|
||||
switch (sz) {
|
||||
case V_Single: return 1;
|
||||
case V_Pair: return 2;
|
||||
case V_Triple: return 3;
|
||||
case V_Quad: return 4;
|
||||
default: return 0;
|
||||
}
|
||||
}
|
||||
|
||||
int GetMatrixSideSafe(MatrixSize sz);
|
||||
int GetMatrixSide(MatrixSize sz);
|
||||
std::string GetVectorNotation(int reg, VectorSize size);
|
||||
|
|
|
@ -2208,8 +2208,8 @@ void CosOnly(SinCosArg angle, float *output) {
|
|||
output[1] = vfpu_cos(angle);
|
||||
}
|
||||
|
||||
void ASinScaled(SinCosArg angle, float *output) {
|
||||
output[0] = vfpu_asin(angle);
|
||||
void ASinScaled(SinCosArg sine, float *output) {
|
||||
output[0] = vfpu_asin(sine);
|
||||
}
|
||||
|
||||
void SinCosNegSin(SinCosArg angle, float *output) {
|
||||
|
@ -2217,13 +2217,25 @@ void SinCosNegSin(SinCosArg angle, float *output) {
|
|||
output[0] = -output[0];
|
||||
}
|
||||
|
||||
void Exp2(SinCosArg arg, float *output) {
|
||||
output[0] = vfpu_exp2(arg);
|
||||
}
|
||||
|
||||
void Log2(SinCosArg arg, float *output) {
|
||||
output[0] = vfpu_log2(arg);
|
||||
}
|
||||
|
||||
void RExp2(SinCosArg arg, float *output) {
|
||||
output[0] = vfpu_rexp2(arg);
|
||||
}
|
||||
|
||||
void Jit::Comp_VV2Op(MIPSOpcode op) {
|
||||
CONDITIONAL_DISABLE(VFPU_VEC);
|
||||
|
||||
if (js.HasUnknownPrefix())
|
||||
DISABLE;
|
||||
|
||||
auto trigCallHelper = [this](void (*sinCosFunc)(SinCosArg, float *output), u8 sreg) {
|
||||
auto specialFuncCallHelper = [this](void (*specialFunc)(SinCosArg, float *output), u8 sreg) {
|
||||
#if PPSSPP_ARCH(AMD64)
|
||||
MOVSS(XMM0, fpr.V(sreg));
|
||||
// TODO: This reg might be different on Linux...
|
||||
|
@ -2232,7 +2244,7 @@ void Jit::Comp_VV2Op(MIPSOpcode op) {
|
|||
#else
|
||||
LEA(64, RDI, MIPSSTATE_VAR(sincostemp[0]));
|
||||
#endif
|
||||
ABI_CallFunction(thunks.ProtectFunction((const void *)sinCosFunc, 0));
|
||||
ABI_CallFunction(thunks.ProtectFunction((const void *)specialFunc, 0));
|
||||
#else
|
||||
// Sigh, passing floats with cdecl isn't pretty, ends up on the stack.
|
||||
if (fpr.V(sreg).IsSimpleReg()) {
|
||||
|
@ -2240,7 +2252,7 @@ void Jit::Comp_VV2Op(MIPSOpcode op) {
|
|||
} else {
|
||||
MOV(32, R(EAX), fpr.V(sreg));
|
||||
}
|
||||
CallProtectedFunction((const void *)sinCosFunc, R(EAX), Imm32((uint32_t)(uintptr_t)&mips_->sincostemp[0]));
|
||||
CallProtectedFunction((const void *)specialFunc, R(EAX), Imm32((uint32_t)(uintptr_t)&mips_->sincostemp[0]));
|
||||
#endif
|
||||
};
|
||||
|
||||
|
@ -2406,18 +2418,20 @@ void Jit::Comp_VV2Op(MIPSOpcode op) {
|
|||
DIVSS(tempxregs[i], R(XMM0));
|
||||
break;
|
||||
case 18: // d[i] = sinf((float)M_PI_2 * s[i]); break; //vsin
|
||||
trigCallHelper(&SinOnly, sregs[i]);
|
||||
specialFuncCallHelper(&SinOnly, sregs[i]);
|
||||
MOVSS(tempxregs[i], MIPSSTATE_VAR(sincostemp[0]));
|
||||
break;
|
||||
case 19: // d[i] = cosf((float)M_PI_2 * s[i]); break; //vcos
|
||||
trigCallHelper(&CosOnly, sregs[i]);
|
||||
specialFuncCallHelper(&CosOnly, sregs[i]);
|
||||
MOVSS(tempxregs[i], MIPSSTATE_VAR(sincostemp[1]));
|
||||
break;
|
||||
case 20: // d[i] = powf(2.0f, s[i]); break; //vexp2
|
||||
DISABLE;
|
||||
specialFuncCallHelper(&Exp2, sregs[i]);
|
||||
MOVSS(tempxregs[i], MIPSSTATE_VAR(sincostemp[0]));
|
||||
break;
|
||||
case 21: // d[i] = logf(s[i])/log(2.0f); break; //vlog2
|
||||
DISABLE;
|
||||
specialFuncCallHelper(&Log2, sregs[i]);
|
||||
MOVSS(tempxregs[i], MIPSSTATE_VAR(sincostemp[0]));
|
||||
break;
|
||||
case 22: // d[i] = sqrtf(s[i]); break; //vsqrt
|
||||
SQRTSS(tempxregs[i], fpr.V(sregs[i]));
|
||||
|
@ -2425,7 +2439,7 @@ void Jit::Comp_VV2Op(MIPSOpcode op) {
|
|||
ANDPS(tempxregs[i], MatR(TEMPREG));
|
||||
break;
|
||||
case 23: // d[i] = asinf(s[i]) / M_PI_2; break; //vasin
|
||||
trigCallHelper(&ASinScaled, sregs[i]);
|
||||
specialFuncCallHelper(&ASinScaled, sregs[i]);
|
||||
MOVSS(tempxregs[i], MIPSSTATE_VAR(sincostemp[0]));
|
||||
break;
|
||||
case 24: // d[i] = -1.0f / s[i]; break; // vnrcp
|
||||
|
@ -2436,11 +2450,12 @@ void Jit::Comp_VV2Op(MIPSOpcode op) {
|
|||
MOVSS(tempxregs[i], R(XMM0));
|
||||
break;
|
||||
case 26: // d[i] = -sinf((float)M_PI_2 * s[i]); break; // vnsin
|
||||
trigCallHelper(&NegSinOnly, sregs[i]);
|
||||
specialFuncCallHelper(&NegSinOnly, sregs[i]);
|
||||
MOVSS(tempxregs[i], MIPSSTATE_VAR(sincostemp[0]));
|
||||
break;
|
||||
case 28: // d[i] = 1.0f / expf(s[i] * (float)M_LOG2E); break; // vrexp2
|
||||
DISABLE;
|
||||
specialFuncCallHelper(&RExp2, sregs[i]);
|
||||
MOVSS(tempxregs[i], MIPSSTATE_VAR(sincostemp[0]));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -87,6 +87,7 @@
|
|||
#define MOUSEEVENTF_FROMTOUCH_NOPEN 0xFF515780 //http://msdn.microsoft.com/en-us/library/windows/desktop/ms703320(v=vs.85).aspx
|
||||
#define MOUSEEVENTF_MASK_PLUS_PENTOUCH 0xFFFFFF80
|
||||
|
||||
// See https://github.com/unknownbrackets/verysleepy/commit/fc1b1b3bd6081fae3566cdb542d896e413238b71
|
||||
int verysleepy__useSendMessage = 1;
|
||||
|
||||
const UINT WM_VERYSLEEPY_MSG = WM_APP + 0x3117;
|
||||
|
|
Loading…
Add table
Reference in a new issue