From 44286a2b37c4ac27b6a11d66c54c809908c0ebc6 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Tue, 24 Mar 2015 00:32:31 +0100 Subject: [PATCH] ARM64: Accurate float->int conversion with rounding mode. --- Core/HLE/sceKernel.cpp | 3 +- Core/MIPS/ARM64/Arm64CompFPU.cpp | 99 +++++++++++++++++++++++-------- Core/MIPS/ARM64/Arm64CompVFPU.cpp | 9 +-- Core/MIPS/ARM64/Arm64Jit.cpp | 24 +++++++- android/jni/Arm64EmitterTest.cpp | 6 ++ 5 files changed, 110 insertions(+), 31 deletions(-) diff --git a/Core/HLE/sceKernel.cpp b/Core/HLE/sceKernel.cpp index 59e72ae5cb..17ee8f6db0 100644 --- a/Core/HLE/sceKernel.cpp +++ b/Core/HLE/sceKernel.cpp @@ -973,7 +973,7 @@ void Register_ExceptionManagerForKernel() // Seen in some homebrew const HLEFunction UtilsForKernel[] = { - {0XC2DF770E, nullptr, "sceKernelIcacheInvalidateRange", '?', "" }, + {0XC2DF770E, WrapI_UI, "sceKernelIcacheInvalidateRange", '?', "" }, {0X78934841, nullptr, "sceKernelGzipDecompress", '?', "" }, {0XE8DB3CE6, nullptr, "sceKernelDeflateDecompress", '?', "" }, {0X840259F1, nullptr, "sceKernelUtilsSha1Digest", '?', "" }, @@ -1003,5 +1003,4 @@ void Register_UtilsForKernel() void Register_ThreadManForKernel() { RegisterModule("ThreadManForKernel", ARRAY_SIZE(ThreadManForKernel), ThreadManForKernel); - } diff --git a/Core/MIPS/ARM64/Arm64CompFPU.cpp b/Core/MIPS/ARM64/Arm64CompFPU.cpp index c0dae1687f..a78506b229 100644 --- a/Core/MIPS/ARM64/Arm64CompFPU.cpp +++ b/Core/MIPS/ARM64/Arm64CompFPU.cpp @@ -101,7 +101,7 @@ void Arm64Jit::Comp_FPULS(MIPSOpcode op) SetCCAndSCRATCH1ForSafeAddress(rs, offset, SCRATCH2); doCheck = true; } - ADD(SCRATCH1_64, SCRATCH1_64, MEMBASEREG); + MOVK(SCRATCH1_64, ((uint64_t)Memory::base) >> 32, SHIFT_32); } FixupBranch skip; if (doCheck) { @@ -134,7 +134,7 @@ void Arm64Jit::Comp_FPULS(MIPSOpcode op) SetCCAndSCRATCH1ForSafeAddress(rs, offset, SCRATCH2); doCheck = true; } - ADD(SCRATCH1_64, SCRATCH1_64, MEMBASEREG); + MOVK(SCRATCH1_64, ((uint64_t)Memory::base) >> 32, SHIFT_32); } FixupBranch skip2; if (doCheck) { @@ -200,7 +200,6 @@ void Arm64Jit::Comp_FPUComp(MIPSOpcode op) { void Arm64Jit::Comp_FPU2op(MIPSOpcode op) { CONDITIONAL_DISABLE; - int fs = _FS; int fd = _FD; @@ -223,28 +222,49 @@ void Arm64Jit::Comp_FPU2op(MIPSOpcode op) { break; case 12: //FsI(fd) = (int)floorf(F(fs)+0.5f); break; //round.w.s - fpr.MapDirtyIn(fd, fs); - fp.FCVTS(fpr.R(fd), fpr.R(fs), ROUND_N); // to nearest, ties to even - break; - - case 13: //FsI(fd) = Rto0(F(fs))); break; //trunc.w.s - fpr.MapDirtyIn(fd, fs); - fp.FCVTS(fpr.R(fd), fpr.R(fs), ROUND_Z); - // TODO: Correctly convert NAN to 0x7fffffff - break; - - case 14: //FsI(fd) = (int)ceilf (F(fs)); break; //ceil.w.s { fpr.MapDirtyIn(fd, fs); + fp.FCMP(fpr.R(fs), fpr.R(fs)); // Detect NaN + fp.FCVTS(fpr.R(fd), fpr.R(fs), ROUND_N); // to nearest, ties to even + FixupBranch skip = B(CC_VC); + MOVI2R(SCRATCH1, 0x7FFFFFFF); + fp.FMOV(fpr.R(fd), SCRATCH1); + SetJumpTarget(skip); + break; + } + + case 13: //FsI(fd) = Rto0(F(fs))); break; //trunc.w.s + { + fpr.MapDirtyIn(fd, fs); + fp.FCMP(fpr.R(fs), fpr.R(fs)); + fp.FCVTS(fpr.R(fd), fpr.R(fs), ROUND_Z); + FixupBranch skip = B(CC_VC); + MOVI2R(SCRATCH1, 0x7FFFFFFF); + fp.FMOV(fpr.R(fd), SCRATCH1); + SetJumpTarget(skip); + break; + } + + case 14://FsI(fd) = (int)ceilf (F(fs)); break; //ceil.w.s + { + fpr.MapDirtyIn(fd, fs); + fp.FCMP(fpr.R(fs), fpr.R(fs)); fp.FCVTS(fpr.R(fd), fpr.R(fs), ROUND_P); // towards +inf - // TODO: Correctly convert NAN to 0x7fffffff + FixupBranch skip = B(CC_VC); + MOVI2R(SCRATCH1, 0x7FFFFFFF); + fp.FMOV(fpr.R(fd), SCRATCH1); + SetJumpTarget(skip); break; } case 15: //FsI(fd) = (int)floorf(F(fs)); break; //floor.w.s { fpr.MapDirtyIn(fd, fs); + fp.FCMP(fpr.R(fs), fpr.R(fs)); fp.FCVTS(fpr.R(fd), fpr.R(fs), ROUND_M); // towards -inf - // TODO: Correctly convert NAN to 0x7fffffff + FixupBranch skip = B(CC_VC); + MOVI2R(SCRATCH1, 0x7FFFFFFF); + fp.FMOV(fpr.R(fd), SCRATCH1); + SetJumpTarget(skip); break; } @@ -254,10 +274,45 @@ void Arm64Jit::Comp_FPU2op(MIPSOpcode op) { break; case 36: //FsI(fd) = (int) F(fs); break; //cvt.w.s - // TODO: Find a way to use the current rounding mode. Until then, default to C conversion rules. fpr.MapDirtyIn(fd, fs); - fp.FCVTS(fpr.R(fd), fpr.R(fs), ROUND_Z); - // TODO: Correctly convert NAN to 0x7fffffff + if (js.hasSetRounding) { + // Urgh, this looks awfully expensive and bloated.. Perhaps we should have a global function pointer to the right FCVTS to use, + // and update it when the rounding mode is switched. + fp.FCMP(fpr.R(fs), fpr.R(fs)); + FixupBranch skip_nan = B(CC_VC); + MOVI2R(SCRATCH1, 0x7FFFFFFF); + fp.FMOV(fpr.R(fd), SCRATCH1); + FixupBranch skip_rest = B(); + // MIPS Rounding Mode: + // 0: Round nearest + // 1: Round to zero + // 2: Round up (ceil) + // 3: Round down (floor) + SetJumpTarget(skip_nan); + LDR(INDEX_UNSIGNED, SCRATCH1, CTXREG, offsetof(MIPSState, fcr31)); + ANDI2R(SCRATCH1, SCRATCH1, 3); + ADR(SCRATCH2_64, 12); // PC + 12 = address of first FCVTS below + ADD(SCRATCH2_64, SCRATCH2_64, EncodeRegTo64(SCRATCH1), ArithOption(SCRATCH2_64, ST_LSL, 3)); + BR(SCRATCH2_64); // choose from the four variants below! + fp.FCVTS(fpr.R(fd), fpr.R(fs), ROUND_N); + FixupBranch skip1 = B(); + fp.FCVTS(fpr.R(fd), fpr.R(fs), ROUND_Z); + FixupBranch skip2 = B(); + fp.FCVTS(fpr.R(fd), fpr.R(fs), ROUND_P); + FixupBranch skip3 = B(); + fp.FCVTS(fpr.R(fd), fpr.R(fs), ROUND_M); + SetJumpTarget(skip1); + SetJumpTarget(skip2); + SetJumpTarget(skip3); + SetJumpTarget(skip_rest); + } else { + fp.FCMP(fpr.R(fs), fpr.R(fs)); + fp.FCVTS(fpr.R(fd), fpr.R(fs), ROUND_Z); + FixupBranch skip_nan = B(CC_VC); + MOVI2R(SCRATCH1, 0x7FFFFFFF); + fp.FMOV(fpr.R(fd), SCRATCH1); + SetJumpTarget(skip_nan); + } break; default: @@ -282,14 +337,13 @@ void Arm64Jit::Comp_mxc1(MIPSOpcode op) } return; - /* case 2: //cfc1 if (fs == 31) { if (gpr.IsImm(MIPS_REG_FPCOND)) { gpr.MapReg(rt, MAP_DIRTY | MAP_NOINIT); LDR(INDEX_UNSIGNED, gpr.R(rt), CTXREG, offsetof(MIPSState, fcr31)); if (gpr.GetImm(MIPS_REG_FPCOND) & 1) { - ORI2R(gpr.R(rt), gpr.R(rt), 0x1 << 23, SCRATCH2); + ORRI2R(gpr.R(rt), gpr.R(rt), 0x1 << 23, SCRATCH2); } else { ANDI2R(gpr.R(rt), gpr.R(rt), ~(0x1 << 23), SCRATCH2); } @@ -308,7 +362,6 @@ void Arm64Jit::Comp_mxc1(MIPSOpcode op) gpr.SetImm(rt, 0); } return; - */ case 4: //FI(fs) = R(rt); break; //mtc1 if (gpr.IsImm(rt)) { @@ -334,7 +387,6 @@ void Arm64Jit::Comp_mxc1(MIPSOpcode op) } return; - /* case 6: //ctc1 if (fs == 31) { // Must clear before setting, since ApplyRoundingMode() assumes it was cleared. @@ -361,7 +413,6 @@ void Arm64Jit::Comp_mxc1(MIPSOpcode op) Comp_Generic(op); } return; - */ default: DISABLE; break; diff --git a/Core/MIPS/ARM64/Arm64CompVFPU.cpp b/Core/MIPS/ARM64/Arm64CompVFPU.cpp index 5e724e7482..70cfae1f66 100644 --- a/Core/MIPS/ARM64/Arm64CompVFPU.cpp +++ b/Core/MIPS/ARM64/Arm64CompVFPU.cpp @@ -229,7 +229,8 @@ namespace MIPSComp SetCCAndSCRATCH1ForSafeAddress(rs, offset, SCRATCH2); doCheck = true; } - ADD(SCRATCH1_64, SCRATCH1_64, MEMBASEREG); + // Pointerify + MOVK(SCRATCH1_64, ((uint64_t)Memory::base) >> 32, SHIFT_32); } FixupBranch skip; if (doCheck) { @@ -264,7 +265,7 @@ namespace MIPSComp SetCCAndSCRATCH1ForSafeAddress(rs, offset, SCRATCH2); doCheck = true; } - ADD(SCRATCH1_64, SCRATCH1_64, MEMBASEREG); + MOVK(SCRATCH1_64, ((uint64_t)Memory::base) >> 32, SHIFT_32); } FixupBranch skip; if (doCheck) { @@ -311,7 +312,7 @@ namespace MIPSComp SetCCAndSCRATCH1ForSafeAddress(rs, imm, SCRATCH2); doCheck = true; } - ADD(SCRATCH1_64, SCRATCH1_64, MEMBASEREG); + MOVK(SCRATCH1_64, ((uint64_t)Memory::base) >> 32, SHIFT_32); } FixupBranch skip; @@ -347,7 +348,7 @@ namespace MIPSComp SetCCAndSCRATCH1ForSafeAddress(rs, imm, SCRATCH2); doCheck = true; } - ADD(SCRATCH1_64, SCRATCH1_64, MEMBASEREG); + MOVK(SCRATCH1_64, ((uint64_t)Memory::base) >> 32, SHIFT_32); } FixupBranch skip; diff --git a/Core/MIPS/ARM64/Arm64Jit.cpp b/Core/MIPS/ARM64/Arm64Jit.cpp index 1022cb8ff2..586bb624c0 100644 --- a/Core/MIPS/ARM64/Arm64Jit.cpp +++ b/Core/MIPS/ARM64/Arm64Jit.cpp @@ -516,7 +516,29 @@ void Arm64Jit::ApplyRoundingMode(bool force) { } void Arm64Jit::UpdateRoundingMode() { - // TODO ARM64 + // NOTE: Must not destroy SCRATCH1. + if (g_Config.bSetRoundingMode) { + LDR(INDEX_UNSIGNED, SCRATCH2, CTXREG, offsetof(MIPSState, fcr31)); + if (!g_Config.bForceFlushToZero) { + TSTI2R(SCRATCH2, 1 << 24); + ANDI2R(SCRATCH2, SCRATCH2, 3); + FixupBranch skip = B(CC_EQ); + ADDI2R(SCRATCH2, SCRATCH2, 4); + SetJumpTarget(skip); + // We can only skip if the rounding mode is zero and flush is set. + CMPI2R(SCRATCH2, 4); + } else { + ANDSI2R(SCRATCH2, SCRATCH2, 3); + } + + FixupBranch skip = B(CC_EQ); + PUSH(SCRATCH1_64); + MOVI2R(SCRATCH2, 1); + MOVP2R(SCRATCH1_64, &js.hasSetRounding); + STRB(INDEX_UNSIGNED, SCRATCH2, SCRATCH1_64, 0); + POP(SCRATCH1_64); + SetJumpTarget(skip); + } } // IDEA - could have a WriteDualExit that takes two destinations and two condition flags, diff --git a/android/jni/Arm64EmitterTest.cpp b/android/jni/Arm64EmitterTest.cpp index 9c7180a378..7e15731e54 100644 --- a/android/jni/Arm64EmitterTest.cpp +++ b/android/jni/Arm64EmitterTest.cpp @@ -44,6 +44,12 @@ void TestCode::Generate() ABI_PushRegisters(regs_to_save); + PUSH(X3); + POP(X3); + + PUSH2(X3, X4); + POP2(X3, X4); + fp.SCVTF(S0, W3, 12); fp.SCVTF(S3, W12); MOVI2R(X0, 1337);