From 820a8e8f2b827a2bdbd283536ebbded9541989c5 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 30 Aug 2014 16:30:13 -0700 Subject: [PATCH 1/4] armjit: Don't reset downcount on fpu instructions. It's maintained always, oops. --- Core/MIPS/ARM/ArmCompFPU.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/Core/MIPS/ARM/ArmCompFPU.cpp b/Core/MIPS/ARM/ArmCompFPU.cpp index f1efb1d4ee..7212a697f3 100644 --- a/Core/MIPS/ARM/ArmCompFPU.cpp +++ b/Core/MIPS/ARM/ArmCompFPU.cpp @@ -50,7 +50,6 @@ namespace MIPSComp void Jit::Comp_FPU3op(MIPSOpcode op) { CONDITIONAL_DISABLE; - SetRoundingMode(); int ft = _FT; int fs = _FS; @@ -191,8 +190,6 @@ void Jit::Comp_FPULS(MIPSOpcode op) void Jit::Comp_FPUComp(MIPSOpcode op) { CONDITIONAL_DISABLE; - // TODO: Does this matter here? - SetRoundingMode(); int opc = op & 0xF; if (opc >= 8) opc -= 8; // alias @@ -338,7 +335,6 @@ void Jit::Comp_FPU2op(MIPSOpcode op) { break; case 36: //FsI(fd) = (int) F(fs); break; //cvt.w.s fpr.MapDirtyIn(fd, fs); - SetRoundingMode(); VCMP(fpr.R(fs), fpr.R(fs)); VCVT(fpr.R(fd), fpr.R(fs), TO_INT | IS_SIGNED); VMRS_APSR(); // Move FP flags from FPSCR to APSR (regular flags). From 8daff0a25e23df14f88ffb1958d769f430b331bf Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 30 Aug 2014 16:43:13 -0700 Subject: [PATCH 2/4] armjit: Fix some downcount issues with rounding. When setting the rounding mode we need to be super careful about not destroying flags or R0 if they are needed. --- Core/MIPS/ARM/ArmCompBranch.cpp | 2 +- Core/MIPS/ARM/ArmJit.cpp | 31 +++++++++++++++++++++---------- 2 files changed, 22 insertions(+), 11 deletions(-) diff --git a/Core/MIPS/ARM/ArmCompBranch.cpp b/Core/MIPS/ARM/ArmCompBranch.cpp index 2bdcc151dd..e58c54e130 100644 --- a/Core/MIPS/ARM/ArmCompBranch.cpp +++ b/Core/MIPS/ARM/ArmCompBranch.cpp @@ -558,8 +558,8 @@ void Jit::Comp_Syscall(MIPSOpcode op) gpr.SetRegImm(R0, op.encoding); QuickCallFunction(R1, (void *)&CallSyscall); } - RestoreDowncount(); SetRoundingMode(); + RestoreDowncount(); WriteSyscallExit(); js.compiling = false; diff --git a/Core/MIPS/ARM/ArmJit.cpp b/Core/MIPS/ARM/ArmJit.cpp index 7a7a186e82..ee2fe436a3 100644 --- a/Core/MIPS/ARM/ArmJit.cpp +++ b/Core/MIPS/ARM/ArmJit.cpp @@ -381,12 +381,14 @@ bool Jit::ReplaceJalTo(u32 dest) { gpr.SetImm(MIPS_REG_RA, js.compilerPC + 8); CompileDelaySlot(DELAYSLOT_NICE); FlushAll(); + ClearRoundingMode(); if (BLInRange((const void *)(entry->replaceFunc))) { BL((const void *)(entry->replaceFunc)); } else { MOVI2R(R0, (u32)entry->replaceFunc); BL(R0); } + SetRoundingMode(); WriteDownCountR(R0); } @@ -433,6 +435,7 @@ void Jit::Comp_ReplacementFunc(MIPSOpcode op) } } else if (entry->replaceFunc) { FlushAll(); + ClearRoundingMode(); gpr.SetRegImm(SCRATCHREG1, js.compilerPC); MovToPC(SCRATCHREG1); @@ -447,8 +450,10 @@ void Jit::Comp_ReplacementFunc(MIPSOpcode op) if (entry->flags & (REPFLAG_HOOKENTER | REPFLAG_HOOKEXIT)) { // Compile the original instruction at this address. We ignore cycles for hooks. + SetRoundingMode(); MIPSCompileOp(Memory::Read_Instruction(js.compilerPC, true)); } else { + SetRoundingMode(); LDR(R1, CTXREG, MIPS_REG_RA * 4); WriteDownCountR(R0); WriteExitDestInR(R1); @@ -550,31 +555,37 @@ void Jit::ClearRoundingMode() { VMRS(SCRATCHREG2); // Assume we're always in round-to-nearest mode beforehand. - BIC(SCRATCHREG1, SCRATCHREG2, AssumeMakeOperand2(3 << 22)); - VMSR(SCRATCHREG1); + BIC(SCRATCHREG2, SCRATCHREG2, AssumeMakeOperand2(3 << 22)); + VMSR(SCRATCHREG2); } } void Jit::SetRoundingMode() { + // NOTE: Must not destory R0. if (g_Config.bSetRoundingMode) { - LDR(SCRATCHREG1, CTXREG, offsetof(MIPSState, fcr31)); - AND(SCRATCHREG1, SCRATCHREG1, Operand2(3)); + LDR(SCRATCHREG2, CTXREG, offsetof(MIPSState, fcr31)); + AND(SCRATCHREG2, SCRATCHREG2, Operand2(3)); // MIPS Rounding Mode: ARM Rounding Mode // 0: Round nearest 0 // 1: Round to zero 3 // 2: Round up (ceil) 1 // 3: Round down (floor) 2 - CMP(SCRATCHREG1, Operand2(1)); - SetCC(CC_EQ); ADD(SCRATCHREG1, SCRATCHREG1, Operand2(2)); - SetCC(CC_GT); SUB(SCRATCHREG1, SCRATCHREG1, Operand2(1)); + CMP(SCRATCHREG2, Operand2(1)); + FixupBranch skip = B_CC(CC_LT); + SetCC(CC_EQ); ADD(SCRATCHREG2, SCRATCHREG2, Operand2(2)); + SetCC(CC_GT); SUB(SCRATCHREG2, SCRATCHREG2, Operand2(1)); SetCC(CC_AL); - VMRS(SCRATCHREG2); + PUSH(1, SCRATCHREG1); + VMRS(SCRATCHREG1); // Assume we're always in round-to-nearest mode beforehand. - ORR(SCRATCHREG1, SCRATCHREG2, Operand2(SCRATCHREG1, ST_LSL, 22)); - VMSR(SCRATCHREG1); + ORR(SCRATCHREG2, SCRATCHREG2, Operand2(SCRATCHREG1, ST_LSL, 22)); + VMSR(SCRATCHREG2); + POP(1, SCRATCHREG1); + + SetJumpTarget(skip); } } From e8cdbcc33f341655d00379fe85e21e1a5e1a372c Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 30 Aug 2014 16:46:43 -0700 Subject: [PATCH 3/4] x86jit: Fix some flags/EAX trashing in rounding. Fixes #6810. --- Core/MIPS/x86/Jit.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/Core/MIPS/x86/Jit.cpp b/Core/MIPS/x86/Jit.cpp index 4f5cf98204..f6a968028e 100644 --- a/Core/MIPS/x86/Jit.cpp +++ b/Core/MIPS/x86/Jit.cpp @@ -487,8 +487,8 @@ bool Jit::ReplaceJalTo(u32 dest) { MOV(32, M(&mips_->pc), Imm32(js.compilerPC)); ClearRoundingMode(); ABI_CallFunction(entry->replaceFunc); - SetRoundingMode(); SUB(32, M(¤tMIPS->downcount), R(EAX)); + SetRoundingMode(); } js.compilerPC += 4; @@ -539,14 +539,16 @@ void Jit::Comp_ReplacementFunc(MIPSOpcode op) MOV(32, M(&mips_->pc), Imm32(js.compilerPC)); ClearRoundingMode(); ABI_CallFunction(entry->replaceFunc); - SetRoundingMode(); if (entry->flags & (REPFLAG_HOOKENTER | REPFLAG_HOOKEXIT)) { // Compile the original instruction at this address. We ignore cycles for hooks. + SetRoundingMode(); MIPSCompileOp(Memory::Read_Instruction(js.compilerPC, true)); } else { MOV(32, R(ECX), M(¤tMIPS->r[MIPS_REG_RA])); SUB(32, M(¤tMIPS->downcount), R(EAX)); + SetRoundingMode(); + SUB(32, M(¤tMIPS->downcount), Imm8(0)); WriteExitDestInReg(ECX); js.compiling = false; } @@ -694,17 +696,18 @@ bool Jit::CheckJitBreakpoint(u32 addr, int downcountOffset) MOV(32, M(&mips_->pc), Imm32(js.compilerPC)); ClearRoundingMode(); ABI_CallFunction(&JitBreakpoint); - SetRoundingMode(); // If 0, the conditional breakpoint wasn't taken. CMP(32, R(EAX), Imm32(0)); FixupBranch skip = J_CC(CC_Z); WriteDowncount(downcountOffset); // Just to fix the stack. + SetRoundingMode(); LOAD_FLAGS; JMP(asm_.dispatcherCheckCoreState, true); SetJumpTarget(skip); + SetRoundingMode(); LOAD_FLAGS; return true; From 5f6f6827b576f9fc60007ef1ed58e2bcf5016672 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 30 Aug 2014 23:48:27 -0700 Subject: [PATCH 4/4] jit: Update rounding mode immediately on ctc1. --- Core/MIPS/ARM/ArmCompFPU.cpp | 24 ++++++++---------------- Core/MIPS/x86/CompFPU.cpp | 7 +++++++ 2 files changed, 15 insertions(+), 16 deletions(-) diff --git a/Core/MIPS/ARM/ArmCompFPU.cpp b/Core/MIPS/ARM/ArmCompFPU.cpp index 7212a697f3..4fe72292f8 100644 --- a/Core/MIPS/ARM/ArmCompFPU.cpp +++ b/Core/MIPS/ARM/ArmCompFPU.cpp @@ -406,23 +406,7 @@ void Jit::Comp_mxc1(MIPSOpcode op) } else { gpr.MapDirtyIn(MIPS_REG_FPCOND, rt); } - // Hardware rounding method. - // Left here in case it is faster than conditional method. - /* - AND(SCRATCHREG1, gpr.R(rt), Operand2(3)); - // MIPS Rounding Mode <-> ARM Rounding Mode - // 0, 1, 2, 3 <-> 0, 3, 1, 2 - CMP(SCRATCHREG1, Operand2(1)); - SetCC(CC_EQ); ADD(SCRATCHREG1, SCRATCHREG1, Operand2(2)); - SetCC(CC_GT); SUB(SCRATCHREG1, SCRATCHREG1, Operand2(1)); - SetCC(CC_AL); - // Load and Store RM to FPSCR - VMRS(SCRATCHREG2); - BIC(SCRATCHREG2, SCRATCHREG2, Operand2(0x3 << 22)); - ORR(SCRATCHREG2, SCRATCHREG2, Operand2(SCRATCHREG1, ST_LSL, 22)); - VMSR(SCRATCHREG2); - */ // Update MIPS state // TODO: Technically, should mask by 0x0181FFFF. Maybe just put all of FCR31 in the reg? STR(gpr.R(rt), CTXREG, offsetof(MIPSState, fcr31)); @@ -433,6 +417,14 @@ void Jit::Comp_mxc1(MIPSOpcode op) MOV(SCRATCHREG1, Operand2(gpr.R(rt), ST_LSR, 23)); AND(gpr.R(MIPS_REG_FPCOND), SCRATCHREG1, Operand2(1)); #endif + SetRoundingMode(); + } else { + if ((gpr.GetImm(rt) & 3) == 0) { + // Default nearest mode, let's do this the fast way. + ClearRoundingMode(); + } else { + SetRoundingMode(); + } } } else { Comp_Generic(op); diff --git a/Core/MIPS/x86/CompFPU.cpp b/Core/MIPS/x86/CompFPU.cpp index 48fdca49b9..a4a779f904 100644 --- a/Core/MIPS/x86/CompFPU.cpp +++ b/Core/MIPS/x86/CompFPU.cpp @@ -383,6 +383,12 @@ void Jit::Comp_mxc1(MIPSOpcode op) if (gpr.IsImm(rt)) { gpr.SetImm(MIPS_REG_FPCOND, (gpr.GetImm(rt) >> 23) & 1); MOV(32, M(&mips_->fcr31), Imm32(gpr.GetImm(rt) & 0x0181FFFF)); + if ((gpr.GetImm(rt) & 3) == 0) { + // Default nearest mode, let's do this the fast way. + ClearRoundingMode(); + } else { + SetRoundingMode(); + } } else { gpr.Lock(rt, MIPS_REG_FPCOND); gpr.MapReg(rt, true, false); @@ -393,6 +399,7 @@ void Jit::Comp_mxc1(MIPSOpcode op) MOV(32, M(&mips_->fcr31), gpr.R(rt)); AND(32, M(&mips_->fcr31), Imm32(0x0181FFFF)); gpr.UnlockAll(); + SetRoundingMode(); } } else { Comp_Generic(op);