Merge pull request #15957 from unknownbrackets/branch-delayslot

Handle branch/jump in branch delay slots more accurately
This commit is contained in:
Henrik Rydgård 2022-09-04 23:18:38 +02:00 committed by GitHub
commit 2145a39251
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 363 additions and 164 deletions

View file

@ -55,7 +55,7 @@
#define LOOPOPTIMIZATION 0
// We can disable nice delay slots.
// #define CONDITIONAL_NICE_DELAYSLOT delaySlotIsNice = false;
// #define CONDITIONAL_NICE_DELAYSLOT branchInfo.delaySlotIsNice = false;
#define CONDITIONAL_NICE_DELAYSLOT ;
using namespace MIPSAnalyst;
@ -76,9 +76,13 @@ void ArmJit::BranchRSRTComp(MIPSOpcode op, CCFlags cc, bool likely)
MIPSGPReg rs = _RS;
u32 targetAddr = GetCompilerPC() + offset + 4;
BranchInfo branchInfo(GetCompilerPC(), op, GetOffsetInstruction(1), false, likely);
branchInfo.delaySlotIsNice = IsDelaySlotNiceReg(op, branchInfo.delaySlotOp, rt, rs);
CONDITIONAL_NICE_DELAYSLOT;
bool immBranch = false;
bool immBranchTaken = false;
if (gpr.IsImm(rs) && gpr.IsImm(rt)) {
if (gpr.IsImm(rs) && gpr.IsImm(rt) && !branchInfo.delaySlotIsBranch) {
// The cc flags are opposites: when NOT to take the branch.
bool immBranchNotTaken;
s32 rsImm = (s32)gpr.GetImm(rs);
@ -112,11 +116,9 @@ void ArmJit::BranchRSRTComp(MIPSOpcode op, CCFlags cc, bool likely)
return;
}
MIPSOpcode delaySlotOp = GetOffsetInstruction(1);
js.downcountAmount += MIPSGetInstructionCycleEstimate(delaySlotOp);
bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rt, rs);
CONDITIONAL_NICE_DELAYSLOT;
js.downcountAmount += MIPSGetInstructionCycleEstimate(branchInfo.delaySlotOp);
u32 notTakenTarget = ResolveNotTakenTarget(branchInfo);
if (immBranch) {
// Continuing is handled above, this is just static jumping.
if (immBranchTaken || !likely)
@ -124,10 +126,10 @@ void ArmJit::BranchRSRTComp(MIPSOpcode op, CCFlags cc, bool likely)
else
FlushAll();
const u32 destAddr = immBranchTaken ? targetAddr : GetCompilerPC() + 8;
const u32 destAddr = immBranchTaken ? targetAddr : notTakenTarget;
WriteExit(destAddr, js.nextExit++);
} else {
if (!likely && delaySlotIsNice)
if (!likely && branchInfo.delaySlotIsNice && !branchInfo.delaySlotIsBranch)
CompileDelaySlot(DELAYSLOT_NICE);
// We might be able to flip the condition (EQ/NEQ are easy.)
@ -156,7 +158,7 @@ void ArmJit::BranchRSRTComp(MIPSOpcode op, CCFlags cc, bool likely)
ArmGen::FixupBranch ptr;
if (!likely) {
if (!delaySlotIsNice)
if (!branchInfo.delaySlotIsNice && !branchInfo.delaySlotIsBranch)
CompileDelaySlot(DELAYSLOT_SAFE_FLUSH);
else
FlushAll();
@ -164,7 +166,18 @@ void ArmJit::BranchRSRTComp(MIPSOpcode op, CCFlags cc, bool likely)
} else {
FlushAll();
ptr = B_CC(cc);
CompileDelaySlot(DELAYSLOT_FLUSH);
if (!branchInfo.delaySlotIsBranch)
CompileDelaySlot(DELAYSLOT_FLUSH);
}
if (branchInfo.delaySlotIsBranch) {
// We still link when the branch is taken (targetAddr case.)
// Remember, it's from the perspective of the delay slot, so +12.
if ((branchInfo.delaySlotInfo & OUT_RA) != 0)
gpr.SetImm(MIPS_REG_RA, GetCompilerPC() + 12);
if ((branchInfo.delaySlotInfo & OUT_RD) != 0)
gpr.SetImm(MIPS_GET_RD(branchInfo.delaySlotOp), GetCompilerPC() + 12);
FlushAll();
}
// Take the branch
@ -172,7 +185,7 @@ void ArmJit::BranchRSRTComp(MIPSOpcode op, CCFlags cc, bool likely)
SetJumpTarget(ptr);
// Not taken
WriteExit(GetCompilerPC() + 8, js.nextExit++);
WriteExit(notTakenTarget, js.nextExit++);
}
js.compiling = false;
@ -189,9 +202,13 @@ void ArmJit::BranchRSZeroComp(MIPSOpcode op, CCFlags cc, bool andLink, bool like
MIPSGPReg rs = _RS;
u32 targetAddr = GetCompilerPC() + offset + 4;
BranchInfo branchInfo(GetCompilerPC(), op, GetOffsetInstruction(1), andLink, likely);
branchInfo.delaySlotIsNice = IsDelaySlotNiceReg(op, branchInfo.delaySlotOp, rs);
CONDITIONAL_NICE_DELAYSLOT;
bool immBranch = false;
bool immBranchTaken = false;
if (gpr.IsImm(rs)) {
if (gpr.IsImm(rs) && !branchInfo.delaySlotIsBranch) {
// The cc flags are opposites: when NOT to take the branch.
bool immBranchNotTaken;
s32 imm = (s32)gpr.GetImm(rs);
@ -231,11 +248,9 @@ void ArmJit::BranchRSZeroComp(MIPSOpcode op, CCFlags cc, bool andLink, bool like
return;
}
MIPSOpcode delaySlotOp = GetOffsetInstruction(1);
js.downcountAmount += MIPSGetInstructionCycleEstimate(delaySlotOp);
bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rs);
CONDITIONAL_NICE_DELAYSLOT;
js.downcountAmount += MIPSGetInstructionCycleEstimate(branchInfo.delaySlotOp);
u32 notTakenTarget = ResolveNotTakenTarget(branchInfo);
if (immBranch) {
// Continuing is handled above, this is just static jumping.
if (andLink)
@ -245,10 +260,10 @@ void ArmJit::BranchRSZeroComp(MIPSOpcode op, CCFlags cc, bool andLink, bool like
else
FlushAll();
const u32 destAddr = immBranchTaken ? targetAddr : GetCompilerPC() + 8;
const u32 destAddr = immBranchTaken ? targetAddr : notTakenTarget;
WriteExit(destAddr, js.nextExit++);
} else {
if (!likely && delaySlotIsNice)
if (!likely && branchInfo.delaySlotIsNice && !branchInfo.delaySlotIsBranch)
CompileDelaySlot(DELAYSLOT_NICE);
gpr.MapReg(rs);
@ -260,7 +275,7 @@ void ArmJit::BranchRSZeroComp(MIPSOpcode op, CCFlags cc, bool andLink, bool like
ArmGen::FixupBranch ptr;
if (!likely)
{
if (!delaySlotIsNice)
if (!branchInfo.delaySlotIsNice && !branchInfo.delaySlotIsBranch)
CompileDelaySlot(DELAYSLOT_SAFE_FLUSH);
else
FlushAll();
@ -270,7 +285,18 @@ void ArmJit::BranchRSZeroComp(MIPSOpcode op, CCFlags cc, bool andLink, bool like
{
FlushAll();
ptr = B_CC(cc);
CompileDelaySlot(DELAYSLOT_FLUSH);
if (!branchInfo.delaySlotIsBranch)
CompileDelaySlot(DELAYSLOT_FLUSH);
}
if (branchInfo.delaySlotIsBranch) {
// We still link when the branch is taken (targetAddr case.)
// Remember, it's from the perspective of the delay slot, so +12.
if ((branchInfo.delaySlotInfo & OUT_RA) != 0)
gpr.SetImm(MIPS_REG_RA, GetCompilerPC() + 12);
if ((branchInfo.delaySlotInfo & OUT_RD) != 0)
gpr.SetImm(MIPS_GET_RD(branchInfo.delaySlotOp), GetCompilerPC() + 12);
FlushAll();
}
// Take the branch
@ -278,7 +304,7 @@ void ArmJit::BranchRSZeroComp(MIPSOpcode op, CCFlags cc, bool andLink, bool like
SetJumpTarget(ptr);
// Not taken
WriteExit(GetCompilerPC() + 8, js.nextExit++);
WriteExit(notTakenTarget, js.nextExit++);
}
js.compiling = false;
}
@ -335,11 +361,12 @@ void ArmJit::BranchFPFlag(MIPSOpcode op, CCFlags cc, bool likely)
int offset = TARGET16;
u32 targetAddr = GetCompilerPC() + offset + 4;
MIPSOpcode delaySlotOp = GetOffsetInstruction(1);
js.downcountAmount += MIPSGetInstructionCycleEstimate(delaySlotOp);
bool delaySlotIsNice = IsDelaySlotNiceFPU(op, delaySlotOp);
BranchInfo branchInfo(GetCompilerPC(), op, GetOffsetInstruction(1), false, likely);
branchInfo.delaySlotIsNice = IsDelaySlotNiceFPU(op, branchInfo.delaySlotOp);
CONDITIONAL_NICE_DELAYSLOT;
if (!likely && delaySlotIsNice)
js.downcountAmount += MIPSGetInstructionCycleEstimate(branchInfo.delaySlotOp);
if (!likely && branchInfo.delaySlotIsNice && !branchInfo.delaySlotIsBranch)
CompileDelaySlot(DELAYSLOT_NICE);
gpr.MapReg(MIPS_REG_FPCOND);
@ -348,7 +375,7 @@ void ArmJit::BranchFPFlag(MIPSOpcode op, CCFlags cc, bool likely)
ArmGen::FixupBranch ptr;
if (!likely)
{
if (!delaySlotIsNice)
if (!branchInfo.delaySlotIsNice && !branchInfo.delaySlotIsBranch)
CompileDelaySlot(DELAYSLOT_SAFE_FLUSH);
else
FlushAll();
@ -358,7 +385,18 @@ void ArmJit::BranchFPFlag(MIPSOpcode op, CCFlags cc, bool likely)
{
FlushAll();
ptr = B_CC(cc);
CompileDelaySlot(DELAYSLOT_FLUSH);
if (!branchInfo.delaySlotIsBranch)
CompileDelaySlot(DELAYSLOT_FLUSH);
}
if (branchInfo.delaySlotIsBranch) {
// We still link when the branch is taken (targetAddr case.)
// Remember, it's from the perspective of the delay slot, so +12.
if ((branchInfo.delaySlotInfo & OUT_RA) != 0)
gpr.SetImm(MIPS_REG_RA, GetCompilerPC() + 12);
if ((branchInfo.delaySlotInfo & OUT_RD) != 0)
gpr.SetImm(MIPS_GET_RD(branchInfo.delaySlotOp), GetCompilerPC() + 12);
FlushAll();
}
// Take the branch
@ -366,7 +404,7 @@ void ArmJit::BranchFPFlag(MIPSOpcode op, CCFlags cc, bool likely)
SetJumpTarget(ptr);
// Not taken
WriteExit(GetCompilerPC() + 8, js.nextExit++);
WriteExit(ResolveNotTakenTarget(branchInfo), js.nextExit++);
js.compiling = false;
}
@ -394,19 +432,16 @@ void ArmJit::BranchVFPUFlag(MIPSOpcode op, CCFlags cc, bool likely)
int offset = TARGET16;
u32 targetAddr = GetCompilerPC() + offset + 4;
MIPSOpcode delaySlotOp = GetOffsetInstruction(1);
js.downcountAmount += MIPSGetInstructionCycleEstimate(delaySlotOp);
BranchInfo branchInfo(GetCompilerPC(), op, GetOffsetInstruction(1), false, likely);
// Sometimes there's a VFPU branch in a delay slot (Disgaea 2: Dark Hero Days, Zettai Hero Project, La Pucelle)
// The behavior is undefined - the CPU may take the second branch even if the first one passes.
// However, it does consistently try each branch, which these games seem to expect.
bool delaySlotIsBranch = MIPSCodeUtils::IsVFPUBranch(delaySlotOp);
bool delaySlotIsNice = !delaySlotIsBranch && IsDelaySlotNiceVFPU(op, delaySlotOp);
branchInfo.delaySlotIsNice = IsDelaySlotNiceVFPU(op, branchInfo.delaySlotOp);
CONDITIONAL_NICE_DELAYSLOT;
if (!likely && delaySlotIsNice)
js.downcountAmount += MIPSGetInstructionCycleEstimate(branchInfo.delaySlotOp);
if (!likely && branchInfo.delaySlotIsNice)
CompileDelaySlot(DELAYSLOT_NICE);
if (delaySlotIsBranch && (signed short)(delaySlotOp & 0xFFFF) != (signed short)(op & 0xFFFF) - 1)
ERROR_LOG_REPORT(JIT, "VFPU branch in VFPU delay slot at %08x with different target", GetCompilerPC());
int imm3 = (op >> 18) & 7;
@ -417,7 +452,7 @@ void ArmJit::BranchVFPUFlag(MIPSOpcode op, CCFlags cc, bool likely)
js.inDelaySlot = true;
if (!likely)
{
if (!delaySlotIsNice && !delaySlotIsBranch)
if (!branchInfo.delaySlotIsNice && !branchInfo.delaySlotIsBranch)
CompileDelaySlot(DELAYSLOT_SAFE_FLUSH);
else
FlushAll();
@ -427,18 +462,27 @@ void ArmJit::BranchVFPUFlag(MIPSOpcode op, CCFlags cc, bool likely)
{
FlushAll();
ptr = B_CC(cc);
if (!delaySlotIsBranch)
if (!branchInfo.delaySlotIsBranch)
CompileDelaySlot(DELAYSLOT_FLUSH);
}
js.inDelaySlot = false;
if (branchInfo.delaySlotIsBranch) {
// We still link when the branch is taken (targetAddr case.)
// Remember, it's from the perspective of the delay slot, so +12.
if ((branchInfo.delaySlotInfo & OUT_RA) != 0)
gpr.SetImm(MIPS_REG_RA, GetCompilerPC() + 12);
if ((branchInfo.delaySlotInfo & OUT_RD) != 0)
gpr.SetImm(MIPS_GET_RD(branchInfo.delaySlotOp), GetCompilerPC() + 12);
FlushAll();
}
// Take the branch
WriteExit(targetAddr, js.nextExit++);
SetJumpTarget(ptr);
// Not taken
u32 notTakenTarget = GetCompilerPC() + (delaySlotIsBranch ? 4 : 8);
WriteExit(notTakenTarget, js.nextExit++);
WriteExit(ResolveNotTakenTarget(branchInfo), js.nextExit++);
js.compiling = false;
}

View file

@ -55,7 +55,7 @@
#define LOOPOPTIMIZATION 0
// We can disable nice delay slots.
// #define CONDITIONAL_NICE_DELAYSLOT delaySlotIsNice = false;
// #define CONDITIONAL_NICE_DELAYSLOT branchInfo.delaySlotIsNice = false;
#define CONDITIONAL_NICE_DELAYSLOT ;
using namespace MIPSAnalyst;
@ -76,9 +76,13 @@ void Arm64Jit::BranchRSRTComp(MIPSOpcode op, CCFlags cc, bool likely)
MIPSGPReg rs = _RS;
u32 targetAddr = GetCompilerPC() + offset + 4;
BranchInfo branchInfo(GetCompilerPC(), op, GetOffsetInstruction(1), false, likely);
branchInfo.delaySlotIsNice = IsDelaySlotNiceReg(op, branchInfo.delaySlotOp, rt, rs);
CONDITIONAL_NICE_DELAYSLOT;
bool immBranch = false;
bool immBranchTaken = false;
if (gpr.IsImm(rs) && gpr.IsImm(rt)) {
if (gpr.IsImm(rs) && gpr.IsImm(rt) && !branchInfo.delaySlotIsBranch) {
// The cc flags are opposites: when NOT to take the branch.
bool immBranchNotTaken;
s32 rsImm = (s32)gpr.GetImm(rs);
@ -112,11 +116,9 @@ void Arm64Jit::BranchRSRTComp(MIPSOpcode op, CCFlags cc, bool likely)
return;
}
MIPSOpcode delaySlotOp = GetOffsetInstruction(1);
js.downcountAmount += MIPSGetInstructionCycleEstimate(delaySlotOp);
bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rt, rs);
CONDITIONAL_NICE_DELAYSLOT;
js.downcountAmount += MIPSGetInstructionCycleEstimate(branchInfo.delaySlotOp);
u32 notTakenTarget = ResolveNotTakenTarget(branchInfo);
if (immBranch) {
// Continuing is handled above, this is just static jumping.
if (immBranchTaken || !likely)
@ -124,10 +126,10 @@ void Arm64Jit::BranchRSRTComp(MIPSOpcode op, CCFlags cc, bool likely)
else
FlushAll();
const u32 destAddr = immBranchTaken ? targetAddr : GetCompilerPC() + 8;
const u32 destAddr = immBranchTaken ? targetAddr : notTakenTarget;
WriteExit(destAddr, js.nextExit++);
} else {
if (!likely && delaySlotIsNice)
if (!likely && branchInfo.delaySlotIsNice && !branchInfo.delaySlotIsBranch)
CompileDelaySlot(DELAYSLOT_NICE);
// We might be able to flip the condition (EQ/NEQ are easy.)
@ -136,7 +138,7 @@ void Arm64Jit::BranchRSRTComp(MIPSOpcode op, CCFlags cc, bool likely)
const bool rtIsZero = gpr.IsImm(rt) && gpr.GetImm(rt) == 0;
Arm64Gen::FixupBranch ptr;
if ((likely || delaySlotIsNice) && (rsIsZero || rtIsZero) && canFlip) {
if ((likely || branchInfo.delaySlotIsNice) && (rsIsZero || rtIsZero) && canFlip) {
// Special case, we can just use CBZ/CBNZ directly.
MIPSGPReg r = rsIsZero ? rt : rs;
gpr.MapReg(r);
@ -169,7 +171,7 @@ void Arm64Jit::BranchRSRTComp(MIPSOpcode op, CCFlags cc, bool likely)
}
if (!likely) {
if (!delaySlotIsNice)
if (!branchInfo.delaySlotIsNice && !branchInfo.delaySlotIsBranch)
CompileDelaySlot(DELAYSLOT_SAFE_FLUSH);
else
FlushAll();
@ -180,17 +182,27 @@ void Arm64Jit::BranchRSRTComp(MIPSOpcode op, CCFlags cc, bool likely)
}
}
if (likely) {
if (likely && !branchInfo.delaySlotIsBranch) {
// Only executed when taking the branch.
CompileDelaySlot(DELAYSLOT_FLUSH);
}
if (branchInfo.delaySlotIsBranch) {
// We still link when the branch is taken (targetAddr case.)
// Remember, it's from the perspective of the delay slot, so +12.
if ((branchInfo.delaySlotInfo & OUT_RA) != 0)
gpr.SetImm(MIPS_REG_RA, GetCompilerPC() + 12);
if ((branchInfo.delaySlotInfo & OUT_RD) != 0)
gpr.SetImm(MIPS_GET_RD(branchInfo.delaySlotOp), GetCompilerPC() + 12);
FlushAll();
}
// Take the branch
WriteExit(targetAddr, js.nextExit++);
SetJumpTarget(ptr);
// Not taken
WriteExit(GetCompilerPC() + 8, js.nextExit++);
WriteExit(notTakenTarget, js.nextExit++);
}
js.compiling = false;
@ -207,9 +219,13 @@ void Arm64Jit::BranchRSZeroComp(MIPSOpcode op, CCFlags cc, bool andLink, bool li
MIPSGPReg rs = _RS;
u32 targetAddr = GetCompilerPC() + offset + 4;
BranchInfo branchInfo(GetCompilerPC(), op, GetOffsetInstruction(1), andLink, likely);
branchInfo.delaySlotIsNice = IsDelaySlotNiceReg(op, branchInfo.delaySlotOp, rs);
CONDITIONAL_NICE_DELAYSLOT;
bool immBranch = false;
bool immBranchTaken = false;
if (gpr.IsImm(rs)) {
if (gpr.IsImm(rs) && !branchInfo.delaySlotIsBranch) {
// The cc flags are opposites: when NOT to take the branch.
bool immBranchNotTaken;
s32 imm = (s32)gpr.GetImm(rs);
@ -249,11 +265,9 @@ void Arm64Jit::BranchRSZeroComp(MIPSOpcode op, CCFlags cc, bool andLink, bool li
return;
}
MIPSOpcode delaySlotOp = GetOffsetInstruction(1);
js.downcountAmount += MIPSGetInstructionCycleEstimate(delaySlotOp);
bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rs);
CONDITIONAL_NICE_DELAYSLOT;
js.downcountAmount += MIPSGetInstructionCycleEstimate(branchInfo.delaySlotOp);
u32 notTakenTarget = ResolveNotTakenTarget(branchInfo);
if (immBranch) {
// Continuing is handled above, this is just static jumping.
if (andLink)
@ -263,10 +277,10 @@ void Arm64Jit::BranchRSZeroComp(MIPSOpcode op, CCFlags cc, bool andLink, bool li
else
FlushAll();
const u32 destAddr = immBranchTaken ? targetAddr : GetCompilerPC() + 8;
const u32 destAddr = immBranchTaken ? targetAddr : notTakenTarget;
WriteExit(destAddr, js.nextExit++);
} else {
if (!likely && delaySlotIsNice)
if (!likely && branchInfo.delaySlotIsNice && !branchInfo.delaySlotIsBranch)
CompileDelaySlot(DELAYSLOT_NICE);
gpr.MapReg(rs);
@ -278,7 +292,7 @@ void Arm64Jit::BranchRSZeroComp(MIPSOpcode op, CCFlags cc, bool andLink, bool li
Arm64Gen::FixupBranch ptr;
if (!likely)
{
if (!delaySlotIsNice)
if (!branchInfo.delaySlotIsNice && !branchInfo.delaySlotIsBranch)
CompileDelaySlot(DELAYSLOT_SAFE_FLUSH);
else
FlushAll();
@ -288,7 +302,18 @@ void Arm64Jit::BranchRSZeroComp(MIPSOpcode op, CCFlags cc, bool andLink, bool li
{
FlushAll();
ptr = B(cc);
CompileDelaySlot(DELAYSLOT_FLUSH);
if (!branchInfo.delaySlotIsBranch)
CompileDelaySlot(DELAYSLOT_FLUSH);
}
if (branchInfo.delaySlotIsBranch) {
// We still link when the branch is taken (targetAddr case.)
// Remember, it's from the perspective of the delay slot, so +12.
if ((branchInfo.delaySlotInfo & OUT_RA) != 0)
gpr.SetImm(MIPS_REG_RA, GetCompilerPC() + 12);
if ((branchInfo.delaySlotInfo & OUT_RD) != 0)
gpr.SetImm(MIPS_GET_RD(branchInfo.delaySlotOp), GetCompilerPC() + 12);
FlushAll();
}
// Take the branch
@ -296,7 +321,7 @@ void Arm64Jit::BranchRSZeroComp(MIPSOpcode op, CCFlags cc, bool andLink, bool li
SetJumpTarget(ptr);
// Not taken
WriteExit(GetCompilerPC() + 8, js.nextExit++);
WriteExit(notTakenTarget, js.nextExit++);
}
js.compiling = false;
}
@ -352,16 +377,17 @@ void Arm64Jit::BranchFPFlag(MIPSOpcode op, CCFlags cc, bool likely) {
int offset = TARGET16;
u32 targetAddr = GetCompilerPC() + offset + 4;
MIPSOpcode delaySlotOp = GetOffsetInstruction(1);
js.downcountAmount += MIPSGetInstructionCycleEstimate(delaySlotOp);
bool delaySlotIsNice = IsDelaySlotNiceFPU(op, delaySlotOp);
BranchInfo branchInfo(GetCompilerPC(), op, GetOffsetInstruction(1), false, likely);
branchInfo.delaySlotIsNice = IsDelaySlotNiceFPU(op, branchInfo.delaySlotOp);
CONDITIONAL_NICE_DELAYSLOT;
if (!likely && delaySlotIsNice)
js.downcountAmount += MIPSGetInstructionCycleEstimate(branchInfo.delaySlotOp);
if (!likely && branchInfo.delaySlotIsNice && !branchInfo.delaySlotIsBranch)
CompileDelaySlot(DELAYSLOT_NICE);
gpr.MapReg(MIPS_REG_FPCOND);
Arm64Gen::FixupBranch ptr;
if (likely || delaySlotIsNice) {
if (likely || branchInfo.delaySlotIsNice) {
// FlushAll() won't actually change the reg.
ARM64Reg ar = gpr.R(MIPS_REG_FPCOND);
FlushAll();
@ -372,20 +398,31 @@ void Arm64Jit::BranchFPFlag(MIPSOpcode op, CCFlags cc, bool likely) {
}
} else {
TSTI2R(gpr.R(MIPS_REG_FPCOND), 1, SCRATCH1);
CompileDelaySlot(DELAYSLOT_SAFE_FLUSH);
if (!branchInfo.delaySlotIsBranch)
CompileDelaySlot(DELAYSLOT_SAFE_FLUSH);
ptr = B(cc);
}
if (likely) {
if (likely && !branchInfo.delaySlotIsBranch) {
CompileDelaySlot(DELAYSLOT_FLUSH);
}
if (branchInfo.delaySlotIsBranch) {
// We still link when the branch is taken (targetAddr case.)
// Remember, it's from the perspective of the delay slot, so +12.
if ((branchInfo.delaySlotInfo & OUT_RA) != 0)
gpr.SetImm(MIPS_REG_RA, GetCompilerPC() + 12);
if ((branchInfo.delaySlotInfo & OUT_RD) != 0)
gpr.SetImm(MIPS_GET_RD(branchInfo.delaySlotOp), GetCompilerPC() + 12);
FlushAll();
}
// Take the branch
WriteExit(targetAddr, js.nextExit++);
SetJumpTarget(ptr);
// Not taken
WriteExit(GetCompilerPC() + 8, js.nextExit++);
WriteExit(ResolveNotTakenTarget(branchInfo), js.nextExit++);
js.compiling = false;
}
@ -410,25 +447,22 @@ void Arm64Jit::BranchVFPUFlag(MIPSOpcode op, CCFlags cc, bool likely) {
int offset = TARGET16;
u32 targetAddr = GetCompilerPC() + offset + 4;
MIPSOpcode delaySlotOp = GetOffsetInstruction(1);
js.downcountAmount += MIPSGetInstructionCycleEstimate(delaySlotOp);
BranchInfo branchInfo(GetCompilerPC(), op, GetOffsetInstruction(1), false, likely);
// Sometimes there's a VFPU branch in a delay slot (Disgaea 2: Dark Hero Days, Zettai Hero Project, La Pucelle)
// The behavior is undefined - the CPU may take the second branch even if the first one passes.
// However, it does consistently try each branch, which these games seem to expect.
bool delaySlotIsBranch = MIPSCodeUtils::IsVFPUBranch(delaySlotOp);
bool delaySlotIsNice = !delaySlotIsBranch && IsDelaySlotNiceVFPU(op, delaySlotOp);
branchInfo.delaySlotIsNice = IsDelaySlotNiceVFPU(op, branchInfo.delaySlotOp);
CONDITIONAL_NICE_DELAYSLOT;
if (!likely && delaySlotIsNice)
js.downcountAmount += MIPSGetInstructionCycleEstimate(branchInfo.delaySlotOp);
if (!likely && branchInfo.delaySlotIsNice)
CompileDelaySlot(DELAYSLOT_NICE);
if (delaySlotIsBranch && (signed short)(delaySlotOp & 0xFFFF) != (signed short)(op & 0xFFFF) - 1)
ERROR_LOG_REPORT(JIT, "VFPU branch in VFPU delay slot at %08x with different target", GetCompilerPC());
int imm3 = (op >> 18) & 7;
gpr.MapReg(MIPS_REG_VFPUCC);
Arm64Gen::FixupBranch ptr;
if (likely || delaySlotIsNice || delaySlotIsBranch) {
if (likely || branchInfo.delaySlotIsNice || branchInfo.delaySlotIsBranch) {
// FlushAll() won't actually change the reg.
ARM64Reg ar = gpr.R(MIPS_REG_VFPUCC);
FlushAll();
@ -439,21 +473,31 @@ void Arm64Jit::BranchVFPUFlag(MIPSOpcode op, CCFlags cc, bool likely) {
}
} else {
TSTI2R(gpr.R(MIPS_REG_VFPUCC), 1ULL << imm3, SCRATCH1);
CompileDelaySlot(DELAYSLOT_SAFE_FLUSH);
if (!branchInfo.delaySlotIsBranch)
CompileDelaySlot(DELAYSLOT_SAFE_FLUSH);
ptr = B(cc);
}
if (likely && !delaySlotIsBranch) {
if (likely && !branchInfo.delaySlotIsBranch) {
CompileDelaySlot(DELAYSLOT_FLUSH);
}
if (branchInfo.delaySlotIsBranch) {
// We still link when the branch is taken (targetAddr case.)
// Remember, it's from the perspective of the delay slot, so +12.
if ((branchInfo.delaySlotInfo & OUT_RA) != 0)
gpr.SetImm(MIPS_REG_RA, GetCompilerPC() + 12);
if ((branchInfo.delaySlotInfo & OUT_RD) != 0)
gpr.SetImm(MIPS_GET_RD(branchInfo.delaySlotOp), GetCompilerPC() + 12);
FlushAll();
}
// Take the branch
WriteExit(targetAddr, js.nextExit++);
SetJumpTarget(ptr);
// Not taken
u32 notTakenTarget = GetCompilerPC() + (delaySlotIsBranch ? 4 : 8);
WriteExit(notTakenTarget, js.nextExit++);
WriteExit(ResolveNotTakenTarget(branchInfo), js.nextExit++);
js.compiling = false;
}

View file

@ -64,15 +64,16 @@ void IRFrontend::BranchRSRTComp(MIPSOpcode op, IRComparison cc, bool likely) {
MIPSGPReg rs = _RS;
u32 targetAddr = GetCompilerPC() + offset + 4;
MIPSOpcode delaySlotOp = GetOffsetInstruction(1);
js.downcountAmount += MIPSGetInstructionCycleEstimate(delaySlotOp);
bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rt, rs);
BranchInfo branchInfo(GetCompilerPC(), op, GetOffsetInstruction(1), false, likely);
branchInfo.delaySlotIsNice = IsDelaySlotNiceReg(op, branchInfo.delaySlotOp, rt, rs);
js.downcountAmount += MIPSGetInstructionCycleEstimate(branchInfo.delaySlotOp);
// Often, div/divu are followed by a likely "break" if the divisor was zero.
// Stalling is not really useful for us, so we optimize this out.
if (likely && offset == 4 && MIPS_IS_BREAK(delaySlotOp)) {
if (likely && offset == 4 && MIPS_IS_BREAK(branchInfo.delaySlotOp)) {
// Okay, let's not actually branch at all. We're done here.
EatInstruction(delaySlotOp);
EatInstruction(branchInfo.delaySlotOp);
// Let's not double-count the downcount, though.
js.downcountAmount--;
return;
@ -80,7 +81,7 @@ void IRFrontend::BranchRSRTComp(MIPSOpcode op, IRComparison cc, bool likely) {
MIPSGPReg lhs = rs;
MIPSGPReg rhs = rt;
if (!delaySlotIsNice && !likely) { // if likely, we don't need this
if (!branchInfo.delaySlotIsNice && !likely) { // if likely, we don't need this
if (rs != 0) {
ir.Write(IROp::Mov, IRTEMP_LHS, rs);
lhs = (MIPSGPReg)IRTEMP_LHS;
@ -91,7 +92,7 @@ void IRFrontend::BranchRSRTComp(MIPSOpcode op, IRComparison cc, bool likely) {
}
}
if (!likely)
if (!likely && !branchInfo.delaySlotIsBranch)
CompileDelaySlot();
int dcAmount = js.downcountAmount;
@ -99,10 +100,18 @@ void IRFrontend::BranchRSRTComp(MIPSOpcode op, IRComparison cc, bool likely) {
js.downcountAmount = 0;
FlushAll();
ir.Write(ComparisonToExit(cc), ir.AddConstant(GetCompilerPC() + 8), lhs, rhs);
ir.Write(ComparisonToExit(cc), ir.AddConstant(ResolveNotTakenTarget(branchInfo)), lhs, rhs);
// This makes the block "impure" :(
if (likely)
if (likely && !branchInfo.delaySlotIsBranch)
CompileDelaySlot();
if (branchInfo.delaySlotIsBranch) {
// We still link when the branch is taken (targetAddr case.)
// Remember, it's from the perspective of the delay slot, so +12.
if ((branchInfo.delaySlotInfo & OUT_RA) != 0)
ir.WriteSetConstant(MIPS_REG_RA, GetCompilerPC() + 12);
if ((branchInfo.delaySlotInfo & OUT_RD) != 0)
ir.WriteSetConstant(MIPS_GET_RD(branchInfo.delaySlotOp), GetCompilerPC() + 12);
}
FlushAll();
ir.Write(IROp::ExitToConst, ir.AddConstant(targetAddr));
@ -121,19 +130,20 @@ void IRFrontend::BranchRSZeroComp(MIPSOpcode op, IRComparison cc, bool andLink,
MIPSGPReg rs = _RS;
u32 targetAddr = GetCompilerPC() + offset + 4;
MIPSOpcode delaySlotOp = GetOffsetInstruction(1);
js.downcountAmount += MIPSGetInstructionCycleEstimate(delaySlotOp);
bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rs);
BranchInfo branchInfo(GetCompilerPC(), op, GetOffsetInstruction(1), andLink, likely);
branchInfo.delaySlotIsNice = IsDelaySlotNiceReg(op, branchInfo.delaySlotOp, rs);
js.downcountAmount += MIPSGetInstructionCycleEstimate(branchInfo.delaySlotOp);
MIPSGPReg lhs = rs;
if (!delaySlotIsNice) { // if likely, we don't need this
if (!branchInfo.delaySlotIsNice) { // if likely, we don't need this
ir.Write(IROp::Mov, IRTEMP_LHS, rs);
lhs = (MIPSGPReg)IRTEMP_LHS;
}
if (andLink)
ir.WriteSetConstant(MIPS_REG_RA, GetCompilerPC() + 8);
if (!likely)
if (!likely && !branchInfo.delaySlotIsBranch)
CompileDelaySlot();
int dcAmount = js.downcountAmount;
@ -141,9 +151,18 @@ void IRFrontend::BranchRSZeroComp(MIPSOpcode op, IRComparison cc, bool andLink,
js.downcountAmount = 0;
FlushAll();
ir.Write(ComparisonToExit(cc), ir.AddConstant(GetCompilerPC() + 8), lhs);
if (likely)
ir.Write(ComparisonToExit(cc), ir.AddConstant(ResolveNotTakenTarget(branchInfo)), lhs);
if (likely && !branchInfo.delaySlotIsBranch)
CompileDelaySlot();
if (branchInfo.delaySlotIsBranch) {
// We still link when the branch is taken (targetAddr case.)
// Remember, it's from the perspective of the delay slot, so +12.
if ((branchInfo.delaySlotInfo & OUT_RA) != 0)
ir.WriteSetConstant(MIPS_REG_RA, GetCompilerPC() + 12);
if ((branchInfo.delaySlotInfo & OUT_RD) != 0)
ir.WriteSetConstant(MIPS_GET_RD(branchInfo.delaySlotOp), GetCompilerPC() + 12);
}
// Taken
FlushAll();
ir.Write(IROp::ExitToConst, ir.AddConstant(targetAddr));
@ -199,8 +218,10 @@ void IRFrontend::BranchFPFlag(MIPSOpcode op, IRComparison cc, bool likely) {
int offset = TARGET16;
u32 targetAddr = GetCompilerPC() + offset + 4;
BranchInfo branchInfo(GetCompilerPC(), op, GetOffsetInstruction(1), false, likely);
ir.Write(IROp::FpCondToReg, IRTEMP_LHS);
if (!likely)
if (!likely && !branchInfo.delaySlotIsBranch)
CompileDelaySlot();
int dcAmount = js.downcountAmount;
@ -209,10 +230,19 @@ void IRFrontend::BranchFPFlag(MIPSOpcode op, IRComparison cc, bool likely) {
FlushAll();
// Not taken
ir.Write(ComparisonToExit(cc), ir.AddConstant(GetCompilerPC() + 8), IRTEMP_LHS, 0);
ir.Write(ComparisonToExit(cc), ir.AddConstant(ResolveNotTakenTarget(branchInfo)), IRTEMP_LHS, 0);
// Taken
if (likely)
if (likely && !branchInfo.delaySlotIsBranch)
CompileDelaySlot();
if (branchInfo.delaySlotIsBranch) {
// We still link when the branch is taken (targetAddr case.)
// Remember, it's from the perspective of the delay slot, so +12.
if ((branchInfo.delaySlotInfo & OUT_RA) != 0)
ir.WriteSetConstant(MIPS_REG_RA, GetCompilerPC() + 12);
if ((branchInfo.delaySlotInfo & OUT_RD) != 0)
ir.WriteSetConstant(MIPS_GET_RD(branchInfo.delaySlotOp), GetCompilerPC() + 12);
}
FlushAll();
ir.Write(IROp::ExitToConst, ir.AddConstant(targetAddr));
@ -242,34 +272,37 @@ void IRFrontend::BranchVFPUFlag(MIPSOpcode op, IRComparison cc, bool likely) {
int offset = TARGET16;
u32 targetAddr = GetCompilerPC() + offset + 4;
MIPSOpcode delaySlotOp = GetOffsetInstruction(1);
js.downcountAmount += MIPSGetInstructionCycleEstimate(delaySlotOp);
BranchInfo branchInfo(GetCompilerPC(), op, GetOffsetInstruction(1), false, likely);
js.downcountAmount += MIPSGetInstructionCycleEstimate(branchInfo.delaySlotOp);
ir.Write(IROp::VfpuCtrlToReg, IRTEMP_LHS, VFPU_CTRL_CC);
// Sometimes there's a VFPU branch in a delay slot (Disgaea 2: Dark Hero Days, Zettai Hero Project, La Pucelle)
// The behavior is undefined - the CPU may take the second branch even if the first one passes.
// However, it does consistently try each branch, which these games seem to expect.
bool delaySlotIsBranch = MIPSCodeUtils::IsVFPUBranch(delaySlotOp);
if (!likely)
if (!likely && !branchInfo.delaySlotIsBranch)
CompileDelaySlot();
int dcAmount = js.downcountAmount;
ir.Write(IROp::Downcount, 0, ir.AddConstant(dcAmount));
js.downcountAmount = 0;
if (delaySlotIsBranch && (signed short)(delaySlotOp & 0xFFFF) != (signed short)(op & 0xFFFF) - 1)
ERROR_LOG_REPORT(JIT, "VFPU branch in VFPU delay slot at %08x with different target", GetCompilerPC());
int imm3 = (op >> 18) & 7;
u32 notTakenTarget = GetCompilerPC() + (delaySlotIsBranch ? 4 : 8);
ir.Write(IROp::AndConst, IRTEMP_LHS, IRTEMP_LHS, ir.AddConstant(1 << imm3));
FlushAll();
ir.Write(ComparisonToExit(cc), ir.AddConstant(notTakenTarget), IRTEMP_LHS, 0);
ir.Write(ComparisonToExit(cc), ir.AddConstant(ResolveNotTakenTarget(branchInfo)), IRTEMP_LHS, 0);
if (likely)
if (likely && !branchInfo.delaySlotIsBranch)
CompileDelaySlot();
if (branchInfo.delaySlotIsBranch) {
// We still link when the branch is taken (targetAddr case.)
// Remember, it's from the perspective of the delay slot, so +12.
if ((branchInfo.delaySlotInfo & OUT_RA) != 0)
ir.WriteSetConstant(MIPS_REG_RA, GetCompilerPC() + 12);
if ((branchInfo.delaySlotInfo & OUT_RD) != 0)
ir.WriteSetConstant(MIPS_GET_RD(branchInfo.delaySlotOp), GetCompilerPC() + 12);
}
// Taken
FlushAll();

View file

@ -22,6 +22,7 @@
#include "ext/disarm.h"
#include "ext/udis86/udis86.h"
#include "Common/LogReporting.h"
#include "Common/StringUtils.h"
#include "Common/Serialize/Serializer.h"
#include "Common/Serialize/SerializeFuncs.h"
@ -29,9 +30,11 @@
#include "Core/Util/DisArm64.h"
#include "Core/Config.h"
#include "Core/MIPS/IR/IRJit.h"
#include "Core/MIPS/JitCommon/JitCommon.h"
#include "Core/MIPS/JitCommon/JitState.h"
#include "Core/MIPS/IR/IRJit.h"
#include "Core/MIPS/MIPSCodeUtils.h"
#include "Core/MIPS/MIPSTables.h"
#if PPSSPP_ARCH(ARM)
#include "../ARM/ArmJit.h"
@ -67,6 +70,32 @@ namespace MIPSComp {
}
}
BranchInfo::BranchInfo(u32 pc, MIPSOpcode o, MIPSOpcode delayO, bool al, bool l)
: compilerPC(pc), op(o), delaySlotOp(delayO), likely(l), andLink(al) {
delaySlotInfo = MIPSGetInfo(delaySlotOp).value;
delaySlotIsBranch = (delaySlotInfo & (IS_JUMP | IS_CONDBRANCH)) != 0;
}
u32 ResolveNotTakenTarget(const BranchInfo &branchInfo) {
u32 notTakenTarget = branchInfo.compilerPC + 8;
if ((branchInfo.delaySlotInfo & (IS_JUMP | IS_CONDBRANCH)) != 0) {
// If a branch has a j/jr/jal/jalr as a delay slot, that is run if the branch is not taken.
// TODO: Technically, in the likely case, we should somehow suppress andLink on this exit.
bool isJump = (branchInfo.delaySlotInfo & IS_JUMP) != 0;
// If the delay slot is a branch, likely skips it.
if (isJump || !branchInfo.likely)
notTakenTarget -= 4;
// For a branch (not a jump), it actually should try the delay slot and take its target potentially.
// This is similar to the VFPU case and has not been seen, so just report it.
if (!isJump && SignExtend16ToU32(branchInfo.delaySlotOp) != SignExtend16ToU32(branchInfo.op) - 1)
ERROR_LOG_REPORT(JIT, "Branch in branch delay slot at %08x with different target", branchInfo.compilerPC);
if (isJump && branchInfo.likely && (branchInfo.delaySlotInfo & (OUT_RA | OUT_RD)) != 0)
ERROR_LOG_REPORT(JIT, "Jump in likely branch delay slot with link at %08x", branchInfo.compilerPC);
}
return notTakenTarget;
}
JitInterface *CreateNativeJit(MIPSState *mipsState) {
#if PPSSPP_ARCH(ARM)
return new MIPSComp::ArmJit(mipsState);

View file

@ -153,6 +153,23 @@ namespace MIPSComp {
typedef void (MIPSFrontendInterface::*MIPSCompileFunc)(MIPSOpcode opcode);
typedef int (MIPSFrontendInterface::*MIPSReplaceFunc)();
struct BranchInfo {
BranchInfo(u32 pc, MIPSOpcode op, MIPSOpcode delaySlotOp, bool andLink, bool likely);
u32 compilerPC;
MIPSOpcode op;
MIPSOpcode delaySlotOp;
u64 delaySlotInfo;
bool likely;
bool andLink;
// Update manually if it's not always nice (rs/rt, rs/zero, etc.)
bool delaySlotIsNice = true;
bool delaySlotIsBranch;
};
// This seems to be the same for all branch types.
u32 ResolveNotTakenTarget(const BranchInfo &branchInfo);
extern JitInterface *jit;
extern std::recursive_mutex jitLock;

View file

@ -63,7 +63,7 @@ using namespace MIPSAnalyst;
#define DO_CONDITIONAL_LOG 0
// We can also disable nice delay slots.
// #define CONDITIONAL_NICE_DELAYSLOT delaySlotIsNice = false;
// #define CONDITIONAL_NICE_DELAYSLOT branchInfo.delaySlotIsNice = false;
#define CONDITIONAL_NICE_DELAYSLOT ;
#if DO_CONDITIONAL_LOG
@ -197,22 +197,22 @@ bool Jit::PredictTakeBranch(u32 targetAddr, bool likely) {
return targetAddr > GetCompilerPC();
}
void Jit::CompBranchExits(CCFlags cc, u32 targetAddr, u32 notTakenAddr, bool delaySlotIsNice, bool likely, bool andLink) {
if (andLink)
void Jit::CompBranchExits(CCFlags cc, u32 targetAddr, u32 notTakenAddr, const BranchInfo &branchInfo) {
if (branchInfo.andLink)
gpr.SetImm(MIPS_REG_RA, GetCompilerPC() + 8);
// We may want to try to continue along this branch a little while, to reduce reg flushing.
bool predictTakeBranch = PredictTakeBranch(targetAddr, likely);
if (CanContinueBranch(predictTakeBranch ? targetAddr : notTakenAddr))
bool predictTakeBranch = PredictTakeBranch(targetAddr, branchInfo.likely);
if (!branchInfo.delaySlotIsBranch && CanContinueBranch(predictTakeBranch ? targetAddr : notTakenAddr))
{
if (predictTakeBranch)
cc = FlipCCFlag(cc);
Gen::FixupBranch ptr;
RegCacheState state;
if (!likely)
if (!branchInfo.likely)
{
if (!delaySlotIsNice)
if (!branchInfo.delaySlotIsNice)
CompileDelaySlot(DELAYSLOT_SAFE);
ptr = J_CC(cc, true);
GetStateAndFlushAll(state);
@ -243,7 +243,7 @@ void Jit::CompBranchExits(CCFlags cc, u32 targetAddr, u32 notTakenAddr, bool del
CONDITIONAL_LOG_EXIT(targetAddr);
// Don't forget to run the delay slot if likely.
if (likely)
if (branchInfo.likely)
CompileDelaySlot(DELAYSLOT_NICE);
AddContinuedBlock(targetAddr);
@ -272,9 +272,9 @@ void Jit::CompBranchExits(CCFlags cc, u32 targetAddr, u32 notTakenAddr, bool del
else
{
Gen::FixupBranch ptr;
if (!likely)
if (!branchInfo.likely)
{
if (!delaySlotIsNice)
if (!branchInfo.delaySlotIsNice && !branchInfo.delaySlotIsBranch)
CompileDelaySlot(DELAYSLOT_SAFE_FLUSH);
else
FlushAll();
@ -284,7 +284,19 @@ void Jit::CompBranchExits(CCFlags cc, u32 targetAddr, u32 notTakenAddr, bool del
{
FlushAll();
ptr = J_CC(cc, true);
CompileDelaySlot(DELAYSLOT_FLUSH);
if (!branchInfo.delaySlotIsBranch)
CompileDelaySlot(DELAYSLOT_FLUSH);
}
// Handle the linkage of a delay slot, even when we're taking the branch.
if (branchInfo.delaySlotIsBranch) {
// We still link when the branch is taken (targetAddr case.)
// Remember, it's from the perspective of the delay slot, so +12.
if ((branchInfo.delaySlotInfo & OUT_RA) != 0)
gpr.SetImm(MIPS_REG_RA, GetCompilerPC() + 12);
if ((branchInfo.delaySlotInfo & OUT_RD) != 0)
gpr.SetImm(MIPS_GET_RD(branchInfo.delaySlotOp), GetCompilerPC() + 12);
FlushAll();
}
// Take the branch
@ -299,14 +311,25 @@ void Jit::CompBranchExits(CCFlags cc, u32 targetAddr, u32 notTakenAddr, bool del
}
}
void Jit::CompBranchExit(bool taken, u32 targetAddr, u32 notTakenAddr, bool delaySlotIsNice, bool likely, bool andLink) {
void Jit::CompBranchExit(bool taken, u32 targetAddr, u32 notTakenAddr, const BranchInfo &branchInfo) {
// Continuing is handled in the imm branch case... TODO: move it here?
if (andLink)
if (branchInfo.andLink)
gpr.SetImm(MIPS_REG_RA, GetCompilerPC() + 8);
if (taken || !likely)
CompileDelaySlot(DELAYSLOT_FLUSH);
else
if (branchInfo.delaySlotIsBranch) {
if (taken) {
// We still link when the branch is taken (targetAddr case.)
// Remember, it's from the perspective of the delay slot, so +12.
if ((branchInfo.delaySlotInfo & OUT_RA) != 0)
gpr.SetImm(MIPS_REG_RA, GetCompilerPC() + 12);
if ((branchInfo.delaySlotInfo & OUT_RD) != 0)
gpr.SetImm(MIPS_GET_RD(branchInfo.delaySlotOp), GetCompilerPC() + 12);
}
FlushAll();
} else if (taken || !branchInfo.likely) {
CompileDelaySlot(DELAYSLOT_FLUSH);
} else {
FlushAll();
}
const u32 destAddr = taken ? targetAddr : notTakenAddr;
CONDITIONAL_LOG_EXIT(destAddr);
@ -326,9 +349,13 @@ void Jit::BranchRSRTComp(MIPSOpcode op, Gen::CCFlags cc, bool likely)
MIPSGPReg rs = _RS;
u32 targetAddr = GetCompilerPC() + offset + 4;
BranchInfo branchInfo(GetCompilerPC(), op, GetOffsetInstruction(1), false, likely);
branchInfo.delaySlotIsNice = IsDelaySlotNiceReg(op, branchInfo.delaySlotOp, rt, rs);
CONDITIONAL_NICE_DELAYSLOT;
bool immBranch = false;
bool immBranchTaken = false;
if (gpr.IsImm(rs) && gpr.IsImm(rt)) {
if (gpr.IsImm(rs) && gpr.IsImm(rt) && !branchInfo.delaySlotIsBranch) {
// The cc flags are opposites: when NOT to take the branch.
bool immBranchNotTaken;
s32 rsImm = (s32)gpr.GetImm(rs);
@ -364,16 +391,14 @@ void Jit::BranchRSRTComp(MIPSOpcode op, Gen::CCFlags cc, bool likely)
return;
}
MIPSOpcode delaySlotOp = GetOffsetInstruction(1);
js.downcountAmount += MIPSGetInstructionCycleEstimate(delaySlotOp);
bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rt, rs);
CONDITIONAL_NICE_DELAYSLOT;
js.downcountAmount += MIPSGetInstructionCycleEstimate(branchInfo.delaySlotOp);
u32 notTakenTarget = ResolveNotTakenTarget(branchInfo);
if (immBranch)
CompBranchExit(immBranchTaken, targetAddr, GetCompilerPC() + 8, delaySlotIsNice, likely, false);
CompBranchExit(immBranchTaken, targetAddr, notTakenTarget, branchInfo);
else
{
if (!likely && delaySlotIsNice)
if (!likely && branchInfo.delaySlotIsNice && !branchInfo.delaySlotIsBranch)
CompileDelaySlot(DELAYSLOT_NICE);
if (gpr.IsImm(rt) && gpr.GetImm(rt) == 0)
@ -387,7 +412,7 @@ void Jit::BranchRSRTComp(MIPSOpcode op, Gen::CCFlags cc, bool likely)
CMP(32, gpr.R(rs), gpr.R(rt));
}
CompBranchExits(cc, targetAddr, GetCompilerPC() + 8, delaySlotIsNice, likely, false);
CompBranchExits(cc, targetAddr, notTakenTarget, branchInfo);
}
}
@ -402,9 +427,14 @@ void Jit::BranchRSZeroComp(MIPSOpcode op, Gen::CCFlags cc, bool andLink, bool li
MIPSGPReg rs = _RS;
u32 targetAddr = GetCompilerPC() + offset + 4;
BranchInfo branchInfo(GetCompilerPC(), op, GetOffsetInstruction(1), andLink, likely);
branchInfo.delaySlotIsNice = IsDelaySlotNiceReg(op, branchInfo.delaySlotOp, rs);
js.downcountAmount += MIPSGetInstructionCycleEstimate(branchInfo.delaySlotOp);
CONDITIONAL_NICE_DELAYSLOT;
bool immBranch = false;
bool immBranchTaken = false;
if (gpr.IsImm(rs)) {
if (gpr.IsImm(rs) && !branchInfo.delaySlotIsBranch) {
// The cc flags are opposites: when NOT to take the branch.
bool immBranchNotTaken;
s32 imm = (s32)gpr.GetImm(rs);
@ -446,22 +476,20 @@ void Jit::BranchRSZeroComp(MIPSOpcode op, Gen::CCFlags cc, bool andLink, bool li
return;
}
MIPSOpcode delaySlotOp = GetOffsetInstruction(1);
js.downcountAmount += MIPSGetInstructionCycleEstimate(delaySlotOp);
bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rs);
CONDITIONAL_NICE_DELAYSLOT;
js.downcountAmount += MIPSGetInstructionCycleEstimate(branchInfo.delaySlotOp);
u32 notTakenTarget = ResolveNotTakenTarget(branchInfo);
if (immBranch)
CompBranchExit(immBranchTaken, targetAddr, GetCompilerPC() + 8, delaySlotIsNice, likely, andLink);
CompBranchExit(immBranchTaken, targetAddr, notTakenTarget, branchInfo);
else
{
if (!likely && delaySlotIsNice)
if (!likely && branchInfo.delaySlotIsNice && !branchInfo.delaySlotIsBranch)
CompileDelaySlot(DELAYSLOT_NICE);
gpr.MapReg(rs, true, false);
CMP(32, gpr.R(rs), Imm32(0));
CompBranchExits(cc, targetAddr, GetCompilerPC() + 8, delaySlotIsNice, likely, andLink);
CompBranchExits(cc, targetAddr, notTakenTarget, branchInfo);
}
}
@ -518,17 +546,19 @@ void Jit::BranchFPFlag(MIPSOpcode op, Gen::CCFlags cc, bool likely)
int offset = TARGET16;
u32 targetAddr = GetCompilerPC() + offset + 4;
MIPSOpcode delaySlotOp = GetOffsetInstruction(1);
js.downcountAmount += MIPSGetInstructionCycleEstimate(delaySlotOp);
bool delaySlotIsNice = IsDelaySlotNiceFPU(op, delaySlotOp);
BranchInfo branchInfo(GetCompilerPC(), op, GetOffsetInstruction(1), false, likely);
branchInfo.delaySlotIsNice = IsDelaySlotNiceFPU(op, branchInfo.delaySlotOp);
CONDITIONAL_NICE_DELAYSLOT;
if (!likely && delaySlotIsNice)
js.downcountAmount += MIPSGetInstructionCycleEstimate(branchInfo.delaySlotOp);
if (!likely && branchInfo.delaySlotIsNice && !branchInfo.delaySlotIsBranch)
CompileDelaySlot(DELAYSLOT_NICE);
gpr.KillImmediate(MIPS_REG_FPCOND, true, false);
TEST(32, gpr.R(MIPS_REG_FPCOND), Imm32(1));
CompBranchExits(cc, targetAddr, GetCompilerPC() + 8, delaySlotIsNice, likely, false);
u32 notTakenTarget = ResolveNotTakenTarget(branchInfo);
CompBranchExits(cc, targetAddr, notTakenTarget, branchInfo);
}
@ -559,19 +589,16 @@ void Jit::BranchVFPUFlag(MIPSOpcode op, Gen::CCFlags cc, bool likely)
int offset = TARGET16;
u32 targetAddr = GetCompilerPC() + offset + 4;
MIPSOpcode delaySlotOp = GetOffsetInstruction(1);
js.downcountAmount += MIPSGetInstructionCycleEstimate(delaySlotOp);
BranchInfo branchInfo(GetCompilerPC(), op, GetOffsetInstruction(1), false, likely);
// Sometimes there's a VFPU branch in a delay slot (Disgaea 2: Dark Hero Days, Zettai Hero Project, La Pucelle)
// The behavior is undefined - the CPU may take the second branch even if the first one passes.
// However, it does consistently try each branch, which these games seem to expect.
bool delaySlotIsBranch = MIPSCodeUtils::IsVFPUBranch(delaySlotOp);
bool delaySlotIsNice = !delaySlotIsBranch && IsDelaySlotNiceVFPU(op, delaySlotOp);
branchInfo.delaySlotIsNice = IsDelaySlotNiceVFPU(op, branchInfo.delaySlotOp);
CONDITIONAL_NICE_DELAYSLOT;
if (!likely && delaySlotIsNice)
js.downcountAmount += MIPSGetInstructionCycleEstimate(branchInfo.delaySlotOp);
if (!likely && branchInfo.delaySlotIsNice && !branchInfo.delaySlotIsBranch)
CompileDelaySlot(DELAYSLOT_NICE);
if (delaySlotIsBranch && (signed short)(delaySlotOp & 0xFFFF) != (signed short)(op & 0xFFFF) - 1)
ERROR_LOG_REPORT(JIT, "VFPU branch in VFPU delay slot at %08x with different target %d / %d", GetCompilerPC(), (signed short)(delaySlotOp & 0xFFFF), (signed short)(op & 0xFFFF) - 1);
// THE CONDITION
int imm3 = (op >> 18) & 7;
@ -579,8 +606,8 @@ void Jit::BranchVFPUFlag(MIPSOpcode op, Gen::CCFlags cc, bool likely)
gpr.KillImmediate(MIPS_REG_VFPUCC, true, false);
TEST(32, gpr.R(MIPS_REG_VFPUCC), Imm32(1 << imm3));
u32 notTakenTarget = GetCompilerPC() + (delaySlotIsBranch ? 4 : 8);
CompBranchExits(cc, targetAddr, notTakenTarget, delaySlotIsNice, likely, false);
u32 notTakenTarget = ResolveNotTakenTarget(branchInfo);
CompBranchExits(cc, targetAddr, notTakenTarget, branchInfo);
}

View file

@ -230,8 +230,8 @@ private:
}
void CompITypeMemUnpairedLR(MIPSOpcode op, bool isStore);
void CompITypeMemUnpairedLRInner(MIPSOpcode op, Gen::X64Reg shiftReg);
void CompBranchExits(Gen::CCFlags cc, u32 targetAddr, u32 notTakenAddr, bool delaySlotIsNice, bool likely, bool andLink);
void CompBranchExit(bool taken, u32 targetAddr, u32 notTakenAddr, bool delaySlotIsNice, bool likely, bool andLink);
void CompBranchExits(Gen::CCFlags cc, u32 targetAddr, u32 notTakenAddr, const BranchInfo &branchInfo);
void CompBranchExit(bool taken, u32 targetAddr, u32 notTakenAddr, const BranchInfo &branchInfo);
static Gen::CCFlags FlipCCFlag(Gen::CCFlags flag);
static Gen::CCFlags SwapCCFlag(Gen::CCFlags flag);

View file

@ -295,6 +295,7 @@ bool CPU_Init(std::string *errorString) {
HLEPlugins::Init();
if (!Memory::Init()) {
// We're screwed.
*errorString = "Memory init failed";
return false;
}
mipsr4k.Reset();

View file

@ -95,7 +95,11 @@ void NativeResized() { }
std::string System_GetProperty(SystemProperty prop) { return ""; }
std::vector<std::string> System_GetPropertyStringVec(SystemProperty prop) { return std::vector<std::string>(); }
int System_GetPropertyInt(SystemProperty prop) { return -1; }
int System_GetPropertyInt(SystemProperty prop) {
if (prop == SYSPROP_SYSTEMVERSION)
return 31;
return -1;
}
float System_GetPropertyFloat(SystemProperty prop) { return -1.0f; }
bool System_GetPropertyBool(SystemProperty prop) {
switch (prop) {