mirror of
https://github.com/hrydgard/ppsspp.git
synced 2025-04-02 11:01:50 -04:00
Merge pull request #3189 from unknownbrackets/jit-branch
Attempts at continuing jit blocks (disabled)
This commit is contained in:
commit
e36444c1c2
12 changed files with 288 additions and 86 deletions
|
@ -39,6 +39,8 @@
|
|||
#define _FD ((op>>6 ) & 0x1F)
|
||||
#define _POS ((op>>6 ) & 0x1F)
|
||||
#define _SIZE ((op>>11 ) & 0x1F)
|
||||
#define _IMM16 (signed short)(op&0xFFFF)
|
||||
#define _IMM26 (op & 0x03FFFFFF)
|
||||
|
||||
#define LOOPOPTIMIZATION 0
|
||||
|
||||
|
@ -57,7 +59,7 @@ void Jit::BranchRSRTComp(u32 op, ArmGen::CCFlags cc, bool likely)
|
|||
ERROR_LOG_REPORT(JIT, "Branch in RSRTComp delay slot at %08x in block starting at %08x", js.compilerPC, js.blockStart);
|
||||
return;
|
||||
}
|
||||
int offset = (signed short)(op&0xFFFF)<<2;
|
||||
int offset = _IMM16 << 2;
|
||||
int rt = _RT;
|
||||
int rs = _RS;
|
||||
u32 targetAddr = js.compilerPC + offset + 4;
|
||||
|
@ -101,11 +103,11 @@ void Jit::BranchRSRTComp(u32 op, ArmGen::CCFlags cc, bool likely)
|
|||
}
|
||||
|
||||
// Take the branch
|
||||
WriteExit(targetAddr, 0);
|
||||
WriteExit(targetAddr, js.nextExit++);
|
||||
|
||||
SetJumpTarget(ptr);
|
||||
// Not taken
|
||||
WriteExit(js.compilerPC+8, 1);
|
||||
WriteExit(js.compilerPC+8, js.nextExit++);
|
||||
|
||||
js.compiling = false;
|
||||
}
|
||||
|
@ -117,7 +119,7 @@ void Jit::BranchRSZeroComp(u32 op, ArmGen::CCFlags cc, bool andLink, bool likely
|
|||
ERROR_LOG_REPORT(JIT, "Branch in RSZeroComp delay slot at %08x in block starting at %08x", js.compilerPC, js.blockStart);
|
||||
return;
|
||||
}
|
||||
int offset = (signed short)(op&0xFFFF)<<2;
|
||||
int offset = _IMM16 << 2;
|
||||
int rs = _RS;
|
||||
u32 targetAddr = js.compilerPC + offset + 4;
|
||||
|
||||
|
@ -153,11 +155,11 @@ void Jit::BranchRSZeroComp(u32 op, ArmGen::CCFlags cc, bool andLink, bool likely
|
|||
STR(R0, CTXREG, MIPS_REG_RA * 4);
|
||||
}
|
||||
|
||||
WriteExit(targetAddr, 0);
|
||||
WriteExit(targetAddr, js.nextExit++);
|
||||
|
||||
SetJumpTarget(ptr);
|
||||
// Not taken
|
||||
WriteExit(js.compilerPC + 8, 1);
|
||||
WriteExit(js.compilerPC + 8, js.nextExit++);
|
||||
js.compiling = false;
|
||||
}
|
||||
|
||||
|
@ -183,7 +185,6 @@ void Jit::Comp_RelBranch(u32 op)
|
|||
_dbg_assert_msg_(CPU,0,"Trying to compile instruction that can't be compiled");
|
||||
break;
|
||||
}
|
||||
js.compiling = false;
|
||||
}
|
||||
|
||||
void Jit::Comp_RelBranchRI(u32 op)
|
||||
|
@ -202,7 +203,6 @@ void Jit::Comp_RelBranchRI(u32 op)
|
|||
_dbg_assert_msg_(CPU,0,"Trying to compile instruction that can't be compiled");
|
||||
break;
|
||||
}
|
||||
js.compiling = false;
|
||||
}
|
||||
|
||||
// If likely is set, discard the branch slot if NOT taken.
|
||||
|
@ -212,7 +212,7 @@ void Jit::BranchFPFlag(u32 op, ArmGen::CCFlags cc, bool likely)
|
|||
ERROR_LOG_REPORT(JIT, "Branch in FPFlag delay slot at %08x in block starting at %08x", js.compilerPC, js.blockStart);
|
||||
return;
|
||||
}
|
||||
int offset = (signed short)(op & 0xFFFF) << 2;
|
||||
int offset = _IMM16 << 2;
|
||||
u32 targetAddr = js.compilerPC + offset + 4;
|
||||
|
||||
u32 delaySlotOp = Memory::ReadUnchecked_U32(js.compilerPC + 4);
|
||||
|
@ -221,8 +221,6 @@ void Jit::BranchFPFlag(u32 op, ArmGen::CCFlags cc, bool likely)
|
|||
if (!likely && delaySlotIsNice)
|
||||
CompileDelaySlot(DELAYSLOT_NICE);
|
||||
|
||||
FlushAll();
|
||||
|
||||
LDR(R0, CTXREG, offsetof(MIPSState, fpcond));
|
||||
TST(R0, Operand2(1, TYPE_IMM));
|
||||
|
||||
|
@ -231,20 +229,23 @@ void Jit::BranchFPFlag(u32 op, ArmGen::CCFlags cc, bool likely)
|
|||
{
|
||||
if (!delaySlotIsNice)
|
||||
CompileDelaySlot(DELAYSLOT_SAFE_FLUSH);
|
||||
else
|
||||
FlushAll();
|
||||
ptr = B_CC(cc);
|
||||
}
|
||||
else
|
||||
{
|
||||
FlushAll();
|
||||
ptr = B_CC(cc);
|
||||
CompileDelaySlot(DELAYSLOT_FLUSH);
|
||||
}
|
||||
|
||||
// Take the branch
|
||||
WriteExit(targetAddr, 0);
|
||||
WriteExit(targetAddr, js.nextExit++);
|
||||
|
||||
SetJumpTarget(ptr);
|
||||
// Not taken
|
||||
WriteExit(js.compilerPC + 8, 1);
|
||||
WriteExit(js.compilerPC + 8, js.nextExit++);
|
||||
js.compiling = false;
|
||||
}
|
||||
|
||||
|
@ -260,7 +261,6 @@ void Jit::Comp_FPUBranch(u32 op)
|
|||
_dbg_assert_msg_(CPU,0,"Trying to interpret instruction that can't be interpreted");
|
||||
break;
|
||||
}
|
||||
js.compiling = false;
|
||||
}
|
||||
|
||||
// If likely is set, discard the branch slot if NOT taken.
|
||||
|
@ -270,7 +270,7 @@ void Jit::BranchVFPUFlag(u32 op, ArmGen::CCFlags cc, bool likely)
|
|||
ERROR_LOG_REPORT(JIT, "Branch in VFPU delay slot at %08x in block starting at %08x", js.compilerPC, js.blockStart);
|
||||
return;
|
||||
}
|
||||
int offset = (signed short)(op & 0xFFFF) << 2;
|
||||
int offset = _IMM16 << 2;
|
||||
u32 targetAddr = js.compilerPC + offset + 4;
|
||||
|
||||
u32 delaySlotOp = Memory::ReadUnchecked_U32(js.compilerPC + 4);
|
||||
|
@ -286,8 +286,6 @@ void Jit::BranchVFPUFlag(u32 op, ArmGen::CCFlags cc, bool likely)
|
|||
if (delaySlotIsBranch && (delaySlotOp & 0xFFFF) != (signed short)(op & 0xFFFF) - 1)
|
||||
ERROR_LOG_REPORT(JIT, "VFPU branch in VFPU delay slot at %08x with different target", js.compilerPC);
|
||||
|
||||
FlushAll();
|
||||
|
||||
int imm3 = (op >> 18) & 7;
|
||||
|
||||
MOVI2R(R0, (u32)&(mips_->vfpuCtrl[VFPU_CTRL_CC]));
|
||||
|
@ -300,10 +298,13 @@ void Jit::BranchVFPUFlag(u32 op, ArmGen::CCFlags cc, bool likely)
|
|||
{
|
||||
if (!delaySlotIsNice && !delaySlotIsBranch)
|
||||
CompileDelaySlot(DELAYSLOT_SAFE_FLUSH);
|
||||
else
|
||||
FlushAll();
|
||||
ptr = B_CC(cc);
|
||||
}
|
||||
else
|
||||
{
|
||||
FlushAll();
|
||||
ptr = B_CC(cc);
|
||||
if (!delaySlotIsBranch)
|
||||
CompileDelaySlot(DELAYSLOT_FLUSH);
|
||||
|
@ -311,12 +312,12 @@ void Jit::BranchVFPUFlag(u32 op, ArmGen::CCFlags cc, bool likely)
|
|||
js.inDelaySlot = false;
|
||||
|
||||
// Take the branch
|
||||
WriteExit(targetAddr, 0);
|
||||
WriteExit(targetAddr, js.nextExit++);
|
||||
|
||||
SetJumpTarget(ptr);
|
||||
// Not taken
|
||||
u32 notTakenTarget = js.compilerPC + (delaySlotIsBranch ? 4 : 8);
|
||||
WriteExit(notTakenTarget, 1);
|
||||
WriteExit(notTakenTarget, js.nextExit++);
|
||||
js.compiling = false;
|
||||
}
|
||||
|
||||
|
@ -329,7 +330,6 @@ void Jit::Comp_VBranch(u32 op)
|
|||
case 2: BranchVFPUFlag(op, CC_NEQ, true); break; // bvfl
|
||||
case 3: BranchVFPUFlag(op, CC_EQ, true); break; // bvtl
|
||||
}
|
||||
js.compiling = false;
|
||||
}
|
||||
|
||||
void Jit::Comp_Jump(u32 op)
|
||||
|
@ -338,7 +338,7 @@ void Jit::Comp_Jump(u32 op)
|
|||
ERROR_LOG_REPORT(JIT, "Branch in Jump delay slot at %08x in block starting at %08x", js.compilerPC, js.blockStart);
|
||||
return;
|
||||
}
|
||||
u32 off = ((op & 0x03FFFFFF) << 2);
|
||||
u32 off = _IMM26 << 2;
|
||||
u32 targetAddr = (js.compilerPC & 0xF0000000) | off;
|
||||
|
||||
switch (op >> 26)
|
||||
|
@ -346,7 +346,7 @@ void Jit::Comp_Jump(u32 op)
|
|||
case 2: //j
|
||||
CompileDelaySlot(DELAYSLOT_NICE);
|
||||
FlushAll();
|
||||
WriteExit(targetAddr, 0);
|
||||
WriteExit(targetAddr, js.nextExit++);
|
||||
break;
|
||||
|
||||
case 3: //jal
|
||||
|
@ -354,7 +354,7 @@ void Jit::Comp_Jump(u32 op)
|
|||
MOVI2R(gpr.R(MIPS_REG_RA), js.compilerPC + 8);
|
||||
CompileDelaySlot(DELAYSLOT_NICE);
|
||||
FlushAll();
|
||||
WriteExit(targetAddr, 0);
|
||||
WriteExit(targetAddr, js.nextExit++);
|
||||
break;
|
||||
|
||||
default:
|
||||
|
|
|
@ -193,6 +193,7 @@ const u8 *Jit::DoJit(u32 em_address, JitBlock *b)
|
|||
{
|
||||
js.cancel = false;
|
||||
js.blockStart = js.compilerPC = mips_->pc;
|
||||
js.nextExit = 0;
|
||||
js.downcountAmount = 0;
|
||||
js.curBlock = b;
|
||||
js.compiling = true;
|
||||
|
|
|
@ -55,6 +55,7 @@ struct ArmJitState
|
|||
|
||||
u32 compilerPC;
|
||||
u32 blockStart;
|
||||
int nextExit;
|
||||
bool cancel;
|
||||
bool inDelaySlot;
|
||||
int downcountAmount;
|
||||
|
|
|
@ -137,12 +137,12 @@ int JitBlockCache::AllocateBlock(u32 em_address)
|
|||
JitBlock &b = blocks[num_blocks];
|
||||
b.invalid = false;
|
||||
b.originalAddress = em_address;
|
||||
b.exitAddress[0] = INVALID_EXIT;
|
||||
b.exitAddress[1] = INVALID_EXIT;
|
||||
b.exitPtrs[0] = 0;
|
||||
b.exitPtrs[1] = 0;
|
||||
b.linkStatus[0] = false;
|
||||
b.linkStatus[1] = false;
|
||||
for (int i = 0; i < MAX_JIT_BLOCK_EXITS; ++i)
|
||||
{
|
||||
b.exitAddress[i] = INVALID_EXIT;
|
||||
b.exitPtrs[i] = 0;
|
||||
b.linkStatus[i] = false;
|
||||
}
|
||||
b.blockNum = num_blocks;
|
||||
num_blocks++; //commit the current block
|
||||
return num_blocks - 1;
|
||||
|
@ -163,7 +163,7 @@ void JitBlockCache::FinalizeBlock(int block_num, bool block_link)
|
|||
block_map[std::make_pair(pAddr + 4 * b.originalSize - 1, pAddr)] = block_num;
|
||||
if (block_link)
|
||||
{
|
||||
for (int i = 0; i < 2; i++)
|
||||
for (int i = 0; i < MAX_JIT_BLOCK_EXITS; i++)
|
||||
{
|
||||
if (b.exitAddress[i] != INVALID_EXIT)
|
||||
links_to.insert(std::pair<u32, int>(b.exitAddress[i], block_num));
|
||||
|
@ -228,7 +228,7 @@ u32 JitBlockCache::GetEmuHackOpForBlock(int blockNum) const {
|
|||
int JitBlockCache::GetBlockNumberFromStartAddress(u32 addr)
|
||||
{
|
||||
if (!blocks)
|
||||
return -1;
|
||||
return -1;
|
||||
u32 inst = Memory::Read_U32(addr);
|
||||
int bl = GetBlockNumberFromEmuHackOp(inst);
|
||||
if (bl < 0)
|
||||
|
@ -262,7 +262,7 @@ void JitBlockCache::LinkBlockExits(int i)
|
|||
// This block is dead. Don't relink it.
|
||||
return;
|
||||
}
|
||||
for (int e = 0; e < 2; e++) {
|
||||
for (int e = 0; e < MAX_JIT_BLOCK_EXITS; e++) {
|
||||
if (b.exitAddress[e] != INVALID_EXIT && !b.linkStatus[e]) {
|
||||
int destinationBlock = GetBlockNumberFromStartAddress(b.exitAddress[e]);
|
||||
if (destinationBlock != -1) {
|
||||
|
@ -308,7 +308,7 @@ void JitBlockCache::UnlinkBlock(int i)
|
|||
return;
|
||||
for (multimap<u32, int>::iterator iter = ppp.first; iter != ppp.second; ++iter) {
|
||||
JitBlock &sourceBlock = blocks[iter->second];
|
||||
for (int e = 0; e < 2; e++)
|
||||
for (int e = 0; e < MAX_JIT_BLOCK_EXITS; e++)
|
||||
{
|
||||
if (sourceBlock.exitAddress[e] == b.originalAddress)
|
||||
sourceBlock.linkStatus[e] = false;
|
||||
|
|
|
@ -40,6 +40,12 @@ typedef Gen::XCodeBlock CodeBlock;
|
|||
#error "Unsupported arch!"
|
||||
#endif
|
||||
|
||||
#if defined(ARM)
|
||||
const int MAX_JIT_BLOCK_EXITS = 2;
|
||||
#else
|
||||
const int MAX_JIT_BLOCK_EXITS = 8;
|
||||
#endif
|
||||
|
||||
// Define this in order to get VTune profile support for the Jit generated code.
|
||||
// Add the VTune include/lib directories to the project directories to get this to build.
|
||||
// #define USE_VTUNE
|
||||
|
@ -50,8 +56,8 @@ struct JitBlock {
|
|||
const u8 *checkedEntry;
|
||||
const u8 *normalEntry;
|
||||
|
||||
u8 *exitPtrs[2]; // to be able to rewrite the exit jump
|
||||
u32 exitAddress[2]; // 0xFFFFFFFF == unknown
|
||||
u8 *exitPtrs[MAX_JIT_BLOCK_EXITS]; // to be able to rewrite the exit jump
|
||||
u32 exitAddress[MAX_JIT_BLOCK_EXITS]; // 0xFFFFFFFF == unknown
|
||||
|
||||
u32 originalAddress;
|
||||
u32 originalFirstOpcode; //to be able to restore
|
||||
|
@ -60,7 +66,7 @@ struct JitBlock {
|
|||
u16 blockNum;
|
||||
|
||||
bool invalid;
|
||||
bool linkStatus[2];
|
||||
bool linkStatus[MAX_JIT_BLOCK_EXITS];
|
||||
|
||||
#ifdef USE_VTUNE
|
||||
char blockName[32];
|
||||
|
|
|
@ -36,8 +36,10 @@
|
|||
#define _FS ((op>>11) & 0x1F)
|
||||
#define _FT ((op>>16) & 0x1F)
|
||||
#define _FD ((op>>6 ) & 0x1F)
|
||||
#define _POS ((op>>6 ) & 0x1F)
|
||||
#define _POS ((op>>6 ) & 0x1F)
|
||||
#define _SIZE ((op>>11 ) & 0x1F)
|
||||
#define _IMM16 (signed short)(op&0xFFFF)
|
||||
#define _IMM26 (op & 0x03FFFFFF)
|
||||
|
||||
#define LOOPOPTIMIZATION 0
|
||||
|
||||
|
@ -133,7 +135,7 @@ void Jit::BranchRSRTComp(u32 op, Gen::CCFlags cc, bool likely)
|
|||
ERROR_LOG_REPORT(JIT, "Branch in RSRTComp delay slot at %08x in block starting at %08x", js.compilerPC, js.blockStart);
|
||||
return;
|
||||
}
|
||||
int offset = (signed short)(op&0xFFFF)<<2;
|
||||
int offset = _IMM16 << 2;
|
||||
int rt = _RT;
|
||||
int rs = _RS;
|
||||
u32 targetAddr = js.compilerPC + offset + 4;
|
||||
|
@ -141,6 +143,36 @@ void Jit::BranchRSRTComp(u32 op, Gen::CCFlags cc, bool likely)
|
|||
u32 delaySlotOp = Memory::Read_Instruction(js.compilerPC+4);
|
||||
bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rt, rs);
|
||||
CONDITIONAL_NICE_DELAYSLOT;
|
||||
|
||||
if (jo.immBranches && gpr.IsImmediate(rs) && gpr.IsImmediate(rt))
|
||||
{
|
||||
// The cc flags are opposites: when NOT to take the branch.
|
||||
bool skipBranch;
|
||||
s32 rsImm = (s32)gpr.GetImmediate32(rs);
|
||||
s32 rtImm = (s32)gpr.GetImmediate32(rt);
|
||||
|
||||
switch (cc)
|
||||
{
|
||||
case CC_E: skipBranch = rsImm == rtImm; break;
|
||||
case CC_NE: skipBranch = rsImm != rtImm; break;
|
||||
default: _dbg_assert_msg_(JIT, false, "Bad cc flag in BranchRSRTComp().");
|
||||
}
|
||||
|
||||
if (skipBranch)
|
||||
{
|
||||
// Skip the delay slot if likely, otherwise it'll be the next instruction.
|
||||
if (likely)
|
||||
js.compilerPC += 4;
|
||||
return;
|
||||
}
|
||||
|
||||
// Branch taken. Always compile the delay slot, and then go to dest.
|
||||
CompileDelaySlot(DELAYSLOT_NICE);
|
||||
// Account for the increment in the loop.
|
||||
js.compilerPC = targetAddr - 4;
|
||||
return;
|
||||
}
|
||||
|
||||
if (!likely && delaySlotIsNice)
|
||||
CompileDelaySlot(DELAYSLOT_NICE);
|
||||
|
||||
|
@ -156,30 +188,40 @@ void Jit::BranchRSRTComp(u32 op, Gen::CCFlags cc, bool likely)
|
|||
}
|
||||
|
||||
Gen::FixupBranch ptr;
|
||||
RegCacheState state;
|
||||
if (!likely)
|
||||
{
|
||||
if (!delaySlotIsNice)
|
||||
CompileDelaySlot(DELAYSLOT_SAFE_FLUSH);
|
||||
CompileDelaySlot(DELAYSLOT_SAFE_FLUSH, state);
|
||||
else
|
||||
FlushAll();
|
||||
GetStateAndFlushAll(state);
|
||||
ptr = J_CC(cc, true);
|
||||
}
|
||||
else
|
||||
{
|
||||
FlushAll();
|
||||
GetStateAndFlushAll(state);
|
||||
ptr = J_CC(cc, true);
|
||||
CompileDelaySlot(DELAYSLOT_FLUSH);
|
||||
}
|
||||
// Take the branch
|
||||
CONDITIONAL_LOG_EXIT(targetAddr);
|
||||
WriteExit(targetAddr, 0);
|
||||
WriteExit(targetAddr, js.nextExit++);
|
||||
|
||||
SetJumpTarget(ptr);
|
||||
// Not taken
|
||||
CONDITIONAL_LOG_EXIT(js.compilerPC + 8);
|
||||
WriteExit(js.compilerPC + 8, 1);
|
||||
|
||||
js.compiling = false;
|
||||
if (CanContinueBranch())
|
||||
{
|
||||
// Account for the delay slot.
|
||||
js.compilerPC += 4;
|
||||
RestoreState(state);
|
||||
}
|
||||
else
|
||||
{
|
||||
WriteExit(js.compilerPC + 8, js.nextExit++);
|
||||
js.compiling = false;
|
||||
}
|
||||
}
|
||||
|
||||
void Jit::BranchRSZeroComp(u32 op, Gen::CCFlags cc, bool andLink, bool likely)
|
||||
|
@ -189,13 +231,49 @@ void Jit::BranchRSZeroComp(u32 op, Gen::CCFlags cc, bool andLink, bool likely)
|
|||
ERROR_LOG_REPORT(JIT, "Branch in RSZeroComp delay slot at %08x in block starting at %08x", js.compilerPC, js.blockStart);
|
||||
return;
|
||||
}
|
||||
int offset = (signed short)(op&0xFFFF)<<2;
|
||||
int offset = _IMM16 << 2;
|
||||
int rs = _RS;
|
||||
u32 targetAddr = js.compilerPC + offset + 4;
|
||||
|
||||
u32 delaySlotOp = Memory::Read_Instruction(js.compilerPC + 4);
|
||||
bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rs);
|
||||
CONDITIONAL_NICE_DELAYSLOT;
|
||||
|
||||
if (jo.immBranches && gpr.IsImmediate(rs))
|
||||
{
|
||||
// The cc flags are opposites: when NOT to take the branch.
|
||||
bool skipBranch;
|
||||
s32 imm = (s32)gpr.GetImmediate32(rs);
|
||||
|
||||
switch (cc)
|
||||
{
|
||||
case CC_G: skipBranch = imm > 0; break;
|
||||
case CC_GE: skipBranch = imm >= 0; break;
|
||||
case CC_L: skipBranch = imm < 0; break;
|
||||
case CC_LE: skipBranch = imm <= 0; break;
|
||||
default: _dbg_assert_msg_(JIT, false, "Bad cc flag in BranchRSZeroComp().");
|
||||
}
|
||||
|
||||
if (skipBranch)
|
||||
{
|
||||
// Skip the delay slot if likely, otherwise it'll be the next instruction.
|
||||
if (likely)
|
||||
js.compilerPC += 4;
|
||||
return;
|
||||
}
|
||||
|
||||
// Branch taken. Always compile the delay slot, and then go to dest.
|
||||
CompileDelaySlot(DELAYSLOT_NICE);
|
||||
if (andLink)
|
||||
{
|
||||
gpr.BindToRegister(MIPS_REG_RA, false, true);
|
||||
MOV(32, gpr.R(MIPS_REG_RA), Imm32(js.compilerPC + 8));
|
||||
}
|
||||
// Account for the increment in the loop.
|
||||
js.compilerPC = targetAddr - 4;
|
||||
return;
|
||||
}
|
||||
|
||||
if (!likely && delaySlotIsNice)
|
||||
CompileDelaySlot(DELAYSLOT_NICE);
|
||||
|
||||
|
@ -203,17 +281,18 @@ void Jit::BranchRSZeroComp(u32 op, Gen::CCFlags cc, bool andLink, bool likely)
|
|||
CMP(32, gpr.R(rs), Imm32(0));
|
||||
|
||||
Gen::FixupBranch ptr;
|
||||
RegCacheState state;
|
||||
if (!likely)
|
||||
{
|
||||
if (!delaySlotIsNice)
|
||||
CompileDelaySlot(DELAYSLOT_SAFE_FLUSH);
|
||||
CompileDelaySlot(DELAYSLOT_SAFE_FLUSH, state);
|
||||
else
|
||||
FlushAll();
|
||||
GetStateAndFlushAll(state);
|
||||
ptr = J_CC(cc, true);
|
||||
}
|
||||
else
|
||||
{
|
||||
FlushAll();
|
||||
GetStateAndFlushAll(state);
|
||||
ptr = J_CC(cc, true);
|
||||
CompileDelaySlot(DELAYSLOT_FLUSH);
|
||||
}
|
||||
|
@ -222,14 +301,23 @@ void Jit::BranchRSZeroComp(u32 op, Gen::CCFlags cc, bool andLink, bool likely)
|
|||
if (andLink)
|
||||
MOV(32, M(&mips_->r[MIPS_REG_RA]), Imm32(js.compilerPC + 8));
|
||||
CONDITIONAL_LOG_EXIT(targetAddr);
|
||||
WriteExit(targetAddr, 0);
|
||||
WriteExit(targetAddr, js.nextExit++);
|
||||
|
||||
SetJumpTarget(ptr);
|
||||
// Not taken
|
||||
CONDITIONAL_LOG_EXIT(js.compilerPC + 8);
|
||||
WriteExit(js.compilerPC + 8, 1);
|
||||
|
||||
js.compiling = false;
|
||||
if (CanContinueBranch())
|
||||
{
|
||||
// Account for the delay slot.
|
||||
js.compilerPC += 4;
|
||||
RestoreState(state);
|
||||
}
|
||||
else
|
||||
{
|
||||
WriteExit(js.compilerPC + 8, js.nextExit++);
|
||||
js.compiling = false;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
@ -253,7 +341,6 @@ void Jit::Comp_RelBranch(u32 op)
|
|||
_dbg_assert_msg_(CPU,0,"Trying to compile instruction that can't be compiled");
|
||||
break;
|
||||
}
|
||||
js.compiling = false;
|
||||
}
|
||||
|
||||
void Jit::Comp_RelBranchRI(u32 op)
|
||||
|
@ -272,7 +359,6 @@ void Jit::Comp_RelBranchRI(u32 op)
|
|||
_dbg_assert_msg_(CPU,0,"Trying to compile instruction that can't be compiled");
|
||||
break;
|
||||
}
|
||||
js.compiling = false;
|
||||
}
|
||||
|
||||
|
||||
|
@ -284,7 +370,7 @@ void Jit::BranchFPFlag(u32 op, Gen::CCFlags cc, bool likely)
|
|||
ERROR_LOG_REPORT(JIT, "Branch in FPFlag delay slot at %08x in block starting at %08x", js.compilerPC, js.blockStart);
|
||||
return;
|
||||
}
|
||||
int offset = (signed short)(op & 0xFFFF) << 2;
|
||||
int offset = _IMM16 << 2;
|
||||
u32 targetAddr = js.compilerPC + offset + 4;
|
||||
|
||||
u32 delaySlotOp = Memory::Read_Instruction(js.compilerPC + 4);
|
||||
|
@ -293,32 +379,43 @@ void Jit::BranchFPFlag(u32 op, Gen::CCFlags cc, bool likely)
|
|||
if (!likely && delaySlotIsNice)
|
||||
CompileDelaySlot(DELAYSLOT_NICE);
|
||||
|
||||
FlushAll();
|
||||
|
||||
TEST(32, M((void *)&(mips_->fpcond)), Imm32(1));
|
||||
Gen::FixupBranch ptr;
|
||||
RegCacheState state;
|
||||
if (!likely)
|
||||
{
|
||||
if (!delaySlotIsNice)
|
||||
CompileDelaySlot(DELAYSLOT_SAFE_FLUSH);
|
||||
CompileDelaySlot(DELAYSLOT_SAFE_FLUSH, state);
|
||||
else
|
||||
GetStateAndFlushAll(state);
|
||||
ptr = J_CC(cc, true);
|
||||
}
|
||||
else
|
||||
{
|
||||
GetStateAndFlushAll(state);
|
||||
ptr = J_CC(cc, true);
|
||||
CompileDelaySlot(DELAYSLOT_FLUSH);
|
||||
}
|
||||
|
||||
// Take the branch
|
||||
CONDITIONAL_LOG_EXIT(targetAddr);
|
||||
WriteExit(targetAddr, 0);
|
||||
WriteExit(targetAddr, js.nextExit++);
|
||||
|
||||
SetJumpTarget(ptr);
|
||||
// Not taken
|
||||
CONDITIONAL_LOG_EXIT(js.compilerPC + 8);
|
||||
WriteExit(js.compilerPC + 8, 1);
|
||||
|
||||
js.compiling = false;
|
||||
if (CanContinueBranch())
|
||||
{
|
||||
// Account for the delay slot.
|
||||
js.compilerPC += 4;
|
||||
RestoreState(state);
|
||||
}
|
||||
else
|
||||
{
|
||||
WriteExit(js.compilerPC + 8, js.nextExit++);
|
||||
js.compiling = false;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
@ -334,7 +431,6 @@ void Jit::Comp_FPUBranch(u32 op)
|
|||
_dbg_assert_msg_(CPU,0,"Trying to interpret instruction that can't be interpreted");
|
||||
break;
|
||||
}
|
||||
js.compiling = false;
|
||||
}
|
||||
|
||||
// If likely is set, discard the branch slot if NOT taken.
|
||||
|
@ -345,7 +441,7 @@ void Jit::BranchVFPUFlag(u32 op, Gen::CCFlags cc, bool likely)
|
|||
ERROR_LOG_REPORT(JIT, "Branch in VFPU delay slot at %08x in block starting at %08x", js.compilerPC, js.blockStart);
|
||||
return;
|
||||
}
|
||||
int offset = (signed short)(op & 0xFFFF) << 2;
|
||||
int offset = _IMM16 << 2;
|
||||
u32 targetAddr = js.compilerPC + offset + 4;
|
||||
|
||||
u32 delaySlotOp = Memory::Read_Instruction(js.compilerPC + 4);
|
||||
|
@ -361,22 +457,24 @@ void Jit::BranchVFPUFlag(u32 op, Gen::CCFlags cc, bool likely)
|
|||
if (delaySlotIsBranch && (signed short)(delaySlotOp & 0xFFFF) != (signed short)(op & 0xFFFF) - 1)
|
||||
ERROR_LOG(JIT, "VFPU branch in VFPU delay slot at %08x with different target %d / %d", js.compilerPC, (signed short)(delaySlotOp & 0xFFFF), (signed short)(op & 0xFFFF) - 1);
|
||||
|
||||
FlushAll();
|
||||
|
||||
// THE CONDITION
|
||||
int imm3 = (op >> 18) & 7;
|
||||
|
||||
//int val = (mips_->vfpuCtrl[VFPU_CTRL_CC] >> imm3) & 1;
|
||||
TEST(32, M((void *)&(mips_->vfpuCtrl[VFPU_CTRL_CC])), Imm32(1 << imm3));
|
||||
Gen::FixupBranch ptr;
|
||||
RegCacheState state;
|
||||
if (!likely)
|
||||
{
|
||||
if (!delaySlotIsNice && !delaySlotIsBranch)
|
||||
CompileDelaySlot(DELAYSLOT_SAFE_FLUSH);
|
||||
CompileDelaySlot(DELAYSLOT_SAFE_FLUSH, state);
|
||||
else
|
||||
GetStateAndFlushAll(state);
|
||||
ptr = J_CC(cc, true);
|
||||
}
|
||||
else
|
||||
{
|
||||
GetStateAndFlushAll(state);
|
||||
ptr = J_CC(cc, true);
|
||||
if (!delaySlotIsBranch)
|
||||
CompileDelaySlot(DELAYSLOT_FLUSH);
|
||||
|
@ -384,15 +482,24 @@ void Jit::BranchVFPUFlag(u32 op, Gen::CCFlags cc, bool likely)
|
|||
|
||||
// Take the branch
|
||||
CONDITIONAL_LOG_EXIT(targetAddr);
|
||||
WriteExit(targetAddr, 0);
|
||||
WriteExit(targetAddr, js.nextExit++);
|
||||
|
||||
SetJumpTarget(ptr);
|
||||
// Not taken
|
||||
u32 notTakenTarget = js.compilerPC + (delaySlotIsBranch ? 4 : 8);
|
||||
CONDITIONAL_LOG_EXIT(notTakenTarget);
|
||||
WriteExit(notTakenTarget, 1);
|
||||
|
||||
js.compiling = false;
|
||||
if (CanContinueBranch() && !delaySlotIsBranch)
|
||||
{
|
||||
// Account for the delay slot.
|
||||
js.compilerPC += 4;
|
||||
RestoreState(state);
|
||||
}
|
||||
else
|
||||
{
|
||||
WriteExit(notTakenTarget, js.nextExit++);
|
||||
js.compiling = false;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
@ -408,7 +515,6 @@ void Jit::Comp_VBranch(u32 op)
|
|||
_dbg_assert_msg_(CPU,0,"Comp_VBranch: Invalid instruction");
|
||||
break;
|
||||
}
|
||||
js.compiling = false;
|
||||
}
|
||||
|
||||
void Jit::Comp_Jump(u32 op)
|
||||
|
@ -418,7 +524,7 @@ void Jit::Comp_Jump(u32 op)
|
|||
ERROR_LOG_REPORT(JIT, "Branch in Jump delay slot at %08x in block starting at %08x", js.compilerPC, js.blockStart);
|
||||
return;
|
||||
}
|
||||
u32 off = ((op & 0x3FFFFFF) << 2);
|
||||
u32 off = _IMM26 << 2;
|
||||
u32 targetAddr = (js.compilerPC & 0xF0000000) | off;
|
||||
|
||||
switch (op >> 26)
|
||||
|
@ -427,7 +533,7 @@ void Jit::Comp_Jump(u32 op)
|
|||
CompileDelaySlot(DELAYSLOT_NICE);
|
||||
FlushAll();
|
||||
CONDITIONAL_LOG_EXIT(targetAddr);
|
||||
WriteExit(targetAddr, 0);
|
||||
WriteExit(targetAddr, js.nextExit++);
|
||||
break;
|
||||
|
||||
case 3: //jal
|
||||
|
@ -436,7 +542,7 @@ void Jit::Comp_Jump(u32 op)
|
|||
CompileDelaySlot(DELAYSLOT_NICE);
|
||||
FlushAll();
|
||||
CONDITIONAL_LOG_EXIT(targetAddr);
|
||||
WriteExit(targetAddr, 0);
|
||||
WriteExit(targetAddr, js.nextExit++);
|
||||
break;
|
||||
|
||||
default:
|
||||
|
|
|
@ -134,6 +134,20 @@ void Jit::DoDummyState(PointerWrap &p)
|
|||
p.DoMarker("Jit");
|
||||
}
|
||||
|
||||
|
||||
void Jit::GetStateAndFlushAll(RegCacheState &state)
|
||||
{
|
||||
gpr.GetState(state.gpr);
|
||||
fpr.GetState(state.fpr);
|
||||
FlushAll();
|
||||
}
|
||||
|
||||
void Jit::RestoreState(const RegCacheState state)
|
||||
{
|
||||
gpr.RestoreState(state.gpr);
|
||||
fpr.RestoreState(state.fpr);
|
||||
}
|
||||
|
||||
void Jit::FlushAll()
|
||||
{
|
||||
gpr.Flush();
|
||||
|
@ -180,7 +194,7 @@ void Jit::ClearCacheAt(u32 em_address)
|
|||
ClearCache();
|
||||
}
|
||||
|
||||
void Jit::CompileDelaySlot(int flags)
|
||||
void Jit::CompileDelaySlot(int flags, RegCacheState *state)
|
||||
{
|
||||
const u32 addr = js.compilerPC + 4;
|
||||
|
||||
|
@ -196,7 +210,12 @@ void Jit::CompileDelaySlot(int flags)
|
|||
js.inDelaySlot = false;
|
||||
|
||||
if (flags & DELAYSLOT_FLUSH)
|
||||
FlushAll();
|
||||
{
|
||||
if (state != NULL)
|
||||
GetStateAndFlushAll(*state);
|
||||
else
|
||||
FlushAll();
|
||||
}
|
||||
if (flags & DELAYSLOT_SAFE)
|
||||
LOAD_FLAGS; // restore flag!
|
||||
}
|
||||
|
@ -255,6 +274,7 @@ const u8 *Jit::DoJit(u32 em_address, JitBlock *b)
|
|||
{
|
||||
js.cancel = false;
|
||||
js.blockStart = js.compilerPC = mips_->pc;
|
||||
js.nextExit = 0;
|
||||
js.downcountAmount = 0;
|
||||
js.curBlock = b;
|
||||
js.compiling = true;
|
||||
|
@ -352,6 +372,8 @@ void Jit::Comp_Generic(u32 op)
|
|||
|
||||
void Jit::WriteExit(u32 destination, int exit_num)
|
||||
{
|
||||
_dbg_assert_msg_(JIT, exit_num < MAX_JIT_BLOCK_EXITS, "Expected a valid exit_num");
|
||||
|
||||
if (!Memory::IsValidAddress(destination)) {
|
||||
ERROR_LOG_REPORT(JIT, "Trying to write block exit to illegal destination %08x: pc = %08x", destination, currentMIPS->pc);
|
||||
}
|
||||
|
|
|
@ -41,9 +41,17 @@ struct JitOptions
|
|||
JitOptions()
|
||||
{
|
||||
enableBlocklink = true;
|
||||
// Seems to hurt performance?
|
||||
immBranches = false;
|
||||
// Seems to hurt performance also?
|
||||
continueBranches = false;
|
||||
continueMaxInstructions = 100;
|
||||
}
|
||||
|
||||
bool enableBlocklink;
|
||||
bool immBranches;
|
||||
bool continueBranches;
|
||||
int continueMaxInstructions;
|
||||
};
|
||||
|
||||
struct JitState
|
||||
|
@ -65,6 +73,7 @@ struct JitState
|
|||
|
||||
u32 compilerPC;
|
||||
u32 blockStart;
|
||||
int nextExit;
|
||||
bool cancel;
|
||||
bool inDelaySlot;
|
||||
// See JitState::AfterOp for values.
|
||||
|
@ -151,6 +160,12 @@ enum CompileDelaySlotFlags
|
|||
DELAYSLOT_SAFE_FLUSH = DELAYSLOT_FLUSH | DELAYSLOT_SAFE,
|
||||
};
|
||||
|
||||
// TODO: Hmm, humongous.
|
||||
struct RegCacheState {
|
||||
GPRRegCacheState gpr;
|
||||
FPURegCacheState fpr;
|
||||
};
|
||||
|
||||
class Jit : public Gen::XCodeBlock
|
||||
{
|
||||
public:
|
||||
|
@ -255,12 +270,17 @@ public:
|
|||
void ClearCache();
|
||||
void ClearCacheAt(u32 em_address);
|
||||
private:
|
||||
void GetStateAndFlushAll(RegCacheState &state);
|
||||
void RestoreState(const RegCacheState state);
|
||||
void FlushAll();
|
||||
void FlushPrefixV();
|
||||
void WriteDowncount(int offset = 0);
|
||||
|
||||
// See CompileDelaySlotFlags for flags.
|
||||
void CompileDelaySlot(int flags);
|
||||
void CompileDelaySlot(int flags, RegCacheState *state = NULL);
|
||||
void CompileDelaySlot(int flags, RegCacheState &state) {
|
||||
CompileDelaySlot(flags, &state);
|
||||
}
|
||||
void EatInstruction(u32 op);
|
||||
|
||||
void WriteExit(u32 destination, int exit_num);
|
||||
|
@ -295,6 +315,17 @@ private:
|
|||
void CallProtectedFunction(void *func, const u32 arg1, const u32 arg2, const u32 arg3);
|
||||
void CallProtectedFunction(void *func, const OpArg &arg1, const u32 arg2, const u32 arg3);
|
||||
|
||||
bool CanContinueBranch() {
|
||||
if (!jo.continueBranches || js.numInstructions >= jo.continueMaxInstructions) {
|
||||
return false;
|
||||
}
|
||||
// Need at least 2 exits left over.
|
||||
if (js.nextExit >= MAX_JIT_BLOCK_EXITS - 1) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
JitBlockCache blocks;
|
||||
JitOptions jo;
|
||||
JitState js;
|
||||
|
|
|
@ -282,8 +282,7 @@ void GPRRegCache::StoreFromRegister(int i) {
|
|||
}
|
||||
}
|
||||
|
||||
void GPRRegCache::Flush()
|
||||
{
|
||||
void GPRRegCache::Flush() {
|
||||
for (int i = 0; i < NUM_X_REGS; i++) {
|
||||
if (xregs[i].allocLocked)
|
||||
PanicAlert("Someone forgot to unlock X64 reg %i.", i);
|
||||
|
@ -305,4 +304,14 @@ void GPRRegCache::Flush()
|
|||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void GPRRegCache::GetState(GPRRegCacheState &state) const {
|
||||
memcpy(state.regs, regs, sizeof(regs));
|
||||
memcpy(state.xregs, xregs, sizeof(xregs));
|
||||
}
|
||||
|
||||
void GPRRegCache::RestoreState(const GPRRegCacheState state) {
|
||||
memcpy(regs, state.regs, sizeof(regs));
|
||||
memcpy(xregs, state.xregs, sizeof(xregs));
|
||||
}
|
||||
|
|
|
@ -22,6 +22,15 @@
|
|||
|
||||
using namespace Gen;
|
||||
|
||||
#ifdef _M_X64
|
||||
#define NUM_X_REGS 16
|
||||
#elif _M_IX86
|
||||
#define NUM_X_REGS 8
|
||||
#endif
|
||||
|
||||
// TODO: Add more cachable regs, like HI, LO
|
||||
#define NUM_MIPS_GPRS 32
|
||||
|
||||
struct MIPSCachedReg {
|
||||
OpArg location;
|
||||
bool away; // value not in source register
|
||||
|
@ -35,14 +44,10 @@ struct X64CachedReg {
|
|||
bool allocLocked;
|
||||
};
|
||||
|
||||
#ifdef _M_X64
|
||||
#define NUM_X_REGS 16
|
||||
#elif _M_IX86
|
||||
#define NUM_X_REGS 8
|
||||
#endif
|
||||
|
||||
// TODO: Add more cachable regs, like HI, LO
|
||||
#define NUM_MIPS_GPRS 32
|
||||
struct GPRRegCacheState {
|
||||
MIPSCachedReg regs[NUM_MIPS_GPRS];
|
||||
X64CachedReg xregs[NUM_X_REGS];
|
||||
};
|
||||
|
||||
class GPRRegCache
|
||||
{
|
||||
|
@ -91,6 +96,9 @@ public:
|
|||
bool IsImmediate(int preg) const;
|
||||
u32 GetImmediate32(int preg) const;
|
||||
|
||||
void GetState(GPRRegCacheState &state) const;
|
||||
void RestoreState(const GPRRegCacheState state);
|
||||
|
||||
MIPSState *mips;
|
||||
|
||||
private:
|
||||
|
|
|
@ -255,3 +255,13 @@ void FPURegCache::FlushX(X64Reg reg) {
|
|||
StoreFromRegister(xregs[reg].mipsReg);
|
||||
}
|
||||
}
|
||||
|
||||
void FPURegCache::GetState(FPURegCacheState &state) const {
|
||||
memcpy(state.regs, regs, sizeof(regs));
|
||||
memcpy(state.xregs, xregs, sizeof(xregs));
|
||||
}
|
||||
|
||||
void FPURegCache::RestoreState(const FPURegCacheState state) {
|
||||
memcpy(regs, state.regs, sizeof(regs));
|
||||
memcpy(xregs, state.xregs, sizeof(xregs));
|
||||
}
|
||||
|
|
|
@ -57,6 +57,11 @@ struct MIPSCachedFPReg {
|
|||
bool tempLocked;
|
||||
};
|
||||
|
||||
struct FPURegCacheState {
|
||||
MIPSCachedFPReg regs[NUM_MIPS_FPRS];
|
||||
X64CachedFPReg xregs[NUM_X_FPREGS];
|
||||
};
|
||||
|
||||
enum {
|
||||
MAP_DIRTY = 1,
|
||||
MAP_NOINIT = 2,
|
||||
|
@ -129,6 +134,9 @@ public:
|
|||
ReleaseSpillLock(vreg + 32);
|
||||
}
|
||||
|
||||
void GetState(FPURegCacheState &state) const;
|
||||
void RestoreState(const FPURegCacheState state);
|
||||
|
||||
MIPSState *mips;
|
||||
|
||||
private:
|
||||
|
|
Loading…
Add table
Reference in a new issue