Merge pull request #568 from unknownbrackets/jit-minor

Jit: lwl/lwr/swl/swr, shift var
This commit is contained in:
Henrik Rydgård 2013-02-02 15:46:40 -08:00
commit b7cf57b79c
5 changed files with 78 additions and 38 deletions

View file

@ -297,17 +297,16 @@ namespace MIPSComp
// "over-shifts" work the same as on x86 - only bottom 5 bits are used to get the shift value
void Jit::CompShiftVar(u32 op, void (XEmitter::*shift)(int, OpArg, OpArg))
{
DISABLE;
int rd = _RD;
int rt = _RT;
int rs = _RS;
gpr.FlushLockX(ECX);
gpr.Lock(rd, rt, rs);
gpr.BindToRegister(rd, true, true);
if (rd != rt)
MOV(32, gpr.R(rd), gpr.R(rt));
gpr.BindToRegister(rd, rd == rt || rd == rs, true);
MOV(32, R(ECX), gpr.R(rs)); // Only ECX can be used for variable shifts.
AND(32, R(ECX), Imm32(0x1f));
if (rd != rt)
MOV(32, gpr.R(rd), gpr.R(rt));
(this->*shift)(32, gpr.R(rd), R(ECX));
gpr.UnlockAll();
gpr.UnlockAllX();

View file

@ -136,7 +136,7 @@ void Jit::BranchRSRTComp(u32 op, Gen::CCFlags cc, bool likely)
int rs = _RS;
u32 targetAddr = js.compilerPC + offset + 4;
u32 delaySlotOp = Memory::ReadUnchecked_U32(js.compilerPC+4);
u32 delaySlotOp = Memory::Read_Instruction(js.compilerPC+4);
bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rt, rs);
CONDITIONAL_NICE_DELAYSLOT;
if (!likely && delaySlotIsNice)
@ -191,7 +191,7 @@ void Jit::BranchRSZeroComp(u32 op, Gen::CCFlags cc, bool andLink, bool likely)
int rs = _RS;
u32 targetAddr = js.compilerPC + offset + 4;
u32 delaySlotOp = Memory::ReadUnchecked_U32(js.compilerPC + 4);
u32 delaySlotOp = Memory::Read_Instruction(js.compilerPC + 4);
bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rs);
CONDITIONAL_NICE_DELAYSLOT;
if (!likely && delaySlotIsNice)
@ -285,7 +285,7 @@ void Jit::BranchFPFlag(u32 op, Gen::CCFlags cc, bool likely)
int offset = (signed short)(op & 0xFFFF) << 2;
u32 targetAddr = js.compilerPC + offset + 4;
u32 delaySlotOp = Memory::ReadUnchecked_U32(js.compilerPC + 4);
u32 delaySlotOp = Memory::Read_Instruction(js.compilerPC + 4);
bool delaySlotIsNice = IsDelaySlotNiceFPU(op, delaySlotOp);
CONDITIONAL_NICE_DELAYSLOT;
if (!likely && delaySlotIsNice)
@ -346,7 +346,7 @@ void Jit::BranchVFPUFlag(u32 op, Gen::CCFlags cc, bool likely)
int offset = (signed short)(op & 0xFFFF) << 2;
u32 targetAddr = js.compilerPC + offset + 4;
u32 delaySlotOp = Memory::ReadUnchecked_U32(js.compilerPC + 4);
u32 delaySlotOp = Memory::Read_Instruction(js.compilerPC + 4);
bool delaySlotIsNice = IsDelaySlotNiceVFPU(op, delaySlotOp);
CONDITIONAL_NICE_DELAYSLOT;
if (!likely && delaySlotIsNice)
@ -443,7 +443,7 @@ void Jit::Comp_JumpReg(u32 op)
}
int rs = _RS;
u32 delaySlotOp = Memory::ReadUnchecked_U32(js.compilerPC + 4);
u32 delaySlotOp = Memory::Read_Instruction(js.compilerPC + 4);
bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rs);
CONDITIONAL_NICE_DELAYSLOT;

View file

@ -148,44 +148,70 @@ namespace MIPSComp
CompITypeMemWrite(op, 32, (void *) &Memory::Write_U32);
break;
case 134: //lwl
case 34: //lwl
{
Crash();
//u32 shift = (addr & 3) << 3;
//u32 mem = ReadMem32(addr & 0xfffffffc);
//R(rt) = ( u32(R(rt)) & (0x00ffffff >> shift) ) | ( mem << (24 - shift) );
u32 nextOp = Memory::Read_Instruction(js.compilerPC + 4);
// Looking for lwr rd, offset-3(rs) which makes a pair.
u32 desiredOp = ((op + (4 << 26)) & 0xFFFF0000) + (offset - 3);
if (!js.inDelaySlot && nextOp == desiredOp)
{
EatInstruction(nextOp);
// nextOp has the correct address.
CompITypeMemRead(nextOp, 32, &XEmitter::MOVZX, (void *) &Memory::Read_U32);
}
else
Comp_Generic(op);
}
break;
case 138: //lwr
case 38: //lwr
{
Crash();
//u32 shift = (addr & 3) << 3;
//u32 mem = ReadMem32(addr & 0xfffffffc);
//R(rt) = ( u32(rt) & (0xffffff00 << (24 - shift)) ) | ( mem >> shift );
u32 nextOp = Memory::Read_Instruction(js.compilerPC + 4);
// Looking for lwl rd, offset+3(rs) which makes a pair.
u32 desiredOp = ((op - (4 << 26)) & 0xFFFF0000) + (offset + 3);
if (!js.inDelaySlot && nextOp == desiredOp)
{
EatInstruction(nextOp);
// op has the correct address.
CompITypeMemRead(op, 32, &XEmitter::MOVZX, (void *) &Memory::Read_U32);
}
else
Comp_Generic(op);
}
break;
case 142: //swl
case 42: //swl
{
Crash();
//u32 shift = (addr & 3) << 3;
//u32 mem = ReadMem32(addr & 0xfffffffc);
//WriteMem32((addr & 0xfffffffc), ( ( u32(R(rt)) >> (24 - shift) ) ) |
// ( mem & (0xffffff00 << shift) ));
u32 nextOp = Memory::Read_Instruction(js.compilerPC + 4);
// Looking for swr rd, offset-3(rs) which makes a pair.
u32 desiredOp = ((op + (4 << 26)) & 0xFFFF0000) + (offset - 3);
if (!js.inDelaySlot && nextOp == desiredOp)
{
EatInstruction(nextOp);
// nextOp has the correct address.
CompITypeMemWrite(nextOp, 32, (void *) &Memory::Write_U32);
}
else
Comp_Generic(op);
}
break;
case 146: //swr
case 46: //swr
{
Crash();
// u32 shift = (addr & 3) << 3;
// u32 mem = ReadMem32(addr & 0xfffffffc);
//
// WriteMem32((addr & 0xfffffffc), ( ( u32(R(rt)) << shift ) |
// (mem & (0x00ffffff >> (24 - shift)) ) ) );
u32 nextOp = Memory::Read_Instruction(js.compilerPC + 4);
// Looking for swl rd, offset+3(rs) which makes a pair.
u32 desiredOp = ((op - (4 << 26)) & 0xFFFF0000) + (offset + 3);
if (!js.inDelaySlot && nextOp == desiredOp)
{
EatInstruction(nextOp);
// op has the correct address.
CompITypeMemWrite(op, 32, (void *) &Memory::Write_U32);
}
else
Comp_Generic(op);
}
break;
default:
Comp_Generic(op);
return ;

View file

@ -158,6 +158,18 @@ void Jit::CompileAt(u32 addr)
MIPSCompileOp(op);
}
void Jit::EatInstruction(u32 op)
{
u32 info = MIPSGetInfo(op);
_dbg_assert_msg_(JIT, !(info & DELAYSLOT), "Never eat a branch op.");
_dbg_assert_msg_(JIT, !js.inDelaySlot, "Never eat an instruction inside a delayslot.");
CheckJitBreakpoint(js.compilerPC + 4, 0);
js.numInstructions++;
js.compilerPC += 4;
js.downcountAmount += MIPSGetInstructionCycleEstimate(op);
}
void Jit::Compile(u32 em_address)
{
if (GetSpaceLeft() < 0x10000 || blocks.IsFull())
@ -203,7 +215,7 @@ const u8 *Jit::DoJit(u32 em_address, JitBlock *b)
gpr.Start(mips_, analysis);
fpr.Start(mips_, analysis);
int numInstructions = 0;
js.numInstructions = 0;
while (js.compiling)
{
// Jit breakpoints are quite fast, so let's do them in release too.
@ -215,13 +227,13 @@ const u8 *Jit::DoJit(u32 em_address, JitBlock *b)
MIPSCompileOp(inst);
js.compilerPC += 4;
numInstructions++;
js.numInstructions++;
}
b->codeSize = (u32)(GetCodePtr() - b->normalEntry);
NOP();
AlignCode4();
b->originalSize = numInstructions;
b->originalSize = js.numInstructions;
return b->normalEntry;
}

View file

@ -53,6 +53,7 @@ struct JitState
bool cancel;
bool inDelaySlot;
int downcountAmount;
int numInstructions;
bool compiling; // TODO: get rid of this in favor of using analysis results to determine end of block
JitBlock *curBlock;
@ -107,8 +108,6 @@ public:
void Compile(u32 em_address); // Compiles a block at current MIPS PC
const u8 *DoJit(u32 em_address, JitBlock *b);
// See CompileDelaySlotFlags for flags.
void CompileDelaySlot(int flags);
void CompileAt(u32 addr);
void Comp_RunBlock(u32 op);
@ -151,6 +150,10 @@ private:
void FlushAll();
void WriteDowncount(int offset = 0);
// See CompileDelaySlotFlags for flags.
void CompileDelaySlot(int flags);
void EatInstruction(u32 op);
void WriteExit(u32 destination, int exit_num);
void WriteExitDestInEAX();
// void WriteRfiExitDestInEAX();