Merge pull request #4503 from unknownbrackets/armjit-imms

Retain immediates in the armjit, optimize similar imms
This commit is contained in:
Henrik Rydgård 2013-11-10 17:22:20 -08:00
commit a1196399ef
17 changed files with 329 additions and 129 deletions

View file

@ -198,7 +198,7 @@ void ARMXEmitter::ANDI2R(ARMReg rd, ARMReg rs, u32 val, ARMReg scratch)
}
// The worst case is 4 (e.g. 0x55555555.)
if (ops <= 3) {
if (ops <= 3 || !cpu_info.bArmV7) {
bool first = true;
for (int i = 0; i < 32; i += 2) {
u8 bits = (val >> i) & 0xFF;
@ -253,8 +253,41 @@ void ARMXEmitter::ORI2R(ARMReg rd, ARMReg rs, u32 val, ARMReg scratch)
if (TryMakeOperand2(val, op2)) {
ORR(rd, rs, op2);
} else {
MOVI2R(scratch, val);
ORR(rd, rs, scratch);
int ops = 0;
for (int i = 0; i < 32; i += 2) {
u8 bits = (val >> i) & 0xFF;
// If either low bit is set, we need to use a ORR for them.
if ((bits & 3) != 0) {
++ops;
i += 8 - 2;
}
}
// The worst case is 4 (e.g. 0x55555555.) But MVN can make it 2. Not sure if better.
bool inversed;
if (TryMakeOperand2_AllowInverse(val, op2, &inversed) && ops >= 3) {
MVN(scratch, op2);
ORR(rd, rs, scratch);
} else if (ops <= 3 || !cpu_info.bArmV7) {
bool first = true;
for (int i = 0; i < 32; i += 2) {
u8 bits = (val >> i) & 0xFF;
if ((bits & 3) != 0) {
u8 rotation = i == 0 ? 0 : 16 - i / 2;
if (first) {
ORR(rd, rs, Operand2(bits, rotation));
first = false;
} else {
ORR(rd, rd, Operand2(bits, rotation));
}
// Well, we took care of these other bits while we were at it.
i += 8 - 2;
}
}
} else {
MOVI2R(scratch, val);
ORR(rd, rs, scratch);
}
}
}

View file

@ -61,7 +61,7 @@ namespace MIPSComp
if (TryMakeOperand2(uimm, op2)) {
(this->*arith)(gpr.R(rt), gpr.R(rs), op2);
} else {
MOVI2R(R0, (u32)uimm);
gpr.SetRegImm(R0, (u32)uimm);
(this->*arith)(gpr.R(rt), gpr.R(rs), R0);
}
}
@ -450,14 +450,32 @@ namespace MIPSComp
}
}
void Jit::CompShiftImm(MIPSOpcode op, ArmGen::ShiftType shiftType)
void Jit::CompShiftImm(MIPSOpcode op, ArmGen::ShiftType shiftType, int sa)
{
MIPSGPReg rd = _RD;
MIPSGPReg rt = _RT;
int sa = _SA;
gpr.MapDirtyIn(rd, rt);
MOV(gpr.R(rd), Operand2(gpr.R(rt), shiftType, sa));
if (gpr.IsImm(rt)) {
switch (shiftType) {
case ST_LSL:
gpr.SetImm(rd, gpr.GetImm(rt) << sa);
break;
case ST_LSR:
gpr.SetImm(rd, gpr.GetImm(rt) >> sa);
break;
case ST_ASR:
gpr.SetImm(rd, (int)gpr.GetImm(rt) >> sa);
break;
case ST_ROR:
gpr.SetImm(rd, (gpr.GetImm(rt) >> sa) | (gpr.GetImm(rt) << (32 - sa)));
break;
default:
DISABLE;
}
} else {
gpr.MapDirtyIn(rd, rt);
MOV(gpr.R(rd), Operand2(gpr.R(rt), shiftType, sa));
}
}
void Jit::CompShiftVar(MIPSOpcode op, ArmGen::ShiftType shiftType)
@ -465,11 +483,9 @@ namespace MIPSComp
MIPSGPReg rd = _RD;
MIPSGPReg rt = _RT;
MIPSGPReg rs = _RS;
if (gpr.IsImm(rs))
{
if (gpr.IsImm(rs)) {
int sa = gpr.GetImm(rs) & 0x1F;
gpr.MapDirtyIn(rd, rt);
MOV(gpr.R(rd), Operand2(gpr.R(rt), shiftType, sa));
CompShiftImm(op, shiftType, sa);
return;
}
gpr.MapDirtyInIn(rd, rs, rt);
@ -483,17 +499,17 @@ namespace MIPSComp
MIPSGPReg rs = _RS;
MIPSGPReg rd = _RD;
int fd = _FD;
int sa = _SA;
// noop, won't write to ZERO.
if (rd == 0)
return;
// WARNING : ROTR
switch (op & 0x3f)
{
case 0: CompShiftImm(op, ST_LSL); break; //sll
case 2: CompShiftImm(op, rs == 1 ? ST_ROR : ST_LSR); break; //srl
case 3: CompShiftImm(op, ST_ASR); break; //sra
switch (op & 0x3f) {
case 0: CompShiftImm(op, ST_LSL, sa); break; //sll
case 2: CompShiftImm(op, rs == 1 ? ST_ROR : ST_LSR, sa); break; //srl
case 3: CompShiftImm(op, ST_ASR, sa); break; //sra
case 4: CompShiftVar(op, ST_LSL); break; //sllv
case 6: CompShiftVar(op, fd == 1 ? ST_ROR : ST_LSR); break; //srlv
case 7: CompShiftVar(op, ST_ASR); break; //srav
@ -519,11 +535,9 @@ namespace MIPSComp
if (rt == 0)
return;
switch (op & 0x3f)
{
switch (op & 0x3f) {
case 0x0: //ext
if (gpr.IsImm(rs))
{
if (gpr.IsImm(rs)) {
gpr.SetImm(rt, (gpr.GetImm(rs) >> pos) & mask);
return;
}
@ -541,11 +555,9 @@ namespace MIPSComp
{
u32 sourcemask = mask >> pos;
u32 destmask = ~(sourcemask << pos);
if (gpr.IsImm(rs))
{
if (gpr.IsImm(rs)) {
u32 inserted = (gpr.GetImm(rs) & sourcemask) << pos;
if (gpr.IsImm(rt))
{
if (gpr.IsImm(rt)) {
gpr.SetImm(rt, (gpr.GetImm(rt) & destmask) | inserted);
return;
}
@ -553,9 +565,7 @@ namespace MIPSComp
gpr.MapReg(rt, MAP_DIRTY);
ANDI2R(gpr.R(rt), gpr.R(rt), destmask, R0);
ORI2R(gpr.R(rt), gpr.R(rt), inserted, R0);
}
else
{
} else {
gpr.MapDirtyIn(rt, rs, false);
if (cpu_info.bArmV7) {
BFI(gpr.R(rt), gpr.R(rs), pos, size-pos);
@ -579,11 +589,9 @@ namespace MIPSComp
if (rd == 0)
return;
switch ((op >> 6) & 31)
{
switch ((op >> 6) & 31) {
case 16: // seb // R(rd) = (u32)(s32)(s8)(u8)R(rt);
if (gpr.IsImm(rt))
{
if (gpr.IsImm(rt)) {
gpr.SetImm(rd, (s32)(s8)(u8)gpr.GetImm(rt));
return;
}
@ -592,8 +600,7 @@ namespace MIPSComp
break;
case 24: // seh
if (gpr.IsImm(rt))
{
if (gpr.IsImm(rt)) {
gpr.SetImm(rd, (s32)(s16)(u16)gpr.GetImm(rt));
return;
}
@ -602,8 +609,7 @@ namespace MIPSComp
break;
case 20: //bitrev
if (gpr.IsImm(rt))
{
if (gpr.IsImm(rt)) {
// http://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel
u32 v = gpr.GetImm(rt);
v = ((v >> 1) & 0x55555555) | ((v & 0x55555555) << 1); // odd<->even
@ -637,15 +643,22 @@ namespace MIPSComp
if (rd == 0)
return;
switch (op & 0x3ff)
{
switch (op & 0x3ff) {
case 0xA0: //wsbh
if (gpr.IsImm(rt)) {
gpr.SetImm(rd, ((gpr.GetImm(rt) & 0xFF00FF00) >> 8) | ((gpr.GetImm(rt) & 0x00FF00FF) << 8));
} else {
gpr.MapDirtyIn(rd, rt);
REV16(gpr.R(rd), gpr.R(rt));
}
break;
case 0xE0: //wsbw
if (gpr.IsImm(rt)) {
gpr.SetImm(rd, swap32(gpr.GetImm(rt)));
} else {
gpr.MapDirtyIn(rd, rt);
REV(gpr.R(rd), gpr.R(rt));
}
break;
default:
Comp_Generic(op);
@ -660,41 +673,68 @@ namespace MIPSComp
MIPSGPReg rs = _RS;
MIPSGPReg rd = _RD;
switch (op & 63)
{
switch (op & 63) {
case 16: // R(rd) = HI; //mfhi
if (gpr.IsImm(MIPS_REG_HI)) {
gpr.SetImm(rd, gpr.GetImm(MIPS_REG_HI));
break;
}
gpr.MapDirtyIn(rd, MIPS_REG_HI);
MOV(gpr.R(rd), gpr.R(MIPS_REG_HI));
break;
case 17: // HI = R(rs); //mthi
if (gpr.IsImm(rs)) {
gpr.SetImm(MIPS_REG_HI, gpr.GetImm(rs));
break;
}
gpr.MapDirtyIn(MIPS_REG_HI, rs);
MOV(gpr.R(MIPS_REG_HI), gpr.R(rs));
break;
case 18: // R(rd) = LO; break; //mflo
if (gpr.IsImm(MIPS_REG_LO)) {
gpr.SetImm(rd, gpr.GetImm(MIPS_REG_LO));
break;
}
gpr.MapDirtyIn(rd, MIPS_REG_LO);
MOV(gpr.R(rd), gpr.R(MIPS_REG_LO));
break;
case 19: // LO = R(rs); break; //mtlo
if (gpr.IsImm(rs)) {
gpr.SetImm(MIPS_REG_LO, gpr.GetImm(rs));
break;
}
gpr.MapDirtyIn(MIPS_REG_LO, rs);
MOV(gpr.R(MIPS_REG_LO), gpr.R(rs));
break;
case 24: //mult (the most popular one). lo,hi = signed mul (rs * rt)
if (gpr.IsImm(rs) && gpr.IsImm(rs)) {
s64 result = (s64)(s32)gpr.GetImm(rs) * (s64)(s32)gpr.GetImm(rt);
u64 resultBits = (u64)result;
gpr.SetImm(MIPS_REG_LO, (u32)(resultBits >> 0));
gpr.SetImm(MIPS_REG_HI, (u32)(resultBits >> 32));
break;
}
gpr.MapDirtyDirtyInIn(MIPS_REG_LO, MIPS_REG_HI, rs, rt);
SMULL(gpr.R(MIPS_REG_LO), gpr.R(MIPS_REG_HI), gpr.R(rs), gpr.R(rt));
break;
case 25: //multu (2nd) lo,hi = unsigned mul (rs * rt)
if (gpr.IsImm(rs) && gpr.IsImm(rs)) {
u64 resultBits = (u64)gpr.GetImm(rs) * (u64)gpr.GetImm(rt);
gpr.SetImm(MIPS_REG_LO, (u32)(resultBits >> 0));
gpr.SetImm(MIPS_REG_HI, (u32)(resultBits >> 32));
break;
}
gpr.MapDirtyDirtyInIn(MIPS_REG_LO, MIPS_REG_HI, rs, rt);
UMULL(gpr.R(MIPS_REG_LO), gpr.R(MIPS_REG_HI), gpr.R(rs), gpr.R(rt));
break;
case 26: //div
if (cpu_info.bIDIVa)
{
if (cpu_info.bIDIVa) {
// TODO: Does this handle INT_MAX, 0, etc. correctly?
gpr.MapDirtyDirtyInIn(MIPS_REG_LO, MIPS_REG_HI, rs, rt);
SDIV(gpr.R(MIPS_REG_LO), gpr.R(rs), gpr.R(rt));

View file

@ -71,33 +71,38 @@ void Jit::BranchRSRTComp(MIPSOpcode op, ArmGen::CCFlags cc, bool likely)
if (!likely && delaySlotIsNice)
CompileDelaySlot(DELAYSLOT_NICE);
if (gpr.IsImm(rt) && gpr.GetImm(rt) == 0)
{
// We might be able to flip the condition (EQ/NEQ are easy.)
const bool canFlip = cc == CC_EQ || cc == CC_NEQ;
Operand2 op2;
bool negated;
if (gpr.IsImm(rt) && TryMakeOperand2_AllowNegation(gpr.GetImm(rt), op2, &negated)) {
gpr.MapReg(rs);
CMP(gpr.R(rs), Operand2(0, TYPE_IMM));
}
else if (gpr.IsImm(rs) && gpr.GetImm(rs) == 0 && (cc == CC_EQ || cc == CC_NEQ)) // only these are easily 'flippable'
{
gpr.MapReg(rt);
CMP(gpr.R(rt), Operand2(0, TYPE_IMM));
}
else
{
gpr.MapInIn(rs, rt);
CMP(gpr.R(rs), gpr.R(rt));
if (!negated)
CMP(gpr.R(rs), op2);
else
CMN(gpr.R(rs), op2);
} else {
if (gpr.IsImm(rs) && TryMakeOperand2_AllowNegation(gpr.GetImm(rs), op2, &negated) && canFlip) {
gpr.MapReg(rt);
if (!negated)
CMP(gpr.R(rt), op2);
else
CMN(gpr.R(rt), op2);
} else {
gpr.MapInIn(rs, rt);
CMP(gpr.R(rs), gpr.R(rt));
}
}
ArmGen::FixupBranch ptr;
if (!likely)
{
if (!likely) {
if (!delaySlotIsNice)
CompileDelaySlot(DELAYSLOT_SAFE_FLUSH);
else
FlushAll();
ptr = B_CC(cc);
}
else
{
} else {
FlushAll();
ptr = B_CC(cc);
CompileDelaySlot(DELAYSLOT_FLUSH);
@ -152,7 +157,7 @@ void Jit::BranchRSZeroComp(MIPSOpcode op, ArmGen::CCFlags cc, bool andLink, bool
// Take the branch
if (andLink)
{
MOVI2R(R0, js.compilerPC + 8);
gpr.SetRegImm(R0, js.compilerPC + 8);
STR(R0, CTXREG, MIPS_REG_RA * 4);
}
@ -351,7 +356,7 @@ void Jit::Comp_Jump(MIPSOpcode op)
case 3: //jal
gpr.MapReg(MIPS_REG_RA, MAP_NOINIT | MAP_DIRTY);
MOVI2R(gpr.R(MIPS_REG_RA), js.compilerPC + 8);
gpr.SetRegImm(gpr.R(MIPS_REG_RA), js.compilerPC + 8);
CompileDelaySlot(DELAYSLOT_NICE);
FlushAll();
WriteExit(targetAddr, js.nextExit++);
@ -379,9 +384,10 @@ void Jit::Comp_JumpReg(MIPSOpcode op)
ARMReg destReg = R8;
if (IsSyscall(delaySlotOp)) {
_dbg_assert_msg_(JIT, (op & 0x3f) == 8, "jalr followed by syscall not supported.");
gpr.MapReg(rs);
MOV(R8, gpr.R(rs));
MovToPC(R8); // For syscall to be able to return.
MovToPC(gpr.R(rs)); // For syscall to be able to return.
CompileDelaySlot(DELAYSLOT_FLUSH);
return; // Syscall wrote exit code.
} else if (delaySlotIsNice) {
@ -393,6 +399,7 @@ void Jit::Comp_JumpReg(MIPSOpcode op)
// Let's discard them so we don't need to write them back.
// NOTE: Not all games follow the MIPS ABI! Tekken 6, for example, will crash
// with this enabled.
gpr.DiscardR(MIPS_REG_COMPILER_SCRATCH);
for (int i = MIPS_REG_A0; i <= MIPS_REG_T7; i++)
gpr.DiscardR((MIPSGPReg)i);
gpr.DiscardR(MIPS_REG_T8);
@ -400,9 +407,9 @@ void Jit::Comp_JumpReg(MIPSOpcode op)
}
FlushAll();
} else {
// Delay slot
// Delay slot - this case is very rare, might be able to free up R8.
gpr.MapReg(rs);
MOV(R8, gpr.R(rs)); // Save the destination address through the delay slot. Could use isNice to avoid when the jit is fully implemented
MOV(R8, gpr.R(rs));
CompileDelaySlot(DELAYSLOT_NICE);
FlushAll();
}
@ -412,7 +419,7 @@ void Jit::Comp_JumpReg(MIPSOpcode op)
case 8: //jr
break;
case 9: //jalr
MOVI2R(R0, js.compilerPC + 8);
gpr.SetRegImm(R0, js.compilerPC + 8);
STR(R0, CTXREG, (int)rd * 4);
break;
default:
@ -439,12 +446,12 @@ void Jit::Comp_Syscall(MIPSOpcode op)
void *quickFunc = GetQuickSyscallFunc(op);
if (quickFunc)
{
MOVI2R(R0, (u32)(intptr_t)GetSyscallInfo(op));
gpr.SetRegImm(R0, (u32)(intptr_t)GetSyscallInfo(op));
QuickCallFunction(R1, quickFunc);
}
else
{
MOVI2R(R0, op.encoding);
gpr.SetRegImm(R0, op.encoding);
QuickCallFunction(R1, (void *)&CallSyscall);
}
RestoreDowncount();

View file

@ -108,7 +108,7 @@ void Jit::Comp_FPULS(MIPSOpcode op)
fpr.MapReg(ft, MAP_NOINIT | MAP_DIRTY);
if (gpr.IsImm(rs)) {
u32 addr = (offset + gpr.GetImm(rs)) & 0x3FFFFFFF;
MOVI2R(R0, addr + (u32)Memory::base);
gpr.SetRegImm(R0, addr + (u32)Memory::base);
} else {
gpr.MapReg(rs);
if (g_Config.bFastMemory) {
@ -152,7 +152,7 @@ void Jit::Comp_FPULS(MIPSOpcode op)
fpr.MapReg(ft);
if (gpr.IsImm(rs)) {
u32 addr = (offset + gpr.GetImm(rs)) & 0x3FFFFFFF;
MOVI2R(R0, addr + (u32)Memory::base);
gpr.SetRegImm(R0, addr + (u32)Memory::base);
} else {
gpr.MapReg(rs);
if (g_Config.bFastMemory) {

View file

@ -78,10 +78,10 @@ namespace MIPSComp
} else {
// Try to avoid using MOVT
if (offset < 0) {
MOVI2R(R0, (u32)(-offset));
gpr.SetRegImm(R0, (u32)(-offset));
SUB(R0, gpr.R(rs), R0);
} else {
MOVI2R(R0, (u32)offset);
gpr.SetRegImm(R0, (u32)offset);
ADD(R0, gpr.R(rs), R0);
}
}
@ -156,7 +156,7 @@ namespace MIPSComp
u32 addr = iaddr & 0x3FFFFFFF;
// Must be OK even if rs == rt since we have the value from imm already.
gpr.MapReg(rt, load ? MAP_NOINIT | MAP_DIRTY : 0);
MOVI2R(R0, addr & ~3);
gpr.SetRegImm(R0, addr & ~3);
u8 shift = (addr & 3) * 8;
@ -284,6 +284,7 @@ namespace MIPSComp
u32 iaddr = gpr.IsImm(rs) ? offset + gpr.GetImm(rs) : 0xFFFFFFFF;
bool doCheck = false;
ARMReg addrReg = R0;
switch (o) {
case 32: //lb
@ -327,9 +328,15 @@ namespace MIPSComp
// We can compute the full address at compile time. Kickass.
u32 addr = iaddr & 0x3FFFFFFF;
// Must be OK even if rs == rt since we have the value from imm already.
gpr.MapReg(rt, load ? MAP_NOINIT | MAP_DIRTY : 0);
MOVI2R(R0, addr);
// Still flush it, since often these will be in a row.
load ? gpr.MapDirtyIn(rt, rs) : gpr.MapInIn(rt, rs);
if (addr == iaddr && offset == 0) {
// It was already safe. Let's shove it into a reg and use it directly.
addrReg = gpr.R(rs);
} else {
gpr.SetRegImm(R0, addr);
addrReg = R0;
}
} else {
_dbg_assert_msg_(JIT, !gpr.IsImm(rs), "Invalid immediate address? CPU bug?");
load ? gpr.MapDirtyIn(rt, rs) : gpr.MapInIn(rt, rs);
@ -340,20 +347,21 @@ namespace MIPSComp
} else {
SetR0ToEffectiveAddress(rs, offset);
}
addrReg = R0;
}
switch (o)
{
// Load
case 35: LDR (gpr.R(rt), R11, R0); break;
case 37: LDRH (gpr.R(rt), R11, R0); break;
case 33: LDRSH(gpr.R(rt), R11, R0); break;
case 36: LDRB (gpr.R(rt), R11, R0); break;
case 32: LDRSB(gpr.R(rt), R11, R0); break;
case 35: LDR (gpr.R(rt), R11, addrReg); break;
case 37: LDRH (gpr.R(rt), R11, addrReg); break;
case 33: LDRSH(gpr.R(rt), R11, addrReg); break;
case 36: LDRB (gpr.R(rt), R11, addrReg); break;
case 32: LDRSB(gpr.R(rt), R11, addrReg); break;
// Store
case 43: STR (gpr.R(rt), R11, R0); break;
case 41: STRH (gpr.R(rt), R11, R0); break;
case 40: STRB (gpr.R(rt), R11, R0); break;
case 43: STR (gpr.R(rt), R11, addrReg); break;
case 41: STRH (gpr.R(rt), R11, addrReg); break;
case 40: STRB (gpr.R(rt), R11, addrReg); break;
}
if (doCheck) {
if (load) {
@ -372,8 +380,13 @@ namespace MIPSComp
break;
default:
Comp_Generic(op);
return ;
return;
}
}
void Jit::Comp_Cache(MIPSOpcode op) {
CONDITIONAL_DISABLE;
// TODO: Could use this as a hint, and technically required to handle icache, etc.
// But right now Int_Cache does nothing, so let's not even call it.
}
}

View file

@ -254,7 +254,7 @@ namespace MIPSComp
fpr.MapRegV(vt, MAP_DIRTY | MAP_NOINIT);
if (gpr.IsImm(rs)) {
u32 addr = (imm + gpr.GetImm(rs)) & 0x3FFFFFFF;
MOVI2R(R0, addr + (u32)Memory::base);
gpr.SetRegImm(R0, addr + (u32)Memory::base);
} else {
gpr.MapReg(rs);
if (g_Config.bFastMemory) {
@ -292,7 +292,7 @@ namespace MIPSComp
fpr.MapRegV(vt);
if (gpr.IsImm(rs)) {
u32 addr = (imm + gpr.GetImm(rs)) & 0x3FFFFFFF;
MOVI2R(R0, addr + (u32)Memory::base);
gpr.SetRegImm(R0, addr + (u32)Memory::base);
} else {
gpr.MapReg(rs);
if (g_Config.bFastMemory) {
@ -348,7 +348,7 @@ namespace MIPSComp
if (gpr.IsImm(rs)) {
u32 addr = (imm + gpr.GetImm(rs)) & 0x3FFFFFFF;
MOVI2R(R0, addr + (u32)Memory::base);
gpr.SetRegImm(R0, addr + (u32)Memory::base);
} else {
gpr.MapReg(rs);
if (g_Config.bFastMemory) {
@ -397,7 +397,7 @@ namespace MIPSComp
if (gpr.IsImm(rs)) {
u32 addr = (imm + gpr.GetImm(rs)) & 0x3FFFFFFF;
MOVI2R(R0, addr + (u32)Memory::base);
gpr.SetRegImm(R0, addr + (u32)Memory::base);
} else {
gpr.MapReg(rs);
if (g_Config.bFastMemory) {
@ -1588,7 +1588,7 @@ namespace MIPSComp
GetVectorRegsPrefixD(dregs, sz, _VD);
fpr.MapRegsAndSpillLockV(dregs, sz, MAP_NOINIT | MAP_DIRTY);
MOVI2R(R0, (u32)(void *)&cst_constants[conNum]);
gpr.SetRegImm(R0, (u32)(void *)&cst_constants[conNum]);
VLDR(S0, R0, 0);
for (int i = 0; i < n; ++i)
VMOV(fpr.V(dregs[i]), S0);
@ -1652,7 +1652,7 @@ namespace MIPSComp
// We should write a custom pure-asm function instead.
VMOV(R0, fpr.V(sreg));
QuickCallFunction(R1, negSin ? (void *)&SinCosNegSin : (void *)&SinCos);
MOVI2R(R0, (u32)(&sincostemp[0]));
gpr.SetRegImm(R0, (u32)(&sincostemp[0]));
VLDR(S0, R0, 0);
VLDR(S1, R0, 4);

View file

@ -109,19 +109,19 @@ void Jit::FlushAll()
void Jit::FlushPrefixV()
{
if ((js.prefixSFlag & JitState::PREFIX_DIRTY) != 0) {
MOVI2R(R0, js.prefixS);
gpr.SetRegImm(R0, js.prefixS);
STR(R0, CTXREG, offsetof(MIPSState, vfpuCtrl[VFPU_CTRL_SPREFIX]));
js.prefixSFlag = (JitState::PrefixState) (js.prefixSFlag & ~JitState::PREFIX_DIRTY);
}
if ((js.prefixTFlag & JitState::PREFIX_DIRTY) != 0) {
MOVI2R(R0, js.prefixT);
gpr.SetRegImm(R0, js.prefixT);
STR(R0, CTXREG, offsetof(MIPSState, vfpuCtrl[VFPU_CTRL_TPREFIX]));
js.prefixTFlag = (JitState::PrefixState) (js.prefixTFlag & ~JitState::PREFIX_DIRTY);
}
if ((js.prefixDFlag & JitState::PREFIX_DIRTY) != 0) {
MOVI2R(R0, js.prefixD);
gpr.SetRegImm(R0, js.prefixD);
STR(R0, CTXREG, offsetof(MIPSState, vfpuCtrl[VFPU_CTRL_DPREFIX]));
js.prefixDFlag = (JitState::PrefixState) (js.prefixDFlag & ~JitState::PREFIX_DIRTY);
}
@ -230,7 +230,7 @@ const u8 *Jit::DoJit(u32 em_address, JitBlock *b)
// Not intrusive so keeping it around here to experiment with, may help on ARMv6 due to
// large/slow construction of 32-bit immediates?
JumpTarget backJump = GetCodePtr();
MOVI2R(R0, js.blockStart);
gpr.SetRegImm(R0, js.blockStart);
B((const void *)outerLoopPCInR0);
b->checkedEntry = GetCodePtr();
SetCC(CC_LT);
@ -244,7 +244,7 @@ const u8 *Jit::DoJit(u32 em_address, JitBlock *b)
} else {
b->checkedEntry = GetCodePtr();
SetCC(CC_LT);
MOVI2R(R0, js.blockStart);
gpr.SetRegImm(R0, js.blockStart);
B((const void *)outerLoopPCInR0);
SetCC(CC_AL);
}
@ -283,7 +283,7 @@ const u8 *Jit::DoJit(u32 em_address, JitBlock *b)
if (jo.useForwardJump) {
SetJumpTarget(bail);
MOVI2R(R0, js.blockStart);
gpr.SetRegImm(R0, js.blockStart);
B((const void *)outerLoopPCInR0);
}
@ -329,9 +329,9 @@ void Jit::Comp_Generic(MIPSOpcode op)
if (func)
{
SaveDowncount();
MOVI2R(R0, js.compilerPC);
gpr.SetRegImm(R0, js.compilerPC);
MovToPC(R0);
MOVI2R(R0, op.encoding);
gpr.SetRegImm(R0, op.encoding);
QuickCallFunction(R1, (void *)func);
RestoreDowncount();
}
@ -373,7 +373,7 @@ void Jit::WriteDownCount(int offset)
} else {
// Should be fine to use R2 here, flushed the regcache anyway.
// If js.downcountAmount can be expressed as an Imm8, we don't need this anyway.
MOVI2R(R2, theDowncount);
gpr.SetRegImm(R2, theDowncount);
SUBS(R7, R7, R2);
}
} else {
@ -386,7 +386,7 @@ void Jit::WriteDownCount(int offset)
} else {
// Should be fine to use R2 here, flushed the regcache anyway.
// If js.downcountAmount can be expressed as an Imm8, we don't need this anyway.
MOVI2R(R2, theDowncount);
gpr.SetRegImm(R2, theDowncount);
SUBS(R1, R1, R2);
STR(R1, CTXREG, offsetof(MIPSState, downcount));
}
@ -412,7 +412,7 @@ void Jit::WriteExit(u32 destination, int exit_num)
B(blocks.GetBlock(block)->checkedEntry);
b->linkStatus[exit_num] = true;
} else {
MOVI2R(R0, destination);
gpr.SetRegImm(R0, destination);
B((const void *)dispatcherPCInR0);
}
}

View file

@ -67,6 +67,7 @@ public:
// Ops
void Comp_ITypeMem(MIPSOpcode op);
void Comp_Cache(MIPSOpcode op);
void Comp_RelBranch(MIPSOpcode op);
void Comp_RelBranchRI(MIPSOpcode op);
@ -161,7 +162,7 @@ private:
void CompImmLogic(MIPSGPReg rs, MIPSGPReg rt, u32 uimm, void (ARMXEmitter::*arith)(ARMReg dst, ARMReg src, Operand2 op2), u32 (*eval)(u32 a, u32 b));
void CompType3(MIPSGPReg rd, MIPSGPReg rs, MIPSGPReg rt, void (ARMXEmitter::*arithOp2)(ARMReg dst, ARMReg rm, Operand2 rn), u32 (*eval)(u32 a, u32 b), bool symmetric = false, bool useMOV = false);
void CompShiftImm(MIPSOpcode op, ArmGen::ShiftType shiftType);
void CompShiftImm(MIPSOpcode op, ArmGen::ShiftType shiftType, int sa);
void CompShiftVar(MIPSOpcode op, ArmGen::ShiftType shiftType);
void ApplyPrefixST(u8 *vregs, u32 prefix, VectorSize sz);

View file

@ -100,25 +100,88 @@ bool ArmRegCache::IsMappedAsPointer(MIPSGPReg mipsReg) {
return mr[mipsReg].loc == ML_ARMREG_AS_PTR;
}
void ArmRegCache::SetRegImm(ARMReg reg, u32 imm) {
// If we can do it with a simple Operand2, let's do that.
Operand2 op2;
bool inverse;
if (TryMakeOperand2_AllowInverse(imm, op2, &inverse)) {
if (!inverse)
emit_->MOV(reg, op2);
else
emit_->MVN(reg, op2);
return;
}
// Okay, so it's a bit more complex. Let's see if we have any useful regs with imm values.
for (int i = 0; i < NUM_MIPSREG; i++) {
const auto &mreg = mr[i];
if (mreg.loc != ML_ARMREG_IMM)
continue;
if (mreg.imm - imm < 256) {
emit_->SUB(reg, mreg.reg, mreg.imm - imm);
return;
}
if (imm - mreg.imm < 256) {
emit_->ADD(reg, mreg.reg, imm - mreg.imm);
return;
}
// This could be common when using an address.
if ((mreg.imm & 0x3FFFFFFF) == imm) {
emit_->BIC(reg, mreg.reg, Operand2(0xC0, 4)); // &= 0x3FFFFFFF
return;
}
// TODO: All sorts of things are possible here, shifted adds, ands/ors, etc.
}
// No luck. Let's go with a regular load.
emit_->MOVI2R(reg, imm);
}
void ArmRegCache::MapRegTo(ARMReg reg, MIPSGPReg mipsReg, int mapFlags) {
ar[reg].isDirty = (mapFlags & MAP_DIRTY) ? true : false;
if (!(mapFlags & MAP_NOINIT)) {
if (mipsReg == 0) {
if (mipsReg == MIPS_REG_ZERO) {
// If we get a request to load the zero register, at least we won't spend
// time on a memory access...
// TODO: EOR?
emit_->MOV(reg, 0);
// This way, if we SetImm() it, we'll keep it.
mr[mipsReg].loc = ML_ARMREG_IMM;
mr[mipsReg].imm = 0;
} else {
if (mr[mipsReg].loc == ML_MEM) {
switch (mr[mipsReg].loc) {
case ML_MEM:
emit_->LDR(reg, CTXREG, GetMipsRegOffset(mipsReg));
} else if (mr[mipsReg].loc == ML_IMM) {
emit_->MOVI2R(reg, mr[mipsReg].imm);
mr[mipsReg].loc = ML_ARMREG;
break;
case ML_IMM:
SetRegImm(reg, mr[mipsReg].imm);
ar[reg].isDirty = true; // IMM is always dirty.
// If we are mapping dirty, it means we're gonna overwrite.
// So the imm value is no longer valid.
if (mapFlags & MAP_DIRTY)
mr[mipsReg].loc = ML_ARMREG;
else
mr[mipsReg].loc = ML_ARMREG_IMM;
break;
default:
mr[mipsReg].loc = ML_ARMREG;
break;
}
}
} else {
if (mipsReg == MIPS_REG_ZERO) {
// This way, if we SetImm() it, we'll keep it.
mr[mipsReg].loc = ML_ARMREG_IMM;
mr[mipsReg].imm = 0;
} else {
mr[mipsReg].loc = ML_ARMREG;
}
}
ar[reg].mipsReg = mipsReg;
mr[mipsReg].loc = ML_ARMREG;
mr[mipsReg].reg = reg;
}
@ -128,12 +191,14 @@ ARMReg ArmRegCache::MapReg(MIPSGPReg mipsReg, int mapFlags) {
// Let's see if it's already mapped. If so we just need to update the dirty flag.
// We don't need to check for ML_NOINIT because we assume that anyone who maps
// with that flag immediately writes a "known" value to the register.
if (mr[mipsReg].loc == ML_ARMREG) {
if (mr[mipsReg].loc == ML_ARMREG || mr[mipsReg].loc == ML_ARMREG_IMM) {
ARMReg armReg = mr[mipsReg].reg;
if (ar[armReg].mipsReg != mipsReg) {
ERROR_LOG(JIT, "Register mapping out of sync! %i", mipsReg);
}
if (mapFlags & MAP_DIRTY) {
// Mapping dirty means the old imm value is invalid.
mr[mipsReg].loc = ML_ARMREG;
ar[armReg].isDirty = true;
}
return (ARMReg)mr[mipsReg].reg;
@ -142,7 +207,9 @@ ARMReg ArmRegCache::MapReg(MIPSGPReg mipsReg, int mapFlags) {
// add or subtract stuff to it. Later we could allow such things but for now
// let's just convert back to a register value by reloading from the backing storage.
ARMReg armReg = mr[mipsReg].reg;
emit_->LDR(armReg, CTXREG, GetMipsRegOffset(mipsReg));
if (!(mapFlags & MAP_NOINIT)) {
emit_->LDR(armReg, CTXREG, GetMipsRegOffset(mipsReg));
}
mr[mipsReg].loc = ML_ARMREG;
if (mapFlags & MAP_DIRTY) {
ar[armReg].isDirty = true;
@ -254,7 +321,7 @@ void ArmRegCache::FlushArmReg(ARMReg r) {
return;
}
if (ar[r].mipsReg != MIPS_REG_INVALID) {
if (ar[r].isDirty && mr[ar[r].mipsReg].loc == ML_ARMREG)
if (ar[r].isDirty && (mr[ar[r].mipsReg].loc == ML_ARMREG || mr[ar[r].mipsReg].loc == ML_ARMREG_IMM))
emit_->STR(r, CTXREG, GetMipsRegOffset(ar[r].mipsReg));
// IMMs won't be in an ARM reg.
mr[ar[r].mipsReg].loc = ML_MEM;
@ -268,7 +335,8 @@ void ArmRegCache::FlushArmReg(ARMReg r) {
}
void ArmRegCache::DiscardR(MIPSGPReg mipsReg) {
if (mr[mipsReg].loc == ML_ARMREG || mr[mipsReg].loc == ML_ARMREG_AS_PTR) {
const RegMIPSLoc prevLoc = mr[mipsReg].loc;
if (prevLoc == ML_ARMREG || prevLoc == ML_ARMREG_AS_PTR || prevLoc == ML_ARMREG_IMM) {
ARMReg armReg = mr[mipsReg].reg;
ar[armReg].isDirty = false;
ar[armReg].mipsReg = MIPS_REG_INVALID;
@ -283,14 +351,15 @@ void ArmRegCache::FlushR(MIPSGPReg r) {
case ML_IMM:
// IMM is always "dirty".
if (r != MIPS_REG_ZERO) {
emit_->MOVI2R(R0, mr[r].imm);
SetRegImm(R0, mr[r].imm);
emit_->STR(R0, CTXREG, GetMipsRegOffset(r));
}
break;
case ML_ARMREG:
case ML_ARMREG_IMM:
if (mr[r].reg == INVALID_REG) {
ERROR_LOG(JIT, "FlushMipsReg: MipsReg had bad ArmReg");
ERROR_LOG(JIT, "FlushR: MipsReg %d had bad ArmReg", r);
}
if (ar[mr[r].reg].isDirty) {
if (r != MIPS_REG_ZERO) {
@ -314,7 +383,7 @@ void ArmRegCache::FlushR(MIPSGPReg r) {
break;
default:
//BAD
ERROR_LOG(JIT, "FlushR: MipsReg %d with invalid location %d", r, mr[r].loc);
break;
}
mr[r].loc = ML_MEM;
@ -327,7 +396,7 @@ int ArmRegCache::FlushGetSequential(MIPSGPReg startMipsReg, bool allowFlushImm)
// Only start a sequence on a dirty armreg.
// TODO: Could also start with an imm?
const auto &startMipsInfo = mr[startMipsReg];
if (startMipsInfo.loc != ML_ARMREG || startMipsInfo.reg == INVALID_REG || !ar[startMipsInfo.reg].isDirty) {
if ((startMipsInfo.loc != ML_ARMREG && startMipsInfo.loc != ML_ARMREG_IMM) || startMipsInfo.reg == INVALID_REG || !ar[startMipsInfo.reg].isDirty) {
return 0;
}
@ -339,7 +408,7 @@ int ArmRegCache::FlushGetSequential(MIPSGPReg startMipsReg, bool allowFlushImm)
int lastArmReg = startMipsInfo.reg;
// Can't use HI/LO, only regs in the main r[] array.
for (int r = (int)startMipsReg + 1; r < 32; ++r) {
if (mr[r].loc == ML_ARMREG && mr[r].reg != INVALID_REG) {
if ((mr[r].loc == ML_ARMREG || mr[r].loc == ML_ARMREG_IMM) && mr[r].reg != INVALID_REG) {
if ((int)mr[r].reg > lastArmReg && ar[mr[r].reg].isDirty) {
++c;
lastArmReg = mr[r].reg;
@ -425,11 +494,15 @@ void ArmRegCache::FlushAll() {
}
void ArmRegCache::SetImm(MIPSGPReg r, u32 immVal) {
if (r == MIPS_REG_ZERO)
if (r == MIPS_REG_ZERO && immVal != 0)
ERROR_LOG(JIT, "Trying to set immediate %08x to r0", immVal);
if (mr[r].loc == ML_ARMREG_IMM && mr[r].imm == immVal) {
// Already have that value, let's keep it in the reg.
return;
}
// Zap existing value if cached in a reg
if (mr[r].loc == ML_ARMREG || mr[r].loc == ML_ARMREG_AS_PTR) {
if (mr[r].reg != INVALID_REG) {
ar[mr[r].reg].mipsReg = MIPS_REG_INVALID;
ar[mr[r].reg].isDirty = false;
}
@ -440,12 +513,12 @@ void ArmRegCache::SetImm(MIPSGPReg r, u32 immVal) {
bool ArmRegCache::IsImm(MIPSGPReg r) const {
if (r == MIPS_REG_ZERO) return true;
return mr[r].loc == ML_IMM;
return mr[r].loc == ML_IMM || mr[r].loc == ML_ARMREG_IMM;
}
u32 ArmRegCache::GetImm(MIPSGPReg r) const {
if (r == MIPS_REG_ZERO) return 0;
if (mr[r].loc != ML_IMM) {
if (mr[r].loc != ML_IMM && mr[r].loc != ML_ARMREG_IMM) {
ERROR_LOG(JIT, "Trying to get imm from non-imm register %i", r);
}
return mr[r].imm;
@ -483,7 +556,7 @@ void ArmRegCache::ReleaseSpillLock(MIPSGPReg reg) {
}
ARMReg ArmRegCache::R(MIPSGPReg mipsReg) {
if (mr[mipsReg].loc == ML_ARMREG) {
if (mr[mipsReg].loc == ML_ARMREG || mr[mipsReg].loc == ML_ARMREG_IMM) {
return (ARMReg)mr[mipsReg].reg;
} else {
ERROR_LOG(JIT, "Reg %i not in arm reg. compilerPC = %08x", mipsReg, compilerPC_);

View file

@ -44,7 +44,10 @@ struct RegARM {
enum RegMIPSLoc {
ML_IMM,
ML_ARMREG,
// In an arm reg, but as a pre-adjusted pointer, not the actual reg.
ML_ARMREG_AS_PTR,
// In an arm reg, but also has a known immediate value.
ML_ARMREG_IMM,
ML_MEM,
};
@ -87,6 +90,8 @@ public:
void SetImm(MIPSGPReg reg, u32 immVal);
bool IsImm(MIPSGPReg reg) const;
u32 GetImm(MIPSGPReg reg) const;
// Optimally set a register to an imm value (possibly using another register.)
void SetRegImm(ARMReg reg, u32 imm);
// Returns an ARM register containing the requested MIPS register.
ARMReg MapReg(MIPSGPReg reg, int mapFlags = 0);

View file

@ -147,7 +147,7 @@ const MIPSInstruction tableImmediate[64] = // xxxxxx ..... ..... ...............
INVALID,
INVALID,
INSTR("swr", &Jit::Comp_ITypeMem, Dis_ITypeMem, Int_ITypeMem, IN_IMM16|IN_RS_ADDR|IN_RT|OUT_MEM|MEMTYPE_WORD),
INSTR("cache", &Jit::Comp_Generic, Dis_Generic, Int_Cache, IN_MEM|IN_IMM16|IN_RS_ADDR|IN_OTHER|OUT_OTHER),
INSTR("cache", &Jit::Comp_Cache, Dis_Generic, Int_Cache, IN_MEM|IN_IMM16|IN_RS_ADDR|IN_OTHER|OUT_OTHER),
//48
INSTR("ll", &Jit::Comp_Generic, Dis_Generic, Int_StoreSync, IN_MEM|IN_IMM16|IN_RS_ADDR|OUT_RT|OUT_OTHER|MEMTYPE_WORD),
INSTR("lwc1", &Jit::Comp_FPULS, Dis_FPULS, Int_FPULS, IN_MEM|IN_IMM16|IN_RS_ADDR|OUT_OTHER|MEMTYPE_FLOAT),

View file

@ -140,3 +140,9 @@ void Jit::Comp_ITypeMem(MIPSOpcode op) {
}
}
}
void Jit::Comp_Cache(MIPSOpcode op) {
CONDITIONAL_DISABLE;
// TODO: Could use this as a hint, and technically required to handle icache, etc.
// But right now Int_Cache does nothing, so let's not even call it.
}

View file

@ -176,6 +176,7 @@ namespace MIPSComp
// Ops
void Comp_ITypeMem(MIPSOpcode op);
void Comp_Cache(MIPSOpcode op);
void Comp_RelBranch(MIPSOpcode op);
void Comp_RelBranchRI(MIPSOpcode op);

View file

@ -782,19 +782,33 @@ namespace MIPSComp
if (rd == MIPS_REG_ZERO)
return;
DISABLE;
switch (op & 0x3ff)
{
case 0xA0: //wsbh
if (gpr.IsImmediate(rt)) {
u32 rtImm = gpr.GetImmediate32(rt);
gpr.SetImmediate32(rd, ((rtImm & 0xFF00FF00) >> 8) | ((rtImm & 0x00FF00FF) << 8));
break;
}
gpr.Lock(rd, rt);
gpr.MapReg(rd, rd == rt, true);
// Stub
if (rd != rt)
MOV(32, gpr.R(rd), gpr.R(rt));
// Swap both 16-bit halfwords by rotating afterward.
BSWAP(32, gpr.RX(rd));
ROR(32, gpr.R(rd), Imm8(16));
gpr.UnlockAll();
break;
case 0xE0: //wsbw
if (gpr.IsImmediate(rt)) {
gpr.SetImmediate32(rd, swap32(gpr.GetImmediate32(rt)));
break;
}
gpr.Lock(rd, rt);
gpr.MapReg(rd, rd == rt, true);
// Stub
if (rd != rt)
MOV(32, gpr.R(rd), gpr.R(rt));
BSWAP(32, gpr.RX(rd));
gpr.UnlockAll();
break;
default:

View file

@ -187,7 +187,7 @@ void Jit::BranchRSRTComp(MIPSOpcode op, Gen::CCFlags cc, bool likely)
else
{
gpr.MapReg(rs, true, false);
CMP(32, gpr.R(rs), rt == MIPS_REG_ZERO ? Imm32(0) : gpr.R(rt));
CMP(32, gpr.R(rs), gpr.R(rt));
}
Gen::FixupBranch ptr;
@ -583,6 +583,8 @@ void Jit::Comp_JumpReg(MIPSOpcode op)
if (IsSyscall(delaySlotOp))
{
_dbg_assert_msg_(JIT, (op & 0x3f) == 8, "jalr followed by syscall not supported.");
// If this is a syscall, write the pc (for thread switching and other good reasons.)
gpr.MapReg(rs, true, false);
MOV(32, M(&currentMIPS->pc), gpr.R(rs));

View file

@ -370,4 +370,8 @@ namespace MIPSComp
}
}
void Jit::Comp_Cache(MIPSOpcode op) {
DISABLE;
}
}

View file

@ -84,6 +84,7 @@ public:
// Ops
void Comp_ITypeMem(MIPSOpcode op);
void Comp_Cache(MIPSOpcode op);
void Comp_RelBranch(MIPSOpcode op);
void Comp_RelBranchRI(MIPSOpcode op);