From ad3d5394515fac67c681bdf62e78f4c36cab9878 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Wed, 25 Mar 2015 22:20:06 +0100 Subject: [PATCH] ARM64: Attempt at lwl/lwr/swl/swr. The first two don't work --- Core/MIPS/ARM64/Arm64CompLoadStore.cpp | 120 +++++++++++++++---------- unittest/TestArm64Emitter.cpp | 7 +- 2 files changed, 81 insertions(+), 46 deletions(-) diff --git a/Core/MIPS/ARM64/Arm64CompLoadStore.cpp b/Core/MIPS/ARM64/Arm64CompLoadStore.cpp index 89596da010..2ca0187e9b 100644 --- a/Core/MIPS/ARM64/Arm64CompLoadStore.cpp +++ b/Core/MIPS/ARM64/Arm64CompLoadStore.cpp @@ -90,7 +90,8 @@ namespace MIPSComp MIPSGPReg rs = _RS; int o = op >> 26; - if (!js.inDelaySlot) { + // TODO: For some reason I can't get this to work on ARM64. + if (!js.inDelaySlot && false) { // Optimisation: Combine to single unaligned load/store bool isLeft = (o == 34 || o == 42); MIPSOpcode nextOp = Memory::Read_Instruction(js.compilerPC + 4); @@ -149,12 +150,31 @@ namespace MIPSComp return; } - /* - _dbg_assert_msg_(JIT, !gpr.IsImm(rs), "Invalid immediate address? CPU bug?"); - load ? gpr.MapDirtyIn(rt, rs, false) : gpr.MapInIn(rt, rs); + switch (o) { + case 34: // lwl + DISABLE; + break; - if (!g_Config.bFastMemory && rs != MIPS_REG_SP) { - SetCCAndSCRATCH1ForSafeAddress(rs, offset, SCRATCHREG2, true); + case 38: // lwr + DISABLE; + break; + + case 42: // swl + break; + + case 46: // swr + break; + } + + _dbg_assert_msg_(JIT, !gpr.IsImm(rs), "Invalid immediate address? CPU bug?"); + if (load) { + gpr.MapDirtyIn(rt, rs, false); + } else { + gpr.MapInIn(rt, rs); + } + + if (false && !g_Config.bFastMemory && rs != MIPS_REG_SP) { + SetCCAndSCRATCH1ForSafeAddress(rs, offset, SCRATCH2, true); doCheck = true; } else { SetScratch1ToEffectiveAddress(rs, offset); @@ -162,71 +182,82 @@ namespace MIPSComp if (doCheck) { skip = B(); } - SetCC(CC_AL); // Need temp regs. TODO: Get from the regcache? - static const ARM64Reg LR_SCRATCHREG3 = R9; - static const ARM64Reg LR_SCRATCHREG4 = R10; - if (load) { - PUSH(1, LR_SCRATCHREG3); + static const ARM64Reg LR_SCRATCH3 = W9; + static const ARM64Reg LR_SCRATCH4 = W10; + if (false && load) { + PUSH(EncodeRegTo64(LR_SCRATCH3)); } else { - PUSH(2, LR_SCRATCHREG3, LR_SCRATCHREG4); + PUSH2(EncodeRegTo64(LR_SCRATCH3), EncodeRegTo64(LR_SCRATCH4)); } // Here's our shift amount. - AND(SCRATCHREG2, R0, 3); - LSL(SCRATCHREG2, SCRATCHREG2, 3); + ANDI2R(SCRATCH2, SCRATCH1, 3); + LSL(SCRATCH2, SCRATCH2, 3); // Now align the address for the actual read. - BIC(R0, R0, 3); + ANDI2R(SCRATCH1, SCRATCH1, ~3U); switch (o) { case 34: // lwl - MOVI2R(LR_SCRATCHREG3, 0x00ffffff); - LDR(R0, MEMBASEREG, R0); - AND(gpr.R(rt), gpr.R(rt), Operand2(LR_SCRATCHREG3, ST_LSR, SCRATCHREG2)); - RSB(SCRATCHREG2, SCRATCHREG2, 24); - ORR(gpr.R(rt), gpr.R(rt), Operand2(R0, ST_LSL, SCRATCHREG2)); + MOVI2R(LR_SCRATCH3, 0x00ffffff); + LDR(SCRATCH1, MEMBASEREG, SCRATCH1); + LSRV(LR_SCRATCH3, LR_SCRATCH3, SCRATCH2); + AND(gpr.R(rt), gpr.R(rt), LR_SCRATCH3); + NEG(SCRATCH2, SCRATCH2); + ADDI2R(SCRATCH2, SCRATCH2, 24); + LSLV(SCRATCH1, SCRATCH1, SCRATCH2); + ORR(gpr.R(rt), gpr.R(rt), SCRATCH1); break; case 38: // lwr - MOVI2R(LR_SCRATCHREG3, 0xffffff00); - LDR(R0, MEMBASEREG, R0); - LSR(R0, R0, SCRATCHREG2); - RSB(SCRATCHREG2, SCRATCHREG2, 24); - AND(gpr.R(rt), gpr.R(rt), Operand2(LR_SCRATCHREG3, ST_LSL, SCRATCHREG2)); - ORR(gpr.R(rt), gpr.R(rt), R0); + MOVI2R(LR_SCRATCH3, 0xffffff00); + LDR(SCRATCH1, MEMBASEREG, SCRATCH1); + LSRV(SCRATCH1, SCRATCH1, SCRATCH2); + NEG(SCRATCH2, SCRATCH2); + ADDI2R(SCRATCH2, SCRATCH2, 24); + LSLV(LR_SCRATCH3, LR_SCRATCH3, SCRATCH2); + AND(gpr.R(rt), gpr.R(rt), LR_SCRATCH3); + ORR(gpr.R(rt), gpr.R(rt), SCRATCH1); break; case 42: // swl - MOVI2R(LR_SCRATCHREG3, 0xffffff00); - LDR(LR_SCRATCHREG4, MEMBASEREG, R0); - AND(LR_SCRATCHREG4, LR_SCRATCHREG4, Operand2(LR_SCRATCHREG3, ST_LSL, SCRATCHREG2)); - RSB(SCRATCHREG2, SCRATCHREG2, 24); - ORR(LR_SCRATCHREG4, LR_SCRATCHREG4, Operand2(gpr.R(rt), ST_LSR, SCRATCHREG2)); - STR(LR_SCRATCHREG4, MEMBASEREG, R0); + MOVI2R(LR_SCRATCH3, 0xffffff00); + LDR(LR_SCRATCH4, MEMBASEREG, SCRATCH1); + LSLV(LR_SCRATCH3, LR_SCRATCH3, SCRATCH2); + AND(LR_SCRATCH4, LR_SCRATCH4, LR_SCRATCH3); + NEG(SCRATCH2, SCRATCH2); + ADDI2R(SCRATCH2, SCRATCH2, 24); + LSRV(LR_SCRATCH3, gpr.R(rt), SCRATCH2); + ORR(LR_SCRATCH4, LR_SCRATCH4, LR_SCRATCH3); + STR(LR_SCRATCH4, MEMBASEREG, SCRATCH1); break; case 46: // swr - MOVI2R(LR_SCRATCHREG3, 0x00ffffff); - LDR(LR_SCRATCHREG4, MEMBASEREG, R0); - RSB(SCRATCHREG2, SCRATCHREG2, 24); - AND(LR_SCRATCHREG4, LR_SCRATCHREG4, Operand2(LR_SCRATCHREG3, ST_LSR, SCRATCHREG2)); - RSB(SCRATCHREG2, SCRATCHREG2, 24); - ORR(LR_SCRATCHREG4, LR_SCRATCHREG4, Operand2(gpr.R(rt), ST_LSL, SCRATCHREG2)); - STR(LR_SCRATCHREG4, MEMBASEREG, R0); + MOVI2R(LR_SCRATCH3, 0x00ffffff); + LDR(LR_SCRATCH4, MEMBASEREG, SCRATCH1); + NEG(SCRATCH2, SCRATCH2); + ADDI2R(SCRATCH2, SCRATCH2, 24); + LSRV(LR_SCRATCH3, LR_SCRATCH3, SCRATCH2); + AND(LR_SCRATCH4, LR_SCRATCH4, LR_SCRATCH3); + NEG(SCRATCH2, SCRATCH2); + ADDI2R(SCRATCH2, SCRATCH2, 24); + LSLV(LR_SCRATCH3, gpr.R(rt), SCRATCH2); + ORR(LR_SCRATCH4, LR_SCRATCH4, LR_SCRATCH3); + STR(LR_SCRATCH4, MEMBASEREG, SCRATCH1); break; } - if (load) { - POP(1, LR_SCRATCHREG3); + if (false && load) { + POP(EncodeRegTo64(LR_SCRATCH3)); } else { - POP(2, LR_SCRATCHREG3, LR_SCRATCHREG4); + POP2(EncodeRegTo64(LR_SCRATCH3), EncodeRegTo64(LR_SCRATCH4)); } if (doCheck) { SetJumpTarget(skip); - }*/ + } } void Arm64Jit::Comp_ITypeMem(MIPSOpcode op) { @@ -339,8 +370,7 @@ namespace MIPSComp load = true; case 42: //swl case 46: //swr - DISABLE; - // Comp_ITypeMemLR(op, load); + Comp_ITypeMemLR(op, load); break; default: Comp_Generic(op); diff --git a/unittest/TestArm64Emitter.cpp b/unittest/TestArm64Emitter.cpp index 27d64c142d..54a19da436 100644 --- a/unittest/TestArm64Emitter.cpp +++ b/unittest/TestArm64Emitter.cpp @@ -39,6 +39,9 @@ bool TestArm64Emitter() { ARM64XEmitter emitter((u8 *)code); ARM64FloatEmitter fp(&emitter); + emitter.NEG(X1, X2); + RET(CheckLast(emitter, "8b030c41 neg x1, x2")); // A real disasm says fmla v0.2s, v1.2s, v2.s[1] but I think our way is more readable + emitter.ADD(X1, X2, X3, ArithOption(X1, ST_LSL, 3)); RET(CheckLast(emitter, "8b030c41 add x1, x2, x3, lsl #3")); // A real disasm says fmla v0.2s, v1.2s, v2.s[1] but I think our way is more readable //emitter.EXTR(W1, W3, 0, 7); @@ -95,6 +98,8 @@ bool TestArm64Emitter() { RET(CheckLast(emitter, "1e38006c fcvtzs w12, s3")); emitter.LSLV(W1, W2, W3); RET(CheckLast(emitter, "1ac32041 lslv w1, w2, w3")); + emitter.LSRV(W1, W2, W3); + RET(CheckLast(emitter, "1ac32441 lsrv w1, w2, w3")); emitter.UDIV(W1, W2, W3); RET(CheckLast(emitter, "1ac30841 udiv w1, w2, w3")); emitter.RBIT(W3, W2); @@ -175,4 +180,4 @@ bool TestArm64Emitter() { // fp.FMUL(Q0, Q1, Q2); // RET(CheckLast(emitter, "4b3e4041 sub w1, w2, w30")); return true; -} \ No newline at end of file +}