More VFPU

This commit is contained in:
Henrik Rydgard 2016-05-09 23:47:56 +02:00
parent a5d5c5ce2b
commit 558bb197c7
11 changed files with 105 additions and 40 deletions

View file

@ -19,11 +19,9 @@
#include "Core/MIPS/MIPS.h"
#include "Core/MIPS/MIPSCodeUtils.h"
#include "Core/MIPS/IR/IRJit.h"
#include "Core/MIPS/IR/IRFrontend.h"
#include "Common/CPUDetect.h"
using namespace MIPSAnalyst;
#define _RS MIPS_GET_RS(op)
#define _RT MIPS_GET_RT(op)
#define _RD MIPS_GET_RD(op)

View file

@ -28,7 +28,7 @@
#include "Core/MIPS/MIPSAnalyst.h"
#include "Core/MIPS/MIPSTables.h"
#include "Core/MIPS/IR/IRJit.h"
#include "Core/MIPS/IR/IRFrontend.h"
#include "Core/MIPS/JitCommon/JitBlockCache.h"
#include "Common/Arm64Emitter.h"

View file

@ -21,7 +21,7 @@
#include "Core/MIPS/MIPSCodeUtils.h"
#include "Core/MIPS/MIPSTables.h"
#include "Core/MIPS/IR/IRJit.h"
#include "Core/MIPS/IR/IRFrontend.h"
#include "Core/MIPS/IR/IRRegCache.h"
#include "Common/CPUDetect.h"

View file

@ -42,7 +42,7 @@
#include "Core/MIPS/MIPS.h"
#include "Core/MIPS/MIPSAnalyst.h"
#include "Core/MIPS/MIPSCodeUtils.h"
#include "Core/MIPS/IR/IRJit.h"
#include "Core/MIPS/IR/IRFrontend.h"
#include "Core/MIPS/IR/IRRegCache.h"
#define _RS MIPS_GET_RS(op)

View file

@ -27,7 +27,7 @@
#include "Core/Config.h"
#include "Core/Reporting.h"
#include "Core/MIPS/IR/IRJit.h"
#include "Core/MIPS/IR/IRFrontend.h"
#include "Core/MIPS/IR/IRRegCache.h"
// All functions should have CONDITIONAL_DISABLE, so we can narrow things down to a file quickly.
@ -50,6 +50,15 @@
#define _IMM26 (op & 0x03FFFFFF)
namespace MIPSComp {
static void ApplyVoffset(u8 regs[4], int count) {
for (int i = 0; i < count; i++) {
regs[i] = voffset[regs[i]];
}
}
static bool IsConsecutive4(const u8 regs[4]) {
return (regs[1] == regs[0] + 1 && regs[2] == regs[1] + 1 && regs[3] == regs[2] + 1);
}
void IRFrontend::Comp_VPFX(MIPSOpcode op) {
CONDITIONAL_DISABLE;
@ -177,7 +186,21 @@ namespace MIPSComp {
}
void IRFrontend::Comp_SV(MIPSOpcode op) {
DISABLE;
s32 offset = (signed short)(op & 0xFFFC);
int vt = ((op >> 16) & 0x1f) | ((op & 3) << 5);
MIPSGPReg rs = _RS;
switch (op >> 26) {
case 50: //lv.s
ir.Write(IROp::LoadFloatV, voffset[vt], rs, ir.AddConstant(offset));
break;
case 58: //sv.s
ir.Write(IROp::StoreFloatV, voffset[vt], rs, ir.AddConstant(offset));
break;
default:
DISABLE;
}
}
void IRFrontend::Comp_SVQ(MIPSOpcode op) {
@ -187,27 +210,32 @@ namespace MIPSComp {
u8 vregs[4];
GetVectorRegs(vregs, V_Quad, vt);
ApplyVoffset(vregs, 4); // Translate to memory order
switch (op >> 26) {
case 54: //lv.q
{
// TODO: Add vector load/store instruction to the IR
ir.Write(IROp::LoadFloatV, voffset[vregs[0]], rs, ir.AddConstant(imm));
ir.Write(IROp::LoadFloatV, voffset[vregs[1]], rs, ir.AddConstant(imm + 4));
ir.Write(IROp::LoadFloatV, voffset[vregs[2]], rs, ir.AddConstant(imm + 8));
ir.Write(IROp::LoadFloatV, voffset[vregs[3]], rs, ir.AddConstant(imm + 12));
}
break;
if (IsConsecutive4(vregs)) {
ir.Write(IROp::LoadVec4, vregs[0], rs, ir.AddConstant(imm));
} else {
// Let's not even bother with "vertical" loads for now.
ir.Write(IROp::LoadFloatV, vregs[0], rs, ir.AddConstant(imm));
ir.Write(IROp::LoadFloatV, vregs[1], rs, ir.AddConstant(imm + 4));
ir.Write(IROp::LoadFloatV, vregs[2], rs, ir.AddConstant(imm + 8));
ir.Write(IROp::LoadFloatV, vregs[3], rs, ir.AddConstant(imm + 12));
}
break;
case 62: //sv.q
{
// CC might be set by slow path below, so load regs first.
ir.Write(IROp::StoreFloatV, voffset[vregs[0]], rs, ir.AddConstant(imm));
ir.Write(IROp::StoreFloatV, voffset[vregs[1]], rs, ir.AddConstant(imm + 4));
ir.Write(IROp::StoreFloatV, voffset[vregs[2]], rs, ir.AddConstant(imm + 8));
ir.Write(IROp::StoreFloatV, voffset[vregs[3]], rs, ir.AddConstant(imm + 12));
}
break;
if (IsConsecutive4(vregs)) {
ir.Write(IROp::StoreVec4, vregs[0], rs, ir.AddConstant(imm));
} else {
// Let's not even bother with "vertical" stores for now.
ir.Write(IROp::StoreFloatV, vregs[0], rs, ir.AddConstant(imm));
ir.Write(IROp::StoreFloatV, vregs[1], rs, ir.AddConstant(imm + 4));
ir.Write(IROp::StoreFloatV, vregs[2], rs, ir.AddConstant(imm + 8));
ir.Write(IROp::StoreFloatV, vregs[3], rs, ir.AddConstant(imm + 12));
}
break;
default:
DISABLE;

View file

@ -236,8 +236,8 @@ void IRFrontend::DoJit(u32 em_address, std::vector<IRInst> &instructions, std::v
if (IRApplyPasses(passes, ARRAY_SIZE(passes), ir, simplified))
logBlocks = 1;
code = &simplified;
if (ir.GetInstructions().size() >= 24)
logBlocks = 1;
//if (ir.GetInstructions().size() >= 24)
// logBlocks = 1;
}
instructions = code->GetInstructions();

View file

@ -60,11 +60,13 @@ static const IRMeta irMeta[] = {
{ IROp::Load32, "Load32", "GGC" },
{ IROp::LoadFloat, "LoadFloat", "FGC" },
{ IROp::LoadFloatV, "LoadFloatV", "VGC" },
{ IROp::LoadVec4, "LoadVec4", "VGC" },
{ IROp::Store8, "Store8", "GGC" },
{ IROp::Store16, "Store16", "GGC" },
{ IROp::Store32, "Store32", "GGC" },
{ IROp::StoreFloat, "StoreFloat", "FGC" },
{ IROp::StoreFloatV, "StoreFloatV", "VGC" },
{ IROp::StoreVec4, "StoreVec4", "VGC" },
{ IROp::FAdd, "FAdd", "FFF" },
{ IROp::FSub, "FSub", "FFF" },
{ IROp::FMul, "FMul", "FFF" },

View file

@ -90,12 +90,14 @@ enum class IROp : u8 {
Load32,
LoadFloat,
LoadFloatV,
LoadVec4,
Store8,
Store16,
Store32,
StoreFloat,
StoreFloatV,
StoreVec4,
Ext8to32,
Ext16to32,
@ -212,13 +214,16 @@ enum {
IRTEMP_LHS, // Reserved for use in branches
IRTEMP_RHS, // Reserved for use in branches
// 16 float temps for vector S and T prefixes and things like that.
// IRVTEMP_0 = 208 - 64, // -64 to be relative to v[0]
// Hacky way to get to other state
IRREG_VPFU_CTRL_BASE = 208,
IRREG_VPFU_CC = 211,
IRREG_VFPU_CTRL_BASE = 208,
IRREG_VFPU_CC = 211,
IRREG_LO = 226, // offset of lo in MIPSState / 4
IRREG_HI = 227,
IRREG_FCR31 = 228,
IRREG_FPCOND = 229
IRREG_FPCOND = 229,
};
struct IRMeta {

View file

@ -1,3 +1,7 @@
#ifdef _M_SSE
#include <smmintrin.h>
#endif
#include "Core/MemMap.h"
#include "Core/HLE/HLE.h"
#include "Core/HLE/ReplaceTables.h"
@ -107,6 +111,29 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int c
Memory::WriteUnchecked_Float(mips->v[inst->src3], mips->r[inst->src1] + constPool[inst->src2]);
break;
case IROp::LoadVec4:
{
u32 base = mips->r[inst->src1] + constPool[inst->src2];
#if defined(_M_SSE)
_mm_store_ps(&mips->v[inst->dest], _mm_load_ps((const float *)Memory::GetPointerUnchecked(base)));
#else
for (int i = 0; i < 4; i++)
mips->v[inst->dest + i] = Memory::ReadUnchecked_Float(base + 4 * i);
#endif
break;
}
case IROp::StoreVec4:
{
u32 base = mips->r[inst->src1] + constPool[inst->src2];
#if defined(_M_SSE)
_mm_store_ps((float *)Memory::GetPointerUnchecked(base), _mm_load_ps(&mips->v[inst->dest]));
#else
for (int i = 0; i < 4; i++)
Memory::WriteUnchecked_Float(mips->v[inst->dest + i], base + 4 * i);
#endif
break;
}
case IROp::ShlImm:
mips->r[inst->dest] = mips->r[inst->src1] << (int)inst->src2;
break;

View file

@ -291,6 +291,7 @@ bool PropagateConstants(const IRWriter &in, IRWriter &out) {
break;
case IROp::StoreFloat:
case IROp::StoreFloatV:
case IROp::StoreVec4:
if (gpr.IsImm(inst.src1)) {
out.Write(inst.op, inst.dest, 0, out.AddConstant(gpr.GetImm(inst.src1) + constants[inst.src2]));
} else {
@ -314,6 +315,7 @@ bool PropagateConstants(const IRWriter &in, IRWriter &out) {
break;
case IROp::LoadFloat:
case IROp::LoadFloatV:
case IROp::LoadVec4:
if (gpr.IsImm(inst.src1)) {
out.Write(inst.op, inst.dest, 0, out.AddConstant(gpr.GetImm(inst.src1) + constants[inst.src2]));
} else {
@ -388,7 +390,7 @@ bool PropagateConstants(const IRWriter &in, IRWriter &out) {
goto doDefault;
case IROp::VfpuCtrlToReg:
gpr.MapDirtyIn(inst.dest, IRREG_VPFU_CTRL_BASE + inst.src1);
gpr.MapDirtyIn(inst.dest, IRREG_VFPU_CTRL_BASE + inst.src1);
goto doDefault;
case IROp::Syscall:

View file

@ -86,6 +86,7 @@ enum MIPSGPReg {
MIPS_REG_RA=31,
// Not real regs, just for convenience/jit mapping.
// NOTE: These are not the same as the offsets the IR has to use!
MIPS_REG_HI = 32,
MIPS_REG_LO = 33,
MIPS_REG_FPCOND = 34,
@ -155,7 +156,7 @@ public:
void DoState(PointerWrap &p);
// MUST start with r and be followed by f!
// MUST start with r and be followed by f, v, and t!
u32 r[32];
union {
float f[32];
@ -166,23 +167,25 @@ public:
float v[128];
u32 vi[128];
};
// Used for temporary variables by IR Interpreter.
// Can be indexed through r[] using indices 192+.
u32 t[16];
// Temps don't get flushed so we don't reserve space for them.
// Register-allocated JIT Temps don't get flushed so we don't reserve space for them.
// However, the IR interpreter needs some temps that can stick around between ops.
// Can be indexed through r[] using indices 192+.
u32 t[16]; //192
// float vt[16]; //208 TODO: VFPU temp
// If vfpuCtrl (prefixes) get mysterious values, check the VFPU regcache code.
u32 vfpuCtrl[16];
u32 vfpuCtrl[16]; // 208
// ARM64 wants lo/hi to be aligned to 64 bits from the base of this struct.
u32 padLoHi;
u32 padLoHi; // 224
union {
struct {
u32 pc;
u32 pc; //225
u32 lo; // offset 192 + 16 + 16 + 1 + 1
u32 hi;
u32 lo; //226
u32 hi; //227
u32 fcr31; //fpu control register
u32 fpcond; // cache the cond flag of fcr31 (& 1 << 23)