mirror of
https://github.com/hrydgard/ppsspp.git
synced 2025-04-02 11:01:50 -04:00
More VFPU
This commit is contained in:
parent
a5d5c5ce2b
commit
558bb197c7
11 changed files with 105 additions and 40 deletions
|
@ -19,11 +19,9 @@
|
|||
|
||||
#include "Core/MIPS/MIPS.h"
|
||||
#include "Core/MIPS/MIPSCodeUtils.h"
|
||||
#include "Core/MIPS/IR/IRJit.h"
|
||||
#include "Core/MIPS/IR/IRFrontend.h"
|
||||
#include "Common/CPUDetect.h"
|
||||
|
||||
using namespace MIPSAnalyst;
|
||||
|
||||
#define _RS MIPS_GET_RS(op)
|
||||
#define _RT MIPS_GET_RT(op)
|
||||
#define _RD MIPS_GET_RD(op)
|
||||
|
|
|
@ -28,7 +28,7 @@
|
|||
#include "Core/MIPS/MIPSAnalyst.h"
|
||||
#include "Core/MIPS/MIPSTables.h"
|
||||
|
||||
#include "Core/MIPS/IR/IRJit.h"
|
||||
#include "Core/MIPS/IR/IRFrontend.h"
|
||||
#include "Core/MIPS/JitCommon/JitBlockCache.h"
|
||||
|
||||
#include "Common/Arm64Emitter.h"
|
||||
|
|
|
@ -21,7 +21,7 @@
|
|||
#include "Core/MIPS/MIPSCodeUtils.h"
|
||||
#include "Core/MIPS/MIPSTables.h"
|
||||
|
||||
#include "Core/MIPS/IR/IRJit.h"
|
||||
#include "Core/MIPS/IR/IRFrontend.h"
|
||||
#include "Core/MIPS/IR/IRRegCache.h"
|
||||
#include "Common/CPUDetect.h"
|
||||
|
||||
|
|
|
@ -42,7 +42,7 @@
|
|||
#include "Core/MIPS/MIPS.h"
|
||||
#include "Core/MIPS/MIPSAnalyst.h"
|
||||
#include "Core/MIPS/MIPSCodeUtils.h"
|
||||
#include "Core/MIPS/IR/IRJit.h"
|
||||
#include "Core/MIPS/IR/IRFrontend.h"
|
||||
#include "Core/MIPS/IR/IRRegCache.h"
|
||||
|
||||
#define _RS MIPS_GET_RS(op)
|
||||
|
|
|
@ -27,7 +27,7 @@
|
|||
#include "Core/Config.h"
|
||||
#include "Core/Reporting.h"
|
||||
|
||||
#include "Core/MIPS/IR/IRJit.h"
|
||||
#include "Core/MIPS/IR/IRFrontend.h"
|
||||
#include "Core/MIPS/IR/IRRegCache.h"
|
||||
|
||||
// All functions should have CONDITIONAL_DISABLE, so we can narrow things down to a file quickly.
|
||||
|
@ -50,6 +50,15 @@
|
|||
#define _IMM26 (op & 0x03FFFFFF)
|
||||
|
||||
namespace MIPSComp {
|
||||
static void ApplyVoffset(u8 regs[4], int count) {
|
||||
for (int i = 0; i < count; i++) {
|
||||
regs[i] = voffset[regs[i]];
|
||||
}
|
||||
}
|
||||
|
||||
static bool IsConsecutive4(const u8 regs[4]) {
|
||||
return (regs[1] == regs[0] + 1 && regs[2] == regs[1] + 1 && regs[3] == regs[2] + 1);
|
||||
}
|
||||
|
||||
void IRFrontend::Comp_VPFX(MIPSOpcode op) {
|
||||
CONDITIONAL_DISABLE;
|
||||
|
@ -177,7 +186,21 @@ namespace MIPSComp {
|
|||
}
|
||||
|
||||
void IRFrontend::Comp_SV(MIPSOpcode op) {
|
||||
DISABLE;
|
||||
s32 offset = (signed short)(op & 0xFFFC);
|
||||
int vt = ((op >> 16) & 0x1f) | ((op & 3) << 5);
|
||||
MIPSGPReg rs = _RS;
|
||||
switch (op >> 26) {
|
||||
case 50: //lv.s
|
||||
ir.Write(IROp::LoadFloatV, voffset[vt], rs, ir.AddConstant(offset));
|
||||
break;
|
||||
|
||||
case 58: //sv.s
|
||||
ir.Write(IROp::StoreFloatV, voffset[vt], rs, ir.AddConstant(offset));
|
||||
break;
|
||||
|
||||
default:
|
||||
DISABLE;
|
||||
}
|
||||
}
|
||||
|
||||
void IRFrontend::Comp_SVQ(MIPSOpcode op) {
|
||||
|
@ -187,27 +210,32 @@ namespace MIPSComp {
|
|||
|
||||
u8 vregs[4];
|
||||
GetVectorRegs(vregs, V_Quad, vt);
|
||||
ApplyVoffset(vregs, 4); // Translate to memory order
|
||||
|
||||
switch (op >> 26) {
|
||||
case 54: //lv.q
|
||||
{
|
||||
// TODO: Add vector load/store instruction to the IR
|
||||
ir.Write(IROp::LoadFloatV, voffset[vregs[0]], rs, ir.AddConstant(imm));
|
||||
ir.Write(IROp::LoadFloatV, voffset[vregs[1]], rs, ir.AddConstant(imm + 4));
|
||||
ir.Write(IROp::LoadFloatV, voffset[vregs[2]], rs, ir.AddConstant(imm + 8));
|
||||
ir.Write(IROp::LoadFloatV, voffset[vregs[3]], rs, ir.AddConstant(imm + 12));
|
||||
}
|
||||
break;
|
||||
if (IsConsecutive4(vregs)) {
|
||||
ir.Write(IROp::LoadVec4, vregs[0], rs, ir.AddConstant(imm));
|
||||
} else {
|
||||
// Let's not even bother with "vertical" loads for now.
|
||||
ir.Write(IROp::LoadFloatV, vregs[0], rs, ir.AddConstant(imm));
|
||||
ir.Write(IROp::LoadFloatV, vregs[1], rs, ir.AddConstant(imm + 4));
|
||||
ir.Write(IROp::LoadFloatV, vregs[2], rs, ir.AddConstant(imm + 8));
|
||||
ir.Write(IROp::LoadFloatV, vregs[3], rs, ir.AddConstant(imm + 12));
|
||||
}
|
||||
break;
|
||||
|
||||
case 62: //sv.q
|
||||
{
|
||||
// CC might be set by slow path below, so load regs first.
|
||||
ir.Write(IROp::StoreFloatV, voffset[vregs[0]], rs, ir.AddConstant(imm));
|
||||
ir.Write(IROp::StoreFloatV, voffset[vregs[1]], rs, ir.AddConstant(imm + 4));
|
||||
ir.Write(IROp::StoreFloatV, voffset[vregs[2]], rs, ir.AddConstant(imm + 8));
|
||||
ir.Write(IROp::StoreFloatV, voffset[vregs[3]], rs, ir.AddConstant(imm + 12));
|
||||
}
|
||||
break;
|
||||
if (IsConsecutive4(vregs)) {
|
||||
ir.Write(IROp::StoreVec4, vregs[0], rs, ir.AddConstant(imm));
|
||||
} else {
|
||||
// Let's not even bother with "vertical" stores for now.
|
||||
ir.Write(IROp::StoreFloatV, vregs[0], rs, ir.AddConstant(imm));
|
||||
ir.Write(IROp::StoreFloatV, vregs[1], rs, ir.AddConstant(imm + 4));
|
||||
ir.Write(IROp::StoreFloatV, vregs[2], rs, ir.AddConstant(imm + 8));
|
||||
ir.Write(IROp::StoreFloatV, vregs[3], rs, ir.AddConstant(imm + 12));
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
DISABLE;
|
||||
|
|
|
@ -236,8 +236,8 @@ void IRFrontend::DoJit(u32 em_address, std::vector<IRInst> &instructions, std::v
|
|||
if (IRApplyPasses(passes, ARRAY_SIZE(passes), ir, simplified))
|
||||
logBlocks = 1;
|
||||
code = &simplified;
|
||||
if (ir.GetInstructions().size() >= 24)
|
||||
logBlocks = 1;
|
||||
//if (ir.GetInstructions().size() >= 24)
|
||||
// logBlocks = 1;
|
||||
}
|
||||
|
||||
instructions = code->GetInstructions();
|
||||
|
|
|
@ -60,11 +60,13 @@ static const IRMeta irMeta[] = {
|
|||
{ IROp::Load32, "Load32", "GGC" },
|
||||
{ IROp::LoadFloat, "LoadFloat", "FGC" },
|
||||
{ IROp::LoadFloatV, "LoadFloatV", "VGC" },
|
||||
{ IROp::LoadVec4, "LoadVec4", "VGC" },
|
||||
{ IROp::Store8, "Store8", "GGC" },
|
||||
{ IROp::Store16, "Store16", "GGC" },
|
||||
{ IROp::Store32, "Store32", "GGC" },
|
||||
{ IROp::StoreFloat, "StoreFloat", "FGC" },
|
||||
{ IROp::StoreFloatV, "StoreFloatV", "VGC" },
|
||||
{ IROp::StoreVec4, "StoreVec4", "VGC" },
|
||||
{ IROp::FAdd, "FAdd", "FFF" },
|
||||
{ IROp::FSub, "FSub", "FFF" },
|
||||
{ IROp::FMul, "FMul", "FFF" },
|
||||
|
|
|
@ -90,12 +90,14 @@ enum class IROp : u8 {
|
|||
Load32,
|
||||
LoadFloat,
|
||||
LoadFloatV,
|
||||
LoadVec4,
|
||||
|
||||
Store8,
|
||||
Store16,
|
||||
Store32,
|
||||
StoreFloat,
|
||||
StoreFloatV,
|
||||
StoreVec4,
|
||||
|
||||
Ext8to32,
|
||||
Ext16to32,
|
||||
|
@ -212,13 +214,16 @@ enum {
|
|||
IRTEMP_LHS, // Reserved for use in branches
|
||||
IRTEMP_RHS, // Reserved for use in branches
|
||||
|
||||
// 16 float temps for vector S and T prefixes and things like that.
|
||||
// IRVTEMP_0 = 208 - 64, // -64 to be relative to v[0]
|
||||
|
||||
// Hacky way to get to other state
|
||||
IRREG_VPFU_CTRL_BASE = 208,
|
||||
IRREG_VPFU_CC = 211,
|
||||
IRREG_VFPU_CTRL_BASE = 208,
|
||||
IRREG_VFPU_CC = 211,
|
||||
IRREG_LO = 226, // offset of lo in MIPSState / 4
|
||||
IRREG_HI = 227,
|
||||
IRREG_FCR31 = 228,
|
||||
IRREG_FPCOND = 229
|
||||
IRREG_FPCOND = 229,
|
||||
};
|
||||
|
||||
struct IRMeta {
|
||||
|
|
|
@ -1,3 +1,7 @@
|
|||
#ifdef _M_SSE
|
||||
#include <smmintrin.h>
|
||||
#endif
|
||||
|
||||
#include "Core/MemMap.h"
|
||||
#include "Core/HLE/HLE.h"
|
||||
#include "Core/HLE/ReplaceTables.h"
|
||||
|
@ -107,6 +111,29 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int c
|
|||
Memory::WriteUnchecked_Float(mips->v[inst->src3], mips->r[inst->src1] + constPool[inst->src2]);
|
||||
break;
|
||||
|
||||
case IROp::LoadVec4:
|
||||
{
|
||||
u32 base = mips->r[inst->src1] + constPool[inst->src2];
|
||||
#if defined(_M_SSE)
|
||||
_mm_store_ps(&mips->v[inst->dest], _mm_load_ps((const float *)Memory::GetPointerUnchecked(base)));
|
||||
#else
|
||||
for (int i = 0; i < 4; i++)
|
||||
mips->v[inst->dest + i] = Memory::ReadUnchecked_Float(base + 4 * i);
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
case IROp::StoreVec4:
|
||||
{
|
||||
u32 base = mips->r[inst->src1] + constPool[inst->src2];
|
||||
#if defined(_M_SSE)
|
||||
_mm_store_ps((float *)Memory::GetPointerUnchecked(base), _mm_load_ps(&mips->v[inst->dest]));
|
||||
#else
|
||||
for (int i = 0; i < 4; i++)
|
||||
Memory::WriteUnchecked_Float(mips->v[inst->dest + i], base + 4 * i);
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
|
||||
case IROp::ShlImm:
|
||||
mips->r[inst->dest] = mips->r[inst->src1] << (int)inst->src2;
|
||||
break;
|
||||
|
|
|
@ -291,6 +291,7 @@ bool PropagateConstants(const IRWriter &in, IRWriter &out) {
|
|||
break;
|
||||
case IROp::StoreFloat:
|
||||
case IROp::StoreFloatV:
|
||||
case IROp::StoreVec4:
|
||||
if (gpr.IsImm(inst.src1)) {
|
||||
out.Write(inst.op, inst.dest, 0, out.AddConstant(gpr.GetImm(inst.src1) + constants[inst.src2]));
|
||||
} else {
|
||||
|
@ -314,6 +315,7 @@ bool PropagateConstants(const IRWriter &in, IRWriter &out) {
|
|||
break;
|
||||
case IROp::LoadFloat:
|
||||
case IROp::LoadFloatV:
|
||||
case IROp::LoadVec4:
|
||||
if (gpr.IsImm(inst.src1)) {
|
||||
out.Write(inst.op, inst.dest, 0, out.AddConstant(gpr.GetImm(inst.src1) + constants[inst.src2]));
|
||||
} else {
|
||||
|
@ -388,7 +390,7 @@ bool PropagateConstants(const IRWriter &in, IRWriter &out) {
|
|||
goto doDefault;
|
||||
|
||||
case IROp::VfpuCtrlToReg:
|
||||
gpr.MapDirtyIn(inst.dest, IRREG_VPFU_CTRL_BASE + inst.src1);
|
||||
gpr.MapDirtyIn(inst.dest, IRREG_VFPU_CTRL_BASE + inst.src1);
|
||||
goto doDefault;
|
||||
|
||||
case IROp::Syscall:
|
||||
|
|
|
@ -86,6 +86,7 @@ enum MIPSGPReg {
|
|||
MIPS_REG_RA=31,
|
||||
|
||||
// Not real regs, just for convenience/jit mapping.
|
||||
// NOTE: These are not the same as the offsets the IR has to use!
|
||||
MIPS_REG_HI = 32,
|
||||
MIPS_REG_LO = 33,
|
||||
MIPS_REG_FPCOND = 34,
|
||||
|
@ -155,7 +156,7 @@ public:
|
|||
|
||||
void DoState(PointerWrap &p);
|
||||
|
||||
// MUST start with r and be followed by f!
|
||||
// MUST start with r and be followed by f, v, and t!
|
||||
u32 r[32];
|
||||
union {
|
||||
float f[32];
|
||||
|
@ -166,23 +167,25 @@ public:
|
|||
float v[128];
|
||||
u32 vi[128];
|
||||
};
|
||||
// Used for temporary variables by IR Interpreter.
|
||||
// Can be indexed through r[] using indices 192+.
|
||||
u32 t[16];
|
||||
|
||||
// Temps don't get flushed so we don't reserve space for them.
|
||||
// Register-allocated JIT Temps don't get flushed so we don't reserve space for them.
|
||||
// However, the IR interpreter needs some temps that can stick around between ops.
|
||||
// Can be indexed through r[] using indices 192+.
|
||||
u32 t[16]; //192
|
||||
// float vt[16]; //208 TODO: VFPU temp
|
||||
|
||||
// If vfpuCtrl (prefixes) get mysterious values, check the VFPU regcache code.
|
||||
u32 vfpuCtrl[16];
|
||||
u32 vfpuCtrl[16]; // 208
|
||||
|
||||
// ARM64 wants lo/hi to be aligned to 64 bits from the base of this struct.
|
||||
u32 padLoHi;
|
||||
u32 padLoHi; // 224
|
||||
|
||||
union {
|
||||
struct {
|
||||
u32 pc;
|
||||
u32 pc; //225
|
||||
|
||||
u32 lo; // offset 192 + 16 + 16 + 1 + 1
|
||||
u32 hi;
|
||||
u32 lo; //226
|
||||
u32 hi; //227
|
||||
|
||||
u32 fcr31; //fpu control register
|
||||
u32 fpcond; // cache the cond flag of fcr31 (& 1 << 23)
|
||||
|
|
Loading…
Add table
Reference in a new issue