irjit: Embed constant inside IRInst.

This simplifies a bunch of code and improves compile performance by about
30%, at the cost of a bit more memory.
This commit is contained in:
Unknown W. Brackets 2018-01-03 23:19:44 -08:00
parent 64b57a0329
commit cffb2d61a7
9 changed files with 80 additions and 180 deletions

View file

@ -219,7 +219,7 @@ MIPSOpcode IRFrontend::GetOffsetInstruction(int offset) {
return Memory::Read_Instruction(GetCompilerPC() + 4 * offset);
}
void IRFrontend::DoJit(u32 em_address, std::vector<IRInst> &instructions, std::vector<u32> &constants, u32 &mipsBytes) {
void IRFrontend::DoJit(u32 em_address, std::vector<IRInst> &instructions, u32 &mipsBytes) {
js.cancel = false;
js.blockStart = em_address;
js.compilerPC = em_address;
@ -244,12 +244,6 @@ void IRFrontend::DoJit(u32 em_address, std::vector<IRInst> &instructions, std::v
MIPSCompileOp(inst, this);
js.compilerPC += 4;
js.numInstructions++;
if (ir.GetConstants().size() > 64) {
// Need to break the block
ir.Write(IROp::ExitToConst, ir.AddConstant(js.compilerPC));
js.compiling = false;
}
}
mipsBytes = js.compilerPC - em_address;
@ -273,7 +267,6 @@ void IRFrontend::DoJit(u32 em_address, std::vector<IRInst> &instructions, std::v
}
instructions = code->GetInstructions();
constants = code->GetConstants();
if (logBlocks > 0 && dontLogBlocks == 0) {
char temp2[256];
@ -286,20 +279,20 @@ void IRFrontend::DoJit(u32 em_address, std::vector<IRInst> &instructions, std::v
}
if (logBlocks > 0 && dontLogBlocks == 0) {
NOTICE_LOG(JIT, "=============== Original IR (%d instructions, %d const) ===============", (int)ir.GetInstructions().size(), (int)ir.GetConstants().size());
NOTICE_LOG(JIT, "=============== Original IR (%d instructions) ===============", (int)ir.GetInstructions().size());
for (size_t i = 0; i < ir.GetInstructions().size(); i++) {
char buf[256];
DisassembleIR(buf, sizeof(buf), ir.GetInstructions()[i], &ir.GetConstants()[0]);
DisassembleIR(buf, sizeof(buf), ir.GetInstructions()[i]);
NOTICE_LOG(JIT, "%s", buf);
}
NOTICE_LOG(JIT, "=============== end =================");
}
if (logBlocks > 0 && dontLogBlocks == 0) {
NOTICE_LOG(JIT, "=============== IR (%d instructions, %d const) ===============", (int)code->GetInstructions().size(), (int)code->GetConstants().size());
NOTICE_LOG(JIT, "=============== IR (%d instructions) ===============", (int)code->GetInstructions().size());
for (size_t i = 0; i < code->GetInstructions().size(); i++) {
char buf[256];
DisassembleIR(buf, sizeof(buf), code->GetInstructions()[i], &code->GetConstants()[0]);
DisassembleIR(buf, sizeof(buf), code->GetInstructions()[i]);
NOTICE_LOG(JIT, "%s", buf);
}
NOTICE_LOG(JIT, "=============== end =================");

View file

@ -88,7 +88,7 @@ public:
void DoState(PointerWrap &p);
bool CheckRounding(u32 blockAddress); // returns true if we need a do-over
void DoJit(u32 em_address, std::vector<IRInst> &instructions, std::vector<u32> &constants, u32 &mipsBytes);
void DoJit(u32 em_address, std::vector<IRInst> &instructions, u32 &mipsBytes);
void EatPrefix() override {
js.EatPrefix();

View file

@ -174,7 +174,10 @@ void IRWriter::Write(IROp op, u8 dst, u8 src1, u8 src2) {
inst.dest = dst;
inst.src1 = src1;
inst.src2 = src2;
inst.constant = nextConst_;
insts_.push_back(inst);
nextConst_ = 0;
}
void IRWriter::WriteSetConstant(u8 dst, u32 value) {
@ -182,16 +185,8 @@ void IRWriter::WriteSetConstant(u8 dst, u32 value) {
}
int IRWriter::AddConstant(u32 value) {
for (size_t i = 0; i < constPool_.size(); i++) {
if (constPool_[i] == value)
return (int)i;
}
constPool_.push_back(value);
if (constPool_.size() > 255) {
// Cannot have more than 256 constants in a block!
Crash();
}
return (int)constPool_.size() - 1;
nextConst_ = value;
return 255;
}
int IRWriter::AddConstantFloat(float value) {
@ -215,7 +210,7 @@ const char *GetGPRName(int r) {
}
}
void DisassembleParam(char *buf, int bufSize, u8 param, char type, const u32 *constPool) {
void DisassembleParam(char *buf, int bufSize, u8 param, char type, u32 constant) {
static const char *vfpuCtrlNames[VFPU_CTRL_MAX] = {
"SPFX",
"TPFX",
@ -271,7 +266,7 @@ void DisassembleParam(char *buf, int bufSize, u8 param, char type, const u32 *co
}
break;
case 'C':
snprintf(buf, bufSize, "%08x", constPool[param]);
snprintf(buf, bufSize, "%08x", constant);
break;
case 'I':
snprintf(buf, bufSize, "%02x", param);
@ -302,7 +297,7 @@ const IRMeta *GetIRMeta(IROp op) {
return metaIndex[(int)op];
}
void DisassembleIR(char *buf, size_t bufsize, IRInst inst, const u32 *constPool) {
void DisassembleIR(char *buf, size_t bufsize, IRInst inst) {
const IRMeta *meta = GetIRMeta(inst.op);
if (!meta) {
snprintf(buf, bufsize, "Unknown %d", (int)inst.op);
@ -311,9 +306,9 @@ void DisassembleIR(char *buf, size_t bufsize, IRInst inst, const u32 *constPool)
char bufDst[16];
char bufSrc1[16];
char bufSrc2[16];
DisassembleParam(bufDst, sizeof(bufDst) - 2, inst.dest, meta->types[0], constPool);
DisassembleParam(bufSrc1, sizeof(bufSrc1) - 2, inst.src1, meta->types[1], constPool);
DisassembleParam(bufSrc2, sizeof(bufSrc2), inst.src2, meta->types[2], constPool);
DisassembleParam(bufDst, sizeof(bufDst) - 2, inst.dest, meta->types[0], inst.constant);
DisassembleParam(bufSrc1, sizeof(bufSrc1) - 2, inst.src1, meta->types[1], inst.constant);
DisassembleParam(bufSrc2, sizeof(bufSrc2), inst.src2, meta->types[2], inst.constant);
if (meta->types[1] && meta->types[0] != '_') {
strcat(bufDst, ", ");
}

View file

@ -313,10 +313,7 @@ struct IRMeta {
u32 flags;
};
// 32 bits.
// TODO: Evaluate whether it would make sense to switch to 64-bit ops with immediates
// included instead of storing immediates separately. Would simplify things at some memory
// storage and bandwidth cost.
// 64 bits.
struct IRInst {
IROp op;
union {
@ -325,22 +322,21 @@ struct IRInst {
};
u8 src1;
u8 src2;
u32 constant;
};
// Returns the new PC.
u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int count);
u32 IRInterpret(MIPSState *mips, const IRInst *inst, int count);
// Each IR block gets a constant pool.
class IRWriter {
public:
IRWriter &operator =(const IRWriter &w) {
insts_ = w.insts_;
constPool_ = w.constPool_;
return *this;
}
IRWriter &operator =(IRWriter &&w) {
insts_ = std::move(w.insts_);
constPool_ = std::move(w.constPool_);
return *this;
}
@ -355,15 +351,13 @@ public:
void Clear() {
insts_.clear();
constPool_.clear();
}
const std::vector<IRInst> &GetInstructions() const { return insts_; }
const std::vector<u32> &GetConstants() const { return constPool_; }
private:
std::vector<IRInst> insts_;
std::vector<u32> constPool_;
u32 nextConst_ = 0;
};
struct IROptions {
@ -371,5 +365,5 @@ struct IROptions {
};
const IRMeta *GetIRMeta(IROp op);
void DisassembleIR(char *buf, size_t bufsize, IRInst inst, const u32 *constPool);
void DisassembleIR(char *buf, size_t bufsize, IRInst inst);
void InitIR();

View file

@ -58,7 +58,7 @@ u32 RunMemCheck(u32 pc, u32 addr) {
return coreState != CORE_RUNNING ? 1 : 0;
}
u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int count) {
u32 IRInterpret(MIPSState *mips, const IRInst *inst, int count) {
const IRInst *end = inst + count;
while (inst != end) {
switch (inst->op) {
@ -66,10 +66,10 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int c
_assert_(false);
break;
case IROp::SetConst:
mips->r[inst->dest] = constPool[inst->src1];
mips->r[inst->dest] = inst->constant;
break;
case IROp::SetConstF:
memcpy(&mips->f[inst->dest], &constPool[inst->src1], 4);
memcpy(&mips->f[inst->dest], &inst->constant, 4);
break;
case IROp::Add:
mips->r[inst->dest] = mips->r[inst->src1] + mips->r[inst->src2];
@ -90,19 +90,19 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int c
mips->r[inst->dest] = mips->r[inst->src1];
break;
case IROp::AddConst:
mips->r[inst->dest] = mips->r[inst->src1] + constPool[inst->src2];
mips->r[inst->dest] = mips->r[inst->src1] + inst->constant;
break;
case IROp::SubConst:
mips->r[inst->dest] = mips->r[inst->src1] - constPool[inst->src2];
mips->r[inst->dest] = mips->r[inst->src1] - inst->constant;
break;
case IROp::AndConst:
mips->r[inst->dest] = mips->r[inst->src1] & constPool[inst->src2];
mips->r[inst->dest] = mips->r[inst->src1] & inst->constant;
break;
case IROp::OrConst:
mips->r[inst->dest] = mips->r[inst->src1] | constPool[inst->src2];
mips->r[inst->dest] = mips->r[inst->src1] | inst->constant;
break;
case IROp::XorConst:
mips->r[inst->dest] = mips->r[inst->src1] ^ constPool[inst->src2];
mips->r[inst->dest] = mips->r[inst->src1] ^ inst->constant;
break;
case IROp::Neg:
mips->r[inst->dest] = -(s32)mips->r[inst->src1];
@ -121,40 +121,40 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int c
break;
case IROp::Load8:
mips->r[inst->dest] = Memory::ReadUnchecked_U8(mips->r[inst->src1] + constPool[inst->src2]);
mips->r[inst->dest] = Memory::ReadUnchecked_U8(mips->r[inst->src1] + inst->constant);
break;
case IROp::Load8Ext:
mips->r[inst->dest] = (s32)(s8)Memory::ReadUnchecked_U8(mips->r[inst->src1] + constPool[inst->src2]);
mips->r[inst->dest] = (s32)(s8)Memory::ReadUnchecked_U8(mips->r[inst->src1] + inst->constant);
break;
case IROp::Load16:
mips->r[inst->dest] = Memory::ReadUnchecked_U16(mips->r[inst->src1] + constPool[inst->src2]);
mips->r[inst->dest] = Memory::ReadUnchecked_U16(mips->r[inst->src1] + inst->constant);
break;
case IROp::Load16Ext:
mips->r[inst->dest] = (s32)(s16)Memory::ReadUnchecked_U16(mips->r[inst->src1] + constPool[inst->src2]);
mips->r[inst->dest] = (s32)(s16)Memory::ReadUnchecked_U16(mips->r[inst->src1] + inst->constant);
break;
case IROp::Load32:
mips->r[inst->dest] = Memory::ReadUnchecked_U32(mips->r[inst->src1] + constPool[inst->src2]);
mips->r[inst->dest] = Memory::ReadUnchecked_U32(mips->r[inst->src1] + inst->constant);
break;
case IROp::LoadFloat:
mips->f[inst->dest] = Memory::ReadUnchecked_Float(mips->r[inst->src1] + constPool[inst->src2]);
mips->f[inst->dest] = Memory::ReadUnchecked_Float(mips->r[inst->src1] + inst->constant);
break;
case IROp::Store8:
Memory::WriteUnchecked_U8(mips->r[inst->src3], mips->r[inst->src1] + constPool[inst->src2]);
Memory::WriteUnchecked_U8(mips->r[inst->src3], mips->r[inst->src1] + inst->constant);
break;
case IROp::Store16:
Memory::WriteUnchecked_U16(mips->r[inst->src3], mips->r[inst->src1] + constPool[inst->src2]);
Memory::WriteUnchecked_U16(mips->r[inst->src3], mips->r[inst->src1] + inst->constant);
break;
case IROp::Store32:
Memory::WriteUnchecked_U32(mips->r[inst->src3], mips->r[inst->src1] + constPool[inst->src2]);
Memory::WriteUnchecked_U32(mips->r[inst->src3], mips->r[inst->src1] + inst->constant);
break;
case IROp::StoreFloat:
Memory::WriteUnchecked_Float(mips->f[inst->src3], mips->r[inst->src1] + constPool[inst->src2]);
Memory::WriteUnchecked_Float(mips->f[inst->src3], mips->r[inst->src1] + inst->constant);
break;
case IROp::LoadVec4:
{
u32 base = mips->r[inst->src1] + constPool[inst->src2];
u32 base = mips->r[inst->src1] + inst->constant;
#if defined(_M_SSE)
_mm_store_ps(&mips->f[inst->dest], _mm_load_ps((const float *)Memory::GetPointerUnchecked(base)));
#else
@ -165,7 +165,7 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int c
}
case IROp::StoreVec4:
{
u32 base = mips->r[inst->src1] + constPool[inst->src2];
u32 base = mips->r[inst->src1] + inst->constant;
#if defined(_M_SSE)
_mm_store_ps((float *)Memory::GetPointerUnchecked(base), _mm_load_ps(&mips->f[inst->dest]));
#else
@ -474,11 +474,11 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int c
break;
case IROp::SltConst:
mips->r[inst->dest] = (s32)mips->r[inst->src1] < (s32)constPool[inst->src2];
mips->r[inst->dest] = (s32)mips->r[inst->src1] < (s32)inst->constant;
break;
case IROp::SltUConst:
mips->r[inst->dest] = mips->r[inst->src1] < constPool[inst->src2];
mips->r[inst->dest] = mips->r[inst->src1] < inst->constant;
break;
case IROp::MovZ:
@ -731,34 +731,34 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int c
break;
case IROp::ExitToConst:
return constPool[inst->dest];
return inst->constant;
case IROp::ExitToReg:
return mips->r[inst->src1];
case IROp::ExitToConstIfEq:
if (mips->r[inst->src1] == mips->r[inst->src2])
return constPool[inst->dest];
return inst->constant;
break;
case IROp::ExitToConstIfNeq:
if (mips->r[inst->src1] != mips->r[inst->src2])
return constPool[inst->dest];
return inst->constant;
break;
case IROp::ExitToConstIfGtZ:
if ((s32)mips->r[inst->src1] > 0)
return constPool[inst->dest];
return inst->constant;
break;
case IROp::ExitToConstIfGeZ:
if ((s32)mips->r[inst->src1] >= 0)
return constPool[inst->dest];
return inst->constant;
break;
case IROp::ExitToConstIfLtZ:
if ((s32)mips->r[inst->src1] < 0)
return constPool[inst->dest];
return inst->constant;
break;
case IROp::ExitToConstIfLeZ:
if ((s32)mips->r[inst->src1] <= 0)
return constPool[inst->dest];
return inst->constant;
break;
case IROp::Downcount:
@ -770,13 +770,13 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int c
break;
case IROp::SetPCConst:
mips->pc = constPool[inst->src1];
mips->pc = inst->constant;
break;
case IROp::Syscall:
// IROp::SetPC was (hopefully) executed before.
{
MIPSOpcode op(constPool[inst->src1]);
MIPSOpcode op(inst->constant);
CallSyscall(op);
if (coreState != CORE_RUNNING)
CoreTiming::ForceCheck();
@ -788,14 +788,14 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int c
case IROp::Interpret: // SLOW fallback. Can be made faster. Ideally should be removed but may be useful for debugging.
{
MIPSOpcode op(constPool[inst->src1]);
MIPSOpcode op(inst->constant);
MIPSInterpret(op);
break;
}
case IROp::CallReplacement:
{
int funcIndex = constPool[inst->src1];
int funcIndex = inst->constant;
const ReplacementTableEntry *f = GetReplacementFunc(funcIndex);
int cycles = f->replaceFunc();
mips->downcount -= cycles;
@ -810,7 +810,7 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int c
return mips->pc + 4;
case IROp::SetCtrlVFPU:
mips->vfpuCtrl[inst->dest] = constPool[inst->src1];
mips->vfpuCtrl[inst->dest] = inst->constant;
break;
case IROp::SetCtrlVFPUReg:
@ -829,7 +829,7 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int c
break;
case IROp::MemoryCheck:
if (RunMemCheck(mips->pc, mips->r[inst->src1] + constPool[inst->src2])) {
if (RunMemCheck(mips->pc, mips->r[inst->src1] + inst->constant)) {
CoreTiming::ForceCheck();
return mips->pc;
}

View file

@ -20,4 +20,4 @@ inline static u32 ReverseBits32(u32 v) {
return v;
}
u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int count);
u32 IRInterpret(MIPSState *mips, const IRInst *inst, int count);

View file

@ -70,10 +70,9 @@ void IRJit::Compile(u32 em_address) {
IRBlock *b = blocks_.GetBlock(block_num);
std::vector<IRInst> instructions;
std::vector<u32> constants;
u32 mipsBytes;
frontend_.DoJit(em_address, instructions, constants, mipsBytes);
b->SetInstructions(instructions, constants);
frontend_.DoJit(em_address, instructions, mipsBytes);
b->SetInstructions(instructions);
b->SetOriginalSize(mipsBytes);
// Overwrites the first instruction, and also updates stats.
blocks_.FinalizeBlock(block_num);
@ -104,7 +103,7 @@ void IRJit::RunLoopUntil(u64 globalticks) {
if (opcode == MIPS_EMUHACK_OPCODE) {
u32 data = inst & 0xFFFFFF;
IRBlock *block = blocks_.GetBlock(data);
mips_->pc = IRInterpret(mips_, block->GetInstructions(), block->GetConstants(), block->GetNumInstructions());
mips_->pc = IRInterpret(mips_, block->GetInstructions(), block->GetNumInstructions());
} else {
// RestoreRoundingMode(true);
Compile(mips_->pc);

View file

@ -38,40 +38,30 @@ namespace MIPSComp {
// TODO : Use arena allocators. For now let's just malloc.
class IRBlock {
public:
IRBlock() : instr_(nullptr), const_(nullptr), numInstructions_(0), numConstants_(0), origAddr_(0), origSize_(0) {}
IRBlock(u32 emAddr) : instr_(nullptr), const_(nullptr), numInstructions_(0), numConstants_(0), origAddr_(emAddr), origSize_(0) {}
IRBlock() : instr_(nullptr), numInstructions_(0), origAddr_(0), origSize_(0) {}
IRBlock(u32 emAddr) : instr_(nullptr), numInstructions_(0), origAddr_(emAddr), origSize_(0) {}
IRBlock(IRBlock &&b) {
instr_ = b.instr_;
const_ = b.const_;
numInstructions_ = b.numInstructions_;
numConstants_ = b.numConstants_;
origAddr_ = b.origAddr_;
origSize_ = b.origSize_;
origFirstOpcode_ = b.origFirstOpcode_;
b.instr_ = nullptr;
b.const_ = nullptr;
}
~IRBlock() {
delete[] instr_;
delete[] const_;
}
void SetInstructions(const std::vector<IRInst> &inst, const std::vector<u32> &constants) {
void SetInstructions(const std::vector<IRInst> &inst) {
instr_ = new IRInst[inst.size()];
numInstructions_ = (u16)inst.size();
if (!inst.empty()) {
memcpy(instr_, &inst[0], sizeof(IRInst) * inst.size());
}
const_ = new u32[constants.size()];
numConstants_ = (u16)constants.size();
if (!constants.empty()) {
memcpy(const_, &constants[0], sizeof(u32) * constants.size());
}
}
const IRInst *GetInstructions() const { return instr_; }
const u32 *GetConstants() const { return const_; }
int GetNumInstructions() const { return numInstructions_; }
MIPSOpcode GetOriginalFirstOp() const { return origFirstOpcode_; }
bool HasOriginalFirstOp();
@ -92,9 +82,7 @@ public:
private:
IRInst *instr_;
u32 *const_;
u16 numInstructions_;
u16 numConstants_;
u32 origAddr_;
u32 origSize_;
MIPSOpcode origFirstOpcode_;

View file

@ -7,43 +7,6 @@
#include "Core/MIPS/IR/IRPassSimplify.h"
#include "Core/MIPS/IR/IRRegCache.h"
void WriteInstWithConstants(const IRWriter &in, IRWriter &out, const u32 *constants, IRInst inst) {
// Remap constants to the new reality
const IRMeta *m = GetIRMeta(inst.op);
if (!m) {
ERROR_LOG(CPU, "Bad IR instruction %02x", (int)inst.op);
return;
}
switch (m->types[0]) {
case 'C':
if (!constants) {
ERROR_LOG(CPU, "Missing constant for type 0");
return;
}
inst.dest = out.AddConstant(constants[inst.dest]);
break;
}
switch (m->types[1]) {
case 'C':
if (!constants) {
ERROR_LOG(CPU, "Missing constants for type 1");
return;
}
inst.src1 = out.AddConstant(constants[inst.src1]);
break;
}
switch (m->types[2]) {
case 'C':
if (!constants) {
ERROR_LOG(CPU, "Missing constants for type 2");
return;
}
inst.src2 = out.AddConstant(constants[inst.src2]);
break;
}
out.Write(inst);
}
u32 Evaluate(u32 a, u32 b, IROp op) {
switch (op) {
case IROp::Add: case IROp::AddConst: return a + b;
@ -137,13 +100,8 @@ bool IRApplyPasses(const IRPassFunc *passes, size_t c, const IRWriter &in, IRWri
}
bool OptimizeFPMoves(const IRWriter &in, IRWriter &out, const IROptions &opts) {
const u32 *constants = !in.GetConstants().empty() ? &in.GetConstants()[0] : nullptr;
bool logBlocks = false;
IRInst prev;
prev.op = IROp::Nop;
prev.dest = 0;
prev.src1 = 0;
prev.src2 = 0;
IRInst prev{ IROp::Nop };
for (int i = 0; i < (int)in.GetInstructions().size(); i++) {
IRInst inst = in.GetInstructions()[i];
switch (inst.op) {
@ -172,7 +130,7 @@ bool OptimizeFPMoves(const IRWriter &in, IRWriter &out, const IROptions &opts) {
// AddConst a0, sp, 0x30
// LoadVec4 v16, sp, 0x30
if (prev.op == IROp::AddConst && prev.dest == inst.src1 && prev.dest != prev.src1 && prev.src1 == MIPS_REG_SP) {
inst.src2 = out.AddConstant(constants[prev.src2] + constants[inst.src2]);
inst.constant += prev.constant;
inst.src1 = prev.src1;
logBlocks = 1;
} else {
@ -182,7 +140,7 @@ bool OptimizeFPMoves(const IRWriter &in, IRWriter &out, const IROptions &opts) {
break;
*/
default:
WriteInstWithConstants(in, out, constants, inst);
out.Write(inst);
break;
}
prev = inst;
@ -238,28 +196,19 @@ bool ThreeOpToTwoOp(const IRWriter &in, IRWriter &out, const IROptions &opts) {
break;
}
}
// Can reuse the old constants array - not touching constants in this pass.
for (u32 value : in.GetConstants()) {
out.AddConstant(value);
}
return logBlocks;
}
bool PropagateConstants(const IRWriter &in, IRWriter &out, const IROptions &opts) {
IRRegCache gpr(&out);
const u32 *constants = !in.GetConstants().empty() ? &in.GetConstants()[0] : nullptr;
bool logBlocks = false;
for (int i = 0; i < (int)in.GetInstructions().size(); i++) {
IRInst inst = in.GetInstructions()[i];
bool symmetric = true;
if (out.GetConstants().size() > 128) {
// Avoid causing a constant explosion.
goto doDefaultAndFlush;
}
switch (inst.op) {
case IROp::SetConst:
gpr.SetImm(inst.dest, constants[inst.src1]);
gpr.SetImm(inst.dest, inst.constant);
break;
case IROp::SetConstF:
goto doDefault;
@ -328,7 +277,7 @@ bool PropagateConstants(const IRWriter &in, IRWriter &out, const IROptions &opts
case IROp::SltConst:
case IROp::SltUConst:
if (gpr.IsImm(inst.src1)) {
gpr.SetImm(inst.dest, Evaluate(gpr.GetImm(inst.src1), constants[inst.src2], inst.op));
gpr.SetImm(inst.dest, Evaluate(gpr.GetImm(inst.src1), inst.constant, inst.op));
} else {
gpr.MapDirtyIn(inst.dest, inst.src1);
goto doDefault;
@ -428,7 +377,7 @@ bool PropagateConstants(const IRWriter &in, IRWriter &out, const IROptions &opts
case IROp::Store32:
if (gpr.IsImm(inst.src1) && inst.src1 != inst.dest) {
gpr.MapIn(inst.dest);
out.Write(inst.op, inst.dest, 0, out.AddConstant(gpr.GetImm(inst.src1) + constants[inst.src2]));
out.Write(inst.op, inst.dest, 0, out.AddConstant(gpr.GetImm(inst.src1) + inst.constant));
} else {
gpr.MapInIn(inst.dest, inst.src1);
goto doDefault;
@ -437,7 +386,7 @@ bool PropagateConstants(const IRWriter &in, IRWriter &out, const IROptions &opts
case IROp::StoreFloat:
case IROp::StoreVec4:
if (gpr.IsImm(inst.src1)) {
out.Write(inst.op, inst.dest, 0, out.AddConstant(gpr.GetImm(inst.src1) + constants[inst.src2]));
out.Write(inst.op, inst.dest, 0, out.AddConstant(gpr.GetImm(inst.src1) + inst.constant));
} else {
gpr.MapIn(inst.src1);
goto doDefault;
@ -451,7 +400,7 @@ bool PropagateConstants(const IRWriter &in, IRWriter &out, const IROptions &opts
case IROp::Load32:
if (gpr.IsImm(inst.src1) && inst.src1 != inst.dest) {
gpr.MapDirty(inst.dest);
out.Write(inst.op, inst.dest, 0, out.AddConstant(gpr.GetImm(inst.src1) + constants[inst.src2]));
out.Write(inst.op, inst.dest, 0, out.AddConstant(gpr.GetImm(inst.src1) + inst.constant));
} else {
gpr.MapDirtyIn(inst.dest, inst.src1);
goto doDefault;
@ -460,7 +409,7 @@ bool PropagateConstants(const IRWriter &in, IRWriter &out, const IROptions &opts
case IROp::LoadFloat:
case IROp::LoadVec4:
if (gpr.IsImm(inst.src1)) {
out.Write(inst.op, inst.dest, 0, out.AddConstant(gpr.GetImm(inst.src1) + constants[inst.src2]));
out.Write(inst.op, inst.dest, 0, out.AddConstant(gpr.GetImm(inst.src1) + inst.constant));
} else {
gpr.MapIn(inst.src1);
goto doDefault;
@ -581,10 +530,9 @@ bool PropagateConstants(const IRWriter &in, IRWriter &out, const IROptions &opts
case IROp::MemoryCheck:
default:
{
doDefaultAndFlush:
gpr.FlushAll();
doDefault:
WriteInstWithConstants(in, out, constants, inst);
out.Write(inst);
break;
}
}
@ -698,9 +646,6 @@ bool PurgeTemps(const IRWriter &in, IRWriter &out, const IROptions &opts) {
}
}
for (u32 value : in.GetConstants()) {
out.AddConstant(value);
}
for (const IRInst &inst : insts) {
if (inst.op != IROp::Mov || inst.dest != 0 || inst.src1 != 0) {
out.Write(inst);
@ -711,10 +656,6 @@ bool PurgeTemps(const IRWriter &in, IRWriter &out, const IROptions &opts) {
}
bool ReduceLoads(const IRWriter &in, IRWriter &out, const IROptions &opts) {
for (u32 value : in.GetConstants()) {
out.AddConstant(value);
}
// This tells us to skip an AND op that has been optimized out.
// Maybe we could skip multiple, but that'd slow things down and is pretty uncommon.
int nextSkip = -1;
@ -735,7 +676,7 @@ bool ReduceLoads(const IRWriter &in, IRWriter &out, const IROptions &opts) {
}
if (IRReadsFromGPR(laterInst, dest)) {
if (IRDestGPR(laterInst) == dest && laterInst.op == IROp::AndConst) {
const u32 mask = in.GetConstants()[laterInst.src2];
const u32 mask = laterInst.constant;
// Here we are, maybe we can reduce the load size based on the mask.
if ((mask & 0xffffff00) == 0) {
inst.op = IROp::Load8;
@ -767,7 +708,7 @@ bool ReduceLoads(const IRWriter &in, IRWriter &out, const IROptions &opts) {
return logBlocks;
}
static std::vector<IRInst> ReorderLoadStoreOps(std::vector<IRInst> &ops, const u32 *consts) {
static std::vector<IRInst> ReorderLoadStoreOps(std::vector<IRInst> &ops) {
if (ops.size() < 2) {
return ops;
}
@ -838,7 +779,7 @@ static std::vector<IRInst> ReorderLoadStoreOps(std::vector<IRInst> &ops, const u
size_t end = j;
if (start + 1 < end) {
std::stable_sort(ops.begin() + start, ops.begin() + end, [&](const IRInst &a, const IRInst &b) {
return consts[a.src2] < consts[b.src2];
return a.constant < b.constant;
});
}
}
@ -866,7 +807,7 @@ bool ReorderLoadStore(const IRWriter &in, IRWriter &out, const IROptions &opts)
}
std::vector<IRInst> loadStoreUnsorted = loadStoreQueue;
std::vector<IRInst> loadStoreSorted = ReorderLoadStoreOps(loadStoreQueue, &in.GetConstants()[0]);
std::vector<IRInst> loadStoreSorted = ReorderLoadStoreOps(loadStoreQueue);
if (memcmp(&loadStoreSorted[0], &loadStoreUnsorted[0], sizeof(IRInst) * loadStoreSorted.size()) != 0) {
logBlocks = true;
}
@ -1034,11 +975,6 @@ bool ReorderLoadStore(const IRWriter &in, IRWriter &out, const IROptions &opts)
break;
}
}
// Can reuse the old constants array - not touching constants in this pass.
for (u32 value : in.GetConstants()) {
out.AddConstant(value);
}
return logBlocks;
}
@ -1050,8 +986,8 @@ bool MergeLoadStore(const IRWriter &in, IRWriter &out, const IROptions &opts) {
// Not similar enough at all.
return false;
}
u32 off1 = in.GetConstants()[a.src2];
u32 off2 = in.GetConstants()[b.src2];
u32 off1 = a.constant;
u32 off2 = b.constant;
if (off1 + dist != off2) {
// Not immediately sequential.
return false;
@ -1172,10 +1108,5 @@ bool MergeLoadStore(const IRWriter &in, IRWriter &out, const IROptions &opts) {
break;
}
}
// Can reuse the old constants array - not touching constants in this pass.
for (u32 value : in.GetConstants()) {
out.AddConstant(value);
}
return logBlocks;
}