softjit: Add dithering.

This commit is contained in:
Unknown W. Brackets 2021-11-22 15:59:41 -08:00
parent bd99448863
commit 0e63b357b3
3 changed files with 147 additions and 15 deletions

View file

@ -577,14 +577,13 @@ void PixelRegCache::Reset() {
}
void PixelRegCache::Release(PixelRegCache::Reg r, PixelRegCache::Type t, PixelRegCache::Purpose p) {
for (auto &reg : regs) {
if (reg.reg == r && reg.type == t) {
_assert_msg_(reg.locked > 0, "softjit Release() reg that isn't locked");
_assert_msg_(!reg.forceLocked, "softjit Release() reg that is force locked");
reg.purpose = p;
reg.locked--;
return;
}
RegStatus *status = FindReg(r, t);
if (status) {
_assert_msg_(status->locked > 0, "softjit Release() reg that isn't locked");
_assert_msg_(!status->forceLocked, "softjit Release() reg that is force locked");
status->purpose = p;
status->locked--;
return;
}
RegStatus newStatus;
@ -595,12 +594,11 @@ void PixelRegCache::Release(PixelRegCache::Reg r, PixelRegCache::Type t, PixelRe
}
void PixelRegCache::Unlock(PixelRegCache::Reg r, PixelRegCache::Type t) {
for (auto &reg : regs) {
if (reg.reg == r && reg.type == t) {
_assert_msg_(reg.locked > 0, "softjit Unlock() reg that isn't locked");
reg.locked--;
return;
}
RegStatus *status = FindReg(r, t);
if (status) {
_assert_msg_(status->locked > 0, "softjit Unlock() reg that isn't locked");
status->locked--;
return;
}
_assert_msg_(false, "softjit Unlock() reg that isn't there");
@ -664,4 +662,49 @@ void PixelRegCache::ForceLock(PixelRegCache::Purpose p, PixelRegCache::Type t, b
_assert_msg_(false, "softjit ForceLock() reg that isn't there");
}
void PixelRegCache::GrabReg(PixelRegCache::Reg r, PixelRegCache::Purpose p, PixelRegCache::Type t, bool &needsSwap, PixelRegCache::Reg swapReg) {
for (auto &reg : regs) {
if (reg.reg != r || reg.type != t)
continue;
// Easy version, it's free.
if (reg.locked == 0 && !reg.forceLocked) {
needsSwap = false;
reg.purpose = p;
reg.locked = 1;
return;
}
// Okay, we need to swap. Find that reg.
needsSwap = true;
RegStatus *swap = FindReg(swapReg, t);
if (swap) {
swap->purpose = reg.purpose;
swap->forceLocked = reg.forceLocked;
swap->locked = reg.locked;
} else {
RegStatus newStatus = reg;
newStatus.reg = swapReg;
regs.push_back(newStatus);
}
reg.purpose = p;
reg.locked = 1;
reg.forceLocked = false;
return;
}
_assert_msg_(false, "softjit GrabReg() reg that isn't there");
}
PixelRegCache::RegStatus *PixelRegCache::FindReg(PixelRegCache::Reg r, PixelRegCache::Type t) {
for (auto &reg : regs) {
if (reg.reg == r && reg.type == t) {
return ®
}
}
return nullptr;
}
};

View file

@ -106,7 +106,12 @@ struct PixelRegCache {
Reg Alloc(Purpose p, Type t);
void ForceLock(Purpose p, Type t, bool state = true);
// For getting a specific reg. WARNING: May return a locked reg, so you have to check.
void GrabReg(Reg r, Purpose p, Type t, bool &needsSwap, Reg swapReg);
private:
RegStatus *FindReg(Reg r, Type t);
std::vector<RegStatus> regs;
};
@ -156,6 +161,8 @@ private:
bool Jit_WriteStencilOnly(const PixelFuncID &id, PixelRegCache::Reg stencilReg);
bool Jit_DepthTest(const PixelFuncID &id);
bool Jit_WriteDepth(const PixelFuncID &id);
bool Jit_AlphaBlend(const PixelFuncID &id);
bool Jit_Dither(const PixelFuncID &id);
std::unordered_map<PixelFuncID, SingleFunc> cache_;
std::unordered_map<PixelFuncID, const u8 *> addresses_;

View file

@ -111,9 +111,11 @@ SingleFunc PixelJitCache::CompileSingle(const PixelFuncID &id) {
success = success && Jit_StencilAndDepthTest(id);
else if (!id.clearMode)
success = success && Jit_DepthTest(id);
success = success && Jit_WriteDepth(id);
success = success && Jit_AlphaBlend(id);
success = success && Jit_Dither(id);
// TODO: There's more...
success = false;
@ -929,6 +931,86 @@ bool PixelJitCache::Jit_WriteDepth(const PixelFuncID &id) {
return true;
}
bool PixelJitCache::Jit_AlphaBlend(const PixelFuncID &id) {
if (!id.alphaBlend)
return true;
// TODO: Will need old color in some cases, too.
return false;
}
bool PixelJitCache::Jit_Dither(const PixelFuncID &id) {
if (!id.dithering)
return true;
X64Reg gstateReg = GetGState();
X64Reg valueReg = regCache_.Alloc(PixelRegCache::TEMP0, PixelRegCache::T_GEN);
// Load the row dither matrix entry (will still need to get the X.)
MOV(32, R(valueReg), R(argYReg));
AND(8, R(valueReg), Imm8(3));
MOVZX(32, 16, valueReg, MComplex(gstateReg, valueReg, 4, offsetof(GPUgstate, dithmtx)));
regCache_.Unlock(gstateReg, PixelRegCache::T_GEN);
// At this point, we're done with depth and y, so let's grab COLOR_OFF and lock it.
// Then we can modify x and throw it away too, which is our actual goal.
regCache_.Unlock(GetColorOff(id), PixelRegCache::T_GEN);
regCache_.ForceLock(PixelRegCache::COLOR_OFF, PixelRegCache::T_GEN);
regCache_.Release(argYReg, PixelRegCache::T_GEN);
AND(32, R(argXReg), Imm32(3));
SHL(32, R(argXReg), Imm8(2));
// Conveniently, this is ECX on Windows, but otherwise we need to swap it.
if (argXReg != RCX) {
bool needsSwap = false;
regCache_.GrabReg(RCX, PixelRegCache::TEMP1, PixelRegCache::T_GEN, needsSwap, argXReg);
if (needsSwap) {
XCHG(PTRBITS, R(RCX), R(argXReg));
if (valueReg == RCX)
valueReg = argXReg;
} else {
MOV(32, R(RCX), R(argXReg));
regCache_.Release(argXReg, PixelRegCache::T_GEN);
}
}
// Okay shift to the specific value to add.
SHR(32, R(valueReg), R(CL));
AND(16, R(valueReg), Imm16(0x000F));
// This will either be argXReg on Windows, or RCX we explicitly grabbed.
regCache_.Release(RCX, PixelRegCache::T_GEN);
// Now we need to make 0-7 positive, 8-F negative.. so sign extend.
SHL(32, R(valueReg), Imm8(4));
SAR(8, R(valueReg), Imm8(4));
// Copy that value into a vec to add to the color.
X64Reg vecValueReg = regCache_.Alloc(PixelRegCache::TEMP0, PixelRegCache::T_VEC);
MOVD_xmm(vecValueReg, R(valueReg));
regCache_.Release(valueReg, PixelRegCache::T_GEN);
// Now we want to broadcast RGB in 16-bit, but keep A as 0.
// Luckily, we know that second lane (in 16-bit) is zero from valueReg's high 16 bits.
// We use 16-bit because we need a signed add, but we also want to saturate.
PSHUFLW(vecValueReg, R(vecValueReg), _MM_SHUFFLE(1, 0, 0, 0));
// With that, now let's convert the color to 16 bit...
X64Reg zeroReg = regCache_.Alloc(PixelRegCache::TEMP1, PixelRegCache::T_VEC);
PXOR(zeroReg, R(zeroReg));
PUNPCKLBW(argColorReg, R(zeroReg));
regCache_.Release(zeroReg, PixelRegCache::T_VEC);
// And simply add the dither values.
PADDSW(argColorReg, R(vecValueReg));
regCache_.Release(vecValueReg, PixelRegCache::T_VEC);
// Now that we're done, put color back in 4x8-bit.
PACKUSWB(argColorReg, R(argColorReg));
return true;
}
};
#endif