samplerjit: Lookup both mip tex values.

This commit is contained in:
Unknown W. Brackets 2021-12-28 16:22:54 -08:00
parent 6b55d328e5
commit 940e6bb1d7
4 changed files with 94 additions and 37 deletions

View file

@ -520,7 +520,6 @@ Vec3<int> AlphaBlendingResult(const PixelFuncID &pixelID, const Vec4<int> &sourc
template <bool mayHaveMipLevels>
static inline Vec4IntResult SOFTRAST_CALL ApplyTexturing(Sampler::Funcs sampler, Vec4IntArg prim_color, float s, float t, int texlevel, int frac_texlevel, bool bilinear, u8 *texptr[], int texbufw[], int x, int y) {
Vec4<int> texcolor0;
Vec4<int> texcolor1;
const u8 **tptr0 = const_cast<const u8 **>(&texptr[mayHaveMipLevels ? texlevel : 0]);
const int *bufw0 = &texbufw[mayHaveMipLevels ? texlevel : 0];
@ -535,18 +534,13 @@ static inline Vec4IntResult SOFTRAST_CALL ApplyTexturing(Sampler::Funcs sampler,
texcolor0 = Vec4<int>(sampler.nearest(u[0], v[0], tptr0[0], bufw0[0], mayHaveMipLevels ? texlevel : 0));
if (mayHaveMipLevels && frac_texlevel) {
texcolor1 = Vec4<int>(sampler.nearest(u[1], v[1], tptr0[1], bufw0[1], texlevel + 1));
Vec4<int> texcolor1 = Vec4<int>(sampler.nearest(u[1], v[1], tptr0[1], bufw0[1], texlevel + 1));
texcolor0 = (texcolor1 * frac_texlevel + texcolor0 * (16 - frac_texlevel)) / 16;
}
} else {
texcolor0 = Vec4<int>(sampler.linear(s, t, x, y, prim_color, tptr0, bufw0, mayHaveMipLevels ? texlevel : 0, mayHaveMipLevels ? frac_texlevel : 0));
if (mayHaveMipLevels && frac_texlevel) {
texcolor1 = Vec4<int>(sampler.linear(s, t, x, y, prim_color, tptr0 + 1, bufw0 + 1, texlevel + 1, 0));
}
}
if (mayHaveMipLevels && frac_texlevel) {
texcolor0 = (texcolor1 * frac_texlevel + texcolor0 * (16 - frac_texlevel)) / 16;
}
return GetTextureFunctionOutput(prim_color, ToVec4IntArg(texcolor0));
}

View file

@ -557,7 +557,7 @@ static inline Vec4IntResult SOFTRAST_CALL GetTexelCoordinatesQuadT(int level, fl
return ApplyTexelClampQuadT(gstate.isTexCoordClampedT(), base_v, height);
}
static Vec4IntResult SOFTRAST_CALL SampleLinear(float s, float t, int x, int y, Vec4IntArg prim_color, const u8 **tptr, const int *bufw, int texlevel, int levelFrac) {
static Vec4IntResult SOFTRAST_CALL SampleLinearLevel(float s, float t, int x, int y, const u8 **tptr, const int *bufw, int texlevel) {
int frac_u, frac_v;
const Vec4<int> u = GetTexelCoordinatesQuadS(texlevel, s, frac_u, x);
const Vec4<int> v = GetTexelCoordinatesQuadT(texlevel, t, frac_v, y);
@ -572,4 +572,13 @@ static Vec4IntResult SOFTRAST_CALL SampleLinear(float s, float t, int x, int y,
return ToVec4IntResult((top * (0x10 - frac_v) + bot * frac_v) / (16 * 16));
}
static Vec4IntResult SOFTRAST_CALL SampleLinear(float s, float t, int x, int y, Vec4IntArg prim_color, const u8 **tptr, const int *bufw, int texlevel, int levelFrac) {
Vec4<int> c0 = SampleLinearLevel(s, t, x, y, tptr, bufw, texlevel);
if (levelFrac) {
const Vec4<int> c1 = SampleLinearLevel(s, t, x, y, tptr + 1, bufw + 1, texlevel + 1);
c0 = (c1 * levelFrac + c0 * (16 - levelFrac)) / 16;
}
return ToVec4IntResult(c0);
}
};

View file

@ -91,7 +91,7 @@ private:
bool Jit_PrepareDataOffsets(const SamplerID &id, Rasterizer::RegCache::Reg uReg, Rasterizer::RegCache::Reg vReg);
bool Jit_PrepareDataDirectOffsets(const SamplerID &id, Rasterizer::RegCache::Reg uReg, Rasterizer::RegCache::Reg vReg, int bitsPerTexel);
bool Jit_PrepareDataSwizzledOffsets(const SamplerID &id, Rasterizer::RegCache::Reg uReg, Rasterizer::RegCache::Reg vReg, int bitsPerTexel);
bool Jit_BlendQuad(const SamplerID &id, Rasterizer::RegCache::Reg destReg, bool level1);
bool Jit_BlendQuad(const SamplerID &id, bool level1);
#if PPSSPP_ARCH(ARM64)
Arm64Gen::ARM64FloatEmitter fp;

View file

@ -265,6 +265,8 @@ LinearFunc SamplerJitCache::CompileLinear(const SamplerID &id) {
regCache_.ChangeReg(XMM7, RegCache::VEC_V1);
regCache_.ForceRetain(RegCache::VEC_U1);
regCache_.ForceRetain(RegCache::VEC_V1);
} else if (regCache_.Has(RegCache::GEN_ARG_LEVELFRAC)) {
regCache_.ForceRelease(RegCache::GEN_ARG_LEVELFRAC);
}
// Save prim color for later in a different XMM too.
@ -441,8 +443,52 @@ LinearFunc SamplerJitCache::CompileLinear(const SamplerID &id) {
if (regCache_.Has(RegCache::GEN_ARG_LEVEL))
regCache_.ForceRelease(RegCache::GEN_ARG_LEVEL);
// TODO: Convert to reg cache.
success = success && Jit_BlendQuad(id, XMM0, false);
success = success && Jit_BlendQuad(id, false);
if (id.hasAnyMips) {
if (!regCache_.Has(RegCache::GEN_ARG_LEVELFRAC)) {
X64Reg levelFracReg = regCache_.Alloc(RegCache::GEN_ARG_LEVELFRAC);
MOVZX(32, 8, levelFracReg, MDisp(RSP, stackArgPos_ + 24));
regCache_.Unlock(levelFracReg, RegCache::GEN_ARG_LEVELFRAC);
regCache_.ForceRetain(RegCache::GEN_ARG_LEVELFRAC);
}
X64Reg levelFracReg = regCache_.Find(RegCache::GEN_ARG_LEVELFRAC);
CMP(8, R(levelFracReg), Imm8(0));
FixupBranch skip = J_CC(CC_Z, true);
success = success && Jit_BlendQuad(id, true);
// First, broadcast the levelFrac value into an XMM.
X64Reg fracReg = regCache_.Alloc(RegCache::VEC_TEMP0);
MOVD_xmm(fracReg, R(levelFracReg));
PSHUFLW(fracReg, R(fracReg), _MM_SHUFFLE(0, 0, 0, 0));
regCache_.Unlock(levelFracReg, RegCache::GEN_ARG_LEVELFRAC);
regCache_.ForceRelease(RegCache::GEN_ARG_LEVELFRAC);
// Multiply level1 color by the fraction.
X64Reg color1Reg = regCache_.Find(RegCache::VEC_RESULT1);
PMULLW(color1Reg, R(fracReg));
// Okay, next we need an inverse for color 0.
X64Reg invFracReg = regCache_.Alloc(RegCache::VEC_TEMP1);
MOVDQA(invFracReg, M(const10All_));
PSUBW(invFracReg, R(fracReg));
// And multiply.
PMULLW(XMM0, R(invFracReg));
regCache_.Release(fracReg, RegCache::VEC_TEMP0);
regCache_.Release(invFracReg, RegCache::VEC_TEMP1);
// Okay, now sum and divide by 16 (which is what the fraction maxed at.)
PADDW(XMM0, R(color1Reg));
PSRLW(XMM0, 4);
// And now we're done with color1Reg/VEC_RESULT1.
regCache_.Unlock(color1Reg, RegCache::VEC_RESULT1);
regCache_.ForceRelease(RegCache::VEC_RESULT1);
SetJumpTarget(skip);
}
// Last of all, convert to 32-bit channels.
if (cpu_info.bSSE4_1) {
@ -453,9 +499,10 @@ LinearFunc SamplerJitCache::CompileLinear(const SamplerID &id) {
regCache_.Unlock(zeroReg, RegCache::VEC_ZERO);
}
// TODO: Actually use these at some point.
regCache_.ForceRelease(RegCache::VEC_ARG_COLOR);
regCache_.ForceRelease(RegCache::VEC_RESULT);
if (regCache_.Has(RegCache::VEC_RESULT1))
regCache_.ForceRelease(RegCache::VEC_RESULT1);
if (!success) {
regCache_.Reset(false);
@ -464,11 +511,6 @@ LinearFunc SamplerJitCache::CompileLinear(const SamplerID &id) {
return nullptr;
}
// TODO: Actually use these at some point.
regCache_.ForceRelease(RegCache::VEC_ARG_COLOR);
if (regCache_.Has(RegCache::GEN_ARG_LEVELFRAC))
regCache_.ForceRelease(RegCache::GEN_ARG_LEVELFRAC);
if (id.hasInvalidPtr) {
SetJumpTarget(zeroSrc);
}
@ -502,7 +544,7 @@ RegCache::Reg SamplerJitCache::GetZeroVec() {
return regCache_.Find(RegCache::VEC_ZERO);
}
bool SamplerJitCache::Jit_BlendQuad(const SamplerID &id, Rasterizer::RegCache::Reg destReg, bool level1) {
bool SamplerJitCache::Jit_BlendQuad(const SamplerID &id, bool level1) {
// First put the top RRRRRRRR LLLLLLLL into topReg, bottom into bottomReg.
// Start with XXXX XXXX RRRR LLLL, and then expand 8 bits to 16 bits.
X64Reg topReg = regCache_.Alloc(RegCache::VEC_TEMP0);
@ -521,8 +563,10 @@ bool SamplerJitCache::Jit_BlendQuad(const SamplerID &id, Rasterizer::RegCache::R
PMOVZXBW(topReg, R(quadReg));
PMOVZXBW(bottomReg, R(bottomReg));
}
regCache_.Unlock(quadReg, level1 ? RegCache::VEC_RESULT1 : RegCache::VEC_RESULT);
regCache_.ForceRelease(level1 ? RegCache::VEC_RESULT1 : RegCache::VEC_RESULT);
if (!level1) {
regCache_.Unlock(quadReg, RegCache::VEC_RESULT);
regCache_.ForceRelease(RegCache::VEC_RESULT);
}
// Grab frac_u and spread to lower (L) lanes.
X64Reg fracReg = regCache_.Alloc(RegCache::VEC_TEMP2);
@ -577,24 +621,34 @@ bool SamplerJitCache::Jit_BlendQuad(const SamplerID &id, Rasterizer::RegCache::R
// Finally, time to sum them all up and divide by 256 to get back to 8 bits.
PADDUSW(bottomReg, R(topReg));
regCache_.Release(topReg, RegCache::VEC_TEMP0);
bool success = regCache_.ChangeReg(destReg, level1 ? RegCache::VEC_RESULT1 : RegCache::VEC_RESULT);
if (!success) {
_assert_msg_(destReg == bottomReg, "Unexpected other reg locked as destReg");
X64Reg otherReg = regCache_.Alloc(RegCache::VEC_TEMP0);
PSHUFD(otherReg, R(bottomReg), _MM_SHUFFLE(3, 2, 3, 2));
PADDUSW(bottomReg, R(otherReg));
regCache_.Release(otherReg, RegCache::VEC_TEMP0);
regCache_.Release(bottomReg, RegCache::VEC_TEMP1);
// Okay, now it can be changed.
regCache_.ChangeReg(destReg, level1 ? RegCache::VEC_RESULT1 : RegCache::VEC_RESULT);
bool changeSuccess = true;
if (level1) {
PSHUFD(quadReg, R(bottomReg), _MM_SHUFFLE(3, 2, 3, 2));
PADDUSW(quadReg, R(bottomReg));
PSRLW(quadReg, 8);
regCache_.Release(bottomReg, RegCache::VEC_TEMP1);
regCache_.Unlock(quadReg, RegCache::VEC_RESULT1);
} else {
PSHUFD(destReg, R(bottomReg), _MM_SHUFFLE(3, 2, 3, 2));
PADDUSW(destReg, R(bottomReg));
regCache_.Release(bottomReg, RegCache::VEC_TEMP1);
}
changeSuccess = regCache_.ChangeReg(XMM0, RegCache::VEC_RESULT);
if (!changeSuccess) {
_assert_msg_(XMM0 == bottomReg, "Unexpected other reg locked as destReg");
X64Reg otherReg = regCache_.Alloc(RegCache::VEC_TEMP0);
PSHUFD(otherReg, R(bottomReg), _MM_SHUFFLE(3, 2, 3, 2));
PADDUSW(bottomReg, R(otherReg));
regCache_.Release(otherReg, RegCache::VEC_TEMP0);
regCache_.Release(bottomReg, RegCache::VEC_TEMP1);
PSRLW(destReg, 8);
// Okay, now it can be changed.
regCache_.ChangeReg(XMM0, RegCache::VEC_RESULT);
} else {
PSHUFD(XMM0, R(bottomReg), _MM_SHUFFLE(3, 2, 3, 2));
PADDUSW(XMM0, R(bottomReg));
regCache_.Release(bottomReg, RegCache::VEC_TEMP1);
}
PSRLW(XMM0, 8);
}
return true;
}