mirror of
https://github.com/hrydgard/ppsspp.git
synced 2025-04-02 11:01:50 -04:00
samplerjit: Lookup both mip tex values.
This commit is contained in:
parent
6b55d328e5
commit
940e6bb1d7
4 changed files with 94 additions and 37 deletions
|
@ -520,7 +520,6 @@ Vec3<int> AlphaBlendingResult(const PixelFuncID &pixelID, const Vec4<int> &sourc
|
|||
template <bool mayHaveMipLevels>
|
||||
static inline Vec4IntResult SOFTRAST_CALL ApplyTexturing(Sampler::Funcs sampler, Vec4IntArg prim_color, float s, float t, int texlevel, int frac_texlevel, bool bilinear, u8 *texptr[], int texbufw[], int x, int y) {
|
||||
Vec4<int> texcolor0;
|
||||
Vec4<int> texcolor1;
|
||||
const u8 **tptr0 = const_cast<const u8 **>(&texptr[mayHaveMipLevels ? texlevel : 0]);
|
||||
const int *bufw0 = &texbufw[mayHaveMipLevels ? texlevel : 0];
|
||||
|
||||
|
@ -535,18 +534,13 @@ static inline Vec4IntResult SOFTRAST_CALL ApplyTexturing(Sampler::Funcs sampler,
|
|||
|
||||
texcolor0 = Vec4<int>(sampler.nearest(u[0], v[0], tptr0[0], bufw0[0], mayHaveMipLevels ? texlevel : 0));
|
||||
if (mayHaveMipLevels && frac_texlevel) {
|
||||
texcolor1 = Vec4<int>(sampler.nearest(u[1], v[1], tptr0[1], bufw0[1], texlevel + 1));
|
||||
Vec4<int> texcolor1 = Vec4<int>(sampler.nearest(u[1], v[1], tptr0[1], bufw0[1], texlevel + 1));
|
||||
texcolor0 = (texcolor1 * frac_texlevel + texcolor0 * (16 - frac_texlevel)) / 16;
|
||||
}
|
||||
} else {
|
||||
texcolor0 = Vec4<int>(sampler.linear(s, t, x, y, prim_color, tptr0, bufw0, mayHaveMipLevels ? texlevel : 0, mayHaveMipLevels ? frac_texlevel : 0));
|
||||
if (mayHaveMipLevels && frac_texlevel) {
|
||||
texcolor1 = Vec4<int>(sampler.linear(s, t, x, y, prim_color, tptr0 + 1, bufw0 + 1, texlevel + 1, 0));
|
||||
}
|
||||
}
|
||||
|
||||
if (mayHaveMipLevels && frac_texlevel) {
|
||||
texcolor0 = (texcolor1 * frac_texlevel + texcolor0 * (16 - frac_texlevel)) / 16;
|
||||
}
|
||||
return GetTextureFunctionOutput(prim_color, ToVec4IntArg(texcolor0));
|
||||
}
|
||||
|
||||
|
|
|
@ -557,7 +557,7 @@ static inline Vec4IntResult SOFTRAST_CALL GetTexelCoordinatesQuadT(int level, fl
|
|||
return ApplyTexelClampQuadT(gstate.isTexCoordClampedT(), base_v, height);
|
||||
}
|
||||
|
||||
static Vec4IntResult SOFTRAST_CALL SampleLinear(float s, float t, int x, int y, Vec4IntArg prim_color, const u8 **tptr, const int *bufw, int texlevel, int levelFrac) {
|
||||
static Vec4IntResult SOFTRAST_CALL SampleLinearLevel(float s, float t, int x, int y, const u8 **tptr, const int *bufw, int texlevel) {
|
||||
int frac_u, frac_v;
|
||||
const Vec4<int> u = GetTexelCoordinatesQuadS(texlevel, s, frac_u, x);
|
||||
const Vec4<int> v = GetTexelCoordinatesQuadT(texlevel, t, frac_v, y);
|
||||
|
@ -572,4 +572,13 @@ static Vec4IntResult SOFTRAST_CALL SampleLinear(float s, float t, int x, int y,
|
|||
return ToVec4IntResult((top * (0x10 - frac_v) + bot * frac_v) / (16 * 16));
|
||||
}
|
||||
|
||||
static Vec4IntResult SOFTRAST_CALL SampleLinear(float s, float t, int x, int y, Vec4IntArg prim_color, const u8 **tptr, const int *bufw, int texlevel, int levelFrac) {
|
||||
Vec4<int> c0 = SampleLinearLevel(s, t, x, y, tptr, bufw, texlevel);
|
||||
if (levelFrac) {
|
||||
const Vec4<int> c1 = SampleLinearLevel(s, t, x, y, tptr + 1, bufw + 1, texlevel + 1);
|
||||
c0 = (c1 * levelFrac + c0 * (16 - levelFrac)) / 16;
|
||||
}
|
||||
return ToVec4IntResult(c0);
|
||||
}
|
||||
|
||||
};
|
||||
|
|
|
@ -91,7 +91,7 @@ private:
|
|||
bool Jit_PrepareDataOffsets(const SamplerID &id, Rasterizer::RegCache::Reg uReg, Rasterizer::RegCache::Reg vReg);
|
||||
bool Jit_PrepareDataDirectOffsets(const SamplerID &id, Rasterizer::RegCache::Reg uReg, Rasterizer::RegCache::Reg vReg, int bitsPerTexel);
|
||||
bool Jit_PrepareDataSwizzledOffsets(const SamplerID &id, Rasterizer::RegCache::Reg uReg, Rasterizer::RegCache::Reg vReg, int bitsPerTexel);
|
||||
bool Jit_BlendQuad(const SamplerID &id, Rasterizer::RegCache::Reg destReg, bool level1);
|
||||
bool Jit_BlendQuad(const SamplerID &id, bool level1);
|
||||
|
||||
#if PPSSPP_ARCH(ARM64)
|
||||
Arm64Gen::ARM64FloatEmitter fp;
|
||||
|
|
|
@ -265,6 +265,8 @@ LinearFunc SamplerJitCache::CompileLinear(const SamplerID &id) {
|
|||
regCache_.ChangeReg(XMM7, RegCache::VEC_V1);
|
||||
regCache_.ForceRetain(RegCache::VEC_U1);
|
||||
regCache_.ForceRetain(RegCache::VEC_V1);
|
||||
} else if (regCache_.Has(RegCache::GEN_ARG_LEVELFRAC)) {
|
||||
regCache_.ForceRelease(RegCache::GEN_ARG_LEVELFRAC);
|
||||
}
|
||||
|
||||
// Save prim color for later in a different XMM too.
|
||||
|
@ -441,8 +443,52 @@ LinearFunc SamplerJitCache::CompileLinear(const SamplerID &id) {
|
|||
if (regCache_.Has(RegCache::GEN_ARG_LEVEL))
|
||||
regCache_.ForceRelease(RegCache::GEN_ARG_LEVEL);
|
||||
|
||||
// TODO: Convert to reg cache.
|
||||
success = success && Jit_BlendQuad(id, XMM0, false);
|
||||
success = success && Jit_BlendQuad(id, false);
|
||||
if (id.hasAnyMips) {
|
||||
if (!regCache_.Has(RegCache::GEN_ARG_LEVELFRAC)) {
|
||||
X64Reg levelFracReg = regCache_.Alloc(RegCache::GEN_ARG_LEVELFRAC);
|
||||
MOVZX(32, 8, levelFracReg, MDisp(RSP, stackArgPos_ + 24));
|
||||
regCache_.Unlock(levelFracReg, RegCache::GEN_ARG_LEVELFRAC);
|
||||
regCache_.ForceRetain(RegCache::GEN_ARG_LEVELFRAC);
|
||||
}
|
||||
|
||||
X64Reg levelFracReg = regCache_.Find(RegCache::GEN_ARG_LEVELFRAC);
|
||||
CMP(8, R(levelFracReg), Imm8(0));
|
||||
FixupBranch skip = J_CC(CC_Z, true);
|
||||
|
||||
success = success && Jit_BlendQuad(id, true);
|
||||
|
||||
// First, broadcast the levelFrac value into an XMM.
|
||||
X64Reg fracReg = regCache_.Alloc(RegCache::VEC_TEMP0);
|
||||
MOVD_xmm(fracReg, R(levelFracReg));
|
||||
PSHUFLW(fracReg, R(fracReg), _MM_SHUFFLE(0, 0, 0, 0));
|
||||
regCache_.Unlock(levelFracReg, RegCache::GEN_ARG_LEVELFRAC);
|
||||
regCache_.ForceRelease(RegCache::GEN_ARG_LEVELFRAC);
|
||||
|
||||
// Multiply level1 color by the fraction.
|
||||
X64Reg color1Reg = regCache_.Find(RegCache::VEC_RESULT1);
|
||||
PMULLW(color1Reg, R(fracReg));
|
||||
|
||||
// Okay, next we need an inverse for color 0.
|
||||
X64Reg invFracReg = regCache_.Alloc(RegCache::VEC_TEMP1);
|
||||
MOVDQA(invFracReg, M(const10All_));
|
||||
PSUBW(invFracReg, R(fracReg));
|
||||
|
||||
// And multiply.
|
||||
PMULLW(XMM0, R(invFracReg));
|
||||
regCache_.Release(fracReg, RegCache::VEC_TEMP0);
|
||||
regCache_.Release(invFracReg, RegCache::VEC_TEMP1);
|
||||
|
||||
// Okay, now sum and divide by 16 (which is what the fraction maxed at.)
|
||||
PADDW(XMM0, R(color1Reg));
|
||||
PSRLW(XMM0, 4);
|
||||
|
||||
// And now we're done with color1Reg/VEC_RESULT1.
|
||||
regCache_.Unlock(color1Reg, RegCache::VEC_RESULT1);
|
||||
regCache_.ForceRelease(RegCache::VEC_RESULT1);
|
||||
|
||||
SetJumpTarget(skip);
|
||||
}
|
||||
|
||||
// Last of all, convert to 32-bit channels.
|
||||
if (cpu_info.bSSE4_1) {
|
||||
|
@ -453,9 +499,10 @@ LinearFunc SamplerJitCache::CompileLinear(const SamplerID &id) {
|
|||
regCache_.Unlock(zeroReg, RegCache::VEC_ZERO);
|
||||
}
|
||||
|
||||
// TODO: Actually use these at some point.
|
||||
regCache_.ForceRelease(RegCache::VEC_ARG_COLOR);
|
||||
|
||||
regCache_.ForceRelease(RegCache::VEC_RESULT);
|
||||
if (regCache_.Has(RegCache::VEC_RESULT1))
|
||||
regCache_.ForceRelease(RegCache::VEC_RESULT1);
|
||||
|
||||
if (!success) {
|
||||
regCache_.Reset(false);
|
||||
|
@ -464,11 +511,6 @@ LinearFunc SamplerJitCache::CompileLinear(const SamplerID &id) {
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
// TODO: Actually use these at some point.
|
||||
regCache_.ForceRelease(RegCache::VEC_ARG_COLOR);
|
||||
if (regCache_.Has(RegCache::GEN_ARG_LEVELFRAC))
|
||||
regCache_.ForceRelease(RegCache::GEN_ARG_LEVELFRAC);
|
||||
|
||||
if (id.hasInvalidPtr) {
|
||||
SetJumpTarget(zeroSrc);
|
||||
}
|
||||
|
@ -502,7 +544,7 @@ RegCache::Reg SamplerJitCache::GetZeroVec() {
|
|||
return regCache_.Find(RegCache::VEC_ZERO);
|
||||
}
|
||||
|
||||
bool SamplerJitCache::Jit_BlendQuad(const SamplerID &id, Rasterizer::RegCache::Reg destReg, bool level1) {
|
||||
bool SamplerJitCache::Jit_BlendQuad(const SamplerID &id, bool level1) {
|
||||
// First put the top RRRRRRRR LLLLLLLL into topReg, bottom into bottomReg.
|
||||
// Start with XXXX XXXX RRRR LLLL, and then expand 8 bits to 16 bits.
|
||||
X64Reg topReg = regCache_.Alloc(RegCache::VEC_TEMP0);
|
||||
|
@ -521,8 +563,10 @@ bool SamplerJitCache::Jit_BlendQuad(const SamplerID &id, Rasterizer::RegCache::R
|
|||
PMOVZXBW(topReg, R(quadReg));
|
||||
PMOVZXBW(bottomReg, R(bottomReg));
|
||||
}
|
||||
regCache_.Unlock(quadReg, level1 ? RegCache::VEC_RESULT1 : RegCache::VEC_RESULT);
|
||||
regCache_.ForceRelease(level1 ? RegCache::VEC_RESULT1 : RegCache::VEC_RESULT);
|
||||
if (!level1) {
|
||||
regCache_.Unlock(quadReg, RegCache::VEC_RESULT);
|
||||
regCache_.ForceRelease(RegCache::VEC_RESULT);
|
||||
}
|
||||
|
||||
// Grab frac_u and spread to lower (L) lanes.
|
||||
X64Reg fracReg = regCache_.Alloc(RegCache::VEC_TEMP2);
|
||||
|
@ -577,24 +621,34 @@ bool SamplerJitCache::Jit_BlendQuad(const SamplerID &id, Rasterizer::RegCache::R
|
|||
// Finally, time to sum them all up and divide by 256 to get back to 8 bits.
|
||||
PADDUSW(bottomReg, R(topReg));
|
||||
regCache_.Release(topReg, RegCache::VEC_TEMP0);
|
||||
bool success = regCache_.ChangeReg(destReg, level1 ? RegCache::VEC_RESULT1 : RegCache::VEC_RESULT);
|
||||
if (!success) {
|
||||
_assert_msg_(destReg == bottomReg, "Unexpected other reg locked as destReg");
|
||||
X64Reg otherReg = regCache_.Alloc(RegCache::VEC_TEMP0);
|
||||
PSHUFD(otherReg, R(bottomReg), _MM_SHUFFLE(3, 2, 3, 2));
|
||||
PADDUSW(bottomReg, R(otherReg));
|
||||
regCache_.Release(otherReg, RegCache::VEC_TEMP0);
|
||||
regCache_.Release(bottomReg, RegCache::VEC_TEMP1);
|
||||
|
||||
// Okay, now it can be changed.
|
||||
regCache_.ChangeReg(destReg, level1 ? RegCache::VEC_RESULT1 : RegCache::VEC_RESULT);
|
||||
bool changeSuccess = true;
|
||||
if (level1) {
|
||||
PSHUFD(quadReg, R(bottomReg), _MM_SHUFFLE(3, 2, 3, 2));
|
||||
PADDUSW(quadReg, R(bottomReg));
|
||||
PSRLW(quadReg, 8);
|
||||
regCache_.Release(bottomReg, RegCache::VEC_TEMP1);
|
||||
regCache_.Unlock(quadReg, RegCache::VEC_RESULT1);
|
||||
} else {
|
||||
PSHUFD(destReg, R(bottomReg), _MM_SHUFFLE(3, 2, 3, 2));
|
||||
PADDUSW(destReg, R(bottomReg));
|
||||
regCache_.Release(bottomReg, RegCache::VEC_TEMP1);
|
||||
}
|
||||
changeSuccess = regCache_.ChangeReg(XMM0, RegCache::VEC_RESULT);
|
||||
if (!changeSuccess) {
|
||||
_assert_msg_(XMM0 == bottomReg, "Unexpected other reg locked as destReg");
|
||||
X64Reg otherReg = regCache_.Alloc(RegCache::VEC_TEMP0);
|
||||
PSHUFD(otherReg, R(bottomReg), _MM_SHUFFLE(3, 2, 3, 2));
|
||||
PADDUSW(bottomReg, R(otherReg));
|
||||
regCache_.Release(otherReg, RegCache::VEC_TEMP0);
|
||||
regCache_.Release(bottomReg, RegCache::VEC_TEMP1);
|
||||
|
||||
PSRLW(destReg, 8);
|
||||
// Okay, now it can be changed.
|
||||
regCache_.ChangeReg(XMM0, RegCache::VEC_RESULT);
|
||||
} else {
|
||||
PSHUFD(XMM0, R(bottomReg), _MM_SHUFFLE(3, 2, 3, 2));
|
||||
PADDUSW(XMM0, R(bottomReg));
|
||||
regCache_.Release(bottomReg, RegCache::VEC_TEMP1);
|
||||
}
|
||||
|
||||
PSRLW(XMM0, 8);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue