softgpu: Remove incorrect offsetting for X/Y.

This commit is contained in:
Unknown W. Brackets 2022-02-20 09:13:20 -08:00
parent 1bc3acf2ed
commit a88c9a0680
4 changed files with 18 additions and 46 deletions

View file

@ -418,8 +418,8 @@ static inline void GetTexelCoordinates(int level, float s, float t, int &out_u,
int width = samplerID.cached.sizes[level].w;
int height = samplerID.cached.sizes[level].h;
int base_u = (int)(s * width * 256.0f) + 12 - x;
int base_v = (int)(t * height * 256.0f) + 12 - y;
int base_u = (int)(s * width * 256.0f);
int base_v = (int)(t * height * 256.0f);
base_u >>= 8;
base_v >>= 8;
@ -610,7 +610,7 @@ static inline Vec4IntResult SOFTRAST_CALL ApplyTexelClampQuadT(bool clamp, int v
static inline Vec4IntResult SOFTRAST_CALL GetTexelCoordinatesQuadS(int level, float in_s, int &frac_u, int x, const SamplerID &samplerID) {
int width = samplerID.cached.sizes[level].w;
int base_u = (int)(in_s * width * 256) + 12 - x - 128;
int base_u = (int)(in_s * width * 256) - 128;
frac_u = (int)(base_u >> 4) & 0x0F;
base_u >>= 8;
@ -621,7 +621,7 @@ static inline Vec4IntResult SOFTRAST_CALL GetTexelCoordinatesQuadS(int level, fl
static inline Vec4IntResult SOFTRAST_CALL GetTexelCoordinatesQuadT(int level, float in_t, int &frac_v, int y, const SamplerID &samplerID) {
int height = samplerID.cached.sizes[level].h;
int base_v = (int)(in_t * height * 256) + 12 - y - 128;
int base_v = (int)(in_t * height * 256) - 128;
frac_v = (int)(base_v >> 4) & 0x0F;
base_v >>= 8;

View file

@ -2582,22 +2582,8 @@ bool SamplerJitCache::Jit_GetTexelCoords(const SamplerID &id) {
Describe("Texel");
// First, adjust X and Y...
X64Reg xReg = regCache_.Find(RegCache::GEN_ARG_X);
X64Reg yReg = regCache_.Find(RegCache::GEN_ARG_Y);
NEG(32, R(xReg));
ADD(32, R(xReg), Imm8(12));
NEG(32, R(yReg));
ADD(32, R(yReg), Imm8(12));
X64Reg tempXYReg = regCache_.Alloc(RegCache::VEC_TEMP5);
SHL(64, R(yReg), Imm8(32));
OR(64, R(xReg), R(yReg));
MOVQ_xmm(tempXYReg, R(xReg));
if (id.hasAnyMips)
PSHUFD(tempXYReg, R(tempXYReg), _MM_SHUFFLE(1, 0, 1, 0));
regCache_.Unlock(xReg, RegCache::GEN_ARG_X);
// TODO: Shouldn't do this in the sampler, need to get s/t right.
regCache_.ForceRelease(RegCache::GEN_ARG_X);
regCache_.Unlock(yReg, RegCache::GEN_ARG_Y);
regCache_.ForceRelease(RegCache::GEN_ARG_Y);
X64Reg uReg = regCache_.Alloc(RegCache::GEN_ARG_U);
@ -2640,12 +2626,11 @@ bool SamplerJitCache::Jit_GetTexelCoords(const SamplerID &id) {
CVTTPS2DQ(sReg, R(sReg));
regCache_.Release(sizesReg, RegCache::VEC_TEMP0);
PADDD(sReg, R(tempXYReg));
PSRLD(sReg, 8);
// Reuse tempXYReg for the level1 values.
if (!cpu_info.bSSE4_1)
PSHUFD(tempXYReg, R(sReg), _MM_SHUFFLE(3, 2, 3, 2));
PSHUFD(tReg, R(sReg), _MM_SHUFFLE(3, 2, 3, 2));
auto applyClampWrap = [&](X64Reg dest, bool clamp, bool isY, bool isLevel1) {
int offset = offsetof(SamplerID, cached.sizes[0].w) + (isY ? 2 : 0) + (isLevel1 ? 4 : 0);
@ -2659,7 +2644,7 @@ bool SamplerJitCache::Jit_GetTexelCoords(const SamplerID &id) {
else
MOVD_xmm(R(dest), sReg);
} else {
X64Reg srcReg = isLevel1 ? tempXYReg : sReg;
X64Reg srcReg = isLevel1 ? tReg : sReg;
MOVD_xmm(R(dest), srcReg);
if (!isY)
PSRLDQ(srcReg, 4);
@ -2699,8 +2684,7 @@ bool SamplerJitCache::Jit_GetTexelCoords(const SamplerID &id) {
UNPCKLPS(sReg, R(tReg));
MULPS(sReg, M(constWidthHeight256f_));
CVTTPS2DQ(sReg, R(sReg));
// Add the X/Y offsets, then shift out the fraction.
PADDD(sReg, R(tempXYReg));
// Great, shift out the fraction.
PSRLD(sReg, 8);
// Square textures are kinda common.
@ -2758,8 +2742,6 @@ bool SamplerJitCache::Jit_GetTexelCoords(const SamplerID &id) {
regCache_.ForceRelease(RegCache::VEC_ARG_S);
regCache_.ForceRelease(RegCache::VEC_ARG_T);
regCache_.Release(tempXYReg, RegCache::VEC_TEMP5);
return true;
}
@ -2830,26 +2812,12 @@ bool SamplerJitCache::Jit_GetTexelCoordsQuad(const SamplerID &id) {
CVTPS2DQ(sReg, R(sReg));
// Now adjust X and Y...
// TODO: Could we cache this? Should only vary on offset, maybe?
X64Reg xReg = regCache_.Find(RegCache::GEN_ARG_X);
X64Reg yReg = regCache_.Find(RegCache::GEN_ARG_Y);
NEG(32, R(xReg));
SUB(32, R(xReg), Imm8(128 - 12));
NEG(32, R(yReg));
SUB(32, R(yReg), Imm8(128 - 12));
SHL(64, R(yReg), Imm8(32));
OR(64, R(xReg), R(yReg));
// Add them in. We do this in the SSE because we have more to do there...
X64Reg tempXYReg = regCache_.Alloc(RegCache::VEC_TEMP0);
MOVQ_xmm(tempXYReg, R(xReg));
if (id.hasAnyMips)
PSHUFD(tempXYReg, R(tempXYReg), _MM_SHUFFLE(1, 0, 1, 0));
// Product a -128 constant.
PCMPEQD(tempXYReg, R(tempXYReg));
PSLLD(tempXYReg, 7);
PADDD(sReg, R(tempXYReg));
regCache_.Release(tempXYReg, RegCache::VEC_TEMP0);
regCache_.Unlock(xReg, RegCache::GEN_ARG_X);
regCache_.Unlock(yReg, RegCache::GEN_ARG_Y);
regCache_.ForceRelease(RegCache::GEN_ARG_X);
regCache_.ForceRelease(RegCache::GEN_ARG_Y);

@ -1 +1 @@
Subproject commit 8b5da5710a965d48f685e7c9dbfaeeae32d4c4d8
Subproject commit 682a4303aba63a50c91ae0fa6928c9dac8ca9b92

View file

@ -150,9 +150,7 @@ tests_good = [
"gpu/commands/material",
"gpu/displaylist/alignment",
"gpu/dither/dither",
"gpu/filtering/linear",
"gpu/filtering/mipmaplinear",
"gpu/filtering/nearest",
"gpu/ge/break",
"gpu/ge/context",
"gpu/ge/edram",
@ -387,6 +385,12 @@ tests_next = [
"gpu/complex/complex",
"gpu/depth/precision",
"gpu/displaylist/state",
"gpu/filtering/linear",
"gpu/filtering/nearest",
"gpu/filtering/precisionlinear2d",
"gpu/filtering/precisionlinear3d",
"gpu/filtering/precisionnearest2d",
"gpu/filtering/precisionnearest3d",
"gpu/ge/get",
"gpu/primitives/bezier",
"gpu/primitives/continue",