From 257f8dbbc64deca2da26ab3f7ebd892001cbacc7 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 6 May 2017 18:55:16 -0700 Subject: [PATCH] GPU: Remove now-unused vertex decoder funcs. We always convert to float now, so these functions are no longer used. --- GPU/Common/VertexDecoderArm.cpp | 44 --------------- GPU/Common/VertexDecoderArm64.cpp | 37 ------------- GPU/Common/VertexDecoderCommon.cpp | 88 ------------------------------ GPU/Common/VertexDecoderCommon.h | 10 ---- GPU/Common/VertexDecoderX86.cpp | 42 -------------- 5 files changed, 221 deletions(-) diff --git a/GPU/Common/VertexDecoderArm.cpp b/GPU/Common/VertexDecoderArm.cpp index fcaf3e4f27..3f9267cab3 100644 --- a/GPU/Common/VertexDecoderArm.cpp +++ b/GPU/Common/VertexDecoderArm.cpp @@ -121,15 +121,12 @@ static const JitLookup jitLookup[] = { {&VertexDecoder::Step_TcFloat, &VertexDecoderJitCache::Jit_TcFloat}, {&VertexDecoder::Step_TcU8ToFloat, &VertexDecoderJitCache::Jit_TcU8ToFloat}, {&VertexDecoder::Step_TcU16ToFloat, &VertexDecoderJitCache::Jit_TcU16ToFloat}, - {&VertexDecoder::Step_TcU16Double, &VertexDecoderJitCache::Jit_TcU16Double}, {&VertexDecoder::Step_TcU8Prescale, &VertexDecoderJitCache::Jit_TcU8Prescale}, {&VertexDecoder::Step_TcU16Prescale, &VertexDecoderJitCache::Jit_TcU16Prescale}, {&VertexDecoder::Step_TcFloatPrescale, &VertexDecoderJitCache::Jit_TcFloatPrescale}, - {&VertexDecoder::Step_TcU16Through, &VertexDecoderJitCache::Jit_TcU16Through}, {&VertexDecoder::Step_TcFloatThrough, &VertexDecoderJitCache::Jit_TcFloatThrough}, - {&VertexDecoder::Step_TcU16ThroughDouble, &VertexDecoderJitCache::Jit_TcU16ThroughDouble}, // {&VertexDecoder::Step_TcU16ThroughToFloat, &VertexDecoderJitCache::Jit_TcU16ThroughToFloat}, {&VertexDecoder::Step_NormalS8, &VertexDecoderJitCache::Jit_NormalS8}, @@ -571,31 +568,6 @@ void VertexDecoderJitCache::Jit_TcFloat() { STR(tempReg2, dstReg, dec_->decFmt.uvoff + 4); } -void VertexDecoderJitCache::Jit_TcU16Through() { - LDRH(tempReg1, srcReg, dec_->tcoff); - LDRH(tempReg2, srcReg, dec_->tcoff + 2); - - // TODO: Cleanup. - MOVP2R(scratchReg, &gstate_c.vertBounds.minU); - - auto updateSide = [&](ARMReg r, CCFlags cc, u32 off) { - LDRH(tempReg3, scratchReg, off); - CMP(r, tempReg3); - SetCC(cc); - STRH(r, scratchReg, off); - SetCC(CC_AL); - }; - - // TODO: Can this actually be fast? Hmm, floats aren't better. - updateSide(tempReg1, CC_LT, offsetof(KnownVertexBounds, minU)); - updateSide(tempReg1, CC_GT, offsetof(KnownVertexBounds, maxU)); - updateSide(tempReg2, CC_LT, offsetof(KnownVertexBounds, minV)); - updateSide(tempReg2, CC_GT, offsetof(KnownVertexBounds, maxV)); - - ORR(tempReg1, tempReg1, Operand2(tempReg2, ST_LSL, 16)); - STR(tempReg1, dstReg, dec_->decFmt.uvoff); -} - void VertexDecoderJitCache::Jit_TcFloatThrough() { LDR(tempReg1, srcReg, dec_->tcoff); LDR(tempReg2, srcReg, dec_->tcoff + 4); @@ -603,22 +575,6 @@ void VertexDecoderJitCache::Jit_TcFloatThrough() { STR(tempReg2, dstReg, dec_->decFmt.uvoff + 4); } -void VertexDecoderJitCache::Jit_TcU16Double() { - LDRH(tempReg1, srcReg, dec_->tcoff); - LDRH(tempReg2, srcReg, dec_->tcoff + 2); - LSL(tempReg1, tempReg1, 1); - ORR(tempReg1, tempReg1, Operand2(tempReg2, ST_LSL, 17)); - STR(tempReg1, dstReg, dec_->decFmt.uvoff); -} - -void VertexDecoderJitCache::Jit_TcU16ThroughDouble() { - LDRH(tempReg1, srcReg, dec_->tcoff); - LDRH(tempReg2, srcReg, dec_->tcoff + 2); - LSL(tempReg1, tempReg1, 1); - ORR(tempReg1, tempReg1, Operand2(tempReg2, ST_LSL, 17)); - STR(tempReg1, dstReg, dec_->decFmt.uvoff); -} - void VertexDecoderJitCache::Jit_TcU8Prescale() { if (cpu_info.bNEON) { // TODO: Needs testing diff --git a/GPU/Common/VertexDecoderArm64.cpp b/GPU/Common/VertexDecoderArm64.cpp index c7e737428b..f68f100457 100644 --- a/GPU/Common/VertexDecoderArm64.cpp +++ b/GPU/Common/VertexDecoderArm64.cpp @@ -95,15 +95,12 @@ static const JitLookup jitLookup[] = { {&VertexDecoder::Step_TcFloat, &VertexDecoderJitCache::Jit_TcFloat}, {&VertexDecoder::Step_TcU8ToFloat, &VertexDecoderJitCache::Jit_TcU8ToFloat}, {&VertexDecoder::Step_TcU16ToFloat, &VertexDecoderJitCache::Jit_TcU16ToFloat}, - {&VertexDecoder::Step_TcU16Double, &VertexDecoderJitCache::Jit_TcU16Double}, {&VertexDecoder::Step_TcU8Prescale, &VertexDecoderJitCache::Jit_TcU8Prescale}, {&VertexDecoder::Step_TcU16Prescale, &VertexDecoderJitCache::Jit_TcU16Prescale}, {&VertexDecoder::Step_TcFloatPrescale, &VertexDecoderJitCache::Jit_TcFloatPrescale}, - {&VertexDecoder::Step_TcU16Through, &VertexDecoderJitCache::Jit_TcU16Through}, {&VertexDecoder::Step_TcFloatThrough, &VertexDecoderJitCache::Jit_TcFloatThrough}, - {&VertexDecoder::Step_TcU16ThroughDouble, &VertexDecoderJitCache::Jit_TcU16ThroughDouble}, // {&VertexDecoder::Step_TcU16ThroughToFloat, &VertexDecoderJitCache::Jit_TcU16ThroughToFloat}, {&VertexDecoder::Step_NormalS8, &VertexDecoderJitCache::Jit_NormalS8}, @@ -582,45 +579,11 @@ void VertexDecoderJitCache::Jit_Color5551() { CSEL(fullAlphaReg, fullAlphaReg, WZR, CC_EQ); } -void VertexDecoderJitCache::Jit_TcU16Through() { - LDRH(INDEX_UNSIGNED, tempReg1, srcReg, dec_->tcoff); - LDRH(INDEX_UNSIGNED, tempReg2, srcReg, dec_->tcoff + 2); - - auto updateSide = [&](ARM64Reg src, CCFlags cc, ARM64Reg dst) { - CMP(src, dst); - CSEL(dst, src, dst, cc); - }; - - updateSide(tempReg1, CC_LT, boundsMinUReg); - updateSide(tempReg1, CC_GT, boundsMaxUReg); - updateSide(tempReg2, CC_LT, boundsMinVReg); - updateSide(tempReg2, CC_GT, boundsMaxVReg); - - ORR(tempReg1, tempReg1, tempReg2, ArithOption(tempReg2, ST_LSL, 16)); - STR(INDEX_UNSIGNED, tempReg1, dstReg, dec_->decFmt.uvoff); -} - void VertexDecoderJitCache::Jit_TcFloatThrough() { LDP(INDEX_SIGNED, tempReg1, tempReg2, srcReg, dec_->tcoff); STP(INDEX_SIGNED, tempReg1, tempReg2, dstReg, dec_->decFmt.uvoff); } -void VertexDecoderJitCache::Jit_TcU16Double() { - LDRH(INDEX_UNSIGNED, tempReg1, srcReg, dec_->tcoff); - LDRH(INDEX_UNSIGNED, tempReg2, srcReg, dec_->tcoff + 2); - LSL(tempReg1, tempReg1, 1); - ORR(tempReg1, tempReg1, tempReg2, ArithOption(tempReg2, ST_LSL, 17)); - STR(INDEX_UNSIGNED, tempReg1, dstReg, dec_->decFmt.uvoff); -} - -void VertexDecoderJitCache::Jit_TcU16ThroughDouble() { - LDRH(INDEX_UNSIGNED, tempReg1, srcReg, dec_->tcoff); - LDRH(INDEX_UNSIGNED, tempReg2, srcReg, dec_->tcoff + 2); - LSL(tempReg1, tempReg1, 1); - ORR(tempReg1, tempReg1, tempReg2, ArithOption(tempReg2, ST_LSL, 17)); - STR(INDEX_UNSIGNED, tempReg1, dstReg, dec_->decFmt.uvoff); -} - void VertexDecoderJitCache::Jit_TcFloat() { LDP(INDEX_SIGNED, tempReg1, tempReg2, srcReg, dec_->tcoff); STP(INDEX_SIGNED, tempReg1, tempReg2, dstReg, dec_->decFmt.uvoff); diff --git a/GPU/Common/VertexDecoderCommon.cpp b/GPU/Common/VertexDecoderCommon.cpp index 6cb48e9d42..63f33f845c 100644 --- a/GPU/Common/VertexDecoderCommon.cpp +++ b/GPU/Common/VertexDecoderCommon.cpp @@ -281,35 +281,6 @@ void VertexDecoder::Step_TcU16ToFloat() const uv[1] = uvdata[1] * (1.0f / 32768.0f); } -void VertexDecoder::Step_TcU16Double() const -{ - u16 *uv = (u16*)(decoded_ + decFmt.uvoff); - const u16 *uvdata = (const u16_le*)(ptr_ + tcoff); - uv[0] = uvdata[0] * 2; - uv[1] = uvdata[1] * 2; -} - -void VertexDecoder::Step_TcU16Through() const -{ - u16 *uv = (u16 *)(decoded_ + decFmt.uvoff); - const u16 *uvdata = (const u16_le*)(ptr_ + tcoff); - uv[0] = uvdata[0]; - uv[1] = uvdata[1]; - - gstate_c.vertBounds.minU = std::min(gstate_c.vertBounds.minU, uvdata[0]); - gstate_c.vertBounds.maxU = std::max(gstate_c.vertBounds.maxU, uvdata[0]); - gstate_c.vertBounds.minV = std::min(gstate_c.vertBounds.minV, uvdata[1]); - gstate_c.vertBounds.maxV = std::max(gstate_c.vertBounds.maxV, uvdata[1]); -} - -void VertexDecoder::Step_TcU16ThroughDouble() const -{ - u16 *uv = (u16 *)(decoded_ + decFmt.uvoff); - const u16 *uvdata = (const u16_le*)(ptr_ + tcoff); - uv[0] = uvdata[0] * 2; - uv[1] = uvdata[1] * 2; -} - void VertexDecoder::Step_TcU16DoubleToFloat() const { float *uv = (float*)(decoded_ + decFmt.uvoff); @@ -388,51 +359,6 @@ void VertexDecoder::Step_TcFloatPrescale() const { uv[1] = uvdata[1] * gstate_c.uv.vScale + gstate_c.uv.vOff; } -void VertexDecoder::Step_TcU8Morph() const { - float uv[2] = { 0, 0 }; - for (int n = 0; n < morphcount; n++) { - float w = gstate_c.morphWeights[n]; - const u8 *uvdata = (const u8 *)(ptr_ + onesize_*n + tcoff); - - uv[0] += (float)uvdata[0] * w; - uv[1] += (float)uvdata[1] * w; - } - - u8 *out = decoded_ + decFmt.uvoff; - out[0] = (int)uv[0]; - out[1] = (int)uv[1]; -} - -void VertexDecoder::Step_TcU16Morph() const { - float uv[2] = { 0, 0 }; - for (int n = 0; n < morphcount; n++) { - float w = gstate_c.morphWeights[n]; - const u16_le *uvdata = (const u16_le *)(ptr_ + onesize_*n + tcoff); - - uv[0] += (float)uvdata[0] * w; - uv[1] += (float)uvdata[1] * w; - } - - u16_le *out = (u16_le *)(decoded_ + decFmt.uvoff); - out[0] = (int)uv[0]; - out[1] = (int)uv[1]; -} - -void VertexDecoder::Step_TcU16DoubleMorph() const { - float uv[2] = { 0, 0 }; - for (int n = 0; n < morphcount; n++) { - float w = gstate_c.morphWeights[n]; - const u16_le *uvdata = (const u16_le *)(ptr_ + onesize_*n + tcoff); - - uv[0] += (float)uvdata[0] * w; - uv[1] += (float)uvdata[1] * w; - } - - u16_le *out = (u16_le *)(decoded_ + decFmt.uvoff); - out[0] = (int)(uv[0] * 2.0f); - out[1] = (int)(uv[1] * 2.0f); -} - void VertexDecoder::Step_TcU8MorphToFloat() const { float uv[2] = { 0, 0 }; for (int n = 0; n < morphcount; n++) { @@ -922,20 +848,6 @@ static const StepFunction tcstep_prescale_morph_remaster[4] = { &VertexDecoder::Step_TcFloatPrescaleMorph, }; -static const StepFunction tcstep_morph[4] = { - 0, - &VertexDecoder::Step_TcU8Morph, - &VertexDecoder::Step_TcU16Morph, - &VertexDecoder::Step_TcFloatMorph, -}; - -static const StepFunction tcstep_morph_remaster[4] = { - 0, - &VertexDecoder::Step_TcU8Morph, - &VertexDecoder::Step_TcU16DoubleMorph, - &VertexDecoder::Step_TcFloatMorph, -}; - static const StepFunction tcstep_morphToFloat[4] = { 0, &VertexDecoder::Step_TcU8MorphToFloat, diff --git a/GPU/Common/VertexDecoderCommon.h b/GPU/Common/VertexDecoderCommon.h index 1531822e02..194bbc5ed5 100644 --- a/GPU/Common/VertexDecoderCommon.h +++ b/GPU/Common/VertexDecoderCommon.h @@ -514,17 +514,11 @@ public: void Step_TcU16DoublePrescale() const; void Step_TcFloatPrescale() const; - void Step_TcU16Double() const; - void Step_TcU16Through() const; - void Step_TcU16ThroughDouble() const; void Step_TcU16DoubleToFloat() const; void Step_TcU16ThroughToFloat() const; void Step_TcU16ThroughDoubleToFloat() const; void Step_TcFloatThrough() const; - void Step_TcU8Morph() const; - void Step_TcU16Morph() const; - void Step_TcU16DoubleMorph() const; void Step_TcU8MorphToFloat() const; void Step_TcU16MorphToFloat() const; void Step_TcU16DoubleMorphToFloat() const; @@ -675,10 +669,6 @@ public: void Jit_TcU16PrescaleMorph(); void Jit_TcFloatPrescaleMorph(); - void Jit_TcU16Double(); - void Jit_TcU16ThroughDouble(); - - void Jit_TcU16Through(); void Jit_TcU16ThroughToFloat(); void Jit_TcFloatThrough(); diff --git a/GPU/Common/VertexDecoderX86.cpp b/GPU/Common/VertexDecoderX86.cpp index 0db6364af2..91ae79e276 100644 --- a/GPU/Common/VertexDecoderX86.cpp +++ b/GPU/Common/VertexDecoderX86.cpp @@ -100,16 +100,13 @@ static const JitLookup jitLookup[] = { {&VertexDecoder::Step_TcFloat, &VertexDecoderJitCache::Jit_TcFloat}, {&VertexDecoder::Step_TcU8ToFloat, &VertexDecoderJitCache::Jit_TcU8ToFloat}, {&VertexDecoder::Step_TcU16ToFloat, &VertexDecoderJitCache::Jit_TcU16ToFloat}, - {&VertexDecoder::Step_TcU16Double, &VertexDecoderJitCache::Jit_TcU16Double}, {&VertexDecoder::Step_TcU8Prescale, &VertexDecoderJitCache::Jit_TcU8Prescale}, {&VertexDecoder::Step_TcU16Prescale, &VertexDecoderJitCache::Jit_TcU16Prescale}, {&VertexDecoder::Step_TcFloatPrescale, &VertexDecoderJitCache::Jit_TcFloatPrescale}, - {&VertexDecoder::Step_TcU16Through, &VertexDecoderJitCache::Jit_TcU16Through}, {&VertexDecoder::Step_TcU16ThroughToFloat, &VertexDecoderJitCache::Jit_TcU16ThroughToFloat}, {&VertexDecoder::Step_TcFloatThrough, &VertexDecoderJitCache::Jit_TcFloatThrough}, - {&VertexDecoder::Step_TcU16ThroughDouble, &VertexDecoderJitCache::Jit_TcU16ThroughDouble}, {&VertexDecoder::Step_TcU8MorphToFloat, &VertexDecoderJitCache::Jit_TcU8MorphToFloat}, {&VertexDecoder::Step_TcU16MorphToFloat, &VertexDecoderJitCache::Jit_TcU16MorphToFloat}, @@ -696,15 +693,6 @@ void VertexDecoderJitCache::Jit_TcU16ToFloat() { MOVQ_xmm(MDisp(dstReg, dec_->decFmt.uvoff), XMM3); } -void VertexDecoderJitCache::Jit_TcU16Double() { - MOVZX(32, 16, tempReg1, MDisp(srcReg, dec_->tcoff)); - MOVZX(32, 16, tempReg2, MDisp(srcReg, dec_->tcoff + 2)); - SHL(16, R(tempReg1), Imm8(1)); // 16 to get a wall to shift into - SHL(32, R(tempReg2), Imm8(17)); - OR(32, R(tempReg1), R(tempReg2)); - MOV(32, MDisp(dstReg, dec_->decFmt.uvoff), R(tempReg1)); -} - void VertexDecoderJitCache::Jit_TcFloat() { #ifdef _M_X64 MOV(64, R(tempReg1), MDisp(srcReg, dec_->tcoff)); @@ -851,27 +839,6 @@ void VertexDecoderJitCache::Jit_TcFloatPrescaleMorph() { MOVQ_xmm(MDisp(dstReg, dec_->decFmt.uvoff), fpScratchReg); } -void VertexDecoderJitCache::Jit_TcU16Through() { - MOV(32, R(tempReg1), MDisp(srcReg, dec_->tcoff)); - MOV(32, MDisp(dstReg, dec_->decFmt.uvoff), R(tempReg1)); - - MOV(32, R(tempReg2), R(tempReg1)); - SHR(32, R(tempReg2), Imm8(16)); - - auto updateSide = [&](X64Reg r, CCFlags skipCC, u16 *value) { - CMP(16, R(r), M(value)); - FixupBranch skip = J_CC(skipCC); - MOV(16, M(value), R(r)); - SetJumpTarget(skip); - }; - - // TODO: Can this actually be fast? Hmm, floats aren't better. - updateSide(tempReg1, CC_GE, &gstate_c.vertBounds.minU); - updateSide(tempReg1, CC_LE, &gstate_c.vertBounds.maxU); - updateSide(tempReg2, CC_GE, &gstate_c.vertBounds.minV); - updateSide(tempReg2, CC_LE, &gstate_c.vertBounds.maxV); -} - void VertexDecoderJitCache::Jit_TcU16ThroughToFloat() { PXOR(fpScratchReg2, R(fpScratchReg2)); MOV(32, R(tempReg1), MDisp(srcReg, dec_->tcoff)); @@ -897,15 +864,6 @@ void VertexDecoderJitCache::Jit_TcU16ThroughToFloat() { updateSide(tempReg2, CC_LE, &gstate_c.vertBounds.maxV); } -void VertexDecoderJitCache::Jit_TcU16ThroughDouble() { - MOVZX(32, 16, tempReg1, MDisp(srcReg, dec_->tcoff)); - MOVZX(32, 16, tempReg2, MDisp(srcReg, dec_->tcoff + 2)); - SHL(16, R(tempReg1), Imm8(1)); // 16 to get a wall to shift into - SHL(32, R(tempReg2), Imm8(17)); - OR(32, R(tempReg1), R(tempReg2)); - MOV(32, MDisp(dstReg, dec_->decFmt.uvoff), R(tempReg1)); -} - void VertexDecoderJitCache::Jit_TcFloatThrough() { #ifdef _M_X64 MOV(64, R(tempReg1), MDisp(srcReg, dec_->tcoff));