From 72a73045bbbd4f7f2fc12af94b069eea4aa0595a Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sun, 19 Jul 2015 15:59:10 -0700 Subject: [PATCH] Add uv range to arm64 and non-jit decoders. --- GPU/Common/VertexDecoderArm64.cpp | 22 +++++++++++++++++++++- GPU/Common/VertexDecoderCommon.cpp | 16 ++++++++++++++++ 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/GPU/Common/VertexDecoderArm64.cpp b/GPU/Common/VertexDecoderArm64.cpp index fa2e05677c..30b1a1d147 100644 --- a/GPU/Common/VertexDecoderArm64.cpp +++ b/GPU/Common/VertexDecoderArm64.cpp @@ -554,7 +554,27 @@ void VertexDecoderJitCache::Jit_TcU16() { } void VertexDecoderJitCache::Jit_TcU16Through() { - LDUR(tempReg1, srcReg, dec_->tcoff); + LDRH(INDEX_UNSIGNED, tempReg1, srcReg, dec_->tcoff); + LDRH(INDEX_UNSIGNED, tempReg2, srcReg, dec_->tcoff + 2); + + // TODO: Cleanup. + MOVP2R(scratchReg64, &gstate_c.vertMinU); + + auto updateSide = [&](ARM64Reg r, CCFlags cc, u32 off) { + LDRH(INDEX_UNSIGNED, tempReg3, scratchReg64, off); + CMP(r, tempReg3); + FixupBranch skip = B(InvertCond(cc)); + STRH(INDEX_UNSIGNED, r, scratchReg64, off); + SetJumpTarget(skip); + }; + + // TODO: Can this actually be fast? Hmm, floats aren't better. + updateSide(tempReg1, CC_LT, 0); + updateSide(tempReg1, CC_GT, 2); + updateSide(tempReg2, CC_LT, 4); + updateSide(tempReg2, CC_GT, 6); + + ORR(tempReg1, tempReg1, tempReg2, ArithOption(tempReg2, ST_LSL, 16)); STR(INDEX_UNSIGNED, tempReg1, dstReg, dec_->decFmt.uvoff); } diff --git a/GPU/Common/VertexDecoderCommon.cpp b/GPU/Common/VertexDecoderCommon.cpp index 6e36c8830b..b0d869c6a5 100644 --- a/GPU/Common/VertexDecoderCommon.cpp +++ b/GPU/Common/VertexDecoderCommon.cpp @@ -15,6 +15,7 @@ // Official git repository and contact information can be found at // https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. +#include #include #include "base/basictypes.h" @@ -294,6 +295,11 @@ void VertexDecoder::Step_TcU16Through() const const u16 *uvdata = (const u16_le*)(ptr_ + tcoff); uv[0] = uvdata[0]; uv[1] = uvdata[1]; + + gstate_c.vertMinU = std::min(gstate_c.vertMinU, uvdata[0]); + gstate_c.vertMaxU = std::max(gstate_c.vertMaxU, uvdata[0]); + gstate_c.vertMinV = std::min(gstate_c.vertMinV, uvdata[1]); + gstate_c.vertMaxV = std::max(gstate_c.vertMaxV, uvdata[1]); } void VertexDecoder::Step_TcU16ThroughDouble() const @@ -318,6 +324,11 @@ void VertexDecoder::Step_TcU16ThroughToFloat() const const u16 *uvdata = (const u16_le*)(ptr_ + tcoff); uv[0] = uvdata[0]; uv[1] = uvdata[1]; + + gstate_c.vertMinU = std::min(gstate_c.vertMinU, uvdata[0]); + gstate_c.vertMaxU = std::max(gstate_c.vertMaxU, uvdata[0]); + gstate_c.vertMinV = std::min(gstate_c.vertMinV, uvdata[1]); + gstate_c.vertMaxV = std::max(gstate_c.vertMaxV, uvdata[1]); } void VertexDecoder::Step_TcU16ThroughDoubleToFloat() const @@ -342,6 +353,11 @@ void VertexDecoder::Step_TcFloatThrough() const const float *uvdata = (const float*)(ptr_ + tcoff); uv[0] = uvdata[0]; uv[1] = uvdata[1]; + + gstate_c.vertMinU = std::min(gstate_c.vertMinU, (u16)uvdata[0]); + gstate_c.vertMaxU = std::max(gstate_c.vertMaxU, (u16)uvdata[0]); + gstate_c.vertMinV = std::min(gstate_c.vertMinV, (u16)uvdata[1]); + gstate_c.vertMaxV = std::max(gstate_c.vertMaxV, (u16)uvdata[1]); } void VertexDecoder::Step_TcU8Prescale() const {