diff --git a/GPU/Common/VertexDecoderArm.cpp b/GPU/Common/VertexDecoderArm.cpp index f8ee947850..90b405b81b 100644 --- a/GPU/Common/VertexDecoderArm.cpp +++ b/GPU/Common/VertexDecoderArm.cpp @@ -572,7 +572,7 @@ void VertexDecoderJitCache::Jit_TcU16Through() { LDRH(tempReg2, srcReg, dec_->tcoff + 2); // TODO: Cleanup. - MOVP2R(scratchReg, &gstate_c.vertMinU); + MOVP2R(scratchReg, &gstate_c.vertBounds.minU); auto updateSide = [&](ARMReg r, CCFlags cc, u32 off) { LDRH(tempReg3, scratchReg, off); @@ -583,10 +583,10 @@ void VertexDecoderJitCache::Jit_TcU16Through() { }; // TODO: Can this actually be fast? Hmm, floats aren't better. - updateSide(tempReg1, CC_LT, 0); - updateSide(tempReg1, CC_GT, 2); - updateSide(tempReg2, CC_LT, 4); - updateSide(tempReg2, CC_GT, 6); + updateSide(tempReg1, CC_LT, offsetof(KnownVertexBounds, minU)); + updateSide(tempReg1, CC_GT, offsetof(KnownVertexBounds, maxU)); + updateSide(tempReg2, CC_LT, offsetof(KnownVertexBounds, minV)); + updateSide(tempReg2, CC_GT, offsetof(KnownVertexBounds, maxV)); ORR(tempReg1, tempReg1, Operand2(tempReg2, ST_LSL, 16)); STR(tempReg1, dstReg, dec_->decFmt.uvoff); diff --git a/GPU/Common/VertexDecoderArm64.cpp b/GPU/Common/VertexDecoderArm64.cpp index 1e0b926d86..c5a26a04a7 100644 --- a/GPU/Common/VertexDecoderArm64.cpp +++ b/GPU/Common/VertexDecoderArm64.cpp @@ -563,7 +563,7 @@ void VertexDecoderJitCache::Jit_TcU16Through() { LDRH(INDEX_UNSIGNED, tempReg2, srcReg, dec_->tcoff + 2); // TODO: Cleanup. - MOVP2R(scratchReg64, &gstate_c.vertMinU); + MOVP2R(scratchReg64, &gstate_c.vertBounds.minU); auto updateSide = [&](ARM64Reg r, CCFlags cc, u32 off) { LDRH(INDEX_UNSIGNED, tempReg3, scratchReg64, off); @@ -574,10 +574,10 @@ void VertexDecoderJitCache::Jit_TcU16Through() { }; // TODO: Can this actually be fast? Hmm, floats aren't better. - updateSide(tempReg1, CC_LT, 0); - updateSide(tempReg1, CC_GT, 2); - updateSide(tempReg2, CC_LT, 4); - updateSide(tempReg2, CC_GT, 6); + updateSide(tempReg1, CC_LT, offsetof(KnownVertexBounds, minU)); + updateSide(tempReg1, CC_GT, offsetof(KnownVertexBounds, maxU)); + updateSide(tempReg2, CC_LT, offsetof(KnownVertexBounds, minV)); + updateSide(tempReg2, CC_GT, offsetof(KnownVertexBounds, maxV)); ORR(tempReg1, tempReg1, tempReg2, ArithOption(tempReg2, ST_LSL, 16)); STR(INDEX_UNSIGNED, tempReg1, dstReg, dec_->decFmt.uvoff); diff --git a/GPU/Common/VertexDecoderCommon.cpp b/GPU/Common/VertexDecoderCommon.cpp index b0d869c6a5..26342ebe6c 100644 --- a/GPU/Common/VertexDecoderCommon.cpp +++ b/GPU/Common/VertexDecoderCommon.cpp @@ -296,10 +296,10 @@ void VertexDecoder::Step_TcU16Through() const uv[0] = uvdata[0]; uv[1] = uvdata[1]; - gstate_c.vertMinU = std::min(gstate_c.vertMinU, uvdata[0]); - gstate_c.vertMaxU = std::max(gstate_c.vertMaxU, uvdata[0]); - gstate_c.vertMinV = std::min(gstate_c.vertMinV, uvdata[1]); - gstate_c.vertMaxV = std::max(gstate_c.vertMaxV, uvdata[1]); + gstate_c.vertBounds.minU = std::min(gstate_c.vertBounds.minU, uvdata[0]); + gstate_c.vertBounds.maxU = std::max(gstate_c.vertBounds.maxU, uvdata[0]); + gstate_c.vertBounds.minV = std::min(gstate_c.vertBounds.minV, uvdata[1]); + gstate_c.vertBounds.maxV = std::max(gstate_c.vertBounds.maxV, uvdata[1]); } void VertexDecoder::Step_TcU16ThroughDouble() const @@ -325,10 +325,10 @@ void VertexDecoder::Step_TcU16ThroughToFloat() const uv[0] = uvdata[0]; uv[1] = uvdata[1]; - gstate_c.vertMinU = std::min(gstate_c.vertMinU, uvdata[0]); - gstate_c.vertMaxU = std::max(gstate_c.vertMaxU, uvdata[0]); - gstate_c.vertMinV = std::min(gstate_c.vertMinV, uvdata[1]); - gstate_c.vertMaxV = std::max(gstate_c.vertMaxV, uvdata[1]); + gstate_c.vertBounds.minU = std::min(gstate_c.vertBounds.minU, uvdata[0]); + gstate_c.vertBounds.maxU = std::max(gstate_c.vertBounds.maxU, uvdata[0]); + gstate_c.vertBounds.minV = std::min(gstate_c.vertBounds.minV, uvdata[1]); + gstate_c.vertBounds.maxV = std::max(gstate_c.vertBounds.maxV, uvdata[1]); } void VertexDecoder::Step_TcU16ThroughDoubleToFloat() const @@ -354,10 +354,10 @@ void VertexDecoder::Step_TcFloatThrough() const uv[0] = uvdata[0]; uv[1] = uvdata[1]; - gstate_c.vertMinU = std::min(gstate_c.vertMinU, (u16)uvdata[0]); - gstate_c.vertMaxU = std::max(gstate_c.vertMaxU, (u16)uvdata[0]); - gstate_c.vertMinV = std::min(gstate_c.vertMinV, (u16)uvdata[1]); - gstate_c.vertMaxV = std::max(gstate_c.vertMaxV, (u16)uvdata[1]); + gstate_c.vertBounds.minU = std::min(gstate_c.vertBounds.minU, (u16)uvdata[0]); + gstate_c.vertBounds.maxU = std::max(gstate_c.vertBounds.maxU, (u16)uvdata[0]); + gstate_c.vertBounds.minV = std::min(gstate_c.vertBounds.minV, (u16)uvdata[1]); + gstate_c.vertBounds.maxV = std::max(gstate_c.vertBounds.maxV, (u16)uvdata[1]); } void VertexDecoder::Step_TcU8Prescale() const { diff --git a/GPU/Common/VertexDecoderX86.cpp b/GPU/Common/VertexDecoderX86.cpp index 26023bf12f..d647058183 100644 --- a/GPU/Common/VertexDecoderX86.cpp +++ b/GPU/Common/VertexDecoderX86.cpp @@ -721,10 +721,10 @@ void VertexDecoderJitCache::Jit_TcU16Through() { }; // TODO: Can this actually be fast? Hmm, floats aren't better. - updateSide(tempReg1, CC_GE, &gstate_c.vertMinU); - updateSide(tempReg1, CC_LE, &gstate_c.vertMaxU); - updateSide(tempReg2, CC_GE, &gstate_c.vertMinV); - updateSide(tempReg2, CC_LE, &gstate_c.vertMaxV); + updateSide(tempReg1, CC_GE, &gstate_c.vertBounds.minU); + updateSide(tempReg1, CC_LE, &gstate_c.vertBounds.maxU); + updateSide(tempReg2, CC_GE, &gstate_c.vertBounds.minV); + updateSide(tempReg2, CC_LE, &gstate_c.vertBounds.maxV); } void VertexDecoderJitCache::Jit_TcU16ThroughToFloat() { diff --git a/GPU/Directx9/FramebufferDX9.cpp b/GPU/Directx9/FramebufferDX9.cpp index 208b7194b8..80a3818849 100644 --- a/GPU/Directx9/FramebufferDX9.cpp +++ b/GPU/Directx9/FramebufferDX9.cpp @@ -639,11 +639,11 @@ namespace DX9 { // If max is not > min, we probably could not detect it. Skip. // See the vertex decoder, where this is updated. - if ((flags & BINDFBCOLOR_MAY_COPY_WITH_UV) != 0 && gstate_c.vertMaxU > gstate_c.vertMinU) { - x = gstate_c.vertMinU; - y = gstate_c.vertMinV; - w = gstate_c.vertMaxU - x; - h = gstate_c.vertMaxV - y; + if ((flags & BINDFBCOLOR_MAY_COPY_WITH_UV) != 0 && gstate_c.vertBounds.maxU > gstate_c.vertBounds.minU) { + x = gstate_c.vertBounds.minU; + y = gstate_c.vertBounds.minV; + w = gstate_c.vertBounds.maxU - x; + h = gstate_c.vertBounds.maxV - y; // If we bound a framebuffer, apply the byte offset as pixels to the copy too. if (flags & BINDFBCOLOR_APPLY_TEX_OFFSET) { diff --git a/GPU/Directx9/TextureCacheDX9.cpp b/GPU/Directx9/TextureCacheDX9.cpp index e1c921cd0c..d3ddcd9866 100644 --- a/GPU/Directx9/TextureCacheDX9.cpp +++ b/GPU/Directx9/TextureCacheDX9.cpp @@ -926,7 +926,7 @@ void TextureCacheDX9::ApplyTexture() { // Texture scale/offset and gen modes don't apply in through. // So we can optimize how much of the texture we look at. if (gstate.isModeThrough()) { - nextTexture_->maxSeenV = std::max(nextTexture_->maxSeenV, gstate_c.vertMaxV); + nextTexture_->maxSeenV = std::max(nextTexture_->maxSeenV, gstate_c.vertBounds.maxV); } else { // Otherwise, we need to reset to ensure we use the whole thing. // Can't tell how much is used. @@ -987,17 +987,17 @@ void TextureCacheDX9::ApplyTextureFramebuffer(TexCacheEntry *entry, VirtualFrame }; // If min is not < max, then we don't have values (wasn't set during decode.) - if (gstate_c.vertMinV < gstate_c.vertMaxV) { + if (gstate_c.vertBounds.minV < gstate_c.vertBounds.maxV) { const float invWidth = 1.0f / (float)framebuffer->bufferWidth; const float invHeight = 1.0f / (float)framebuffer->bufferHeight; // Inverse of half = double. const float invHalfWidth = invWidth * 2.0f; const float invHalfHeight = invHeight * 2.0f; - const int u1 = gstate_c.vertMinU + gstate_c.curTextureXOffset; - const int v1 = gstate_c.vertMinV + gstate_c.curTextureYOffset; - const int u2 = gstate_c.vertMaxU + gstate_c.curTextureXOffset; - const int v2 = gstate_c.vertMaxV + gstate_c.curTextureYOffset; + const int u1 = gstate_c.vertBounds.minU + gstate_c.curTextureXOffset; + const int v1 = gstate_c.vertBounds.minV + gstate_c.curTextureYOffset; + const int u2 = gstate_c.vertBounds.maxU + gstate_c.curTextureXOffset; + const int v2 = gstate_c.vertBounds.maxV + gstate_c.curTextureYOffset; const float left = u1 * invHalfWidth - 1.0f + xoff; const float right = u2 * invHalfWidth - 1.0f + xoff; diff --git a/GPU/Directx9/TransformPipelineDX9.cpp b/GPU/Directx9/TransformPipelineDX9.cpp index 896b4c5dfd..201168299a 100644 --- a/GPU/Directx9/TransformPipelineDX9.cpp +++ b/GPU/Directx9/TransformPipelineDX9.cpp @@ -695,7 +695,7 @@ void TransformDrawEngineDX9::DoFlush() { vai->numVerts = indexGen.PureCount(); } - _dbg_assert_msg_(G3D, gstate_c.vertMinV >= gstate_c.vertMaxV, "Should not have checked UVs when caching."); + _dbg_assert_msg_(G3D, gstate_c.vertBounds.minV >= gstate_c.vertBounds.maxV, "Should not have checked UVs when caching."); void * pVb; u32 size = dec_->GetDecVtxFmt().stride * indexGen.MaxIndex(); @@ -888,10 +888,10 @@ rotateVBO: framebufferManager_->SetColorUpdated(gstate_c.skipDrawReason); // Now seems as good a time as any to reset the min/max coords, which we may examine later. - gstate_c.vertMinU = 512; - gstate_c.vertMinV = 512; - gstate_c.vertMaxU = 0; - gstate_c.vertMaxV = 0; + gstate_c.vertBounds.minU = 512; + gstate_c.vertBounds.minV = 512; + gstate_c.vertBounds.maxU = 0; + gstate_c.vertBounds.maxV = 0; host->GPUNotifyDraw(); } diff --git a/GPU/GLES/Framebuffer.cpp b/GPU/GLES/Framebuffer.cpp index 41f198a183..87dd0d99f4 100644 --- a/GPU/GLES/Framebuffer.cpp +++ b/GPU/GLES/Framebuffer.cpp @@ -874,11 +874,11 @@ void FramebufferManager::BindFramebufferColor(int stage, u32 fbRawAddress, Virtu // If max is not > min, we probably could not detect it. Skip. // See the vertex decoder, where this is updated. - if ((flags & BINDFBCOLOR_MAY_COPY_WITH_UV) != 0 && gstate_c.vertMaxU > gstate_c.vertMinU) { - x = gstate_c.vertMinU; - y = gstate_c.vertMinV; - w = gstate_c.vertMaxU - x; - h = gstate_c.vertMaxV - y; + if ((flags & BINDFBCOLOR_MAY_COPY_WITH_UV) != 0 && gstate_c.vertBounds.maxU > gstate_c.vertBounds.minU) { + x = gstate_c.vertBounds.minU; + y = gstate_c.vertBounds.minV; + w = gstate_c.vertBounds.maxU - x; + h = gstate_c.vertBounds.maxV - y; // If we bound a framebuffer, apply the byte offset as pixels to the copy too. if (flags & BINDFBCOLOR_APPLY_TEX_OFFSET) { diff --git a/GPU/GLES/TextureCache.cpp b/GPU/GLES/TextureCache.cpp index 3604f49f9f..f6e138aaab 100644 --- a/GPU/GLES/TextureCache.cpp +++ b/GPU/GLES/TextureCache.cpp @@ -996,7 +996,7 @@ void TextureCache::ApplyTexture() { // Texture scale/offset and gen modes don't apply in through. // So we can optimize how much of the texture we look at. if (gstate.isModeThrough()) { - nextTexture_->maxSeenV = std::max(nextTexture_->maxSeenV, gstate_c.vertMaxV); + nextTexture_->maxSeenV = std::max(nextTexture_->maxSeenV, gstate_c.vertBounds.maxV); } else { // Otherwise, we need to reset to ensure we use the whole thing. // Can't tell how much is used. @@ -1053,17 +1053,17 @@ void TextureCache::ApplyTextureFramebuffer(TexCacheEntry *entry, VirtualFramebuf static const GLubyte indices[4] = { 0, 1, 3, 2 }; // If min is not < max, then we don't have values (wasn't set during decode.) - if (gstate_c.vertMinV < gstate_c.vertMaxV) { + if (gstate_c.vertBounds.minV < gstate_c.vertBounds.maxV) { const float invWidth = 1.0f / (float)framebuffer->bufferWidth; const float invHeight = 1.0f / (float)framebuffer->bufferHeight; // Inverse of half = double. const float invHalfWidth = invWidth * 2.0f; const float invHalfHeight = invHeight * 2.0f; - const int u1 = gstate_c.vertMinU + gstate_c.curTextureXOffset; - const int v1 = gstate_c.vertMinV + gstate_c.curTextureYOffset; - const int u2 = gstate_c.vertMaxU + gstate_c.curTextureXOffset; - const int v2 = gstate_c.vertMaxV + gstate_c.curTextureYOffset; + const int u1 = gstate_c.vertBounds.minU + gstate_c.curTextureXOffset; + const int v1 = gstate_c.vertBounds.minV + gstate_c.curTextureYOffset; + const int u2 = gstate_c.vertBounds.maxU + gstate_c.curTextureXOffset; + const int v2 = gstate_c.vertBounds.maxV + gstate_c.curTextureYOffset; const float left = u1 * invHalfWidth - 1.0f; const float right = u2 * invHalfWidth - 1.0f; diff --git a/GPU/GLES/TransformPipeline.cpp b/GPU/GLES/TransformPipeline.cpp index 17acfa18ae..0624ba4abc 100644 --- a/GPU/GLES/TransformPipeline.cpp +++ b/GPU/GLES/TransformPipeline.cpp @@ -684,7 +684,7 @@ void TransformDrawEngine::DoFlush() { vai->numVerts = indexGen.PureCount(); } - _dbg_assert_msg_(G3D, gstate_c.vertMinV >= gstate_c.vertMaxV, "Should not have checked UVs when caching."); + _dbg_assert_msg_(G3D, gstate_c.vertBounds.minV >= gstate_c.vertBounds.maxV, "Should not have checked UVs when caching."); vai->vbo = AllocateBuffer(); glstate.arrayBuffer.bind(vai->vbo); @@ -885,10 +885,10 @@ rotateVBO: framebufferManager_->SetColorUpdated(gstate_c.skipDrawReason); // Now seems as good a time as any to reset the min/max coords, which we may examine later. - gstate_c.vertMinU = 512; - gstate_c.vertMinV = 512; - gstate_c.vertMaxU = 0; - gstate_c.vertMaxV = 0; + gstate_c.vertBounds.minU = 512; + gstate_c.vertBounds.minV = 512; + gstate_c.vertBounds.maxU = 0; + gstate_c.vertBounds.maxV = 0; #ifndef MOBILE_DEVICE host->GPUNotifyDraw(); diff --git a/GPU/GPUState.h b/GPU/GPUState.h index d18c245192..651c4994ce 100644 --- a/GPU/GPUState.h +++ b/GPU/GPUState.h @@ -464,6 +464,13 @@ enum { GPU_PREFER_REVERSE_COLOR_ORDER = FLAG_BIT(31), }; +struct KnownVertexBounds { + u16 minU; + u16 minV; + u16 maxU; + u16 maxV; +}; + struct GPUStateCache { bool Supports(int flag) { return (featureFlags & flag) != 0; } @@ -507,10 +514,7 @@ struct GPUStateCache { float vpWidthScale; float vpHeightScale; - u16 vertMinU; - u16 vertMinV; - u16 vertMaxU; - u16 vertMaxV; + KnownVertexBounds vertBounds; // TODO: These should be accessed from the current VFB object directly. u32 curRTWidth;