diff --git a/GPU/Common/SoftwareTransformCommon.cpp b/GPU/Common/SoftwareTransformCommon.cpp index c16e734313..de62a5f933 100644 --- a/GPU/Common/SoftwareTransformCommon.cpp +++ b/GPU/Common/SoftwareTransformCommon.cpp @@ -15,6 +15,7 @@ // Official git repository and contact information can be found at // https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. +#include #include #include "math/math_util.h" #include "gfx_es2/gpu_features.h" @@ -156,6 +157,7 @@ static int ColorIndexOffset(int prim, GEShadeMode shadeMode, bool clearMode) { } // NOTE: The viewport must be up to date! +// Also, this assumes SetTexture() has already figured out the actual texture height. void SoftwareTransform( int prim, int vertexCount, u32 vertType, u16 *&inds, int indexType, const DecVtxFormat &decVtxFormat, int &maxIndex, TransformedVertex *&drawBuffer, int &numTrans, bool &drawIndexed, const SoftwareTransformParams *params, SoftwareTransformResult *result) { @@ -471,43 +473,50 @@ void SoftwareTransform( } } - // This means we're using a framebuffer (and one that isn't big enough.) + // Breath of Fire 3 does some interesting rendering here, probably from being a port. + // It draws at 384x240 to two buffers in VRAM, one right after the other. + // We end up creating separate framebuffers, and rendering to each. + // But the game then stretches this to the screen - and reads from a single 512 tall texture. + // We initially use the first framebuffer. This code detects the read from the second. + // + // First Vs: 12, 228 - second Vs: 252, 468 - estimated fb height: 272 + + // If curTextureHeight is < h, it must be a framebuffer that wasn't full height. if (gstate_c.curTextureHeight < (u32)h && maxIndex >= 2) { - // Even if not rectangles, this will detect if either of the first two are outside the framebuffer. - // HACK: Adding one pixel margin to this detection fixes issues in Assassin's Creed : Bloodlines, - // while still keeping BOF working (see below). + // This is the max V that will still land within the framebuffer (since it's shorter.) + // We already adjusted V to the framebuffer above. + const float maxAvailableV = 1.0f; + // This is the max V that would've been inside the original texture size. + const float maxValidV = heightFactor; + + // Apaprently, Assassin's Creed: Bloodlines accesses just outside. const float invTexH = 1.0f / gstate_c.curTextureHeight; // size of one texel. - bool tlOutside; - bool tlAlmostOutside; - bool brOutside; - // If we're outside heightFactor, then v must be wrapping or clamping. Avoid this workaround. - // If we're <= 1.0f, we're inside the framebuffer (workaround not needed.) - // We buffer that 1.0f a little more with a texel to avoid some false positives. - tlOutside = transformed[0].v <= heightFactor && transformed[0].v > 1.0f + invTexH; - brOutside = transformed[1].v <= heightFactor && transformed[1].v > 1.0f + invTexH; - // Careful: if br is outside, but tl is well inside, this workaround still doesn't make sense. - // We go with halfway, since we overestimate framebuffer heights sometimes but not by much. - tlAlmostOutside = transformed[0].v <= heightFactor && transformed[0].v >= 0.5f; + + // Are either TL or BR inside the texture but outside the framebuffer? + const bool tlOutside = transformed[0].v > maxAvailableV + invTexH && transformed[0].v <= maxValidV; + const bool brOutside = transformed[1].v > maxAvailableV + invTexH && transformed[1].v <= maxValidV; + + // If TL isn't outside, is it at least near the end? + const bool tlAlmostOutside = transformed[0].v > maxAvailableV * 0.5f && transformed[0].v <= maxValidV; + if (tlOutside || (brOutside && tlAlmostOutside)) { - // Okay, so we're texturing from outside the framebuffer, but inside the texture height. - // Breath of Fire 3 does this to access a render surface at an offset. - const u32 bpp = fbman->GetTargetFormat() == GE_FORMAT_8888 ? 4 : 2; - const u32 prevH = texCache->AttachedDrawingHeight(); - const u32 fb_size = bpp * fbman->GetTargetStride() * prevH; - const u32 prevYOffset = gstate_c.curTextureYOffset; - if (texCache->SetOffsetTexture(fb_size)) { + // This is how far the nearest coord is, so that's where we'll look for the next framebuf. + const u32 yOffset = (int)(gstate_c.curTextureHeight * std::min(transformed[0].v, transformed[1].v)); + if (texCache->SetOffsetTexture(yOffset)) { const float oldWidthFactor = widthFactor; const float oldHeightFactor = heightFactor; - widthFactor = (float) w / (float) gstate_c.curTextureWidth; - heightFactor = (float) h / (float) gstate_c.curTextureHeight; + widthFactor = (float)w / (float)gstate_c.curTextureWidth; + heightFactor = (float)h / (float)gstate_c.curTextureHeight; - // We've already baked in the old gstate_c.curTextureYOffset, so correct. - const float yDiff = (float) (prevH + prevYOffset - gstate_c.curTextureYOffset) / (float) h; + // We need to subtract this offset from the Vs to address the new framebuf. + // Note: SetOffsetTexture() will account for any overshoot in yOffset. + const float yDiff = (float)yOffset / (float)h; for (int index = 0; index < maxIndex; ++index) { transformed[index].u *= widthFactor / oldWidthFactor; - // Inverse it back to scale to the new FBO, and add 1.0f to account for old FBO. transformed[index].v = (transformed[index].v / oldHeightFactor - yDiff) * heightFactor; } + + result->textureChanged = true; } } } diff --git a/GPU/Common/SoftwareTransformCommon.h b/GPU/Common/SoftwareTransformCommon.h index a7efc396c9..bdc63deda2 100644 --- a/GPU/Common/SoftwareTransformCommon.h +++ b/GPU/Common/SoftwareTransformCommon.h @@ -35,6 +35,7 @@ struct SoftwareTransformResult { float depth; bool setStencil; + bool textureChanged; u8 stencilValue; }; diff --git a/GPU/Common/TextureCacheCommon.cpp b/GPU/Common/TextureCacheCommon.cpp index 50964d88e5..ed3f904a67 100644 --- a/GPU/Common/TextureCacheCommon.cpp +++ b/GPU/Common/TextureCacheCommon.cpp @@ -29,8 +29,9 @@ #include "GPU/Common/ShaderId.h" #include "GPU/Common/GPUStateUtils.h" #include "GPU/Debugger/Debugger.h" -#include "GPU/GPUState.h" +#include "GPU/GPUCommon.h" #include "GPU/GPUInterface.h" +#include "GPU/GPUState.h" #if defined(_M_SSE) #include @@ -750,11 +751,14 @@ void TextureCacheCommon::DetachFramebuffer(TexCacheEntry *entry, u32 address, Vi } } -bool TextureCacheCommon::AttachFramebuffer(TexCacheEntry *entry, u32 address, VirtualFramebuffer *framebuffer, u32 texaddrOffset) { +bool TextureCacheCommon::AttachFramebuffer(TexCacheEntry *entry, u32 address, VirtualFramebuffer *framebuffer, u32 yOffset) { static const u32 MAX_SUBAREA_Y_OFFSET_SAFE = 32; AttachedFramebufferInfo fbInfo = { 0 }; + const u32 bpp = framebuffer->format == GE_FORMAT_8888 ? 4 : 2; + const u32 texaddrOffset = yOffset * framebuffer->fb_stride * bpp; + const u32 mirrorMask = 0x00600000; u32 addr = address & 0x3FFFFFFF; u32 texaddr = entry->addr + texaddrOffset; @@ -908,12 +912,12 @@ void TextureCacheCommon::SetTextureFramebuffer(TexCacheEntry *entry, VirtualFram nextNeedsRebuild_ = false; } -bool TextureCacheCommon::SetOffsetTexture(u32 offset) { +bool TextureCacheCommon::SetOffsetTexture(u32 yOffset) { if (!framebufferManager_->UseBufferedRendering()) { return false; } u32 texaddr = gstate.getTextureAddress(0); - if (!Memory::IsValidAddress(texaddr) || !Memory::IsValidAddress(texaddr + offset)) { + if (!Memory::IsValidAddress(texaddr)) { return false; } @@ -928,7 +932,7 @@ bool TextureCacheCommon::SetOffsetTexture(u32 offset) { bool success = false; for (size_t i = 0, n = fbCache_.size(); i < n; ++i) { auto framebuffer = fbCache_[i]; - if (AttachFramebuffer(entry, framebuffer->fb_address, framebuffer, offset)) { + if (AttachFramebuffer(entry, framebuffer->fb_address, framebuffer, yOffset)) { success = true; } } diff --git a/GPU/Common/TextureCacheCommon.h b/GPU/Common/TextureCacheCommon.h index b349a475a2..8af38798ff 100644 --- a/GPU/Common/TextureCacheCommon.h +++ b/GPU/Common/TextureCacheCommon.h @@ -187,7 +187,7 @@ public: void SetTexture(bool force = false); void ApplyTexture(); - bool SetOffsetTexture(u32 offset); + bool SetOffsetTexture(u32 yOffset); void Invalidate(u32 addr, int size, GPUInvalidationType type); void InvalidateAll(GPUInvalidationType type); void ClearNextFrame(); @@ -248,7 +248,7 @@ protected: void UpdateSamplingParams(TexCacheEntry &entry, SamplerCacheKey &key); // Used by D3D11 and Vulkan. void UpdateMaxSeenV(TexCacheEntry *entry, bool throughMode); - bool AttachFramebuffer(TexCacheEntry *entry, u32 address, VirtualFramebuffer *framebuffer, u32 texaddrOffset = 0); + bool AttachFramebuffer(TexCacheEntry *entry, u32 address, VirtualFramebuffer *framebuffer, u32 yOffset = 0); void AttachFramebufferValid(TexCacheEntry *entry, VirtualFramebuffer *framebuffer, const AttachedFramebufferInfo &fbInfo); void AttachFramebufferInvalid(TexCacheEntry *entry, VirtualFramebuffer *framebuffer, const AttachedFramebufferInfo &fbInfo); void DetachFramebuffer(TexCacheEntry *entry, u32 address, VirtualFramebuffer *framebuffer); diff --git a/GPU/GLES/DrawEngineGLES.cpp b/GPU/GLES/DrawEngineGLES.cpp index 70abe87bef..417153fae3 100644 --- a/GPU/GLES/DrawEngineGLES.cpp +++ b/GPU/GLES/DrawEngineGLES.cpp @@ -574,6 +574,10 @@ rotateVBO: dec_->VertexType(), inds, GE_VTYPE_IDX_16BIT, dec_->GetDecVtxFmt(), maxIndex, drawBuffer, numTrans, drawIndexed, ¶ms, &result); + // We have an offset texture to apply. + if (result.textureChanged) + textureCache_->ApplyTexture(); + ApplyDrawStateLate(result.setStencil, result.stencilValue); LinkedShader *program = shaderManager_->ApplyFragmentShader(vsid, vshader, lastVType_, framebufferManager_->UseBufferedRendering());