diff --git a/GPU/Common/FramebufferCommon.cpp b/GPU/Common/FramebufferCommon.cpp index a5a7d81fc3..28bc2413b2 100644 --- a/GPU/Common/FramebufferCommon.cpp +++ b/GPU/Common/FramebufferCommon.cpp @@ -20,6 +20,7 @@ #include "Core/Config.h" #include "Core/CoreParameter.h" #include "Core/Reporting.h" +#include "Core/ELF/ParamSFO.h" #include "Core/System.h" #include "GPU/Common/FramebufferCommon.h" #include "GPU/GPUInterface.h" @@ -41,6 +42,21 @@ FramebufferManagerCommon::FramebufferManagerCommon() : FramebufferManagerCommon::~FramebufferManagerCommon() { } +void FramebufferManagerCommon::Init() { + + const std::string gameId = g_paramSFO.GetValueString("DISC_ID"); + // This applies a hack to Dangan Ronpa, its demo, and its sequel. + // The game draws solid colors to a small framebuffer, and then reads this directly in VRAM. + // We force this framebuffer to 1x and force download it automatically. + hackForce04154000Download_ = gameId == "NPJH50631" || gameId == "NPJH50372" || gameId == "NPJH90164" || gameId == "NPJH50515"; + + // And an initial clear. We don't clear per frame as the games are supposed to handle that + // by themselves. + ClearBuffer(); + + BeginFrame(); +} + void FramebufferManagerCommon::BeginFrame() { DecimateFBOs(); currentRenderVfb_ = 0; @@ -387,3 +403,331 @@ void FramebufferManagerCommon::DoSetRenderFrameBuffer() { gstate_c.curRTRenderWidth = vfb->renderWidth; gstate_c.curRTRenderHeight = vfb->renderHeight; } + +void FramebufferManagerCommon::UpdateFromMemory(u32 addr, int size, bool safe) { + addr &= ~0x40000000; + // TODO: Could go through all FBOs, but probably not important? + // TODO: Could also check for inner changes, but video is most important. + bool isDisplayBuf = addr == DisplayFramebufAddr() || addr == PrevDisplayFramebufAddr(); + if (isDisplayBuf || safe) { + // TODO: Deleting the FBO is a heavy hammer solution, so let's only do it if it'd help. + if (!Memory::IsValidAddress(displayFramebufPtr_)) + return; + + for (size_t i = 0; i < vfbs_.size(); ++i) { + VirtualFramebuffer *vfb = vfbs_[i]; + if (MaskedEqual(vfb->fb_address, addr)) { + FlushBeforeCopy(); + + if (useBufferedRendering_ && vfb->fbo) { + DisableState(); + GEBufferFormat fmt = vfb->format; + if (vfb->last_frame_render + 1 < gpuStats.numFlips && isDisplayBuf) { + // If we're not rendering to it, format may be wrong. Use displayFormat_ instead. + fmt = displayFormat_; + } + DrawPixels(vfb, 0, 0, Memory::GetPointer(addr | 0x04000000), fmt, vfb->fb_stride, vfb->width, vfb->height); + SetColorUpdated(vfb); + } else { + INFO_LOG(SCEGE, "Invalidating FBO for %08x (%i x %i x %i)", vfb->fb_address, vfb->width, vfb->height, vfb->format); + DestroyFramebuf(vfb); + vfbs_.erase(vfbs_.begin() + i--); + } + } + } + + RebindFramebuffer(); + } +} + +bool FramebufferManagerCommon::NotifyFramebufferCopy(u32 src, u32 dst, int size, bool isMemset) { + if (updateVRAM_ || size == 0) { + return false; + } + + dst &= 0x3FFFFFFF; + src &= 0x3FFFFFFF; + + VirtualFramebuffer *dstBuffer = 0; + VirtualFramebuffer *srcBuffer = 0; + u32 dstY = (u32)-1; + u32 dstH = 0; + u32 srcY = (u32)-1; + u32 srcH = 0; + for (size_t i = 0; i < vfbs_.size(); ++i) { + VirtualFramebuffer *vfb = vfbs_[i]; + const u32 vfb_address = (0x04000000 | vfb->fb_address) & 0x3FFFFFFF; + const u32 vfb_size = FramebufferByteSize(vfb); + const u32 vfb_bpp = vfb->format == GE_FORMAT_8888 ? 4 : 2; + const u32 vfb_byteStride = vfb->fb_stride * vfb_bpp; + const int vfb_byteWidth = vfb->width * vfb_bpp; + + if (dst >= vfb_address && (dst + size <= vfb_address + vfb_size || dst == vfb_address)) { + const u32 offset = dst - vfb_address; + const u32 yOffset = offset / vfb_byteStride; + if ((offset % vfb_byteStride) == 0 && (size == vfb_byteWidth || (size % vfb_byteStride) == 0) && yOffset < dstY) { + dstBuffer = vfb; + dstY = yOffset; + dstH = size == vfb_byteWidth ? 1 : std::min((u32)size / vfb_byteStride, (u32)vfb->height); + } + } + + if (src >= vfb_address && (src + size <= vfb_address + vfb_size || src == vfb_address)) { + const u32 offset = src - vfb_address; + const u32 yOffset = offset / vfb_byteStride; + if ((offset % vfb_byteStride) == 0 && (size == vfb_byteWidth || (size % vfb_byteStride) == 0) && yOffset < srcY) { + srcBuffer = vfb; + srcY = yOffset; + srcH = size == vfb_byteWidth ? 1 : std::min((u32)size / vfb_byteStride, (u32)vfb->height); + } + } + } + + if (srcBuffer && srcY == 0 && srcH == srcBuffer->height && !dstBuffer) { + // MotoGP workaround - it copies a framebuffer to memory and then displays it. + // TODO: It's rare anyway, but the game could modify the RAM and then we'd display the wrong thing. + // Unfortunately, that would force 1x render resolution. + if (Memory::IsRAMAddress(dst)) { + knownFramebufferRAMCopies_.insert(std::pair(src, dst)); + } + } + + if (!useBufferedRendering_) { + // If we're copying into a recently used display buf, it's probably destined for the screen. + if (srcBuffer || (dstBuffer != displayFramebuf_ && dstBuffer != prevDisplayFramebuf_)) { + return false; + } + } + + if (dstBuffer && srcBuffer && !isMemset) { + if (srcBuffer == dstBuffer) { + WARN_LOG_REPORT_ONCE(dstsrccpy, G3D, "Intra-buffer memcpy (not supported) %08x -> %08x", src, dst); + } else { + WARN_LOG_REPORT_ONCE(dstnotsrccpy, G3D, "Inter-buffer memcpy %08x -> %08x", src, dst); + // Just do the blit! + if (g_Config.bBlockTransferGPU) { + BlitFramebuffer(dstBuffer, 0, dstY, srcBuffer, 0, srcY, srcBuffer->width, srcH, 0); + SetColorUpdated(dstBuffer); + RebindFramebuffer(); + } + } + return false; + } else if (dstBuffer) { + WARN_LOG_ONCE(btucpy, G3D, "Memcpy fbo upload %08x -> %08x", src, dst); + if (g_Config.bBlockTransferGPU) { + FlushBeforeCopy(); + const u8 *srcBase = Memory::GetPointerUnchecked(src); + DrawPixels(dstBuffer, 0, dstY, srcBase, dstBuffer->format, dstBuffer->fb_stride, dstBuffer->width, dstH); + SetColorUpdated(dstBuffer); + RebindFramebuffer(); + // This is a memcpy, let's still copy just in case. + return false; + } + return false; + } else if (srcBuffer) { + WARN_LOG_ONCE(btdcpy, G3D, "Memcpy fbo download %08x -> %08x", src, dst); + FlushBeforeCopy(); + if (srcH == 0 || srcY + srcH > srcBuffer->bufferHeight) { + WARN_LOG_REPORT_ONCE(btdcpyheight, G3D, "Memcpy fbo download %08x -> %08x skipped, %d+%d is taller than %d", src, dst, srcY, srcH, srcBuffer->bufferHeight); + } else if (g_Config.bBlockTransferGPU && !srcBuffer->memoryUpdated) { + ReadFramebufferToMemory(srcBuffer, true, 0, srcY, srcBuffer->width, srcH); + } + return false; + } else { + return false; + } +} + +void FramebufferManagerCommon::FindTransferFramebuffers(VirtualFramebuffer *&dstBuffer, VirtualFramebuffer *&srcBuffer, u32 dstBasePtr, int dstStride, int &dstX, int &dstY, u32 srcBasePtr, int srcStride, int &srcX, int &srcY, int &srcWidth, int &srcHeight, int &dstWidth, int &dstHeight, int bpp) const { + u32 dstYOffset = -1; + u32 dstXOffset = -1; + u32 srcYOffset = -1; + u32 srcXOffset = -1; + int width = srcWidth; + int height = srcHeight; + + dstBasePtr &= 0x3FFFFFFF; + srcBasePtr &= 0x3FFFFFFF; + + for (size_t i = 0; i < vfbs_.size(); ++i) { + VirtualFramebuffer *vfb = vfbs_[i]; + const u32 vfb_address = (0x04000000 | vfb->fb_address) & 0x3FFFFFFF; + const u32 vfb_size = FramebufferByteSize(vfb); + const u32 vfb_bpp = vfb->format == GE_FORMAT_8888 ? 4 : 2; + const u32 vfb_byteStride = vfb->fb_stride * vfb_bpp; + const u32 vfb_byteWidth = vfb->width * vfb_bpp; + + // These heuristics are a bit annoying. + // The goal is to avoid using GPU block transfers for things that ought to be memory. + // Maybe we should even check for textures at these places instead? + + if (vfb_address <= dstBasePtr && dstBasePtr < vfb_address + vfb_size) { + const u32 byteOffset = dstBasePtr - vfb_address; + const u32 byteStride = dstStride * bpp; + const u32 yOffset = byteOffset / byteStride; + // Some games use mismatching bitdepths. But make sure the stride matches. + // If it doesn't, generally this means we detected the framebuffer with too large a height. + bool match = yOffset < dstYOffset; + if (match && vfb_byteStride != byteStride) { + // Grand Knights History copies with a mismatching stride but a full line at a time. + // Makes it hard to detect the wrong transfers in e.g. God of War. + if (width != dstStride || (byteStride * height != vfb_byteStride && byteStride * height != vfb_byteWidth)) { + match = false; + } else { + dstWidth = byteStride * height / vfb_bpp; + dstHeight = 1; + } + } else if (match) { + dstWidth = width; + dstHeight = height; + } + if (match) { + dstYOffset = yOffset; + dstXOffset = (byteOffset / bpp) % dstStride; + dstBuffer = vfb; + } + } + if (vfb_address <= srcBasePtr && srcBasePtr < vfb_address + vfb_size) { + const u32 byteOffset = srcBasePtr - vfb_address; + const u32 byteStride = srcStride * bpp; + const u32 yOffset = byteOffset / byteStride; + bool match = yOffset < srcYOffset; + if (match && vfb_byteStride != byteStride) { + if (width != srcStride || (byteStride * height != vfb_byteStride && byteStride * height != vfb_byteWidth)) { + match = false; + } else { + srcWidth = byteStride * height / vfb_bpp; + srcHeight = 1; + } + } else if (match) { + srcWidth = width; + srcHeight = height; + } + if (match) { + srcYOffset = yOffset; + srcXOffset = (byteOffset / bpp) % srcStride; + srcBuffer = vfb; + } + } + } + + if (dstYOffset != (u32)-1) { + dstY += dstYOffset; + dstX += dstXOffset; + } + if (srcYOffset != (u32)-1) { + srcY += srcYOffset; + srcX += srcXOffset; + } +} + +bool FramebufferManagerCommon::NotifyBlockTransferBefore(u32 dstBasePtr, int dstStride, int dstX, int dstY, u32 srcBasePtr, int srcStride, int srcX, int srcY, int width, int height, int bpp) { + if (!useBufferedRendering_ || updateVRAM_) { + return false; + } + + // Skip checking if there's no framebuffers in that area. + if (!MayIntersectFramebuffer(srcBasePtr) && !MayIntersectFramebuffer(dstBasePtr)) { + return false; + } + + VirtualFramebuffer *dstBuffer = 0; + VirtualFramebuffer *srcBuffer = 0; + int srcWidth = width; + int srcHeight = height; + int dstWidth = width; + int dstHeight = height; + FindTransferFramebuffers(dstBuffer, srcBuffer, dstBasePtr, dstStride, dstX, dstY, srcBasePtr, srcStride, srcX, srcY, srcWidth, srcHeight, dstWidth, dstHeight, bpp); + + if (dstBuffer && srcBuffer) { + if (srcBuffer == dstBuffer) { + if (srcX != dstX || srcY != dstY) { + WARN_LOG_ONCE(dstsrc, G3D, "Intra-buffer block transfer %08x -> %08x", srcBasePtr, dstBasePtr); + if (g_Config.bBlockTransferGPU) { + FlushBeforeCopy(); + BlitFramebuffer(dstBuffer, dstX, dstY, srcBuffer, srcX, srcY, dstWidth, dstHeight, bpp); + RebindFramebuffer(); + SetColorUpdated(dstBuffer); + return true; + } + } else { + // Ignore, nothing to do. Tales of Phantasia X does this by accident. + if (g_Config.bBlockTransferGPU) { + return true; + } + } + } else { + WARN_LOG_ONCE(dstnotsrc, G3D, "Inter-buffer block transfer %08x -> %08x", srcBasePtr, dstBasePtr); + // Just do the blit! + if (g_Config.bBlockTransferGPU) { + FlushBeforeCopy(); + BlitFramebuffer(dstBuffer, dstX, dstY, srcBuffer, srcX, srcY, dstWidth, dstHeight, bpp); + RebindFramebuffer(); + SetColorUpdated(dstBuffer); + return true; // No need to actually do the memory copy behind, probably. + } + } + return false; + } else if (dstBuffer) { + // Here we should just draw the pixels into the buffer. Copy first. + return false; + } else if (srcBuffer) { + WARN_LOG_ONCE(btd, G3D, "Block transfer download %08x -> %08x", srcBasePtr, dstBasePtr); + FlushBeforeCopy(); + if (g_Config.bBlockTransferGPU && !srcBuffer->memoryUpdated) { + const int srcBpp = srcBuffer->format == GE_FORMAT_8888 ? 4 : 2; + const float srcXFactor = (float)bpp / srcBpp; + if (srcHeight <= 0 || srcY + srcHeight > srcBuffer->bufferHeight) { + WARN_LOG_ONCE(btdheight, G3D, "Block transfer download %08x -> %08x skipped, %d+%d is taller than %d", srcBasePtr, dstBasePtr, srcY, srcHeight, srcBuffer->bufferHeight); + } else { + ReadFramebufferToMemory(srcBuffer, true, static_cast(srcX * srcXFactor), srcY, static_cast(srcWidth * srcXFactor), srcHeight); + } + } + return false; // Let the bit copy happen + } else { + return false; + } +} + +void FramebufferManagerCommon::NotifyBlockTransferAfter(u32 dstBasePtr, int dstStride, int dstX, int dstY, u32 srcBasePtr, int srcStride, int srcX, int srcY, int width, int height, int bpp) { + // A few games use this INSTEAD of actually drawing the video image to the screen, they just blast it to + // the backbuffer. Detect this and have the framebuffermanager draw the pixels. + + u32 backBuffer = PrevDisplayFramebufAddr(); + u32 displayBuffer = DisplayFramebufAddr(); + + // TODO: Is this not handled by upload? Should we check !dstBuffer to avoid a double copy? + if (((backBuffer != 0 && dstBasePtr == backBuffer) || + (displayBuffer != 0 && dstBasePtr == displayBuffer)) && + dstStride == 512 && height == 272 && !useBufferedRendering_) { + FlushBeforeCopy(); + DrawFramebuffer(Memory::GetPointerUnchecked(dstBasePtr), displayFormat_, 512, false); + } + + if (MayIntersectFramebuffer(srcBasePtr) || MayIntersectFramebuffer(dstBasePtr)) { + VirtualFramebuffer *dstBuffer = 0; + VirtualFramebuffer *srcBuffer = 0; + int srcWidth = width; + int srcHeight = height; + int dstWidth = width; + int dstHeight = height; + FindTransferFramebuffers(dstBuffer, srcBuffer, dstBasePtr, dstStride, dstX, dstY, srcBasePtr, srcStride, srcX, srcY, srcWidth, srcHeight, dstWidth, dstHeight, bpp); + + if (!useBufferedRendering_ && currentRenderVfb_ != dstBuffer) { + return; + } + + if (dstBuffer && !srcBuffer) { + WARN_LOG_ONCE(btu, G3D, "Block transfer upload %08x -> %08x", srcBasePtr, dstBasePtr); + if (g_Config.bBlockTransferGPU) { + FlushBeforeCopy(); + const u8 *srcBase = Memory::GetPointerUnchecked(srcBasePtr) + (srcX + srcY * srcStride) * bpp; + int dstBpp = dstBuffer->format == GE_FORMAT_8888 ? 4 : 2; + float dstXFactor = (float)bpp / dstBpp; + DrawPixels(dstBuffer, static_cast(dstX * dstXFactor), dstY, srcBase, dstBuffer->format, static_cast(srcStride * dstXFactor), static_cast(dstWidth * dstXFactor), dstHeight); + SetColorUpdated(dstBuffer); + RebindFramebuffer(); + } + } + } +} diff --git a/GPU/Common/FramebufferCommon.h b/GPU/Common/FramebufferCommon.h index 5dfb24f145..b71ca1c807 100644 --- a/GPU/Common/FramebufferCommon.h +++ b/GPU/Common/FramebufferCommon.h @@ -17,6 +17,7 @@ #pragma once +#include #include #include "Common/CommonTypes.h" #include "Core/MemMap.h" @@ -93,10 +94,9 @@ public: FramebufferManagerCommon(); virtual ~FramebufferManagerCommon(); + virtual void Init(); void BeginFrame(); - - virtual bool NotifyFramebufferCopy(u32 src, u32 dest, int size, bool isMemset = false) = 0; - virtual bool NotifyStencilUpload(u32 addr, int size, bool skipZero = false) = 0; + void SetDisplayFramebuffer(u32 framebuf, u32 stride, GEBufferFormat format); void DoSetRenderFrameBuffer(); void SetRenderFrameBuffer() { @@ -110,11 +110,24 @@ public: } DoSetRenderFrameBuffer(); } + virtual void RebindFramebuffer() = 0; + + bool NotifyFramebufferCopy(u32 src, u32 dest, int size, bool isMemset = false); + void UpdateFromMemory(u32 addr, int size, bool safe); + virtual bool NotifyStencilUpload(u32 addr, int size, bool skipZero = false) = 0; + // Returns true if it's sure this is a direct FBO->FBO transfer and it has already handle it. + // In that case we hardly need to actually copy the bytes in VRAM, they will be wrong anyway (unless + // read framebuffers is on, in which case this should always return false). + bool NotifyBlockTransferBefore(u32 dstBasePtr, int dstStride, int dstX, int dstY, u32 srcBasePtr, int srcStride, int srcX, int srcY, int w, int h, int bpp); + void NotifyBlockTransferAfter(u32 dstBasePtr, int dstStride, int dstX, int dstY, u32 srcBasePtr, int srcStride, int srcX, int srcY, int w, int h, int bpp); + + virtual void ReadFramebufferToMemory(VirtualFramebuffer *vfb, bool sync, int x, int y, int w, int h) = 0; + virtual void MakePixelTexture(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height) = 0; + virtual void DrawPixels(VirtualFramebuffer *vfb, int dstX, int dstY, const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height) = 0; + virtual void DrawFramebuffer(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, bool applyPostShader) = 0; size_t NumVFBs() const { return vfbs_.size(); } - void SetDisplayFramebuffer(u32 framebuf, u32 stride, GEBufferFormat format); - u32 PrevDisplayFramebufAddr() { return prevDisplayFramebuf_ ? (0x04000000 | prevDisplayFramebuf_->fb_address) : 0; } @@ -122,17 +135,6 @@ public: return displayFramebuf_ ? (0x04000000 | displayFramebuf_->fb_address) : 0; } - void SetDepthUpdated() { - if (currentRenderVfb_) { - currentRenderVfb_->depthUpdated = true; - } - } - void SetColorUpdated() { - if (currentRenderVfb_) { - SetColorUpdated(currentRenderVfb_); - } - } - bool MayIntersectFramebuffer(u32 start) { // Clear the cache/kernel bits. start = start & 0x3FFFFFFF; @@ -158,12 +160,31 @@ public: int GetTargetStride() const { return currentRenderVfb_ ? currentRenderVfb_->fb_stride : 512; } GEBufferFormat GetTargetFormat() const { return currentRenderVfb_ ? currentRenderVfb_->format : displayFormat_; } + void SetDepthUpdated() { + if (currentRenderVfb_) { + currentRenderVfb_->depthUpdated = true; + } + } + void SetColorUpdated() { + if (currentRenderVfb_) { + SetColorUpdated(currentRenderVfb_); + } + } + protected: + virtual void DisableState() = 0; + virtual void ClearBuffer() = 0; + virtual void ClearDepthBuffer() = 0; + virtual void FlushBeforeCopy() = 0; + virtual void DecimateFBOs() = 0; + + // Used by ReadFramebufferToMemory and later framebuffer block copies + virtual void BlitFramebuffer(VirtualFramebuffer *dst, int dstX, int dstY, VirtualFramebuffer *src, int srcX, int srcY, int w, int h, int bpp, bool flip = false) = 0; + void EstimateDrawingSize(int &drawing_width, int &drawing_height); u32 FramebufferByteSize(const VirtualFramebuffer *vfb) const; static bool MaskedEqual(u32 addr1, u32 addr2); - virtual void DecimateFBOs() = 0; virtual void DestroyFramebuf(VirtualFramebuffer *vfb) = 0; virtual void ResizeFramebufFBO(VirtualFramebuffer *vfb, u16 w, u16 h, bool force = false) = 0; virtual void NotifyRenderFramebufferCreated(VirtualFramebuffer *vfb) = 0; @@ -171,6 +192,7 @@ protected: virtual void NotifyRenderFramebufferUpdated(VirtualFramebuffer *vfb, bool vfbFormatChanged) = 0; bool ShouldDownloadFramebuffer(const VirtualFramebuffer *vfb) const; + void FindTransferFramebuffers(VirtualFramebuffer *&dstBuffer, VirtualFramebuffer *&srcBuffer, u32 dstBasePtr, int dstStride, int &dstX, int &dstY, u32 srcBasePtr, int srcStride, int &srcX, int &srcY, int &srcWidth, int &srcHeight, int &dstWidth, int &dstHeight, int bpp) const; void SetColorUpdated(VirtualFramebuffer *dstBuffer) { dstBuffer->memoryUpdated = false; @@ -182,10 +204,6 @@ protected: dstBuffer->reallyDirtyAfterDisplay = true; } - virtual void DisableState() = 0; - virtual void ClearBuffer() = 0; - virtual void ClearDepthBuffer() = 0; - u32 displayFramebufPtr_; u32 displayStride_; GEBufferFormat displayFormat_; @@ -204,6 +222,7 @@ protected: bool updateVRAM_; std::vector vfbs_; + std::set> knownFramebufferRAMCopies_; bool hackForce04154000Download_; diff --git a/GPU/Common/SoftwareTransformCommon.cpp b/GPU/Common/SoftwareTransformCommon.cpp index 9d39dd7ae7..31b020805c 100644 --- a/GPU/Common/SoftwareTransformCommon.cpp +++ b/GPU/Common/SoftwareTransformCommon.cpp @@ -395,13 +395,21 @@ void SoftwareTransform( return; } - if (gstate_c.flipTexture && maxIndex >= 2) { + // This means we're using a framebuffer (and one that isn't big enough.) + if (gstate_c.curTextureHeight < (u32)gstate.getTextureHeight(0) && maxIndex >= 2) { // Even if not rectangles, this will detect if either of the first two are outside the framebuffer. // HACK: Adding one pixel margin to this detection fixes issues in Assassin's Creed : Bloodlines, // while still keeping BOF working (see below). const float invTexH = 1.0f / gstate_c.curTextureHeight; // size of one texel. - const bool tlOutside = transformed[0].v < -invTexH && transformed[0].v > 1.0f - heightFactor; - const bool brOutside = transformed[1].v < -invTexH && transformed[1].v > 1.0f - heightFactor; + bool tlOutside; + bool brOutside; + if (gstate_c.flipTexture) { + tlOutside = transformed[0].v < -invTexH && transformed[0].v > 1.0f - heightFactor; + brOutside = transformed[1].v < -invTexH && transformed[1].v > 1.0f - heightFactor; + } else { + tlOutside = transformed[0].v > invTexH && transformed[0].v > heightFactor - 1.0f; + brOutside = transformed[1].v > invTexH && transformed[1].v > heightFactor - 1.0f; + } if (tlOutside || brOutside) { // Okay, so we're texturing from outside the framebuffer, but inside the texture height. // Breath of Fire 3 does this to access a render surface at an offset. @@ -420,9 +428,13 @@ void SoftwareTransform( for (int index = 0; index < maxIndex; ++index) { transformed[index].u *= widthFactor / oldWidthFactor; // Inverse it back to scale to the new FBO, and add 1.0f to account for old FBO. - transformed[index].v = (1.0f - transformed[index].v) / oldHeightFactor; - transformed[index].v -= yDiff; - transformed[index].v = 1.0f - (transformed[index].v * heightFactor); + if (gstate_c.flipTexture) { + transformed[index].v = (1.0f - transformed[index].v) / oldHeightFactor; + transformed[index].v -= yDiff; + transformed[index].v = 1.0f - (transformed[index].v * heightFactor); + } else { + transformed[index].v = (transformed[index].v / oldHeightFactor - yDiff) * heightFactor; + } } } } diff --git a/GPU/Directx9/FramebufferDX9.cpp b/GPU/Directx9/FramebufferDX9.cpp index 6021d29f74..7572e6b8a4 100644 --- a/GPU/Directx9/FramebufferDX9.cpp +++ b/GPU/Directx9/FramebufferDX9.cpp @@ -28,9 +28,11 @@ #include "helper/fbo.h" #include "GPU/Common/FramebufferCommon.h" +#include "GPU/Common/TextureDecoder.h" #include "GPU/Directx9/FramebufferDX9.h" -#include "GPU/Directx9/TextureCacheDX9.h" #include "GPU/Directx9/ShaderManagerDX9.h" +#include "GPU/Directx9/TextureCacheDX9.h" +#include "GPU/Directx9/TransformPipelineDX9.h" #include @@ -47,7 +49,15 @@ namespace DX9 { return ((px >> 3) & 0x001F) | ((px >> 6) & 0x03E0) | ((px >> 9) & 0x7C00) | ((px >> 16) & 0x8000); } - static void ConvertFromRGBA8888(u8 *dst, u8 *src, u32 stride, u32 height, GEBufferFormat format); + inline u16 BGRA8888toRGB565(u32 px) { + return ((px >> 19) & 0x001F) | ((px >> 5) & 0x07E0) | ((px << 8) & 0xF800); + } + + inline u16 BGRA8888toRGBA4444(u32 px) { + return ((px >> 20) & 0x000F) | ((px >> 8) & 0x00F0) | ((px << 4) & 0x0F00) | ((px >> 16) & 0xF000); + } + + static void ConvertFromRGBA8888(u8 *dst, u8 *src, u32 dstStride, u32 srcStride, u32 width, u32 height, GEBufferFormat format); void CenterRect(float *x, float *y, float *w, float *h, float origW, float origH, float frameW, float frameH) { @@ -112,31 +122,20 @@ namespace DX9 { FramebufferManagerDX9::FramebufferManagerDX9() : drawPixelsTex_(0), - drawPixelsTexFormat_(GE_FORMAT_INVALID), - convBuf(0) - { - // And an initial clear. We don't clear per frame as the games are supposed to handle that - // by themselves. - ClearBuffer(); - // TODO: Check / use D3DCAPS2_DYNAMICTEXTURES? - int usage = 0; - D3DPOOL pool = D3DPOOL_MANAGED; - if (pD3DdeviceEx) { - pool = D3DPOOL_DEFAULT; - usage = D3DUSAGE_DYNAMIC; - } - HRESULT hr = pD3Ddevice->CreateTexture(512, 272, 1, usage, D3DFMT(D3DFMT_A8R8G8B8), pool, &drawPixelsTex_, NULL); - if (FAILED(hr)) { - drawPixelsTex_ = nullptr; - ERROR_LOG(G3D, "Failed to create drawpixels texture"); - } - BeginFrame(); + convBuf(0), + gameUsesSequentialCopies_(false) { } FramebufferManagerDX9::~FramebufferManagerDX9() { - if(drawPixelsTex_) { + if (drawPixelsTex_) { drawPixelsTex_->Release(); } + for (auto it = tempFBOs_.begin(), end = tempFBOs_.end(); it != end; ++it) { + fbo_destroy(it->second.fbo); + } + for (auto it = offscreenSurfaces_.begin(), end = offscreenSurfaces_.end(); it != end; ++it) { + it->second.surface->Release(); + } delete [] convBuf; } @@ -150,15 +149,40 @@ namespace DX9 { *dst = ((c & 0x001f) << 19) | (((c >> 5) & 0x001f) << 11) | ((((c >> 10) & 0x001f) << 3)) | 0xFF000000; } - static inline u32 ABGR2RGBA(u32 src) { - return (src >> 8) | (src << 24); + // TODO: Swizzle the texture access instead. + static inline u32 RGBA2BGRA(u32 src) { + const u32 r = (src & 0x000000FF) << 16; + const u32 ga = src & 0xFF00FF00; + const u32 b = (src & 0x00FF0000) >> 16; + return r | ga | b; } void FramebufferManagerDX9::MakePixelTexture(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height) { - u8 *convBuf = NULL; D3DLOCKED_RECT rect; + // TODO: Check / use D3DCAPS2_DYNAMICTEXTURES? + if (drawPixelsTex_ && (drawPixelsTexW_ != width || drawPixelsTexH_ != height)) { + drawPixelsTex_->Release(); + drawPixelsTex_ = nullptr; + } + + if (!drawPixelsTex_) { + int usage = 0; + D3DPOOL pool = D3DPOOL_MANAGED; + if (pD3DdeviceEx) { + pool = D3DPOOL_DEFAULT; + usage = D3DUSAGE_DYNAMIC; + } + HRESULT hr = pD3Ddevice->CreateTexture(width, height, 1, usage, D3DFMT(D3DFMT_A8R8G8B8), pool, &drawPixelsTex_, NULL); + if (FAILED(hr)) { + drawPixelsTex_ = nullptr; + ERROR_LOG(G3D, "Failed to create drawpixels texture"); + } + drawPixelsTexW_ = width; + drawPixelsTexH_ = height; + } + if (!drawPixelsTex_) { return; } @@ -167,18 +191,18 @@ namespace DX9 { convBuf = (u8*)rect.pBits; - // Final format is ARGB(directx) + // Final format is BGRA(directx) // TODO: We can just change the texture format and flip some bits around instead of this. if (srcPixelFormat != GE_FORMAT_8888 || srcStride != 512) { - for (int y = 0; y < 272; y++) { + for (int y = 0; y < height; y++) { switch (srcPixelFormat) { // not tested case GE_FORMAT_565: { - const u16 *src = (const u16 *)srcPixels + srcStride * y; - u32 *dst = (u32*)(convBuf + rect.Pitch * y); - for (int x = 0; x < 480; x++) { + const u16_le *src = (const u16_le *)srcPixels + srcStride * y; + u32 *dst = (u32 *)(convBuf + rect.Pitch * y); + for (int x = 0; x < width; x++) { u16_le col0 = src[x+0]; ARGB8From565(col0, &dst[x + 0]); } @@ -187,20 +211,19 @@ namespace DX9 { // faster case GE_FORMAT_5551: { - const u16 *src = (const u16 *)srcPixels + srcStride * y; - u32 *dst = (u32*)(convBuf + rect.Pitch * y); - for (int x = 0; x < 480; x++) { + const u16_le *src = (const u16_le *)srcPixels + srcStride * y; + u32 *dst = (u32 *)(convBuf + rect.Pitch * y); + for (int x = 0; x < width; x++) { u16_le col0 = src[x+0]; ARGB8From5551(col0, &dst[x + 0]); } } break; - // not tested case GE_FORMAT_4444: { - const u16 *src = (const u16 *)srcPixels + srcStride * y; - u32 *dst = (u32*)(convBuf + rect.Pitch * y); - for (int x = 0; x < 480; x++) + const u16_le *src = (const u16_le *)srcPixels + srcStride * y; + u8 *dst = (u8 *)(convBuf + rect.Pitch * y); + for (int x = 0; x < width; x++) { u16_le col = src[x]; dst[x * 4 + 0] = (col >> 12) << 4; @@ -213,23 +236,23 @@ namespace DX9 { case GE_FORMAT_8888: { - const u32 *src = (const u32 *)srcPixels + srcStride * y; - u32 *dst = (u32*)(convBuf + rect.Pitch * y); - for (int x = 0; x < 480; x++) + const u32_le *src = (const u32_le *)srcPixels + srcStride * y; + u32 *dst = (u32 *)(convBuf + rect.Pitch * y); + for (int x = 0; x < width; x++) { - dst[x] = ABGR2RGBA(src[x]); + dst[x] = RGBA2BGRA(src[x]); } } break; } } } else { - for (int y = 0; y < 272; y++) { - const u32 *src = (const u32 *)srcPixels + srcStride * y; - u32 *dst = (u32*)(convBuf + rect.Pitch * y); - for (int x = 0; x < 512; x++) + for (int y = 0; y < height; y++) { + const u32_le *src = (const u32_le *)srcPixels + srcStride * y; + u32 *dst = (u32 *)(convBuf + rect.Pitch * y); + for (int x = 0; x < width; x++) { - dst[x] = ABGR2RGBA(src[x]); + dst[x] = RGBA2BGRA(src[x]); } } } @@ -245,7 +268,8 @@ namespace DX9 { dxstate.viewport.set(0, 0, vfb->renderWidth, vfb->renderHeight); MakePixelTexture(srcPixels, srcPixelFormat, srcStride, width, height); DisableState(); - DrawActiveTexture(0, dstX, dstY, width, height, vfb->bufferWidth, vfb->bufferHeight, false, 0.0f, 0.0f, 1.0f, 1.0f); + DrawActiveTexture(drawPixelsTex_, dstX, dstY, width, height, vfb->bufferWidth, vfb->bufferHeight, false, 0.0f, 0.0f, 1.0f, 1.0f); + textureCache_->ForgetLastTexture(); } void FramebufferManagerDX9::DrawFramebuffer(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, bool applyPostShader) { @@ -281,10 +305,6 @@ namespace DX9 { } // TODO: StretchRect instead? - if (tex) { - pD3Ddevice->SetTexture(0, tex); - } - float coord[20] = { x,y,0, u0,v0, x+w,y,0, u1,v0, @@ -304,10 +324,14 @@ namespace DX9 { pD3Ddevice->SetVertexDeclaration(pFramebufferVertexDecl); pD3Ddevice->SetPixelShader(pFramebufferPixelShader); pD3Ddevice->SetVertexShader(pFramebufferVertexShader); + shaderManager_->DirtyLastShader(); if (tex != NULL) { pD3Ddevice->SetTexture(0, tex); } - pD3Ddevice->DrawPrimitiveUP(D3DPT_TRIANGLEFAN, 2, coord, 5 * sizeof(float)); + HRESULT hr = pD3Ddevice->DrawPrimitiveUP(D3DPT_TRIANGLEFAN, 2, coord, 5 * sizeof(float)); + if (FAILED(hr)) { + ERROR_LOG_REPORT(G3D, "DrawActiveTexture() failed: %08x", hr); + } } void FramebufferManagerDX9::DestroyFramebuf(VirtualFramebuffer *v) { @@ -330,6 +354,14 @@ namespace DX9 { delete v; } + void FramebufferManagerDX9::RebindFramebuffer() { + if (currentRenderVfb_ && currentRenderVfb_->fbo) { + fbo_bind_as_render_target(currentRenderVfb_->fbo); + } else { + fbo_unbind(); + } + } + void FramebufferManagerDX9::ResizeFramebufFBO(VirtualFramebuffer *vfb, u16 w, u16 h, bool force) { float renderWidthFactor = (float)vfb->renderWidth / (float)vfb->bufferWidth; float renderHeightFactor = (float)vfb->renderHeight / (float)vfb->bufferHeight; @@ -391,9 +423,10 @@ namespace DX9 { if (old.fbo) { INFO_LOG(SCEGE, "Resizing FBO for %08x : %i x %i x %i", vfb->fb_address, w, h, vfb->format); if (vfb->fbo) { + fbo_bind_as_render_target(vfb->fbo); ClearBuffer(); if (!g_Config.bDisableSlowFramebufEffects) { - BlitFramebuffer_(vfb, 0, 0, &old, 0, 0, std::min(vfb->bufferWidth, vfb->width), std::min(vfb->height, vfb->bufferHeight), 0); + BlitFramebuffer(vfb, 0, 0, &old, 0, 0, std::min(vfb->bufferWidth, vfb->width), std::min(vfb->height, vfb->bufferHeight), 0); } } fbo_destroy(old.fbo); @@ -422,12 +455,15 @@ namespace DX9 { if (gstate_c.curRTWidth != vfb->width || gstate_c.curRTHeight != vfb->height) { shaderManager_->DirtyUniform(DIRTY_PROJTHROUGHMATRIX); } + if (gstate_c.curRTRenderWidth != vfb->renderWidth || gstate_c.curRTRenderHeight != vfb->renderHeight) { + shaderManager_->DirtyUniform(DIRTY_PROJMATRIX); + shaderManager_->DirtyUniform(DIRTY_PROJTHROUGHMATRIX); + } } void FramebufferManagerDX9::NotifyRenderFramebufferSwitched(VirtualFramebuffer *prevVfb, VirtualFramebuffer *vfb) { if (ShouldDownloadFramebuffer(vfb) && !vfb->memoryUpdated) { - // TODO - //ReadFramebufferToMemory(vfb, true, 0, 0, vfb->width, vfb->height); + ReadFramebufferToMemory(vfb, true, 0, 0, vfb->width, vfb->height); } textureCache_->ForgetLastTexture(); @@ -471,6 +507,10 @@ namespace DX9 { if (gstate_c.curRTWidth != vfb->width || gstate_c.curRTHeight != vfb->height) { shaderManager_->DirtyUniform(DIRTY_PROJTHROUGHMATRIX); } + if (gstate_c.curRTRenderWidth != vfb->renderWidth || gstate_c.curRTRenderHeight != vfb->renderHeight) { + shaderManager_->DirtyUniform(DIRTY_PROJMATRIX); + shaderManager_->DirtyUniform(DIRTY_PROJTHROUGHMATRIX); + } } void FramebufferManagerDX9::NotifyRenderFramebufferUpdated(VirtualFramebuffer *vfb, bool vfbFormatChanged) { @@ -486,6 +526,52 @@ namespace DX9 { if (gstate_c.curRTWidth != vfb->width || gstate_c.curRTHeight != vfb->height) { shaderManager_->DirtyUniform(DIRTY_PROJTHROUGHMATRIX); } + if (gstate_c.curRTRenderWidth != vfb->renderWidth || gstate_c.curRTRenderHeight != vfb->renderHeight) { + shaderManager_->DirtyUniform(DIRTY_PROJMATRIX); + shaderManager_->DirtyUniform(DIRTY_PROJTHROUGHMATRIX); + } + } + + FBO *FramebufferManagerDX9::GetTempFBO(u16 w, u16 h, FBOColorDepth depth) { + u64 key = ((u64)depth << 32) | (w << 16) | h; + auto it = tempFBOs_.find(key); + if (it != tempFBOs_.end()) { + it->second.last_frame_used = gpuStats.numFlips; + return it->second.fbo; + } + + textureCache_->ForgetLastTexture(); + FBO *fbo = fbo_create(w, h, 1, false, depth); + if (!fbo) + return fbo; + fbo_bind_as_render_target(fbo); + ClearBuffer(); + const TempFBO info = {fbo, gpuStats.numFlips}; + tempFBOs_[key] = info; + return fbo; + } + + LPDIRECT3DSURFACE9 FramebufferManagerDX9::GetOffscreenSurface(LPDIRECT3DSURFACE9 similarSurface) { + D3DSURFACE_DESC desc; + similarSurface->GetDesc(&desc); + + u64 key = ((u64)desc.Format << 32) | (desc.Width << 16) | desc.Height; + auto it = offscreenSurfaces_.find(key); + if (it != offscreenSurfaces_.end()) { + it->second.last_frame_used = gpuStats.numFlips; + return it->second.surface; + } + + textureCache_->ForgetLastTexture(); + LPDIRECT3DSURFACE9 offscreen = nullptr; + HRESULT hr = pD3Ddevice->CreateOffscreenPlainSurface(desc.Width, desc.Height, desc.Format, D3DPOOL_SYSTEMMEM, &offscreen, NULL); + if (FAILED(hr) || !offscreen) { + ERROR_LOG_REPORT(G3D, "Unable to create offscreen surface %dx%d @%d", desc.Width, desc.Height, desc.Format); + return nullptr; + } + const OffscreenSurface info = {offscreen, gpuStats.numFlips}; + offscreenSurfaces_[key] = info; + return offscreen; } void FramebufferManagerDX9::CopyDisplayToOutput() { @@ -542,7 +628,7 @@ namespace DX9 { // The game is displaying something directly from RAM. In GTA, it's decoded video. // First check that it's not a known RAM copy of a VRAM framebuffer though, as in MotoGP - for (auto iter = knownFramebufferCopies_.begin(); iter != knownFramebufferCopies_.end(); ++iter) { + for (auto iter = knownFramebufferRAMCopies_.begin(); iter != knownFramebufferRAMCopies_.end(); ++iter) { if (iter->second == displayFramebufPtr_) { vfb = GetVFBAt(iter->first); } @@ -595,18 +681,26 @@ namespace DX9 { const float v1 = (272.0f + offsetY) / (float)vfb->bufferHeight; if (1) { - dxstate.viewport.set(0, 0, PSP_CoreParameter().pixelWidth, PSP_CoreParameter().pixelHeight); - // These are in the output display coordinates - if (g_Config.iBufFilter == SCALE_LINEAR) { - dxstate.texMagFilter.set(D3DTEXF_LINEAR); - dxstate.texMinFilter.set(D3DTEXF_LINEAR); - } else { - dxstate.texMagFilter.set(D3DTEXF_POINT); - dxstate.texMinFilter.set(D3DTEXF_POINT); + const u32 rw = PSP_CoreParameter().pixelWidth; + const u32 rh = PSP_CoreParameter().pixelHeight; + const RECT srcRect = {(LONG)(u0 * vfb->renderWidth), (LONG)(v0 * vfb->renderHeight), (LONG)(u1 * vfb->renderWidth), (LONG)(v1 * vfb->renderHeight)}; + const RECT dstRect = {x * rw / w, y * rh / h, (x + w) * rw / w, (y + h) * rh / h}; + HRESULT hr = fbo_blit_color(vfb->fbo, &srcRect, nullptr, &dstRect, g_Config.iBufFilter == SCALE_LINEAR ? D3DTEXF_LINEAR : D3DTEXF_POINT); + if (FAILED(hr)) { + ERROR_LOG_REPORT(G3D, "fbo_blit_color failed on display: %08x", hr); + dxstate.viewport.set(0, 0, PSP_CoreParameter().pixelWidth, PSP_CoreParameter().pixelHeight); + // These are in the output display coordinates + if (g_Config.iBufFilter == SCALE_LINEAR) { + dxstate.texMagFilter.set(D3DTEXF_LINEAR); + dxstate.texMinFilter.set(D3DTEXF_LINEAR); + } else { + dxstate.texMagFilter.set(D3DTEXF_POINT); + dxstate.texMinFilter.set(D3DTEXF_POINT); + } + dxstate.texMipFilter.set(D3DTEXF_NONE); + dxstate.texMipLodBias.set(0); + DrawActiveTexture(colorTexture, x, y, w, h, (float)PSP_CoreParameter().pixelWidth, (float)PSP_CoreParameter().pixelHeight, false, u0, v0, u1, v1); } - dxstate.texMipFilter.set(D3DTEXF_NONE); - dxstate.texMipLodBias.set(0); - DrawActiveTexture(colorTexture, x, y, w, h, (float)PSP_CoreParameter().pixelWidth, (float)PSP_CoreParameter().pixelHeight, false, u0, v0, u1, v1); } /* else if (usePostShader_ && extraFBOs_.size() == 1 && !postShaderAtOutputResolution_) { @@ -640,14 +734,14 @@ namespace DX9 { } } - void FramebufferManagerDX9::ReadFramebufferToMemory(VirtualFramebuffer *vfb, bool sync) { + void FramebufferManagerDX9::ReadFramebufferToMemory(VirtualFramebuffer *vfb, bool sync, int x, int y, int w, int h) { #if 0 if (sync) { PackFramebufferAsync_(NULL); // flush async just in case when we go for synchronous update } #endif - if(vfb) { + if (vfb) { // We'll pseudo-blit framebuffers here to get a resized and flipped version of vfb. // For now we'll keep these on the same struct as the ones that can get displayed // (and blatantly copy work already done above while at it). @@ -682,26 +776,15 @@ namespace DX9 { nvfb->bufferWidth = vfb->bufferWidth; nvfb->bufferHeight = vfb->bufferHeight; nvfb->format = vfb->format; + nvfb->drawnWidth = vfb->drawnWidth; + nvfb->drawnHeight = vfb->drawnHeight; + nvfb->drawnFormat = vfb->format; nvfb->usageFlags = FB_USAGE_RENDERTARGET; nvfb->dirtyAfterDisplay = true; - // When updating VRAM, it need to be exact format. - switch (vfb->format) { - case GE_FORMAT_4444: - nvfb->colorDepth = FBO_4444; - break; - case GE_FORMAT_5551: - nvfb->colorDepth = FBO_5551; - break; - case GE_FORMAT_565: - nvfb->colorDepth = FBO_565; - break; - case GE_FORMAT_8888: - default: - nvfb->colorDepth = FBO_8888; - break; - } + nvfb->colorDepth = FBO_8888; + textureCache_->ForgetLastTexture(); nvfb->fbo = fbo_create(nvfb->width, nvfb->height, 1, true, (FBOColorDepth)nvfb->colorDepth); if (!(nvfb->fbo)) { ERROR_LOG(SCEGE, "Error creating FBO! %i x %i", nvfb->renderWidth, nvfb->renderHeight); @@ -710,7 +793,7 @@ namespace DX9 { nvfb->last_frame_render = gpuStats.numFlips; bvfbs_.push_back(nvfb); - fbo_bind_as_render_target(nvfb->fbo); + fbo_bind_as_render_target(nvfb->fbo); ClearBuffer(); } else { nvfb->usageFlags |= FB_USAGE_RENDERTARGET; @@ -733,142 +816,208 @@ namespace DX9 { #endif } - vfb->memoryUpdated = true; - BlitFramebuffer_(nvfb, 0, 0, vfb, 0, 0, vfb->width, vfb->height, 0, false); - -#if 0 -#ifdef USING_GLES2 - PackFramebufferSync_(nvfb); // synchronous glReadPixels -#else - if (gl_extensions.PBO_ARB || !gl_extensions.ATIClampBug) { - if (!sync) { - PackFramebufferAsync_(nvfb); // asynchronous glReadPixels using PBOs - } else { - PackFramebufferSync_(nvfb); // synchronous glReadPixels + if (gameUsesSequentialCopies_) { + // Ignore the x/y/etc., read the entire thing. + x = 0; + y = 0; + w = vfb->width; + h = vfb->height; + } + if (x == 0 && y == 0 && w == vfb->width && h == vfb->height) { + vfb->memoryUpdated = true; + } else { + const static int FREQUENT_SEQUENTIAL_COPIES = 3; + static int frameLastCopy = 0; + static u32 bufferLastCopy = 0; + static int copiesThisFrame = 0; + if (frameLastCopy != gpuStats.numFlips || bufferLastCopy != vfb->fb_address) { + frameLastCopy = gpuStats.numFlips; + bufferLastCopy = vfb->fb_address; + copiesThisFrame = 0; + } + if (++copiesThisFrame > FREQUENT_SEQUENTIAL_COPIES) { + gameUsesSequentialCopies_ = true; } } -#endif -#endif + BlitFramebuffer(nvfb, x, y, vfb, x, y, w, h, 0, false); + + PackFramebufferDirectx9_(nvfb, x, y, w, h); + RebindFramebuffer(); } } - void FramebufferManagerDX9::BlitFramebuffer_(VirtualFramebuffer *dst, int dstX, int dstY, VirtualFramebuffer *src, int srcX, int srcY, int w, int h, int bpp, bool flip) { + void FramebufferManagerDX9::BlitFramebuffer(VirtualFramebuffer *dst, int dstX, int dstY, VirtualFramebuffer *src, int srcX, int srcY, int w, int h, int bpp, bool flip) { if (!dst->fbo || !src->fbo || !useBufferedRendering_) { // This can happen if they recently switched from non-buffered. fbo_unbind(); return; } - fbo_bind_as_render_target(dst->fbo); - dxstate.viewport.set(0, 0, dst->renderWidth, dst->renderHeight); - DisableState(); - - fbo_bind_color_as_texture(src->fbo, 0); - - float srcXFactor = 1.0f; - float srcYFactor = 1.0f; + float srcXFactor = flip ? 1.0f : (float)src->renderWidth / (float)src->bufferWidth; + float srcYFactor = flip ? 1.0f : (float)src->renderHeight / (float)src->bufferHeight; const int srcBpp = src->format == GE_FORMAT_8888 ? 4 : 2; if (srcBpp != bpp && bpp != 0) { srcXFactor = (srcXFactor * bpp) / srcBpp; } int srcX1 = srcX * srcXFactor; int srcX2 = (srcX + w) * srcXFactor; - int srcY2 = src->renderHeight - (h + srcY) * srcYFactor; - int srcY1 = srcY2 + h * srcYFactor; + int srcY1 = srcY * srcYFactor; + int srcY2 = (srcY + h) * srcYFactor; - float dstXFactor = 1.0f; - float dstYFactor = 1.0f; + float dstXFactor = flip ? 1.0f : (float)dst->renderWidth / (float)dst->bufferWidth; + float dstYFactor = flip ? 1.0f : (float)dst->renderHeight / (float)dst->bufferHeight; const int dstBpp = dst->format == GE_FORMAT_8888 ? 4 : 2; if (dstBpp != bpp && bpp != 0) { dstXFactor = (dstXFactor * bpp) / dstBpp; } int dstX1 = dstX * dstXFactor; int dstX2 = (dstX + w) * dstXFactor; - int dstY2 = dst->renderHeight - (h + dstY) * dstYFactor; - int dstY1 = dstY2 + h * dstYFactor; + int dstY1 = dstY * dstYFactor; + int dstY2 = (dstY + h) * dstYFactor; - float srcW = src->bufferWidth; - float srcH = src->bufferHeight; - DrawActiveTexture(0, dstX1, dstY, w * dstXFactor, h, dst->bufferWidth, dst->bufferHeight, !flip, srcX1 / srcW, srcY / srcH, srcX2 / srcW, (srcY + h) / srcH); - pD3Ddevice->SetTexture(0, NULL); - textureCache_->ForgetLastTexture(); - dxstate.viewport.restore(); + if (flip) { + fbo_bind_as_render_target(dst->fbo); + dxstate.viewport.set(0, 0, dst->renderWidth, dst->renderHeight); + DisableState(); - fbo_unbind(); + fbo_bind_color_as_texture(src->fbo, 0); + + float srcW = src->bufferWidth; + float srcH = src->bufferHeight; + DrawActiveTexture(0, dstX1, dstY, w * dstXFactor, h, dst->bufferWidth, dst->bufferHeight, flip, srcX1 / srcW, srcY / srcH, srcX2 / srcW, (srcY + h) / srcH); + pD3Ddevice->SetTexture(0, NULL); + textureCache_->ForgetLastTexture(); + dxstate.viewport.restore(); + + RebindFramebuffer(); + } else { + LPDIRECT3DSURFACE9 srcSurf = fbo_get_color_for_read(src->fbo); + LPDIRECT3DSURFACE9 dstSurf = fbo_get_color_for_write(dst->fbo); + RECT srcRect = {srcX1, srcY1, srcX2, srcY2}; + RECT dstRect = {dstX1, dstY1, dstX2, dstY2}; + + D3DSURFACE_DESC desc; + srcSurf->GetDesc(&desc); + srcRect.right = std::min(srcRect.right, (LONG)desc.Width); + srcRect.bottom = std::min(srcRect.bottom, (LONG)desc.Height); + + dstSurf->GetDesc(&desc); + dstRect.right = std::min(dstRect.right, (LONG)desc.Width); + dstRect.bottom = std::min(dstRect.bottom, (LONG)desc.Height); + + // Direct3D 9 doesn't support rect -> self. + FBO *srcFBO = src->fbo; + if (src == dst) { + FBO *tempFBO = GetTempFBO(src->renderWidth, src->renderHeight, (FBOColorDepth)src->colorDepth); + HRESULT hr = fbo_blit_color(src->fbo, &srcRect, tempFBO, &srcRect, D3DTEXF_POINT); + if (SUCCEEDED(hr)) { + srcFBO = tempFBO; + } + } + + HRESULT hr = fbo_blit_color(srcFBO, &srcRect, dst->fbo, &dstRect, D3DTEXF_POINT); + if (FAILED(hr)) { + ERROR_LOG_REPORT(G3D, "fbo_blit_color failed in blit: %08x (%08x -> %08x)", hr, src->fb_address, dst->fb_address); + } + } } // TODO: SSE/NEON // Could also make C fake-simd for 64-bit, two 8888 pixels fit in a register :) - void ConvertFromRGBA8888(u8 *dst, u8 *src, u32 stride, u32 height, GEBufferFormat format) { - if(format == GE_FORMAT_8888) { - if(src == dst) { + void ConvertFromRGBA8888(u8 *dst, u8 *src, u32 dstStride, u32 srcStride, u32 width, u32 height, GEBufferFormat format) { + // Must skip stride in the cases below. Some games pack data into the cracks, like MotoGP. + const u32 *src32 = (const u32 *)src; + + if (format == GE_FORMAT_8888) { + u32 *dst32 = (u32 *)dst; + if (src == dst) { return; - } else { // Here lets assume they don't intersect - memcpy(dst, src, stride * height * 4); + } else { + for (u32 y = 0; y < height; ++y) { + ConvertBGRA8888ToRGBA8888(dst32, src32, width); + src32 += srcStride; + dst32 += dstStride; + } } - } else { // But here it shouldn't matter if they do - int size = height * stride; - const u32 *src32 = (const u32 *)src; + } else { + // But here it shouldn't matter if they do intersect u16 *dst16 = (u16 *)dst; switch (format) { case GE_FORMAT_565: // BGR 565 - for(int i = 0; i < size; i++) { - dst16[i] = RGBA8888toRGB565(src32[i]); + for (u32 y = 0; y < height; ++y) { + for (u32 x = 0; x < width; ++x) { + dst16[x] = BGRA8888toRGB565(src32[x]); + } + src32 += srcStride; + dst16 += dstStride; } break; case GE_FORMAT_5551: // ABGR 1555 - for(int i = 0; i < size; i++) { - dst16[i] = RGBA8888toRGBA5551(src32[i]); + for (u32 y = 0; y < height; ++y) { + ConvertBGRA8888ToRGBA5551(dst16, src32, width); + src32 += srcStride; + dst16 += dstStride; } break; case GE_FORMAT_4444: // ABGR 4444 - for(int i = 0; i < size; i++) { - dst16[i] = RGBA8888toRGBA4444(src32[i]); + for (u32 y = 0; y < height; ++y) { + for (u32 x = 0; x < width; ++x) { + dst16[x] = BGRA8888toRGBA4444(src32[x]); + } + src32 += srcStride; + dst16 += dstStride; } break; case GE_FORMAT_8888: + case GE_FORMAT_INVALID: // Not possible. break; - default: - break; } } } - void FramebufferManagerDX9::PackFramebufferDirectx9_(VirtualFramebuffer *vfb) { - if (vfb->fbo) { - fbo_bind_for_read(vfb->fbo); - } else { - ERROR_LOG_REPORT_ONCE(vfbfbozero, SCEGE, "PackFramebufferSync_: vfb->fbo == 0"); + void FramebufferManagerDX9::PackFramebufferDirectx9_(VirtualFramebuffer *vfb, int x, int y, int w, int h) { + if (!vfb->fbo) { + ERROR_LOG_REPORT_ONCE(vfbfbozero, SCEGE, "PackFramebufferDirectx9_: vfb->fbo == 0"); fbo_unbind(); return; } - // Pixel size always 4 here because we always request RGBA8888 - size_t bufSize = vfb->fb_stride * vfb->height * 4; - u32 fb_address = (0x04000000) | vfb->fb_address; + const u32 fb_address = (0x04000000) | vfb->fb_address; + const int dstBpp = vfb->format == GE_FORMAT_8888 ? 4 : 2; - u8 *packed = 0; - if(vfb->format == GE_FORMAT_8888) { - packed = (u8 *)Memory::GetPointer(fb_address); - } else { // End result may be 16-bit but we are reading 32-bit, so there may not be enough space at fb_address - packed = (u8 *)malloc(bufSize * sizeof(u8)); - } + // We always need to convert from the framebuffer native format. + // Right now that's always 8888. + DEBUG_LOG(HLE, "Reading framebuffer to mem, fb_address = %08x", fb_address); - if(packed) { - DEBUG_LOG(HLE, "Reading framebuffer to mem, bufSize = %u, packed = %p, fb_address = %08x", - (u32)bufSize, packed, fb_address); + LPDIRECT3DSURFACE9 renderTarget = fbo_get_color_for_read(vfb->fbo); + D3DSURFACE_DESC desc; + renderTarget->GetDesc(&desc); - // Resolve(packed, vfb); - - if(vfb->format != GE_FORMAT_8888) { // If not RGBA 8888 we need to convert - ConvertFromRGBA8888(Memory::GetPointer(fb_address), packed, vfb->fb_stride, vfb->height, vfb->format); - free(packed); + LPDIRECT3DSURFACE9 offscreen = GetOffscreenSurface(renderTarget); + if (offscreen) { + HRESULT hr = pD3Ddevice->GetRenderTargetData(renderTarget, offscreen); + if (SUCCEEDED(hr)) { + D3DLOCKED_RECT locked; + u32 widthFactor = vfb->renderWidth / vfb->bufferWidth; + u32 heightFactor = vfb->renderHeight / vfb->bufferHeight; + RECT rect = {x * widthFactor, y * heightFactor, (x + w) * widthFactor, (y + h) * heightFactor}; + hr = offscreen->LockRect(&locked, &rect, D3DLOCK_READONLY); + if (SUCCEEDED(hr)) { + // TODO: Handle the other formats? We don't currently create them, I think. + const int dstByteOffset = (y * vfb->fb_stride + x) * dstBpp; + // Pixel size always 4 here because we always request BGRA8888. + ConvertFromRGBA8888(Memory::GetPointer(fb_address + dstByteOffset), (u8 *)locked.pBits, vfb->fb_stride, locked.Pitch / 4, w, h, vfb->format); + offscreen->UnlockRect(); + } else { + ERROR_LOG_REPORT(G3D, "Unable to lock rect from %08x: %d,%d %dx%d of %dx%d", fb_address, rect.left, rect.top, rect.right, rect.bottom, vfb->renderWidth, vfb->renderHeight); + } + } else { + ERROR_LOG_REPORT(G3D, "Unable to download render target data from %08x", fb_address); } } - - fbo_unbind(); } + void FramebufferManagerDX9::EndFrame() { if (resized_) { DestroyAllFBOs(); @@ -905,19 +1054,6 @@ namespace DX9 { return list; } - // MotoGP workaround - bool FramebufferManagerDX9::NotifyFramebufferCopy(u32 src, u32 dest, int size, bool isMemset) { - for (size_t i = 0; i < vfbs_.size(); i++) { - // This size fits for MotoGP. Might want to make this more flexible for other games if they do the same. - if ((vfbs_[i]->fb_address | 0x04000000) == src && size == 512 * 272 * 2) { - // A framebuffer matched! - knownFramebufferCopies_.insert(std::pair(src, dest)); - } - } - // TODO - return false; - } - bool FramebufferManagerDX9::NotifyStencilUpload(u32 addr, int size, bool skipZero) { // TODO return false; @@ -932,8 +1068,9 @@ namespace DX9 { VirtualFramebuffer *vfb = vfbs_[i]; int age = frameLastFramebufUsed_ - std::max(vfb->last_frame_render, vfb->last_frame_used); - if (updateVram && age == 0 && !vfb->memoryUpdated && vfb == displayFramebuf_) - ReadFramebufferToMemory(vfb); + if (ShouldDownloadFramebuffer(vfb) && age == 0 && !vfb->memoryUpdated) { + ReadFramebufferToMemory(vfb, false, 0, 0, vfb->width, vfb->height); + } if (vfb == displayFramebuf_ || vfb == prevDisplayFramebuf_ || vfb == prevPrevDisplayFramebuf_) { continue; @@ -946,6 +1083,26 @@ namespace DX9 { } } + for (auto it = tempFBOs_.begin(); it != tempFBOs_.end(); ) { + int age = frameLastFramebufUsed_ - it->second.last_frame_used; + if (age > FBO_OLD_AGE) { + fbo_destroy(it->second.fbo); + tempFBOs_.erase(it++); + } else { + ++it; + } + } + + for (auto it = offscreenSurfaces_.begin(); it != offscreenSurfaces_.end(); ) { + int age = frameLastFramebufUsed_ - it->second.last_frame_used; + if (age > FBO_OLD_AGE) { + it->second.surface->Release(); + offscreenSurfaces_.erase(it++); + } else { + ++it; + } + } + // Do the same for ReadFramebuffersToMemory's VFBs for (size_t i = 0; i < bvfbs_.size(); ++i) { VirtualFramebuffer *vfb = bvfbs_[i]; @@ -971,45 +1128,30 @@ namespace DX9 { DestroyFramebuf(vfb); } vfbs_.clear(); + + for (size_t i = 0; i < bvfbs_.size(); ++i) { + VirtualFramebuffer *vfb = bvfbs_[i]; + DestroyFramebuf(vfb); + } + bvfbs_.clear(); + + for (auto it = tempFBOs_.begin(), end = tempFBOs_.end(); it != end; ++it) { + fbo_destroy(it->second.fbo); + } + tempFBOs_.clear(); + + for (auto it = offscreenSurfaces_.begin(), end = offscreenSurfaces_.end(); it != end; ++it) { + it->second.surface->Release(); + } + offscreenSurfaces_.clear(); + DisableState(); } - void FramebufferManagerDX9::UpdateFromMemory(u32 addr, int size, bool safe) { - addr &= ~0x40000000; - // TODO: Could go through all FBOs, but probably not important? - // TODO: Could also check for inner changes, but video is most important. - bool isDisplayBuf = addr == DisplayFramebufAddr() || addr == PrevDisplayFramebufAddr(); - if (isDisplayBuf || safe) { - // TODO: Deleting the FBO is a heavy hammer solution, so let's only do it if it'd help. - if (!Memory::IsValidAddress(displayFramebufPtr_)) - return; - - for (size_t i = 0; i < vfbs_.size(); ++i) { - VirtualFramebuffer *vfb = vfbs_[i]; - if (MaskedEqual(vfb->fb_address, addr)) { - // TODO - //FlushBeforeCopy(); - - if (useBufferedRendering_ && vfb->fbo) { - DisableState(); - GEBufferFormat fmt = vfb->format; - if (vfb->last_frame_render + 1 < gpuStats.numFlips && isDisplayBuf) { - // If we're not rendering to it, format may be wrong. Use displayFormat_ instead. - fmt = displayFormat_; - } - DrawPixels(vfb, 0, 0, Memory::GetPointer(addr | 0x04000000), fmt, vfb->fb_stride, vfb->width, vfb->height); - SetColorUpdated(vfb); - } else { - INFO_LOG(SCEGE, "Invalidating FBO for %08x (%i x %i x %i)", vfb->fb_address, vfb->width, vfb->height, vfb->format); - DestroyFramebuf(vfb); - vfbs_.erase(vfbs_.begin() + i--); - } - } - } - - // TODO: RebindFramebuffer(); - fbo_unbind(); - currentRenderVfb_ = 0; - } + void FramebufferManagerDX9::FlushBeforeCopy() { + // Flush anything not yet drawn before blitting, downloading, or uploading. + // This might be a stalled list, or unflushed before a block transfer, etc. + SetRenderFrameBuffer(); + transformDraw_->Flush(); } void FramebufferManagerDX9::Resized() { @@ -1041,9 +1183,8 @@ namespace DX9 { D3DSURFACE_DESC desc; renderTarget->GetDesc(&desc); - LPDIRECT3DSURFACE9 offscreen = nullptr; - hr = pD3Ddevice->CreateOffscreenPlainSurface(desc.Width, desc.Height, desc.Format, D3DPOOL_SYSTEMMEM, &offscreen, NULL); - if (!offscreen || !SUCCEEDED(hr)) { + LPDIRECT3DSURFACE9 offscreen = GetOffscreenSurface(renderTarget); + if (!offscreen) { renderTarget->Release(); return false; } @@ -1063,7 +1204,6 @@ namespace DX9 { } } - offscreen->Release(); renderTarget->Release(); return success; diff --git a/GPU/Directx9/FramebufferDX9.h b/GPU/Directx9/FramebufferDX9.h index 45c1a0628c..b3ec0bdf54 100644 --- a/GPU/Directx9/FramebufferDX9.h +++ b/GPU/Directx9/FramebufferDX9.h @@ -19,6 +19,7 @@ #include #include +#include #include "d3d9.h" @@ -34,15 +35,14 @@ namespace DX9 { -struct GLSLProgram; class TextureCacheDX9; +class TransformDrawEngineDX9; +class ShaderManagerDX9; void CenterRect(float *x, float *y, float *w, float *h, float origW, float origH, float frameW, float frameH); -class ShaderManagerDX9; - class FramebufferManagerDX9 : public FramebufferManagerCommon { public: FramebufferManagerDX9(); @@ -54,11 +54,13 @@ public: void SetShaderManager(ShaderManagerDX9 *sm) { shaderManager_ = sm; } + void SetTransformDrawEngine(TransformDrawEngineDX9 *td) { + transformDraw_ = td; + } - void MakePixelTexture(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height); - - void DrawPixels(VirtualFramebuffer *vfb, int dstX, int dstY, const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height); - void DrawFramebuffer(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, bool applyPostShader); + virtual void MakePixelTexture(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height) override; + virtual void DrawPixels(VirtualFramebuffer *vfb, int dstX, int dstY, const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height) override; + virtual void DrawFramebuffer(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, bool applyPostShader) override; void DrawActiveTexture(LPDIRECT3DTEXTURE9 texture, float x, float y, float w, float h, float destW, float destH, bool flip = false, float u0 = 0.0f, float v0 = 0.0f, float u1 = 1.0f, float v1 = 1.0f); @@ -68,13 +70,11 @@ public: void Resized(); void DeviceLost(); void CopyDisplayToOutput(); - void UpdateFromMemory(u32 addr, int size, bool safe); - void ReadFramebufferToMemory(VirtualFramebuffer *vfb, bool sync = true); + virtual void ReadFramebufferToMemory(VirtualFramebuffer *vfb, bool sync, int x, int y, int w, int h) override; std::vector GetFramebufferList(); - bool NotifyFramebufferCopy(u32 src, u32 dest, int size, bool isMemset = false); bool NotifyStencilUpload(u32 addr, int size, bool skipZero = false); void DestroyFramebuf(VirtualFramebuffer *vfb); @@ -84,30 +84,37 @@ public: bool GetCurrentDepthbuffer(GPUDebugBuffer &buffer); bool GetCurrentStencilbuffer(GPUDebugBuffer &buffer); + virtual void RebindFramebuffer() override; + + FBO *GetTempFBO(u16 w, u16 h, FBOColorDepth depth = FBO_8888); + LPDIRECT3DSURFACE9 GetOffscreenSurface(LPDIRECT3DSURFACE9 similarSurface); + protected: virtual void DisableState() override; virtual void ClearBuffer() override; virtual void ClearDepthBuffer() override; + virtual void FlushBeforeCopy() override; + virtual void DecimateFBOs() override; + + // Used by ReadFramebufferToMemory and later framebuffer block copies + virtual void BlitFramebuffer(VirtualFramebuffer *dst, int dstX, int dstY, VirtualFramebuffer *src, int srcX, int srcY, int w, int h, int bpp, bool flip = false) override; virtual void NotifyRenderFramebufferCreated(VirtualFramebuffer *vfb) override; virtual void NotifyRenderFramebufferSwitched(VirtualFramebuffer *prevVfb, VirtualFramebuffer *vfb) override; virtual void NotifyRenderFramebufferUpdated(VirtualFramebuffer *vfb, bool vfbFormatChanged) override; - virtual void DecimateFBOs() override; - private: void CompileDraw2DProgram(); void DestroyDraw2DProgram(); void SetNumExtraFBOs(int num); - // Used by ReadFramebufferToMemory - void BlitFramebuffer_(VirtualFramebuffer *dst, int dstX, int dstY, VirtualFramebuffer *src, int srcX, int srcY, int w, int h, int bpp, bool flip = false); - void PackFramebufferDirectx9_(VirtualFramebuffer *vfb); + void PackFramebufferDirectx9_(VirtualFramebuffer *vfb, int x, int y, int w, int h); // Used by DrawPixels LPDIRECT3DTEXTURE9 drawPixelsTex_; - GEBufferFormat drawPixelsTexFormat_; + int drawPixelsTexW_; + int drawPixelsTexH_; u8 *convBuf; @@ -115,6 +122,7 @@ private: TextureCacheDX9 *textureCache_; ShaderManagerDX9 *shaderManager_; + TransformDrawEngineDX9 *transformDraw_; bool usePostShader_; bool postShaderAtOutputResolution_; @@ -122,10 +130,20 @@ private: std::vector extraFBOs_; bool resized_; + bool gameUsesSequentialCopies_; + + struct TempFBO { + FBO *fbo; + int last_frame_used; + }; + struct OffscreenSurface { + LPDIRECT3DSURFACE9 surface; + int last_frame_used; + }; std::vector bvfbs_; // blitting FBOs - - std::set> knownFramebufferCopies_; + std::map tempFBOs_; + std::map offscreenSurfaces_; #if 0 AsyncPBO *pixelBufObj_; //this isn't that large diff --git a/GPU/Directx9/GPU_DX9.cpp b/GPU/Directx9/GPU_DX9.cpp index 554e9f93c4..83ddda14f1 100644 --- a/GPU/Directx9/GPU_DX9.cpp +++ b/GPU/Directx9/GPU_DX9.cpp @@ -19,7 +19,9 @@ #include "Common/ChunkFile.h" #include "base/logging.h" +#include "Core/Debugger/Breakpoints.h" #include "Core/MemMap.h" +#include "Core/MIPS/MIPS.h" #include "Core/Host.h" #include "Core/Config.h" #include "Core/Reporting.h" @@ -396,8 +398,10 @@ DIRECTX9_GPU::DIRECTX9_GPU() transformDraw_.SetShaderManager(shaderManager_); transformDraw_.SetTextureCache(&textureCache_); transformDraw_.SetFramebufferManager(&framebufferManager_); + framebufferManager_.Init(); framebufferManager_.SetTextureCache(&textureCache_); framebufferManager_.SetShaderManager(shaderManager_); + framebufferManager_.SetTransformDrawEngine(&transformDraw_); textureCache_.SetFramebufferManager(&framebufferManager_); textureCache_.SetShaderManager(shaderManager_); @@ -630,6 +634,18 @@ void DIRECTX9_GPU::ProcessEvent(GPUEvent ev) { InvalidateCacheInternal(ev.invalidate_cache.addr, ev.invalidate_cache.size, ev.invalidate_cache.type); break; + case GPU_EVENT_FB_MEMCPY: + PerformMemoryCopyInternal(ev.fb_memcpy.dst, ev.fb_memcpy.src, ev.fb_memcpy.size); + break; + + case GPU_EVENT_FB_MEMSET: + PerformMemorySetInternal(ev.fb_memset.dst, ev.fb_memset.v, ev.fb_memset.size); + break; + + case GPU_EVENT_FB_STENCIL_UPLOAD: + PerformStencilUploadInternal(ev.fb_stencil_upload.dst, ev.fb_stencil_upload.size); + break; + default: GPUCommon::ProcessEvent(ev); } @@ -1806,31 +1822,50 @@ void DIRECTX9_GPU::DoBlockTransfer() { return; } - // Do the copy! (Hm, if we detect a drawn video frame (see below) then we could maybe skip this?) - // Can use GetPointerUnchecked because we checked the addresses above. We could also avoid them - // entirely by walking a couple of pointers... - for (int y = 0; y < height; y++) { - const u8 *src = Memory::GetPointerUnchecked(srcBasePtr + ((y + srcY) * srcStride + srcX) * bpp); - u8 *dst = Memory::GetPointerUnchecked(dstBasePtr + ((y + dstY) * dstStride + dstX) * bpp); - memcpy(dst, src, width * bpp); + // Check that the last address of both source and dest are valid addresses + + u32 srcLastAddr = srcBasePtr + ((height - 1 + srcY) * srcStride + (srcX + width - 1)) * bpp; + u32 dstLastAddr = dstBasePtr + ((height - 1 + dstY) * dstStride + (dstX + width - 1)) * bpp; + + if (!Memory::IsValidAddress(srcLastAddr)) { + ERROR_LOG_REPORT(G3D, "Bottom-right corner of source of block transfer is at an invalid address: %08x", srcLastAddr); + return; + } + if (!Memory::IsValidAddress(dstLastAddr)) { + ERROR_LOG_REPORT(G3D, "Bottom-right corner of destination of block transfer is at an invalid address: %08x", srcLastAddr); + return; } - // TODO: Notify all overlapping FBOs that they need to reload. + // Tell the framebuffer manager to take action if possible. If it does the entire thing, let's just return. + if (!framebufferManager_.NotifyBlockTransferBefore(dstBasePtr, dstStride, dstX, dstY, srcBasePtr, srcStride, srcX, srcY, width, height, bpp)) { + // Do the copy! (Hm, if we detect a drawn video frame (see below) then we could maybe skip this?) + // Can use GetPointerUnchecked because we checked the addresses above. We could also avoid them + // entirely by walking a couple of pointers... + if (srcStride == dstStride && (u32)width == srcStride) { + // Common case in God of War, let's do it all in one chunk. + u32 srcLineStartAddr = srcBasePtr + (srcY * srcStride + srcX) * bpp; + u32 dstLineStartAddr = dstBasePtr + (dstY * dstStride + dstX) * bpp; + const u8 *src = Memory::GetPointerUnchecked(srcLineStartAddr); + u8 *dst = Memory::GetPointerUnchecked(dstLineStartAddr); + memcpy(dst, src, width * height * bpp); + } else { + for (int y = 0; y < height; y++) { + u32 srcLineStartAddr = srcBasePtr + ((y + srcY) * srcStride + srcX) * bpp; + u32 dstLineStartAddr = dstBasePtr + ((y + dstY) * dstStride + dstX) * bpp; - textureCache_.Invalidate(dstBasePtr + (dstY * dstStride + dstX) * bpp, height * dstStride * bpp, GPU_INVALIDATE_HINT); + const u8 *src = Memory::GetPointerUnchecked(srcLineStartAddr); + u8 *dst = Memory::GetPointerUnchecked(dstLineStartAddr); + memcpy(dst, src, width * bpp); + } + } - // A few games use this INSTEAD of actually drawing the video image to the screen, they just blast it to - // the backbuffer. Detect this and have the framebuffermanager draw the pixels. - - u32 backBuffer = framebufferManager_.PrevDisplayFramebufAddr(); - u32 displayBuffer = framebufferManager_.DisplayFramebufAddr(); - - if (((backBuffer != 0 && dstBasePtr == backBuffer) || - (displayBuffer != 0 && dstBasePtr == displayBuffer)) && - dstStride == 512 && height == 272) { - framebufferManager_.DrawFramebuffer(Memory::GetPointerUnchecked(dstBasePtr), GE_FORMAT_8888, 512, false); + textureCache_.Invalidate(dstBasePtr + (dstY * dstStride + dstX) * bpp, height * dstStride * bpp, GPU_INVALIDATE_HINT); + framebufferManager_.NotifyBlockTransferAfter(dstBasePtr, dstStride, dstX, dstY, srcBasePtr, srcStride, srcX, srcY, width, height, bpp); } + CBreakPoints::ExecMemCheck(srcBasePtr + (srcY * srcStride + srcX) * bpp, false, height * srcStride * bpp, currentMIPS->pc); + CBreakPoints::ExecMemCheck(dstBasePtr + (srcY * dstStride + srcX) * bpp, true, height * dstStride * bpp, currentMIPS->pc); + // TODO: Correct timing appears to be 1.9, but erring a bit low since some of our other timing is inaccurate. cyclesExecuted += ((height * width * bpp) * 16) / 10; } @@ -1858,32 +1893,103 @@ void DIRECTX9_GPU::InvalidateCacheInternal(u32 addr, int size, GPUInvalidationTy } } +void DIRECTX9_GPU::PerformMemoryCopyInternal(u32 dest, u32 src, int size) { + if (!framebufferManager_.NotifyFramebufferCopy(src, dest, size)) { + // We use a little hack for Download/Upload using a VRAM mirror. + // Since they're identical we don't need to copy. + if (!Memory::IsVRAMAddress(dest) || (dest ^ 0x00400000) != src) { + Memory::Memcpy(dest, Memory::GetPointer(src), size); + } + } + InvalidateCache(dest, size, GPU_INVALIDATE_HINT); +} + +void DIRECTX9_GPU::PerformMemorySetInternal(u32 dest, u8 v, int size) { + if (!framebufferManager_.NotifyFramebufferCopy(dest, dest, size, true)) { + InvalidateCache(dest, size, GPU_INVALIDATE_HINT); + } +} + +void DIRECTX9_GPU::PerformStencilUploadInternal(u32 dest, int size) { + framebufferManager_.NotifyStencilUpload(dest, size); +} + bool DIRECTX9_GPU::PerformMemoryCopy(u32 dest, u32 src, int size) { + // Track stray copies of a framebuffer in RAM. MotoGP does this. + if (framebufferManager_.MayIntersectFramebuffer(src) || framebufferManager_.MayIntersectFramebuffer(dest)) { + if (IsOnSeparateCPUThread()) { + GPUEvent ev(GPU_EVENT_FB_MEMCPY); + ev.fb_memcpy.dst = dest; + ev.fb_memcpy.src = src; + ev.fb_memcpy.size = size; + ScheduleEvent(ev); + + // This is a memcpy, so we need to wait for it to complete. + SyncThread(); + } else { + PerformMemoryCopyInternal(dest, src, size); + } + return true; + } + InvalidateCache(dest, size, GPU_INVALIDATE_HINT); return false; } bool DIRECTX9_GPU::PerformMemorySet(u32 dest, u8 v, int size) { + // This may indicate a memset, usually to 0, of a framebuffer. + if (framebufferManager_.MayIntersectFramebuffer(dest)) { + Memory::Memset(dest, v, size); + + if (IsOnSeparateCPUThread()) { + GPUEvent ev(GPU_EVENT_FB_MEMSET); + ev.fb_memset.dst = dest; + ev.fb_memset.v = v; + ev.fb_memset.size = size; + ScheduleEvent(ev); + + // We don't need to wait for the framebuffer to be updated. + } else { + PerformMemorySetInternal(dest, v, size); + } + return true; + } + + // Or perhaps a texture, let's invalidate. InvalidateCache(dest, size, GPU_INVALIDATE_HINT); return false; } bool DIRECTX9_GPU::PerformMemoryDownload(u32 dest, int size) { - InvalidateCache(dest, size, GPU_INVALIDATE_HINT); - - // Track stray copies of a framebuffer in RAM. MotoGP does this. - if (Memory::IsRAMAddress(dest)) { -// framebufferManager_.NotifyFramebufferCopy(src, dest, size); + // Cheat a bit to force a download of the framebuffer. + // VRAM + 0x00400000 is simply a VRAM mirror. + if (Memory::IsVRAMAddress(dest)) { + return PerformMemoryCopy(dest ^ 0x00400000, dest, size); } return false; } bool DIRECTX9_GPU::PerformMemoryUpload(u32 dest, int size) { - InvalidateCache(dest, size, GPU_INVALIDATE_HINT); + // Cheat a bit to force an upload of the framebuffer. + // VRAM + 0x00400000 is simply a VRAM mirror. + if (Memory::IsVRAMAddress(dest)) { + return PerformMemoryCopy(dest, dest ^ 0x00400000, size); + } return false; } bool DIRECTX9_GPU::PerformStencilUpload(u32 dest, int size) { + if (framebufferManager_.MayIntersectFramebuffer(dest)) { + if (IsOnSeparateCPUThread()) { + GPUEvent ev(GPU_EVENT_FB_STENCIL_UPLOAD); + ev.fb_stencil_upload.dst = dest; + ev.fb_stencil_upload.size = size; + ScheduleEvent(ev); + } else { + PerformStencilUploadInternal(dest, size); + } + return true; + } return false; } diff --git a/GPU/Directx9/GPU_DX9.h b/GPU/Directx9/GPU_DX9.h index f44cd3629c..f53b24ecaa 100644 --- a/GPU/Directx9/GPU_DX9.h +++ b/GPU/Directx9/GPU_DX9.h @@ -158,6 +158,9 @@ private: void InitClearInternal(); void BeginFrameInternal(); void CopyDisplayToOutputInternal(); + void PerformMemoryCopyInternal(u32 dest, u32 src, int size); + void PerformMemorySetInternal(u32 dest, u8 v, int size); + void PerformStencilUploadInternal(u32 dest, int size); void InvalidateCacheInternal(u32 addr, int size, GPUInvalidationType type); FramebufferManagerDX9 framebufferManager_; diff --git a/GPU/Directx9/ShaderManagerDX9.cpp b/GPU/Directx9/ShaderManagerDX9.cpp index 66e7d554e5..8253a4f405 100644 --- a/GPU/Directx9/ShaderManagerDX9.cpp +++ b/GPU/Directx9/ShaderManagerDX9.cpp @@ -202,11 +202,13 @@ void ShaderManagerDX9::VSSetMatrix(int creg, const float* pMatrix) { } // Depth in ogl is between -1;1 we need between 0;1 and optionally reverse it -void ConvertProjMatrixToD3D(Matrix4x4 & in, bool invert) { +static void ConvertProjMatrixToD3D(Matrix4x4 & in, bool invertedX, bool invertedY, bool invertedZ) { Matrix4x4 s; Matrix4x4 t; - s.setScaling(Vec3(1, 1, invert ? -0.5 : 0.5f)); - t.setTranslation(Vec3(0, 0, 0.5f)); + s.setScaling(Vec3(1, 1, invertedZ ? -0.5 : 0.5f)); + float xoff = 0.5f / gstate_c.curRTRenderWidth; + float yoff = 0.5f / gstate_c.curRTRenderHeight; + t.setTranslation(Vec3(invertedX ? xoff : -xoff, invertedY ? -yoff : yoff, 0.5f)); in = in * s * t; } @@ -230,17 +232,20 @@ void ShaderManagerDX9::VSUpdateUniforms(int dirtyUniforms) { if (dirtyUniforms & DIRTY_PROJMATRIX) { Matrix4x4 flippedMatrix; memcpy(&flippedMatrix, gstate.projMatrix, 16 * sizeof(float)); - if (gstate_c.vpHeight < 0) { + + const bool invertedY = gstate_c.vpHeight < 0; + if (invertedY) { flippedMatrix[5] = -flippedMatrix[5]; flippedMatrix[13] = -flippedMatrix[13]; } - if (gstate_c.vpWidth < 0) { + const bool invertedX = gstate_c.vpWidth < 0; + if (invertedX) { flippedMatrix[0] = -flippedMatrix[0]; flippedMatrix[12] = -flippedMatrix[12]; } - bool invert = gstate_c.vpDepth < 0; - ConvertProjMatrixToD3D(flippedMatrix, invert); + const bool invertedZ = gstate_c.vpDepth < 0; + ConvertProjMatrixToD3D(flippedMatrix, invertedX, invertedY, invertedZ); VSSetMatrix(CONST_VS_PROJ, flippedMatrix.getReadPtr()); } @@ -248,7 +253,7 @@ void ShaderManagerDX9::VSUpdateUniforms(int dirtyUniforms) { Matrix4x4 proj_through; proj_through.setOrtho(0.0f, gstate_c.curRTWidth, gstate_c.curRTHeight, 0, 0, 1); - ConvertProjMatrixToD3D(proj_through, false); + ConvertProjMatrixToD3D(proj_through, false, false, false); VSSetMatrix(CONST_VS_PROJ_THROUGH, proj_through.getReadPtr()); } diff --git a/GPU/Directx9/TextureCacheDX9.h b/GPU/Directx9/TextureCacheDX9.h index d082318b0d..62c2c20189 100644 --- a/GPU/Directx9/TextureCacheDX9.h +++ b/GPU/Directx9/TextureCacheDX9.h @@ -51,7 +51,7 @@ public: ~TextureCacheDX9(); void SetTexture(bool force = false); - bool SetOffsetTexture(u32 offset); + virtual bool SetOffsetTexture(u32 offset) override; void Clear(bool delete_them); void StartFrame(); diff --git a/GPU/Directx9/helper/fbo.cpp b/GPU/Directx9/helper/fbo.cpp index 66990d080b..d56c523124 100644 --- a/GPU/Directx9/helper/fbo.cpp +++ b/GPU/Directx9/helper/fbo.cpp @@ -97,8 +97,12 @@ LPDIRECT3DTEXTURE9 fbo_get_color_texture(FBO *fbo) { return fbo->tex; } -void fbo_bind_for_read(FBO *fbo) { - // pD3Ddevice->SetRenderTarget(0, fbo->surf); +LPDIRECT3DSURFACE9 fbo_get_color_for_read(FBO *fbo) { + return fbo->surf; +} + +LPDIRECT3DSURFACE9 fbo_get_color_for_write(FBO *fbo) { + return fbo->surf; } void fbo_bind_color_as_texture(FBO *fbo, int color) { @@ -110,4 +114,10 @@ void fbo_get_dimensions(FBO *fbo, int *w, int *h) { *h = fbo->height; } +HRESULT fbo_blit_color(FBO *src, const RECT *srcRect, FBO *dst, const RECT *dstRect, D3DTEXTUREFILTERTYPE filter) { + LPDIRECT3DSURFACE9 srcSurf = src ? src->surf : deviceRTsurf; + LPDIRECT3DSURFACE9 dstSurf = dst ? dst->surf : deviceRTsurf; + return pD3Ddevice->StretchRect(srcSurf, srcRect, dstSurf, dstRect, filter); +} + } diff --git a/GPU/Directx9/helper/fbo.h b/GPU/Directx9/helper/fbo.h index 492629dfa9..0cdddea2ca 100644 --- a/GPU/Directx9/helper/fbo.h +++ b/GPU/Directx9/helper/fbo.h @@ -28,11 +28,13 @@ FBO *fbo_create(int width, int height, int num_color_textures, bool z_stencil, F void fbo_bind_as_render_target(FBO *fbo); // color must be 0, for now. void fbo_bind_color_as_texture(FBO *fbo, int color); -void fbo_bind_for_read(FBO *fbo); +LPDIRECT3DSURFACE9 fbo_get_color_for_read(FBO *fbo); +LPDIRECT3DSURFACE9 fbo_get_color_for_write(FBO *fbo); void fbo_unbind(); void fbo_destroy(FBO *fbo); void fbo_get_dimensions(FBO *fbo, int *w, int *h); void fbo_resolve(FBO *fbo); +HRESULT fbo_blit_color(FBO *src, const RECT *srcRect, FBO *dst, const RECT *dstRect, D3DTEXTUREFILTERTYPE filter); LPDIRECT3DTEXTURE9 fbo_get_color_texture(FBO *fbo); diff --git a/GPU/GLES/Framebuffer.cpp b/GPU/GLES/Framebuffer.cpp index 8b247bf456..b4d36d9199 100644 --- a/GPU/GLES/Framebuffer.cpp +++ b/GPU/GLES/Framebuffer.cpp @@ -30,7 +30,6 @@ #include "Core/Config.h" #include "Core/System.h" #include "Core/Reporting.h" -#include "Core/ELF/ParamSFO.h" #include "Core/HLE/sceDisplay.h" #include "GPU/ge_constants.h" #include "GPU/GPUState.h" @@ -113,7 +112,7 @@ inline u16 BGRA8888toRGBA4444(u32 px) { return ((px >> 20) & 0x000F) | ((px >> 8) & 0x00F0) | ((px << 4) & 0x0F00) | ((px >> 16) & 0xF000); } -void ConvertFromRGBA8888(u8 *dst, const u8 *src, u32 stride, u32 width, u32 height, GEBufferFormat format); +void ConvertFromRGBA8888(u8 *dst, const u8 *src, u32 dstStride, u32 srcStride, u32 width, u32 height, GEBufferFormat format); void CenterRect(float *x, float *y, float *w, float *h, float origW, float origH, float frameW, float frameH) { @@ -212,7 +211,8 @@ void FramebufferManager::SetNumExtraFBOs(int num) { FBO *fbo = fbo_create(PSP_CoreParameter().renderWidth, PSP_CoreParameter().renderHeight, 1, false, FBO_8888); extraFBOs_.push_back(fbo); - // The new FBO is still bound after creation. + // The new FBO is still bound after creation, but let's bind it anyway. + fbo_bind_as_render_target(fbo); ClearBuffer(); } @@ -353,20 +353,9 @@ FramebufferManager::FramebufferManager() : } void FramebufferManager::Init() { + FramebufferManagerCommon::Init(); CompileDraw2DProgram(); - - const std::string gameId = g_paramSFO.GetValueString("DISC_ID"); - // This applies a hack to Dangan Ronpa, its demo, and its sequel. - // The game draws solid colors to a small framebuffer, and then reads this directly in VRAM. - // We force this framebuffer to 1x and force download it automatically. - hackForce04154000Download_ = gameId == "NPJH50631" || gameId == "NPJH50372" || gameId == "NPJH90164" || gameId == "NPJH50515"; - - // And an initial clear. We don't clear per frame as the games are supposed to handle that - // by themselves. - ClearBuffer(); - SetLineWidth(); - BeginFrame(); } FramebufferManager::~FramebufferManager() { @@ -498,6 +487,7 @@ void FramebufferManager::DrawPixels(VirtualFramebuffer *vfb, int dstX, int dstY, MakePixelTexture(srcPixels, srcPixelFormat, srcStride, width, height); DisableState(); DrawActiveTexture(0, dstX, dstY, width, height, vfb->bufferWidth, vfb->bufferHeight, false, 0.0f, 0.0f, 1.0f, 1.0f); + textureCache_->ForgetLastTexture(); } void FramebufferManager::DrawFramebuffer(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, bool applyPostShader) { @@ -720,9 +710,10 @@ void FramebufferManager::ResizeFramebufFBO(VirtualFramebuffer *vfb, u16 w, u16 h if (old.fbo) { INFO_LOG(SCEGE, "Resizing FBO for %08x : %i x %i x %i", vfb->fb_address, w, h, vfb->format); if (vfb->fbo) { + fbo_bind_as_render_target(vfb->fbo); ClearBuffer(); if (!g_Config.bDisableSlowFramebufEffects) { - BlitFramebuffer_(vfb, 0, 0, &old, 0, 0, std::min(vfb->bufferWidth, vfb->width), std::min(vfb->height, vfb->bufferHeight), 0); + BlitFramebuffer(vfb, 0, 0, &old, 0, 0, std::min(vfb->bufferWidth, vfb->width), std::min(vfb->height, vfb->bufferHeight), 0); } } fbo_destroy(old.fbo); @@ -912,7 +903,7 @@ void FramebufferManager::BlitFramebufferDepth(VirtualFramebuffer *sourceframebuf } FBO *FramebufferManager::GetTempFBO(u16 w, u16 h, FBOColorDepth depth) { - u32 key = ((u64)depth << 32) | (w << 16) | h; + u64 key = ((u64)depth << 32) | (w << 16) | h; auto it = tempFBOs_.find(key); if (it != tempFBOs_.end()) { it->second.last_frame_used = gpuStats.numFlips; @@ -923,6 +914,7 @@ FBO *FramebufferManager::GetTempFBO(u16 w, u16 h, FBOColorDepth depth) { FBO *fbo = fbo_create(w, h, 1, false, depth); if (!fbo) return fbo; + fbo_bind_as_render_target(fbo); ClearBuffer(); const TempFBO info = {fbo, gpuStats.numFlips}; tempFBOs_[key] = info; @@ -951,7 +943,7 @@ void FramebufferManager::BindFramebufferColor(VirtualFramebuffer *framebuffer, b if (renderCopy) { VirtualFramebuffer copyInfo = *framebuffer; copyInfo.fbo = renderCopy; - BlitFramebuffer_(©Info, 0, 0, framebuffer, 0, 0, framebuffer->drawnWidth, framebuffer->drawnHeight, 0, false); + BlitFramebuffer(©Info, 0, 0, framebuffer, 0, 0, framebuffer->drawnWidth, framebuffer->drawnHeight, 0, false); RebindFramebuffer(); fbo_bind_color_as_texture(renderCopy, 0); @@ -1201,6 +1193,7 @@ void FramebufferManager::ReadFramebufferToMemory(VirtualFramebuffer *vfb, bool s nvfb->last_frame_render = gpuStats.numFlips; bvfbs_.push_back(nvfb); + fbo_bind_as_render_target(nvfb->fbo); ClearBuffer(); glDisable(GL_DITHER); } else { @@ -1247,7 +1240,7 @@ void FramebufferManager::ReadFramebufferToMemory(VirtualFramebuffer *vfb, bool s gameUsesSequentialCopies_ = true; } } - BlitFramebuffer_(nvfb, x, y, vfb, x, y, w, h, 0, true); + BlitFramebuffer(nvfb, x, y, vfb, x, y, w, h, 0, true); // PackFramebufferSync_() - Synchronous pixel data transfer using glReadPixels // PackFramebufferAsync_() - Asynchronous pixel data transfer using glReadPixels with PBOs @@ -1269,7 +1262,7 @@ void FramebufferManager::ReadFramebufferToMemory(VirtualFramebuffer *vfb, bool s } // TODO: If dimensions are the same, we can use glCopyImageSubData. -void FramebufferManager::BlitFramebuffer_(VirtualFramebuffer *dst, int dstX, int dstY, VirtualFramebuffer *src, int srcX, int srcY, int w, int h, int bpp, bool flip) { +void FramebufferManager::BlitFramebuffer(VirtualFramebuffer *dst, int dstX, int dstY, VirtualFramebuffer *src, int srcX, int srcY, int w, int h, int bpp, bool flip) { if (!dst->fbo || !src->fbo || !useBufferedRendering_) { // This can happen if they recently switched from non-buffered. fbo_unbind(); @@ -1356,7 +1349,7 @@ void FramebufferManager::BlitFramebuffer_(VirtualFramebuffer *dst, int dstX, int // TODO: SSE/NEON // Could also make C fake-simd for 64-bit, two 8888 pixels fit in a register :) -void ConvertFromRGBA8888(u8 *dst, const u8 *src, u32 stride, u32 width, u32 height, GEBufferFormat format) { +void ConvertFromRGBA8888(u8 *dst, const u8 *src, u32 dstStride, u32 srcStride, u32 width, u32 height, GEBufferFormat format) { // Must skip stride in the cases below. Some games pack data into the cracks, like MotoGP. const u32 *src32 = (const u32 *)src; @@ -1367,20 +1360,19 @@ void ConvertFromRGBA8888(u8 *dst, const u8 *src, u32 stride, u32 width, u32 heig } else if (UseBGRA8888()) { for (u32 y = 0; y < height; ++y) { ConvertBGRA8888ToRGBA8888(dst32, src32, width); - src32 += stride; - dst32 += stride; + src32 += srcStride; + dst32 += dstStride; } } else { // Here let's assume they don't intersect for (u32 y = 0; y < height; ++y) { memcpy(dst32, src32, width * 4); - src32 += stride; - dst32 += stride; + src32 += srcStride; + dst32 += dstStride; } } } else { // But here it shouldn't matter if they do intersect - int size = height * stride; u16 *dst16 = (u16 *)dst; switch (format) { case GE_FORMAT_565: // BGR 565 @@ -1389,16 +1381,16 @@ void ConvertFromRGBA8888(u8 *dst, const u8 *src, u32 stride, u32 width, u32 heig for (u32 x = 0; x < width; ++x) { dst16[x] = BGRA8888toRGB565(src32[x]); } - src32 += stride; - dst16 += stride; + src32 += srcStride; + dst16 += dstStride; } } else { for (u32 y = 0; y < height; ++y) { for (u32 x = 0; x < width; ++x) { dst16[x] = RGBA8888toRGB565(src32[x]); } - src32 += stride; - dst16 += stride; + src32 += srcStride; + dst16 += dstStride; } } break; @@ -1406,14 +1398,14 @@ void ConvertFromRGBA8888(u8 *dst, const u8 *src, u32 stride, u32 width, u32 heig if (UseBGRA8888()) { for (u32 y = 0; y < height; ++y) { ConvertBGRA8888ToRGBA5551(dst16, src32, width); - src32 += stride; - dst16 += stride; + src32 += srcStride; + dst16 += dstStride; } } else { for (u32 y = 0; y < height; ++y) { ConvertRGBA8888ToRGBA5551(dst16, src32, width); - src32 += stride; - dst16 += stride; + src32 += srcStride; + dst16 += dstStride; } } break; @@ -1423,16 +1415,16 @@ void ConvertFromRGBA8888(u8 *dst, const u8 *src, u32 stride, u32 width, u32 heig for (u32 x = 0; x < width; ++x) { dst16[x] = BGRA8888toRGBA4444(src32[x]); } - src32 += stride; - dst16 += stride; + src32 += srcStride; + dst16 += dstStride; } } else { for (u32 y = 0; y < height; ++y) { for (u32 x = 0; x < width; ++x) { dst16[x] = RGBA8888toRGBA4444(src32[x]); } - src32 += stride; - dst16 += stride; + src32 += srcStride; + dst16 += dstStride; } } break; @@ -1507,7 +1499,7 @@ void FramebufferManager::PackFramebufferAsync_(VirtualFramebuffer *vfb) { if (useCPU || (UseBGRA8888() && pbo.format == GE_FORMAT_8888)) { u8 *dst = Memory::GetPointer(pbo.fb_address); - ConvertFromRGBA8888(dst, packed, pbo.stride, pbo.stride, pbo.height, pbo.format); + ConvertFromRGBA8888(dst, packed, pbo.stride, pbo.stride, pbo.stride, pbo.height, pbo.format); } else { // We don't need to convert, GPU already did (or should have) Memory::Memcpy(pbo.fb_address, packed, pbo.size); @@ -1671,7 +1663,7 @@ void FramebufferManager::PackFramebufferSync_(VirtualFramebuffer *vfb, int x, in if (convert) { int dstByteOffset = y * vfb->fb_stride * dstBpp; - ConvertFromRGBA8888(Memory::GetPointer(fb_address + dstByteOffset), packed + byteOffset, vfb->fb_stride, vfb->width, h, vfb->format); + ConvertFromRGBA8888(Memory::GetPointer(fb_address + dstByteOffset), packed + byteOffset, vfb->fb_stride, vfb->fb_stride, vfb->width, h, vfb->format); } } @@ -1803,224 +1795,6 @@ void FramebufferManager::DestroyAllFBOs() { DisableState(); } -void FramebufferManager::UpdateFromMemory(u32 addr, int size, bool safe) { - addr &= ~0x40000000; - // TODO: Could go through all FBOs, but probably not important? - // TODO: Could also check for inner changes, but video is most important. - bool isDisplayBuf = addr == DisplayFramebufAddr() || addr == PrevDisplayFramebufAddr(); - if (isDisplayBuf || safe) { - // TODO: Deleting the FBO is a heavy hammer solution, so let's only do it if it'd help. - if (!Memory::IsValidAddress(displayFramebufPtr_)) - return; - - for (size_t i = 0; i < vfbs_.size(); ++i) { - VirtualFramebuffer *vfb = vfbs_[i]; - if (MaskedEqual(vfb->fb_address, addr)) { - FlushBeforeCopy(); - - if (useBufferedRendering_ && vfb->fbo) { - DisableState(); - GEBufferFormat fmt = vfb->format; - if (vfb->last_frame_render + 1 < gpuStats.numFlips && isDisplayBuf) { - // If we're not rendering to it, format may be wrong. Use displayFormat_ instead. - fmt = displayFormat_; - } - DrawPixels(vfb, 0, 0, Memory::GetPointer(addr | 0x04000000), fmt, vfb->fb_stride, vfb->width, vfb->height); - SetColorUpdated(vfb); - } else { - INFO_LOG(SCEGE, "Invalidating FBO for %08x (%i x %i x %i)", vfb->fb_address, vfb->width, vfb->height, vfb->format); - DestroyFramebuf(vfb); - vfbs_.erase(vfbs_.begin() + i--); - } - } - } - - RebindFramebuffer(); - } -} - -bool FramebufferManager::NotifyFramebufferCopy(u32 src, u32 dst, int size, bool isMemset) { - if (updateVRAM_ || size == 0) { - return false; - } - - dst &= 0x3FFFFFFF; - src &= 0x3FFFFFFF; - - VirtualFramebuffer *dstBuffer = 0; - VirtualFramebuffer *srcBuffer = 0; - u32 dstY = (u32)-1; - u32 dstH = 0; - u32 srcY = (u32)-1; - u32 srcH = 0; - for (size_t i = 0; i < vfbs_.size(); ++i) { - VirtualFramebuffer *vfb = vfbs_[i]; - const u32 vfb_address = (0x04000000 | vfb->fb_address) & 0x3FFFFFFF; - const u32 vfb_size = FramebufferByteSize(vfb); - const u32 vfb_bpp = vfb->format == GE_FORMAT_8888 ? 4 : 2; - const u32 vfb_byteStride = vfb->fb_stride * vfb_bpp; - const int vfb_byteWidth = vfb->width * vfb_bpp; - - if (dst >= vfb_address && (dst + size <= vfb_address + vfb_size || dst == vfb_address)) { - const u32 offset = dst - vfb_address; - const u32 yOffset = offset / vfb_byteStride; - if ((offset % vfb_byteStride) == 0 && (size == vfb_byteWidth || (size % vfb_byteStride) == 0) && yOffset < dstY) { - dstBuffer = vfb; - dstY = yOffset; - dstH = size == vfb_byteWidth ? 1 : std::min((u32)size / vfb_byteStride, (u32)vfb->height); - } - } - - if (src >= vfb_address && (src + size <= vfb_address + vfb_size || src == vfb_address)) { - const u32 offset = src - vfb_address; - const u32 yOffset = offset / vfb_byteStride; - if ((offset % vfb_byteStride) == 0 && (size == vfb_byteWidth || (size % vfb_byteStride) == 0) && yOffset < srcY) { - srcBuffer = vfb; - srcY = yOffset; - srcH = size == vfb_byteWidth ? 1 : std::min((u32)size / vfb_byteStride, (u32)vfb->height); - } - } - } - - if (srcBuffer && srcY == 0 && srcH == srcBuffer->height && !dstBuffer) { - // MotoGP workaround - it copies a framebuffer to memory and then displays it. - // TODO: It's rare anyway, but the game could modify the RAM and then we'd display the wrong thing. - // Unfortunately, that would force 1x render resolution. - if (Memory::IsRAMAddress(dst)) { - knownFramebufferRAMCopies_.insert(std::pair(src, dst)); - } - } - - if (!useBufferedRendering_) { - // If we're copying into a recently used display buf, it's probably destined for the screen. - if (srcBuffer || (dstBuffer != displayFramebuf_ && dstBuffer != prevDisplayFramebuf_)) { - return false; - } - } - - if (dstBuffer && srcBuffer && !isMemset) { - if (srcBuffer == dstBuffer) { - WARN_LOG_REPORT_ONCE(dstsrccpy, G3D, "Intra-buffer memcpy (not supported) %08x -> %08x", src, dst); - } else { - WARN_LOG_REPORT_ONCE(dstnotsrccpy, G3D, "Inter-buffer memcpy %08x -> %08x", src, dst); - // Just do the blit! - if (g_Config.bBlockTransferGPU) { - BlitFramebuffer_(dstBuffer, 0, dstY, srcBuffer, 0, srcY, srcBuffer->width, srcH, 0); - SetColorUpdated(dstBuffer); - RebindFramebuffer(); - } - } - return false; - } else if (dstBuffer) { - WARN_LOG_ONCE(btucpy, G3D, "Memcpy fbo upload %08x -> %08x", src, dst); - if (g_Config.bBlockTransferGPU) { - FlushBeforeCopy(); - const u8 *srcBase = Memory::GetPointerUnchecked(src); - DrawPixels(dstBuffer, 0, dstY, srcBase, dstBuffer->format, dstBuffer->fb_stride, dstBuffer->width, dstH); - SetColorUpdated(dstBuffer); - RebindFramebuffer(); - textureCache_->ForgetLastTexture(); - // This is a memcpy, let's still copy just in case. - return false; - } - return false; - } else if (srcBuffer) { - WARN_LOG_ONCE(btdcpy, G3D, "Memcpy fbo download %08x -> %08x", src, dst); - FlushBeforeCopy(); - if (srcH == 0 || srcY + srcH > srcBuffer->bufferHeight) { - WARN_LOG_REPORT_ONCE(btdcpyheight, G3D, "Memcpy fbo download %08x -> %08x skipped, %d+%d is taller than %d", src, dst, srcY, srcH, srcBuffer->bufferHeight); - } else if (g_Config.bBlockTransferGPU && !srcBuffer->memoryUpdated) { - ReadFramebufferToMemory(srcBuffer, true, 0, srcY, srcBuffer->width, srcH); - } - return false; - } else { - return false; - } -} - -void FramebufferManager::FindTransferFramebuffers(VirtualFramebuffer *&dstBuffer, VirtualFramebuffer *&srcBuffer, u32 dstBasePtr, int dstStride, int &dstX, int &dstY, u32 srcBasePtr, int srcStride, int &srcX, int &srcY, int &srcWidth, int &srcHeight, int &dstWidth, int &dstHeight, int bpp) const { - u32 dstYOffset = -1; - u32 dstXOffset = -1; - u32 srcYOffset = -1; - u32 srcXOffset = -1; - int width = srcWidth; - int height = srcHeight; - - dstBasePtr &= 0x3FFFFFFF; - srcBasePtr &= 0x3FFFFFFF; - - for (size_t i = 0; i < vfbs_.size(); ++i) { - VirtualFramebuffer *vfb = vfbs_[i]; - const u32 vfb_address = (0x04000000 | vfb->fb_address) & 0x3FFFFFFF; - const u32 vfb_size = FramebufferByteSize(vfb); - const u32 vfb_bpp = vfb->format == GE_FORMAT_8888 ? 4 : 2; - const u32 vfb_byteStride = vfb->fb_stride * vfb_bpp; - const u32 vfb_byteWidth = vfb->width * vfb_bpp; - - // These heuristics are a bit annoying. - // The goal is to avoid using GPU block transfers for things that ought to be memory. - // Maybe we should even check for textures at these places instead? - - if (vfb_address <= dstBasePtr && dstBasePtr < vfb_address + vfb_size) { - const u32 byteOffset = dstBasePtr - vfb_address; - const u32 byteStride = dstStride * bpp; - const u32 yOffset = byteOffset / byteStride; - // Some games use mismatching bitdepths. But make sure the stride matches. - // If it doesn't, generally this means we detected the framebuffer with too large a height. - bool match = yOffset < dstYOffset; - if (match && vfb_byteStride != byteStride) { - // Grand Knights History copies with a mismatching stride but a full line at a time. - // Makes it hard to detect the wrong transfers in e.g. God of War. - if (width != dstStride || (byteStride * height != vfb_byteStride && byteStride * height != vfb_byteWidth)) { - match = false; - } else { - dstWidth = byteStride * height / vfb_bpp; - dstHeight = 1; - } - } else if (match) { - dstWidth = width; - dstHeight = height; - } - if (match) { - dstYOffset = yOffset; - dstXOffset = (byteOffset / bpp) % dstStride; - dstBuffer = vfb; - } - } - if (vfb_address <= srcBasePtr && srcBasePtr < vfb_address + vfb_size) { - const u32 byteOffset = srcBasePtr - vfb_address; - const u32 byteStride = srcStride * bpp; - const u32 yOffset = byteOffset / byteStride; - bool match = yOffset < srcYOffset; - if (match && vfb_byteStride != byteStride) { - if (width != srcStride || (byteStride * height != vfb_byteStride && byteStride * height != vfb_byteWidth)) { - match = false; - } else { - srcWidth = byteStride * height / vfb_bpp; - srcHeight = 1; - } - } else if (match) { - srcWidth = width; - srcHeight = height; - } - if (match) { - srcYOffset = yOffset; - srcXOffset = (byteOffset / bpp) % srcStride; - srcBuffer = vfb; - } - } - } - - if (dstYOffset != (u32)-1) { - dstY += dstYOffset; - dstX += dstXOffset; - } - if (srcYOffset != (u32)-1) { - srcY += srcYOffset; - srcX += srcXOffset; - } -} - void FramebufferManager::FlushBeforeCopy() { // Flush anything not yet drawn before blitting, downloading, or uploading. // This might be a stalled list, or unflushed before a block transfer, etc. @@ -2028,118 +1802,6 @@ void FramebufferManager::FlushBeforeCopy() { transformDraw_->Flush(); } -bool FramebufferManager::NotifyBlockTransferBefore(u32 dstBasePtr, int dstStride, int dstX, int dstY, u32 srcBasePtr, int srcStride, int srcX, int srcY, int width, int height, int bpp) { - if (!useBufferedRendering_ || updateVRAM_) { - return false; - } - - // Skip checking if there's no framebuffers in that area. - if (!MayIntersectFramebuffer(srcBasePtr) && !MayIntersectFramebuffer(dstBasePtr)) { - return false; - } - - VirtualFramebuffer *dstBuffer = 0; - VirtualFramebuffer *srcBuffer = 0; - int srcWidth = width; - int srcHeight = height; - int dstWidth = width; - int dstHeight = height; - FindTransferFramebuffers(dstBuffer, srcBuffer, dstBasePtr, dstStride, dstX, dstY, srcBasePtr, srcStride, srcX, srcY, srcWidth, srcHeight, dstWidth, dstHeight, bpp); - - if (dstBuffer && srcBuffer) { - if (srcBuffer == dstBuffer) { - if (srcX != dstX || srcY != dstY) { - WARN_LOG_ONCE(dstsrc, G3D, "Intra-buffer block transfer %08x -> %08x", srcBasePtr, dstBasePtr); - if (g_Config.bBlockTransferGPU) { - FlushBeforeCopy(); - BlitFramebuffer_(dstBuffer, dstX, dstY, srcBuffer, srcX, srcY, dstWidth, dstHeight, bpp); - RebindFramebuffer(); - SetColorUpdated(dstBuffer); - return true; - } - } else { - // Ignore, nothing to do. Tales of Phantasia X does this by accident. - if (g_Config.bBlockTransferGPU) { - return true; - } - } - } else { - WARN_LOG_ONCE(dstnotsrc, G3D, "Inter-buffer block transfer %08x -> %08x", srcBasePtr, dstBasePtr); - // Just do the blit! - if (g_Config.bBlockTransferGPU) { - FlushBeforeCopy(); - BlitFramebuffer_(dstBuffer, dstX, dstY, srcBuffer, srcX, srcY, dstWidth, dstHeight, bpp); - RebindFramebuffer(); - SetColorUpdated(dstBuffer); - return true; // No need to actually do the memory copy behind, probably. - } - } - return false; - } else if (dstBuffer) { - // Here we should just draw the pixels into the buffer. Copy first. - return false; - } else if (srcBuffer) { - WARN_LOG_ONCE(btd, G3D, "Block transfer download %08x -> %08x", srcBasePtr, dstBasePtr); - FlushBeforeCopy(); - if (g_Config.bBlockTransferGPU && !srcBuffer->memoryUpdated) { - const int srcBpp = srcBuffer->format == GE_FORMAT_8888 ? 4 : 2; - const float srcXFactor = (float)bpp / srcBpp; - if (srcHeight <= 0 || srcY + srcHeight > srcBuffer->bufferHeight) { - WARN_LOG_ONCE(btdheight, G3D, "Block transfer download %08x -> %08x skipped, %d+%d is taller than %d", srcBasePtr, dstBasePtr, srcY, srcHeight, srcBuffer->bufferHeight); - } else { - ReadFramebufferToMemory(srcBuffer, true, srcX * srcXFactor, srcY, srcWidth * srcXFactor, srcHeight); - } - } - return false; // Let the bit copy happen - } else { - return false; - } -} - -void FramebufferManager::NotifyBlockTransferAfter(u32 dstBasePtr, int dstStride, int dstX, int dstY, u32 srcBasePtr, int srcStride, int srcX, int srcY, int width, int height, int bpp) { - // A few games use this INSTEAD of actually drawing the video image to the screen, they just blast it to - // the backbuffer. Detect this and have the framebuffermanager draw the pixels. - - u32 backBuffer = PrevDisplayFramebufAddr(); - u32 displayBuffer = DisplayFramebufAddr(); - - // TODO: Is this not handled by upload? Should we check !dstBuffer to avoid a double copy? - if (((backBuffer != 0 && dstBasePtr == backBuffer) || - (displayBuffer != 0 && dstBasePtr == displayBuffer)) && - dstStride == 512 && height == 272 && !useBufferedRendering_) { - FlushBeforeCopy(); - DrawFramebuffer(Memory::GetPointerUnchecked(dstBasePtr), displayFormat_, 512, false); - } - - if (MayIntersectFramebuffer(srcBasePtr) || MayIntersectFramebuffer(dstBasePtr)) { - VirtualFramebuffer *dstBuffer = 0; - VirtualFramebuffer *srcBuffer = 0; - int srcWidth = width; - int srcHeight = height; - int dstWidth = width; - int dstHeight = height; - FindTransferFramebuffers(dstBuffer, srcBuffer, dstBasePtr, dstStride, dstX, dstY, srcBasePtr, srcStride, srcX, srcY, srcWidth, srcHeight, dstWidth, dstHeight, bpp); - - if (!useBufferedRendering_ && currentRenderVfb_ != dstBuffer) { - return; - } - - if (dstBuffer && !srcBuffer) { - WARN_LOG_ONCE(btu, G3D, "Block transfer upload %08x -> %08x", srcBasePtr, dstBasePtr); - if (g_Config.bBlockTransferGPU) { - FlushBeforeCopy(); - const u8 *srcBase = Memory::GetPointerUnchecked(srcBasePtr) + (srcX + srcY * srcStride) * bpp; - int dstBpp = dstBuffer->format == GE_FORMAT_8888 ? 4 : 2; - float dstXFactor = (float)bpp / dstBpp; - DrawPixels(dstBuffer, dstX * dstXFactor, dstY, srcBase, dstBuffer->format, srcStride * dstXFactor, dstWidth * dstXFactor, dstHeight); - SetColorUpdated(dstBuffer); - RebindFramebuffer(); - textureCache_->ForgetLastTexture(); - } - } - } -} - void FramebufferManager::Resized() { resized_ = true; } diff --git a/GPU/GLES/Framebuffer.h b/GPU/GLES/Framebuffer.h index ef28e90541..ec17e246bc 100644 --- a/GPU/GLES/Framebuffer.h +++ b/GPU/GLES/Framebuffer.h @@ -71,10 +71,9 @@ public: transformDraw_ = td; } - void MakePixelTexture(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height); - - void DrawPixels(VirtualFramebuffer *vfb, int dstX, int dstY, const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height); - void DrawFramebuffer(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, bool applyPostShader); + virtual void MakePixelTexture(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height) override; + virtual void DrawPixels(VirtualFramebuffer *vfb, int dstX, int dstY, const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height) override; + virtual void DrawFramebuffer(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, bool applyPostShader) override; // If texture != 0, will bind it. // x,y,w,h are relative to destW, destH which fill out the target completely. @@ -84,12 +83,11 @@ public: void DestroyAllFBOs(); - void Init(); + virtual void Init() override; void EndFrame(); void Resized(); void DeviceLost(); void CopyDisplayToOutput(); - void UpdateFromMemory(u32 addr, int size, bool safe); void SetLineWidth(); void ReformatFramebufferFrom(VirtualFramebuffer *vfb, GEBufferFormat old); @@ -98,18 +96,11 @@ public: // For use when texturing from a framebuffer. May create a duplicate if target. void BindFramebufferColor(VirtualFramebuffer *framebuffer, bool skipCopy = false); - // Returns true if it's sure this is a direct FBO->FBO transfer and it has already handle it. - // In that case we hardly need to actually copy the bytes in VRAM, they will be wrong anyway (unless - // read framebuffers is on, in which case this should always return false). - bool NotifyBlockTransferBefore(u32 dstBasePtr, int dstStride, int dstX, int dstY, u32 srcBasePtr, int srcStride, int srcX, int srcY, int w, int h, int bpp); - void NotifyBlockTransferAfter(u32 dstBasePtr, int dstStride, int dstX, int dstY, u32 srcBasePtr, int srcStride, int srcX, int srcY, int w, int h, int bpp); - // Reads a rectangular subregion of a framebuffer to the right position in its backing memory. - void ReadFramebufferToMemory(VirtualFramebuffer *vfb, bool sync, int x, int y, int w, int h); + virtual void ReadFramebufferToMemory(VirtualFramebuffer *vfb, bool sync, int x, int y, int w, int h) override; std::vector GetFramebufferList(); - bool NotifyFramebufferCopy(u32 src, u32 dest, int size, bool isMemset = false); bool NotifyStencilUpload(u32 addr, int size, bool skipZero = false); void DestroyFramebuf(VirtualFramebuffer *vfb); @@ -119,7 +110,7 @@ public: bool GetCurrentDepthbuffer(GPUDebugBuffer &buffer); bool GetCurrentStencilbuffer(GPUDebugBuffer &buffer); - void RebindFramebuffer(); + virtual void RebindFramebuffer() override; FBO *GetTempFBO(u16 w, u16 h, FBOColorDepth depth = FBO_8888); @@ -127,26 +118,24 @@ protected: virtual void DisableState() override; virtual void ClearBuffer() override; virtual void ClearDepthBuffer() override; + virtual void FlushBeforeCopy() override; + virtual void DecimateFBOs() override; + + // Used by ReadFramebufferToMemory and later framebuffer block copies + virtual void BlitFramebuffer(VirtualFramebuffer *dst, int dstX, int dstY, VirtualFramebuffer *src, int srcX, int srcY, int w, int h, int bpp, bool flip = false) override; virtual void NotifyRenderFramebufferCreated(VirtualFramebuffer *vfb) override; virtual void NotifyRenderFramebufferSwitched(VirtualFramebuffer *prevVfb, VirtualFramebuffer *vfb) override; virtual void NotifyRenderFramebufferUpdated(VirtualFramebuffer *vfb, bool vfbFormatChanged) override; - virtual void DecimateFBOs() override; - private: void CompileDraw2DProgram(); void DestroyDraw2DProgram(); - void FlushBeforeCopy(); - - void FindTransferFramebuffers(VirtualFramebuffer *&dstBuffer, VirtualFramebuffer *&srcBuffer, u32 dstBasePtr, int dstStride, int &dstX, int &dstY, u32 srcBasePtr, int srcStride, int &srcX, int &srcY, int &srcWidth, int &srcHeight, int &dstWidth, int &dstHeight, int bpp) const; void SetNumExtraFBOs(int num); inline bool ShouldDownloadUsingCPU(const VirtualFramebuffer *vfb) const; - // Used by ReadFramebufferToMemory and later framebuffer block copies - void BlitFramebuffer_(VirtualFramebuffer *dst, int dstX, int dstY, VirtualFramebuffer *src, int srcX, int srcY, int w, int h, int bpp, bool flip = false); #ifndef USING_GLES2 void PackFramebufferAsync_(VirtualFramebuffer *vfb); #endif @@ -187,8 +176,6 @@ private: std::vector bvfbs_; // blitting framebuffers (for download) std::map tempFBOs_; - std::set> knownFramebufferRAMCopies_; - #ifndef USING_GLES2 AsyncPBO *pixelBufObj_; //this isn't that large u8 currentPBO_; diff --git a/GPU/GLES/TextureCache.h b/GPU/GLES/TextureCache.h index 6028f4c72a..6b9173718c 100644 --- a/GPU/GLES/TextureCache.h +++ b/GPU/GLES/TextureCache.h @@ -58,7 +58,7 @@ public: ~TextureCache(); void SetTexture(bool force = false); - bool SetOffsetTexture(u32 offset) override; + virtual bool SetOffsetTexture(u32 offset) override; void Clear(bool delete_them); void StartFrame();