From 6f991a10427f4294aba153114675f58753f5eba4 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 13 Sep 2014 14:23:18 -0700 Subject: [PATCH 01/33] d3d: Add FlushBeforeCopy(). --- GPU/Common/FramebufferCommon.h | 11 ++++++----- GPU/Directx9/FramebufferDX9.cpp | 13 ++++++++++--- GPU/Directx9/FramebufferDX9.h | 13 ++++++++----- GPU/Directx9/GPU_DX9.cpp | 1 + GPU/GLES/Framebuffer.h | 5 ++--- 5 files changed, 27 insertions(+), 16 deletions(-) diff --git a/GPU/Common/FramebufferCommon.h b/GPU/Common/FramebufferCommon.h index 5dfb24f145..a6e055d29d 100644 --- a/GPU/Common/FramebufferCommon.h +++ b/GPU/Common/FramebufferCommon.h @@ -159,11 +159,16 @@ public: GEBufferFormat GetTargetFormat() const { return currentRenderVfb_ ? currentRenderVfb_->format : displayFormat_; } protected: + virtual void DisableState() = 0; + virtual void ClearBuffer() = 0; + virtual void ClearDepthBuffer() = 0; + virtual void FlushBeforeCopy() = 0; + virtual void DecimateFBOs() = 0; + void EstimateDrawingSize(int &drawing_width, int &drawing_height); u32 FramebufferByteSize(const VirtualFramebuffer *vfb) const; static bool MaskedEqual(u32 addr1, u32 addr2); - virtual void DecimateFBOs() = 0; virtual void DestroyFramebuf(VirtualFramebuffer *vfb) = 0; virtual void ResizeFramebufFBO(VirtualFramebuffer *vfb, u16 w, u16 h, bool force = false) = 0; virtual void NotifyRenderFramebufferCreated(VirtualFramebuffer *vfb) = 0; @@ -182,10 +187,6 @@ protected: dstBuffer->reallyDirtyAfterDisplay = true; } - virtual void DisableState() = 0; - virtual void ClearBuffer() = 0; - virtual void ClearDepthBuffer() = 0; - u32 displayFramebufPtr_; u32 displayStride_; GEBufferFormat displayFormat_; diff --git a/GPU/Directx9/FramebufferDX9.cpp b/GPU/Directx9/FramebufferDX9.cpp index 6021d29f74..5245c40231 100644 --- a/GPU/Directx9/FramebufferDX9.cpp +++ b/GPU/Directx9/FramebufferDX9.cpp @@ -29,8 +29,9 @@ #include "GPU/Common/FramebufferCommon.h" #include "GPU/Directx9/FramebufferDX9.h" -#include "GPU/Directx9/TextureCacheDX9.h" #include "GPU/Directx9/ShaderManagerDX9.h" +#include "GPU/Directx9/TextureCacheDX9.h" +#include "GPU/Directx9/TransformPipelineDX9.h" #include @@ -986,8 +987,7 @@ namespace DX9 { for (size_t i = 0; i < vfbs_.size(); ++i) { VirtualFramebuffer *vfb = vfbs_[i]; if (MaskedEqual(vfb->fb_address, addr)) { - // TODO - //FlushBeforeCopy(); + FlushBeforeCopy(); if (useBufferedRendering_ && vfb->fbo) { DisableState(); @@ -1012,6 +1012,13 @@ namespace DX9 { } } + void FramebufferManagerDX9::FlushBeforeCopy() { + // Flush anything not yet drawn before blitting, downloading, or uploading. + // This might be a stalled list, or unflushed before a block transfer, etc. + SetRenderFrameBuffer(); + transformDraw_->Flush(); + } + void FramebufferManagerDX9::Resized() { resized_ = true; } diff --git a/GPU/Directx9/FramebufferDX9.h b/GPU/Directx9/FramebufferDX9.h index 45c1a0628c..cdc7227be3 100644 --- a/GPU/Directx9/FramebufferDX9.h +++ b/GPU/Directx9/FramebufferDX9.h @@ -34,15 +34,14 @@ namespace DX9 { -struct GLSLProgram; class TextureCacheDX9; +class TransformDrawEngineDX9; +class ShaderManagerDX9; void CenterRect(float *x, float *y, float *w, float *h, float origW, float origH, float frameW, float frameH); -class ShaderManagerDX9; - class FramebufferManagerDX9 : public FramebufferManagerCommon { public: FramebufferManagerDX9(); @@ -54,6 +53,9 @@ public: void SetShaderManager(ShaderManagerDX9 *sm) { shaderManager_ = sm; } + void SetTransformDrawEngine(TransformDrawEngineDX9 *td) { + transformDraw_ = td; + } void MakePixelTexture(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height); @@ -88,13 +90,13 @@ protected: virtual void DisableState() override; virtual void ClearBuffer() override; virtual void ClearDepthBuffer() override; + virtual void FlushBeforeCopy() override; + virtual void DecimateFBOs() override; virtual void NotifyRenderFramebufferCreated(VirtualFramebuffer *vfb) override; virtual void NotifyRenderFramebufferSwitched(VirtualFramebuffer *prevVfb, VirtualFramebuffer *vfb) override; virtual void NotifyRenderFramebufferUpdated(VirtualFramebuffer *vfb, bool vfbFormatChanged) override; - virtual void DecimateFBOs() override; - private: void CompileDraw2DProgram(); void DestroyDraw2DProgram(); @@ -115,6 +117,7 @@ private: TextureCacheDX9 *textureCache_; ShaderManagerDX9 *shaderManager_; + TransformDrawEngineDX9 *transformDraw_; bool usePostShader_; bool postShaderAtOutputResolution_; diff --git a/GPU/Directx9/GPU_DX9.cpp b/GPU/Directx9/GPU_DX9.cpp index 09b43e8242..26c0176541 100644 --- a/GPU/Directx9/GPU_DX9.cpp +++ b/GPU/Directx9/GPU_DX9.cpp @@ -398,6 +398,7 @@ DIRECTX9_GPU::DIRECTX9_GPU() transformDraw_.SetFramebufferManager(&framebufferManager_); framebufferManager_.SetTextureCache(&textureCache_); framebufferManager_.SetShaderManager(shaderManager_); + framebufferManager_.SetTransformDrawEngine(&transformDraw_); textureCache_.SetFramebufferManager(&framebufferManager_); textureCache_.SetShaderManager(shaderManager_); diff --git a/GPU/GLES/Framebuffer.h b/GPU/GLES/Framebuffer.h index ef28e90541..a4432952b2 100644 --- a/GPU/GLES/Framebuffer.h +++ b/GPU/GLES/Framebuffer.h @@ -127,17 +127,16 @@ protected: virtual void DisableState() override; virtual void ClearBuffer() override; virtual void ClearDepthBuffer() override; + virtual void FlushBeforeCopy() override; + virtual void DecimateFBOs() override; virtual void NotifyRenderFramebufferCreated(VirtualFramebuffer *vfb) override; virtual void NotifyRenderFramebufferSwitched(VirtualFramebuffer *prevVfb, VirtualFramebuffer *vfb) override; virtual void NotifyRenderFramebufferUpdated(VirtualFramebuffer *vfb, bool vfbFormatChanged) override; - virtual void DecimateFBOs() override; - private: void CompileDraw2DProgram(); void DestroyDraw2DProgram(); - void FlushBeforeCopy(); void FindTransferFramebuffers(VirtualFramebuffer *&dstBuffer, VirtualFramebuffer *&srcBuffer, u32 dstBasePtr, int dstStride, int &dstX, int &dstY, u32 srcBasePtr, int srcStride, int &srcX, int &srcY, int &srcWidth, int &srcHeight, int &dstWidth, int &dstHeight, int bpp) const; From 9af3befc596eac2ba3cd65d8495e201b99f815e8 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 13 Sep 2014 14:26:39 -0700 Subject: [PATCH 02/33] d3d: Add RebindFramebuffer(). --- GPU/Common/FramebufferCommon.h | 1 + GPU/Directx9/FramebufferDX9.cpp | 13 ++++++++++--- GPU/Directx9/FramebufferDX9.h | 2 ++ GPU/GLES/Framebuffer.h | 2 +- 4 files changed, 14 insertions(+), 4 deletions(-) diff --git a/GPU/Common/FramebufferCommon.h b/GPU/Common/FramebufferCommon.h index a6e055d29d..a494d7450d 100644 --- a/GPU/Common/FramebufferCommon.h +++ b/GPU/Common/FramebufferCommon.h @@ -110,6 +110,7 @@ public: } DoSetRenderFrameBuffer(); } + virtual void RebindFramebuffer() = 0; size_t NumVFBs() const { return vfbs_.size(); } diff --git a/GPU/Directx9/FramebufferDX9.cpp b/GPU/Directx9/FramebufferDX9.cpp index 5245c40231..f230016048 100644 --- a/GPU/Directx9/FramebufferDX9.cpp +++ b/GPU/Directx9/FramebufferDX9.cpp @@ -331,6 +331,14 @@ namespace DX9 { delete v; } + void FramebufferManagerDX9::RebindFramebuffer() { + if (currentRenderVfb_ && currentRenderVfb_->fbo) { + fbo_bind_as_render_target(currentRenderVfb_->fbo); + } else { + fbo_unbind(); + } + } + void FramebufferManagerDX9::ResizeFramebufFBO(VirtualFramebuffer *vfb, u16 w, u16 h, bool force) { float renderWidthFactor = (float)vfb->renderWidth / (float)vfb->bufferWidth; float renderHeightFactor = (float)vfb->renderHeight / (float)vfb->bufferHeight; @@ -750,6 +758,7 @@ namespace DX9 { } #endif #endif + RebindFramebuffer(); } } @@ -1006,9 +1015,7 @@ namespace DX9 { } } - // TODO: RebindFramebuffer(); - fbo_unbind(); - currentRenderVfb_ = 0; + RebindFramebuffer(); } } diff --git a/GPU/Directx9/FramebufferDX9.h b/GPU/Directx9/FramebufferDX9.h index cdc7227be3..0991871006 100644 --- a/GPU/Directx9/FramebufferDX9.h +++ b/GPU/Directx9/FramebufferDX9.h @@ -86,6 +86,8 @@ public: bool GetCurrentDepthbuffer(GPUDebugBuffer &buffer); bool GetCurrentStencilbuffer(GPUDebugBuffer &buffer); + virtual void RebindFramebuffer() override; + protected: virtual void DisableState() override; virtual void ClearBuffer() override; diff --git a/GPU/GLES/Framebuffer.h b/GPU/GLES/Framebuffer.h index a4432952b2..16bba3c6db 100644 --- a/GPU/GLES/Framebuffer.h +++ b/GPU/GLES/Framebuffer.h @@ -119,7 +119,7 @@ public: bool GetCurrentDepthbuffer(GPUDebugBuffer &buffer); bool GetCurrentStencilbuffer(GPUDebugBuffer &buffer); - void RebindFramebuffer(); + virtual void RebindFramebuffer() override; FBO *GetTempFBO(u16 w, u16 h, FBOColorDepth depth = FBO_8888); From 3dfdddfc5ffd2cc3d1dec84f4427dfb148f10144 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 13 Sep 2014 14:44:18 -0700 Subject: [PATCH 03/33] d3d: Move UpdateFromMemory() to common. --- GPU/Common/FramebufferCommon.cpp | 36 ++++++++++++++++++++++++++++++++ GPU/Common/FramebufferCommon.h | 36 ++++++++++++++++++-------------- GPU/Directx9/FramebufferDX9.cpp | 36 -------------------------------- GPU/Directx9/FramebufferDX9.h | 8 +++---- GPU/GLES/Framebuffer.cpp | 36 -------------------------------- GPU/GLES/Framebuffer.h | 8 +++---- 6 files changed, 62 insertions(+), 98 deletions(-) diff --git a/GPU/Common/FramebufferCommon.cpp b/GPU/Common/FramebufferCommon.cpp index a5a7d81fc3..1d9cfc87cc 100644 --- a/GPU/Common/FramebufferCommon.cpp +++ b/GPU/Common/FramebufferCommon.cpp @@ -387,3 +387,39 @@ void FramebufferManagerCommon::DoSetRenderFrameBuffer() { gstate_c.curRTRenderWidth = vfb->renderWidth; gstate_c.curRTRenderHeight = vfb->renderHeight; } + +void FramebufferManagerCommon::UpdateFromMemory(u32 addr, int size, bool safe) { + addr &= ~0x40000000; + // TODO: Could go through all FBOs, but probably not important? + // TODO: Could also check for inner changes, but video is most important. + bool isDisplayBuf = addr == DisplayFramebufAddr() || addr == PrevDisplayFramebufAddr(); + if (isDisplayBuf || safe) { + // TODO: Deleting the FBO is a heavy hammer solution, so let's only do it if it'd help. + if (!Memory::IsValidAddress(displayFramebufPtr_)) + return; + + for (size_t i = 0; i < vfbs_.size(); ++i) { + VirtualFramebuffer *vfb = vfbs_[i]; + if (MaskedEqual(vfb->fb_address, addr)) { + FlushBeforeCopy(); + + if (useBufferedRendering_ && vfb->fbo) { + DisableState(); + GEBufferFormat fmt = vfb->format; + if (vfb->last_frame_render + 1 < gpuStats.numFlips && isDisplayBuf) { + // If we're not rendering to it, format may be wrong. Use displayFormat_ instead. + fmt = displayFormat_; + } + DrawPixels(vfb, 0, 0, Memory::GetPointer(addr | 0x04000000), fmt, vfb->fb_stride, vfb->width, vfb->height); + SetColorUpdated(vfb); + } else { + INFO_LOG(SCEGE, "Invalidating FBO for %08x (%i x %i x %i)", vfb->fb_address, vfb->width, vfb->height, vfb->format); + DestroyFramebuf(vfb); + vfbs_.erase(vfbs_.begin() + i--); + } + } + } + + RebindFramebuffer(); + } +} diff --git a/GPU/Common/FramebufferCommon.h b/GPU/Common/FramebufferCommon.h index a494d7450d..2d7b6ed24b 100644 --- a/GPU/Common/FramebufferCommon.h +++ b/GPU/Common/FramebufferCommon.h @@ -94,9 +94,7 @@ public: virtual ~FramebufferManagerCommon(); void BeginFrame(); - - virtual bool NotifyFramebufferCopy(u32 src, u32 dest, int size, bool isMemset = false) = 0; - virtual bool NotifyStencilUpload(u32 addr, int size, bool skipZero = false) = 0; + void SetDisplayFramebuffer(u32 framebuf, u32 stride, GEBufferFormat format); void DoSetRenderFrameBuffer(); void SetRenderFrameBuffer() { @@ -112,9 +110,15 @@ public: } virtual void RebindFramebuffer() = 0; - size_t NumVFBs() const { return vfbs_.size(); } + void UpdateFromMemory(u32 addr, int size, bool safe); + virtual bool NotifyFramebufferCopy(u32 src, u32 dest, int size, bool isMemset = false) = 0; + virtual bool NotifyStencilUpload(u32 addr, int size, bool skipZero = false) = 0; - void SetDisplayFramebuffer(u32 framebuf, u32 stride, GEBufferFormat format); + virtual void MakePixelTexture(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height) = 0; + virtual void DrawPixels(VirtualFramebuffer *vfb, int dstX, int dstY, const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height) = 0; + virtual void DrawFramebuffer(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, bool applyPostShader) = 0; + + size_t NumVFBs() const { return vfbs_.size(); } u32 PrevDisplayFramebufAddr() { return prevDisplayFramebuf_ ? (0x04000000 | prevDisplayFramebuf_->fb_address) : 0; @@ -123,17 +127,6 @@ public: return displayFramebuf_ ? (0x04000000 | displayFramebuf_->fb_address) : 0; } - void SetDepthUpdated() { - if (currentRenderVfb_) { - currentRenderVfb_->depthUpdated = true; - } - } - void SetColorUpdated() { - if (currentRenderVfb_) { - SetColorUpdated(currentRenderVfb_); - } - } - bool MayIntersectFramebuffer(u32 start) { // Clear the cache/kernel bits. start = start & 0x3FFFFFFF; @@ -159,6 +152,17 @@ public: int GetTargetStride() const { return currentRenderVfb_ ? currentRenderVfb_->fb_stride : 512; } GEBufferFormat GetTargetFormat() const { return currentRenderVfb_ ? currentRenderVfb_->format : displayFormat_; } + void SetDepthUpdated() { + if (currentRenderVfb_) { + currentRenderVfb_->depthUpdated = true; + } + } + void SetColorUpdated() { + if (currentRenderVfb_) { + SetColorUpdated(currentRenderVfb_); + } + } + protected: virtual void DisableState() = 0; virtual void ClearBuffer() = 0; diff --git a/GPU/Directx9/FramebufferDX9.cpp b/GPU/Directx9/FramebufferDX9.cpp index f230016048..f9b904d643 100644 --- a/GPU/Directx9/FramebufferDX9.cpp +++ b/GPU/Directx9/FramebufferDX9.cpp @@ -983,42 +983,6 @@ namespace DX9 { vfbs_.clear(); } - void FramebufferManagerDX9::UpdateFromMemory(u32 addr, int size, bool safe) { - addr &= ~0x40000000; - // TODO: Could go through all FBOs, but probably not important? - // TODO: Could also check for inner changes, but video is most important. - bool isDisplayBuf = addr == DisplayFramebufAddr() || addr == PrevDisplayFramebufAddr(); - if (isDisplayBuf || safe) { - // TODO: Deleting the FBO is a heavy hammer solution, so let's only do it if it'd help. - if (!Memory::IsValidAddress(displayFramebufPtr_)) - return; - - for (size_t i = 0; i < vfbs_.size(); ++i) { - VirtualFramebuffer *vfb = vfbs_[i]; - if (MaskedEqual(vfb->fb_address, addr)) { - FlushBeforeCopy(); - - if (useBufferedRendering_ && vfb->fbo) { - DisableState(); - GEBufferFormat fmt = vfb->format; - if (vfb->last_frame_render + 1 < gpuStats.numFlips && isDisplayBuf) { - // If we're not rendering to it, format may be wrong. Use displayFormat_ instead. - fmt = displayFormat_; - } - DrawPixels(vfb, 0, 0, Memory::GetPointer(addr | 0x04000000), fmt, vfb->fb_stride, vfb->width, vfb->height); - SetColorUpdated(vfb); - } else { - INFO_LOG(SCEGE, "Invalidating FBO for %08x (%i x %i x %i)", vfb->fb_address, vfb->width, vfb->height, vfb->format); - DestroyFramebuf(vfb); - vfbs_.erase(vfbs_.begin() + i--); - } - } - } - - RebindFramebuffer(); - } - } - void FramebufferManagerDX9::FlushBeforeCopy() { // Flush anything not yet drawn before blitting, downloading, or uploading. // This might be a stalled list, or unflushed before a block transfer, etc. diff --git a/GPU/Directx9/FramebufferDX9.h b/GPU/Directx9/FramebufferDX9.h index 0991871006..de56082a2b 100644 --- a/GPU/Directx9/FramebufferDX9.h +++ b/GPU/Directx9/FramebufferDX9.h @@ -57,10 +57,9 @@ public: transformDraw_ = td; } - void MakePixelTexture(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height); - - void DrawPixels(VirtualFramebuffer *vfb, int dstX, int dstY, const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height); - void DrawFramebuffer(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, bool applyPostShader); + virtual void MakePixelTexture(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height) override; + virtual void DrawPixels(VirtualFramebuffer *vfb, int dstX, int dstY, const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height) override; + virtual void DrawFramebuffer(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, bool applyPostShader) override; void DrawActiveTexture(LPDIRECT3DTEXTURE9 texture, float x, float y, float w, float h, float destW, float destH, bool flip = false, float u0 = 0.0f, float v0 = 0.0f, float u1 = 1.0f, float v1 = 1.0f); @@ -70,7 +69,6 @@ public: void Resized(); void DeviceLost(); void CopyDisplayToOutput(); - void UpdateFromMemory(u32 addr, int size, bool safe); void ReadFramebufferToMemory(VirtualFramebuffer *vfb, bool sync = true); diff --git a/GPU/GLES/Framebuffer.cpp b/GPU/GLES/Framebuffer.cpp index 8b247bf456..a0b24ff213 100644 --- a/GPU/GLES/Framebuffer.cpp +++ b/GPU/GLES/Framebuffer.cpp @@ -1803,42 +1803,6 @@ void FramebufferManager::DestroyAllFBOs() { DisableState(); } -void FramebufferManager::UpdateFromMemory(u32 addr, int size, bool safe) { - addr &= ~0x40000000; - // TODO: Could go through all FBOs, but probably not important? - // TODO: Could also check for inner changes, but video is most important. - bool isDisplayBuf = addr == DisplayFramebufAddr() || addr == PrevDisplayFramebufAddr(); - if (isDisplayBuf || safe) { - // TODO: Deleting the FBO is a heavy hammer solution, so let's only do it if it'd help. - if (!Memory::IsValidAddress(displayFramebufPtr_)) - return; - - for (size_t i = 0; i < vfbs_.size(); ++i) { - VirtualFramebuffer *vfb = vfbs_[i]; - if (MaskedEqual(vfb->fb_address, addr)) { - FlushBeforeCopy(); - - if (useBufferedRendering_ && vfb->fbo) { - DisableState(); - GEBufferFormat fmt = vfb->format; - if (vfb->last_frame_render + 1 < gpuStats.numFlips && isDisplayBuf) { - // If we're not rendering to it, format may be wrong. Use displayFormat_ instead. - fmt = displayFormat_; - } - DrawPixels(vfb, 0, 0, Memory::GetPointer(addr | 0x04000000), fmt, vfb->fb_stride, vfb->width, vfb->height); - SetColorUpdated(vfb); - } else { - INFO_LOG(SCEGE, "Invalidating FBO for %08x (%i x %i x %i)", vfb->fb_address, vfb->width, vfb->height, vfb->format); - DestroyFramebuf(vfb); - vfbs_.erase(vfbs_.begin() + i--); - } - } - } - - RebindFramebuffer(); - } -} - bool FramebufferManager::NotifyFramebufferCopy(u32 src, u32 dst, int size, bool isMemset) { if (updateVRAM_ || size == 0) { return false; diff --git a/GPU/GLES/Framebuffer.h b/GPU/GLES/Framebuffer.h index 16bba3c6db..23900d08df 100644 --- a/GPU/GLES/Framebuffer.h +++ b/GPU/GLES/Framebuffer.h @@ -71,10 +71,9 @@ public: transformDraw_ = td; } - void MakePixelTexture(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height); - - void DrawPixels(VirtualFramebuffer *vfb, int dstX, int dstY, const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height); - void DrawFramebuffer(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, bool applyPostShader); + virtual void MakePixelTexture(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height) override; + virtual void DrawPixels(VirtualFramebuffer *vfb, int dstX, int dstY, const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height) override; + virtual void DrawFramebuffer(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, bool applyPostShader) override; // If texture != 0, will bind it. // x,y,w,h are relative to destW, destH which fill out the target completely. @@ -89,7 +88,6 @@ public: void Resized(); void DeviceLost(); void CopyDisplayToOutput(); - void UpdateFromMemory(u32 addr, int size, bool safe); void SetLineWidth(); void ReformatFramebufferFrom(VirtualFramebuffer *vfb, GEBufferFormat old); From b79062339bbe458b07c7a32a8774eb4aca3c2dc0 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 13 Sep 2014 14:53:14 -0700 Subject: [PATCH 04/33] d3d: Fix colors in UpdateFromMemory(). --- GPU/Directx9/FramebufferDX9.cpp | 34 ++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/GPU/Directx9/FramebufferDX9.cpp b/GPU/Directx9/FramebufferDX9.cpp index f9b904d643..9d75f1320d 100644 --- a/GPU/Directx9/FramebufferDX9.cpp +++ b/GPU/Directx9/FramebufferDX9.cpp @@ -151,8 +151,12 @@ namespace DX9 { *dst = ((c & 0x001f) << 19) | (((c >> 5) & 0x001f) << 11) | ((((c >> 10) & 0x001f) << 3)) | 0xFF000000; } - static inline u32 ABGR2RGBA(u32 src) { - return (src >> 8) | (src << 24); + // TODO: Swizzle the texture access instead. + static inline u32 RGBA2BGRA(u32 src) { + const u32 r = (src & 0x000000FF) << 16; + const u32 ga = src & 0xFF00FF00; + const u32 b = (src & 0x00FF0000) >> 16; + return r | ga | b; } void FramebufferManagerDX9::MakePixelTexture(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height) { @@ -168,7 +172,7 @@ namespace DX9 { convBuf = (u8*)rect.pBits; - // Final format is ARGB(directx) + // Final format is BGRA(directx) // TODO: We can just change the texture format and flip some bits around instead of this. if (srcPixelFormat != GE_FORMAT_8888 || srcStride != 512) { @@ -177,8 +181,8 @@ namespace DX9 { // not tested case GE_FORMAT_565: { - const u16 *src = (const u16 *)srcPixels + srcStride * y; - u32 *dst = (u32*)(convBuf + rect.Pitch * y); + const u16_le *src = (const u16_le *)srcPixels + srcStride * y; + u32 *dst = (u32 *)(convBuf + rect.Pitch * y); for (int x = 0; x < 480; x++) { u16_le col0 = src[x+0]; ARGB8From565(col0, &dst[x + 0]); @@ -188,8 +192,8 @@ namespace DX9 { // faster case GE_FORMAT_5551: { - const u16 *src = (const u16 *)srcPixels + srcStride * y; - u32 *dst = (u32*)(convBuf + rect.Pitch * y); + const u16_le *src = (const u16_le *)srcPixels + srcStride * y; + u32 *dst = (u32 *)(convBuf + rect.Pitch * y); for (int x = 0; x < 480; x++) { u16_le col0 = src[x+0]; ARGB8From5551(col0, &dst[x + 0]); @@ -199,8 +203,8 @@ namespace DX9 { // not tested case GE_FORMAT_4444: { - const u16 *src = (const u16 *)srcPixels + srcStride * y; - u32 *dst = (u32*)(convBuf + rect.Pitch * y); + const u16_le *src = (const u16_le *)srcPixels + srcStride * y; + u32 *dst = (u32 *)(convBuf + rect.Pitch * y); for (int x = 0; x < 480; x++) { u16_le col = src[x]; @@ -214,11 +218,11 @@ namespace DX9 { case GE_FORMAT_8888: { - const u32 *src = (const u32 *)srcPixels + srcStride * y; - u32 *dst = (u32*)(convBuf + rect.Pitch * y); + const u32_le *src = (const u32_le *)srcPixels + srcStride * y; + u32 *dst = (u32 *)(convBuf + rect.Pitch * y); for (int x = 0; x < 480; x++) { - dst[x] = ABGR2RGBA(src[x]); + dst[x] = RGBA2BGRA(src[x]); } } break; @@ -226,11 +230,11 @@ namespace DX9 { } } else { for (int y = 0; y < 272; y++) { - const u32 *src = (const u32 *)srcPixels + srcStride * y; - u32 *dst = (u32*)(convBuf + rect.Pitch * y); + const u32_le *src = (const u32_le *)srcPixels + srcStride * y; + u32 *dst = (u32 *)(convBuf + rect.Pitch * y); for (int x = 0; x < 512; x++) { - dst[x] = ABGR2RGBA(src[x]); + dst[x] = RGBA2BGRA(src[x]); } } } From 96b497f95566ceed94254567e67d786011a6ad03 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 13 Sep 2014 14:54:53 -0700 Subject: [PATCH 05/33] d3d: Support arbitrary size framebuf uploads. --- GPU/Directx9/FramebufferDX9.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/GPU/Directx9/FramebufferDX9.cpp b/GPU/Directx9/FramebufferDX9.cpp index 9d75f1320d..47f1759165 100644 --- a/GPU/Directx9/FramebufferDX9.cpp +++ b/GPU/Directx9/FramebufferDX9.cpp @@ -176,14 +176,14 @@ namespace DX9 { // TODO: We can just change the texture format and flip some bits around instead of this. if (srcPixelFormat != GE_FORMAT_8888 || srcStride != 512) { - for (int y = 0; y < 272; y++) { + for (int y = 0; y < height; y++) { switch (srcPixelFormat) { // not tested case GE_FORMAT_565: { const u16_le *src = (const u16_le *)srcPixels + srcStride * y; u32 *dst = (u32 *)(convBuf + rect.Pitch * y); - for (int x = 0; x < 480; x++) { + for (int x = 0; x < width; x++) { u16_le col0 = src[x+0]; ARGB8From565(col0, &dst[x + 0]); } @@ -194,7 +194,7 @@ namespace DX9 { { const u16_le *src = (const u16_le *)srcPixels + srcStride * y; u32 *dst = (u32 *)(convBuf + rect.Pitch * y); - for (int x = 0; x < 480; x++) { + for (int x = 0; x < width; x++) { u16_le col0 = src[x+0]; ARGB8From5551(col0, &dst[x + 0]); } @@ -205,7 +205,7 @@ namespace DX9 { { const u16_le *src = (const u16_le *)srcPixels + srcStride * y; u32 *dst = (u32 *)(convBuf + rect.Pitch * y); - for (int x = 0; x < 480; x++) + for (int x = 0; x < width; x++) { u16_le col = src[x]; dst[x * 4 + 0] = (col >> 12) << 4; @@ -220,7 +220,7 @@ namespace DX9 { { const u32_le *src = (const u32_le *)srcPixels + srcStride * y; u32 *dst = (u32 *)(convBuf + rect.Pitch * y); - for (int x = 0; x < 480; x++) + for (int x = 0; x < width; x++) { dst[x] = RGBA2BGRA(src[x]); } @@ -229,10 +229,10 @@ namespace DX9 { } } } else { - for (int y = 0; y < 272; y++) { + for (int y = 0; y < height; y++) { const u32_le *src = (const u32_le *)srcPixels + srcStride * y; u32 *dst = (u32 *)(convBuf + rect.Pitch * y); - for (int x = 0; x < 512; x++) + for (int x = 0; x < width; x++) { dst[x] = RGBA2BGRA(src[x]); } From f33515723b0359dcb953c5311902264f00b48cdd Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 13 Sep 2014 14:57:45 -0700 Subject: [PATCH 06/33] Move ForgetLastTexture() into DrawPixels(). --- GPU/Directx9/FramebufferDX9.cpp | 7 ++----- GPU/GLES/Framebuffer.cpp | 3 +-- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/GPU/Directx9/FramebufferDX9.cpp b/GPU/Directx9/FramebufferDX9.cpp index 47f1759165..f43f415bec 100644 --- a/GPU/Directx9/FramebufferDX9.cpp +++ b/GPU/Directx9/FramebufferDX9.cpp @@ -250,7 +250,8 @@ namespace DX9 { dxstate.viewport.set(0, 0, vfb->renderWidth, vfb->renderHeight); MakePixelTexture(srcPixels, srcPixelFormat, srcStride, width, height); DisableState(); - DrawActiveTexture(0, dstX, dstY, width, height, vfb->bufferWidth, vfb->bufferHeight, false, 0.0f, 0.0f, 1.0f, 1.0f); + DrawActiveTexture(drawPixelsTex_, dstX, dstY, width, height, vfb->bufferWidth, vfb->bufferHeight, false, 0.0f, 0.0f, 1.0f, 1.0f); + textureCache_->ForgetLastTexture(); } void FramebufferManagerDX9::DrawFramebuffer(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, bool applyPostShader) { @@ -286,10 +287,6 @@ namespace DX9 { } // TODO: StretchRect instead? - if (tex) { - pD3Ddevice->SetTexture(0, tex); - } - float coord[20] = { x,y,0, u0,v0, x+w,y,0, u1,v0, diff --git a/GPU/GLES/Framebuffer.cpp b/GPU/GLES/Framebuffer.cpp index a0b24ff213..68fca12634 100644 --- a/GPU/GLES/Framebuffer.cpp +++ b/GPU/GLES/Framebuffer.cpp @@ -498,6 +498,7 @@ void FramebufferManager::DrawPixels(VirtualFramebuffer *vfb, int dstX, int dstY, MakePixelTexture(srcPixels, srcPixelFormat, srcStride, width, height); DisableState(); DrawActiveTexture(0, dstX, dstY, width, height, vfb->bufferWidth, vfb->bufferHeight, false, 0.0f, 0.0f, 1.0f, 1.0f); + textureCache_->ForgetLastTexture(); } void FramebufferManager::DrawFramebuffer(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, bool applyPostShader) { @@ -1883,7 +1884,6 @@ bool FramebufferManager::NotifyFramebufferCopy(u32 src, u32 dst, int size, bool DrawPixels(dstBuffer, 0, dstY, srcBase, dstBuffer->format, dstBuffer->fb_stride, dstBuffer->width, dstH); SetColorUpdated(dstBuffer); RebindFramebuffer(); - textureCache_->ForgetLastTexture(); // This is a memcpy, let's still copy just in case. return false; } @@ -2098,7 +2098,6 @@ void FramebufferManager::NotifyBlockTransferAfter(u32 dstBasePtr, int dstStride, DrawPixels(dstBuffer, dstX * dstXFactor, dstY, srcBase, dstBuffer->format, srcStride * dstXFactor, dstWidth * dstXFactor, dstHeight); SetColorUpdated(dstBuffer); RebindFramebuffer(); - textureCache_->ForgetLastTexture(); } } } From a4d33d77b7907e30966157c20b9a5b2c7c528056 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 13 Sep 2014 15:09:30 -0700 Subject: [PATCH 07/33] d3d: Unify ReadFramebufferToMemory() signature. --- GPU/Common/FramebufferCommon.h | 1 + GPU/Directx9/FramebufferDX9.cpp | 47 ++++++++++++++++++++++++++------- GPU/Directx9/FramebufferDX9.h | 3 ++- GPU/GLES/Framebuffer.h | 2 +- 4 files changed, 41 insertions(+), 12 deletions(-) diff --git a/GPU/Common/FramebufferCommon.h b/GPU/Common/FramebufferCommon.h index 2d7b6ed24b..77f3f62741 100644 --- a/GPU/Common/FramebufferCommon.h +++ b/GPU/Common/FramebufferCommon.h @@ -114,6 +114,7 @@ public: virtual bool NotifyFramebufferCopy(u32 src, u32 dest, int size, bool isMemset = false) = 0; virtual bool NotifyStencilUpload(u32 addr, int size, bool skipZero = false) = 0; + virtual void ReadFramebufferToMemory(VirtualFramebuffer *vfb, bool sync, int x, int y, int w, int h) = 0; virtual void MakePixelTexture(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height) = 0; virtual void DrawPixels(VirtualFramebuffer *vfb, int dstX, int dstY, const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height) = 0; virtual void DrawFramebuffer(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, bool applyPostShader) = 0; diff --git a/GPU/Directx9/FramebufferDX9.cpp b/GPU/Directx9/FramebufferDX9.cpp index f43f415bec..787e65f819 100644 --- a/GPU/Directx9/FramebufferDX9.cpp +++ b/GPU/Directx9/FramebufferDX9.cpp @@ -114,7 +114,8 @@ namespace DX9 { FramebufferManagerDX9::FramebufferManagerDX9() : drawPixelsTex_(0), drawPixelsTexFormat_(GE_FORMAT_INVALID), - convBuf(0) + convBuf(0), + gameUsesSequentialCopies_(false) { // And an initial clear. We don't clear per frame as the games are supposed to handle that // by themselves. @@ -436,8 +437,7 @@ namespace DX9 { void FramebufferManagerDX9::NotifyRenderFramebufferSwitched(VirtualFramebuffer *prevVfb, VirtualFramebuffer *vfb) { if (ShouldDownloadFramebuffer(vfb) && !vfb->memoryUpdated) { - // TODO - //ReadFramebufferToMemory(vfb, true, 0, 0, vfb->width, vfb->height); + ReadFramebufferToMemory(vfb, true, 0, 0, vfb->width, vfb->height); } textureCache_->ForgetLastTexture(); @@ -650,14 +650,14 @@ namespace DX9 { } } - void FramebufferManagerDX9::ReadFramebufferToMemory(VirtualFramebuffer *vfb, bool sync) { + void FramebufferManagerDX9::ReadFramebufferToMemory(VirtualFramebuffer *vfb, bool sync, int x, int y, int w, int h) { #if 0 if (sync) { PackFramebufferAsync_(NULL); // flush async just in case when we go for synchronous update } #endif - if(vfb) { + if (vfb) { // We'll pseudo-blit framebuffers here to get a resized and flipped version of vfb. // For now we'll keep these on the same struct as the ones that can get displayed // (and blatantly copy work already done above while at it). @@ -692,6 +692,9 @@ namespace DX9 { nvfb->bufferWidth = vfb->bufferWidth; nvfb->bufferHeight = vfb->bufferHeight; nvfb->format = vfb->format; + nvfb->drawnWidth = vfb->drawnWidth; + nvfb->drawnHeight = vfb->drawnHeight; + nvfb->drawnFormat = vfb->format; nvfb->usageFlags = FB_USAGE_RENDERTARGET; nvfb->dirtyAfterDisplay = true; @@ -712,6 +715,7 @@ namespace DX9 { break; } + textureCache_->ForgetLastTexture(); nvfb->fbo = fbo_create(nvfb->width, nvfb->height, 1, true, (FBOColorDepth)nvfb->colorDepth); if (!(nvfb->fbo)) { ERROR_LOG(SCEGE, "Error creating FBO! %i x %i", nvfb->renderWidth, nvfb->renderHeight); @@ -720,7 +724,6 @@ namespace DX9 { nvfb->last_frame_render = gpuStats.numFlips; bvfbs_.push_back(nvfb); - fbo_bind_as_render_target(nvfb->fbo); ClearBuffer(); } else { nvfb->usageFlags |= FB_USAGE_RENDERTARGET; @@ -743,9 +746,32 @@ namespace DX9 { #endif } - vfb->memoryUpdated = true; - BlitFramebuffer_(nvfb, 0, 0, vfb, 0, 0, vfb->width, vfb->height, 0, false); + if (gameUsesSequentialCopies_) { + // Ignore the x/y/etc., read the entire thing. + x = 0; + y = 0; + w = vfb->width; + h = vfb->height; + } + if (x == 0 && y == 0 && w == vfb->width && h == vfb->height) { + vfb->memoryUpdated = true; + } else { + const static int FREQUENT_SEQUENTIAL_COPIES = 3; + static int frameLastCopy = 0; + static u32 bufferLastCopy = 0; + static int copiesThisFrame = 0; + if (frameLastCopy != gpuStats.numFlips || bufferLastCopy != vfb->fb_address) { + frameLastCopy = gpuStats.numFlips; + bufferLastCopy = vfb->fb_address; + copiesThisFrame = 0; + } + if (++copiesThisFrame > FREQUENT_SEQUENTIAL_COPIES) { + gameUsesSequentialCopies_ = true; + } + } + BlitFramebuffer_(nvfb, x, y, vfb, x, y, w, h, 0, false); + // TODO: Actually do it. #if 0 #ifdef USING_GLES2 PackFramebufferSync_(nvfb); // synchronous glReadPixels @@ -943,8 +969,9 @@ namespace DX9 { VirtualFramebuffer *vfb = vfbs_[i]; int age = frameLastFramebufUsed_ - std::max(vfb->last_frame_render, vfb->last_frame_used); - if (updateVram && age == 0 && !vfb->memoryUpdated && vfb == displayFramebuf_) - ReadFramebufferToMemory(vfb); + if (ShouldDownloadFramebuffer(vfb) && age == 0 && !vfb->memoryUpdated) { + ReadFramebufferToMemory(vfb, false, 0, 0, vfb->width, vfb->height); + } if (vfb == displayFramebuf_ || vfb == prevDisplayFramebuf_ || vfb == prevPrevDisplayFramebuf_) { continue; diff --git a/GPU/Directx9/FramebufferDX9.h b/GPU/Directx9/FramebufferDX9.h index de56082a2b..dabdea62d0 100644 --- a/GPU/Directx9/FramebufferDX9.h +++ b/GPU/Directx9/FramebufferDX9.h @@ -70,7 +70,7 @@ public: void DeviceLost(); void CopyDisplayToOutput(); - void ReadFramebufferToMemory(VirtualFramebuffer *vfb, bool sync = true); + virtual void ReadFramebufferToMemory(VirtualFramebuffer *vfb, bool sync, int x, int y, int w, int h) override; std::vector GetFramebufferList(); @@ -125,6 +125,7 @@ private: std::vector extraFBOs_; bool resized_; + bool gameUsesSequentialCopies_; std::vector bvfbs_; // blitting FBOs diff --git a/GPU/GLES/Framebuffer.h b/GPU/GLES/Framebuffer.h index 23900d08df..bd87084825 100644 --- a/GPU/GLES/Framebuffer.h +++ b/GPU/GLES/Framebuffer.h @@ -103,7 +103,7 @@ public: void NotifyBlockTransferAfter(u32 dstBasePtr, int dstStride, int dstX, int dstY, u32 srcBasePtr, int srcStride, int srcX, int srcY, int w, int h, int bpp); // Reads a rectangular subregion of a framebuffer to the right position in its backing memory. - void ReadFramebufferToMemory(VirtualFramebuffer *vfb, bool sync, int x, int y, int w, int h); + virtual void ReadFramebufferToMemory(VirtualFramebuffer *vfb, bool sync, int x, int y, int w, int h) override; std::vector GetFramebufferList(); From 971dd5df1e0b16543f09335216cd9bdcc1012e92 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 13 Sep 2014 15:12:06 -0700 Subject: [PATCH 08/33] d3d: Unify BlitFramebuffer() as well. --- GPU/Common/FramebufferCommon.h | 3 +++ GPU/Directx9/FramebufferDX9.cpp | 8 +++++--- GPU/Directx9/FramebufferDX9.h | 5 +++-- GPU/GLES/Framebuffer.cpp | 14 +++++++------- GPU/GLES/Framebuffer.h | 5 +++-- 5 files changed, 21 insertions(+), 14 deletions(-) diff --git a/GPU/Common/FramebufferCommon.h b/GPU/Common/FramebufferCommon.h index 77f3f62741..623f464947 100644 --- a/GPU/Common/FramebufferCommon.h +++ b/GPU/Common/FramebufferCommon.h @@ -171,6 +171,9 @@ protected: virtual void FlushBeforeCopy() = 0; virtual void DecimateFBOs() = 0; + // Used by ReadFramebufferToMemory and later framebuffer block copies + virtual void BlitFramebuffer(VirtualFramebuffer *dst, int dstX, int dstY, VirtualFramebuffer *src, int srcX, int srcY, int w, int h, int bpp, bool flip = false) = 0; + void EstimateDrawingSize(int &drawing_width, int &drawing_height); u32 FramebufferByteSize(const VirtualFramebuffer *vfb) const; static bool MaskedEqual(u32 addr1, u32 addr2); diff --git a/GPU/Directx9/FramebufferDX9.cpp b/GPU/Directx9/FramebufferDX9.cpp index 787e65f819..9a66fadacb 100644 --- a/GPU/Directx9/FramebufferDX9.cpp +++ b/GPU/Directx9/FramebufferDX9.cpp @@ -404,7 +404,7 @@ namespace DX9 { if (vfb->fbo) { ClearBuffer(); if (!g_Config.bDisableSlowFramebufEffects) { - BlitFramebuffer_(vfb, 0, 0, &old, 0, 0, std::min(vfb->bufferWidth, vfb->width), std::min(vfb->height, vfb->bufferHeight), 0); + BlitFramebuffer(vfb, 0, 0, &old, 0, 0, std::min(vfb->bufferWidth, vfb->width), std::min(vfb->height, vfb->bufferHeight), 0); } } fbo_destroy(old.fbo); @@ -769,7 +769,7 @@ namespace DX9 { gameUsesSequentialCopies_ = true; } } - BlitFramebuffer_(nvfb, x, y, vfb, x, y, w, h, 0, false); + BlitFramebuffer(nvfb, x, y, vfb, x, y, w, h, 0, false); // TODO: Actually do it. #if 0 @@ -789,13 +789,15 @@ namespace DX9 { } } - void FramebufferManagerDX9::BlitFramebuffer_(VirtualFramebuffer *dst, int dstX, int dstY, VirtualFramebuffer *src, int srcX, int srcY, int w, int h, int bpp, bool flip) { + void FramebufferManagerDX9::BlitFramebuffer(VirtualFramebuffer *dst, int dstX, int dstY, VirtualFramebuffer *src, int srcX, int srcY, int w, int h, int bpp, bool flip) { if (!dst->fbo || !src->fbo || !useBufferedRendering_) { // This can happen if they recently switched from non-buffered. fbo_unbind(); return; } + // TODO: StretchRect? + fbo_bind_as_render_target(dst->fbo); dxstate.viewport.set(0, 0, dst->renderWidth, dst->renderHeight); DisableState(); diff --git a/GPU/Directx9/FramebufferDX9.h b/GPU/Directx9/FramebufferDX9.h index dabdea62d0..5979075cc7 100644 --- a/GPU/Directx9/FramebufferDX9.h +++ b/GPU/Directx9/FramebufferDX9.h @@ -93,6 +93,9 @@ protected: virtual void FlushBeforeCopy() override; virtual void DecimateFBOs() override; + // Used by ReadFramebufferToMemory and later framebuffer block copies + virtual void BlitFramebuffer(VirtualFramebuffer *dst, int dstX, int dstY, VirtualFramebuffer *src, int srcX, int srcY, int w, int h, int bpp, bool flip = false) override; + virtual void NotifyRenderFramebufferCreated(VirtualFramebuffer *vfb) override; virtual void NotifyRenderFramebufferSwitched(VirtualFramebuffer *prevVfb, VirtualFramebuffer *vfb) override; virtual void NotifyRenderFramebufferUpdated(VirtualFramebuffer *vfb, bool vfbFormatChanged) override; @@ -103,8 +106,6 @@ private: void SetNumExtraFBOs(int num); - // Used by ReadFramebufferToMemory - void BlitFramebuffer_(VirtualFramebuffer *dst, int dstX, int dstY, VirtualFramebuffer *src, int srcX, int srcY, int w, int h, int bpp, bool flip = false); void PackFramebufferDirectx9_(VirtualFramebuffer *vfb); // Used by DrawPixels diff --git a/GPU/GLES/Framebuffer.cpp b/GPU/GLES/Framebuffer.cpp index 68fca12634..2cff2e738c 100644 --- a/GPU/GLES/Framebuffer.cpp +++ b/GPU/GLES/Framebuffer.cpp @@ -723,7 +723,7 @@ void FramebufferManager::ResizeFramebufFBO(VirtualFramebuffer *vfb, u16 w, u16 h if (vfb->fbo) { ClearBuffer(); if (!g_Config.bDisableSlowFramebufEffects) { - BlitFramebuffer_(vfb, 0, 0, &old, 0, 0, std::min(vfb->bufferWidth, vfb->width), std::min(vfb->height, vfb->bufferHeight), 0); + BlitFramebuffer(vfb, 0, 0, &old, 0, 0, std::min(vfb->bufferWidth, vfb->width), std::min(vfb->height, vfb->bufferHeight), 0); } } fbo_destroy(old.fbo); @@ -952,7 +952,7 @@ void FramebufferManager::BindFramebufferColor(VirtualFramebuffer *framebuffer, b if (renderCopy) { VirtualFramebuffer copyInfo = *framebuffer; copyInfo.fbo = renderCopy; - BlitFramebuffer_(©Info, 0, 0, framebuffer, 0, 0, framebuffer->drawnWidth, framebuffer->drawnHeight, 0, false); + BlitFramebuffer(©Info, 0, 0, framebuffer, 0, 0, framebuffer->drawnWidth, framebuffer->drawnHeight, 0, false); RebindFramebuffer(); fbo_bind_color_as_texture(renderCopy, 0); @@ -1248,7 +1248,7 @@ void FramebufferManager::ReadFramebufferToMemory(VirtualFramebuffer *vfb, bool s gameUsesSequentialCopies_ = true; } } - BlitFramebuffer_(nvfb, x, y, vfb, x, y, w, h, 0, true); + BlitFramebuffer(nvfb, x, y, vfb, x, y, w, h, 0, true); // PackFramebufferSync_() - Synchronous pixel data transfer using glReadPixels // PackFramebufferAsync_() - Asynchronous pixel data transfer using glReadPixels with PBOs @@ -1270,7 +1270,7 @@ void FramebufferManager::ReadFramebufferToMemory(VirtualFramebuffer *vfb, bool s } // TODO: If dimensions are the same, we can use glCopyImageSubData. -void FramebufferManager::BlitFramebuffer_(VirtualFramebuffer *dst, int dstX, int dstY, VirtualFramebuffer *src, int srcX, int srcY, int w, int h, int bpp, bool flip) { +void FramebufferManager::BlitFramebuffer(VirtualFramebuffer *dst, int dstX, int dstY, VirtualFramebuffer *src, int srcX, int srcY, int w, int h, int bpp, bool flip) { if (!dst->fbo || !src->fbo || !useBufferedRendering_) { // This can happen if they recently switched from non-buffered. fbo_unbind(); @@ -1870,7 +1870,7 @@ bool FramebufferManager::NotifyFramebufferCopy(u32 src, u32 dst, int size, bool WARN_LOG_REPORT_ONCE(dstnotsrccpy, G3D, "Inter-buffer memcpy %08x -> %08x", src, dst); // Just do the blit! if (g_Config.bBlockTransferGPU) { - BlitFramebuffer_(dstBuffer, 0, dstY, srcBuffer, 0, srcY, srcBuffer->width, srcH, 0); + BlitFramebuffer(dstBuffer, 0, dstY, srcBuffer, 0, srcY, srcBuffer->width, srcH, 0); SetColorUpdated(dstBuffer); RebindFramebuffer(); } @@ -2016,7 +2016,7 @@ bool FramebufferManager::NotifyBlockTransferBefore(u32 dstBasePtr, int dstStride WARN_LOG_ONCE(dstsrc, G3D, "Intra-buffer block transfer %08x -> %08x", srcBasePtr, dstBasePtr); if (g_Config.bBlockTransferGPU) { FlushBeforeCopy(); - BlitFramebuffer_(dstBuffer, dstX, dstY, srcBuffer, srcX, srcY, dstWidth, dstHeight, bpp); + BlitFramebuffer(dstBuffer, dstX, dstY, srcBuffer, srcX, srcY, dstWidth, dstHeight, bpp); RebindFramebuffer(); SetColorUpdated(dstBuffer); return true; @@ -2032,7 +2032,7 @@ bool FramebufferManager::NotifyBlockTransferBefore(u32 dstBasePtr, int dstStride // Just do the blit! if (g_Config.bBlockTransferGPU) { FlushBeforeCopy(); - BlitFramebuffer_(dstBuffer, dstX, dstY, srcBuffer, srcX, srcY, dstWidth, dstHeight, bpp); + BlitFramebuffer(dstBuffer, dstX, dstY, srcBuffer, srcX, srcY, dstWidth, dstHeight, bpp); RebindFramebuffer(); SetColorUpdated(dstBuffer); return true; // No need to actually do the memory copy behind, probably. diff --git a/GPU/GLES/Framebuffer.h b/GPU/GLES/Framebuffer.h index bd87084825..f11575a419 100644 --- a/GPU/GLES/Framebuffer.h +++ b/GPU/GLES/Framebuffer.h @@ -128,6 +128,9 @@ protected: virtual void FlushBeforeCopy() override; virtual void DecimateFBOs() override; + // Used by ReadFramebufferToMemory and later framebuffer block copies + virtual void BlitFramebuffer(VirtualFramebuffer *dst, int dstX, int dstY, VirtualFramebuffer *src, int srcX, int srcY, int w, int h, int bpp, bool flip = false) override; + virtual void NotifyRenderFramebufferCreated(VirtualFramebuffer *vfb) override; virtual void NotifyRenderFramebufferSwitched(VirtualFramebuffer *prevVfb, VirtualFramebuffer *vfb) override; virtual void NotifyRenderFramebufferUpdated(VirtualFramebuffer *vfb, bool vfbFormatChanged) override; @@ -142,8 +145,6 @@ private: inline bool ShouldDownloadUsingCPU(const VirtualFramebuffer *vfb) const; - // Used by ReadFramebufferToMemory and later framebuffer block copies - void BlitFramebuffer_(VirtualFramebuffer *dst, int dstX, int dstY, VirtualFramebuffer *src, int srcX, int srcY, int w, int h, int bpp, bool flip = false); #ifndef USING_GLES2 void PackFramebufferAsync_(VirtualFramebuffer *vfb); #endif From 2463074530e0d84cac22f7f717a4bab63d0570f6 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 13 Sep 2014 15:40:55 -0700 Subject: [PATCH 09/33] d3d: Converge block transfer logic. --- GPU/Common/FramebufferCommon.cpp | 292 +++++++++++++++++++++++++++++++ GPU/Common/FramebufferCommon.h | 10 +- GPU/Directx9/FramebufferDX9.cpp | 15 +- GPU/Directx9/FramebufferDX9.h | 3 - GPU/GLES/Framebuffer.cpp | 292 ------------------------------- GPU/GLES/Framebuffer.h | 11 -- 6 files changed, 302 insertions(+), 321 deletions(-) diff --git a/GPU/Common/FramebufferCommon.cpp b/GPU/Common/FramebufferCommon.cpp index 1d9cfc87cc..1a6576bf70 100644 --- a/GPU/Common/FramebufferCommon.cpp +++ b/GPU/Common/FramebufferCommon.cpp @@ -423,3 +423,295 @@ void FramebufferManagerCommon::UpdateFromMemory(u32 addr, int size, bool safe) { RebindFramebuffer(); } } + +bool FramebufferManagerCommon::NotifyFramebufferCopy(u32 src, u32 dst, int size, bool isMemset) { + if (updateVRAM_ || size == 0) { + return false; + } + + dst &= 0x3FFFFFFF; + src &= 0x3FFFFFFF; + + VirtualFramebuffer *dstBuffer = 0; + VirtualFramebuffer *srcBuffer = 0; + u32 dstY = (u32)-1; + u32 dstH = 0; + u32 srcY = (u32)-1; + u32 srcH = 0; + for (size_t i = 0; i < vfbs_.size(); ++i) { + VirtualFramebuffer *vfb = vfbs_[i]; + const u32 vfb_address = (0x04000000 | vfb->fb_address) & 0x3FFFFFFF; + const u32 vfb_size = FramebufferByteSize(vfb); + const u32 vfb_bpp = vfb->format == GE_FORMAT_8888 ? 4 : 2; + const u32 vfb_byteStride = vfb->fb_stride * vfb_bpp; + const int vfb_byteWidth = vfb->width * vfb_bpp; + + if (dst >= vfb_address && (dst + size <= vfb_address + vfb_size || dst == vfb_address)) { + const u32 offset = dst - vfb_address; + const u32 yOffset = offset / vfb_byteStride; + if ((offset % vfb_byteStride) == 0 && (size == vfb_byteWidth || (size % vfb_byteStride) == 0) && yOffset < dstY) { + dstBuffer = vfb; + dstY = yOffset; + dstH = size == vfb_byteWidth ? 1 : std::min((u32)size / vfb_byteStride, (u32)vfb->height); + } + } + + if (src >= vfb_address && (src + size <= vfb_address + vfb_size || src == vfb_address)) { + const u32 offset = src - vfb_address; + const u32 yOffset = offset / vfb_byteStride; + if ((offset % vfb_byteStride) == 0 && (size == vfb_byteWidth || (size % vfb_byteStride) == 0) && yOffset < srcY) { + srcBuffer = vfb; + srcY = yOffset; + srcH = size == vfb_byteWidth ? 1 : std::min((u32)size / vfb_byteStride, (u32)vfb->height); + } + } + } + + if (srcBuffer && srcY == 0 && srcH == srcBuffer->height && !dstBuffer) { + // MotoGP workaround - it copies a framebuffer to memory and then displays it. + // TODO: It's rare anyway, but the game could modify the RAM and then we'd display the wrong thing. + // Unfortunately, that would force 1x render resolution. + if (Memory::IsRAMAddress(dst)) { + knownFramebufferRAMCopies_.insert(std::pair(src, dst)); + } + } + + if (!useBufferedRendering_) { + // If we're copying into a recently used display buf, it's probably destined for the screen. + if (srcBuffer || (dstBuffer != displayFramebuf_ && dstBuffer != prevDisplayFramebuf_)) { + return false; + } + } + + if (dstBuffer && srcBuffer && !isMemset) { + if (srcBuffer == dstBuffer) { + WARN_LOG_REPORT_ONCE(dstsrccpy, G3D, "Intra-buffer memcpy (not supported) %08x -> %08x", src, dst); + } else { + WARN_LOG_REPORT_ONCE(dstnotsrccpy, G3D, "Inter-buffer memcpy %08x -> %08x", src, dst); + // Just do the blit! + if (g_Config.bBlockTransferGPU) { + BlitFramebuffer(dstBuffer, 0, dstY, srcBuffer, 0, srcY, srcBuffer->width, srcH, 0); + SetColorUpdated(dstBuffer); + RebindFramebuffer(); + } + } + return false; + } else if (dstBuffer) { + WARN_LOG_ONCE(btucpy, G3D, "Memcpy fbo upload %08x -> %08x", src, dst); + if (g_Config.bBlockTransferGPU) { + FlushBeforeCopy(); + const u8 *srcBase = Memory::GetPointerUnchecked(src); + DrawPixels(dstBuffer, 0, dstY, srcBase, dstBuffer->format, dstBuffer->fb_stride, dstBuffer->width, dstH); + SetColorUpdated(dstBuffer); + RebindFramebuffer(); + // This is a memcpy, let's still copy just in case. + return false; + } + return false; + } else if (srcBuffer) { + WARN_LOG_ONCE(btdcpy, G3D, "Memcpy fbo download %08x -> %08x", src, dst); + FlushBeforeCopy(); + if (srcH == 0 || srcY + srcH > srcBuffer->bufferHeight) { + WARN_LOG_REPORT_ONCE(btdcpyheight, G3D, "Memcpy fbo download %08x -> %08x skipped, %d+%d is taller than %d", src, dst, srcY, srcH, srcBuffer->bufferHeight); + } else if (g_Config.bBlockTransferGPU && !srcBuffer->memoryUpdated) { + ReadFramebufferToMemory(srcBuffer, true, 0, srcY, srcBuffer->width, srcH); + } + return false; + } else { + return false; + } +} + +void FramebufferManagerCommon::FindTransferFramebuffers(VirtualFramebuffer *&dstBuffer, VirtualFramebuffer *&srcBuffer, u32 dstBasePtr, int dstStride, int &dstX, int &dstY, u32 srcBasePtr, int srcStride, int &srcX, int &srcY, int &srcWidth, int &srcHeight, int &dstWidth, int &dstHeight, int bpp) const { + u32 dstYOffset = -1; + u32 dstXOffset = -1; + u32 srcYOffset = -1; + u32 srcXOffset = -1; + int width = srcWidth; + int height = srcHeight; + + dstBasePtr &= 0x3FFFFFFF; + srcBasePtr &= 0x3FFFFFFF; + + for (size_t i = 0; i < vfbs_.size(); ++i) { + VirtualFramebuffer *vfb = vfbs_[i]; + const u32 vfb_address = (0x04000000 | vfb->fb_address) & 0x3FFFFFFF; + const u32 vfb_size = FramebufferByteSize(vfb); + const u32 vfb_bpp = vfb->format == GE_FORMAT_8888 ? 4 : 2; + const u32 vfb_byteStride = vfb->fb_stride * vfb_bpp; + const u32 vfb_byteWidth = vfb->width * vfb_bpp; + + // These heuristics are a bit annoying. + // The goal is to avoid using GPU block transfers for things that ought to be memory. + // Maybe we should even check for textures at these places instead? + + if (vfb_address <= dstBasePtr && dstBasePtr < vfb_address + vfb_size) { + const u32 byteOffset = dstBasePtr - vfb_address; + const u32 byteStride = dstStride * bpp; + const u32 yOffset = byteOffset / byteStride; + // Some games use mismatching bitdepths. But make sure the stride matches. + // If it doesn't, generally this means we detected the framebuffer with too large a height. + bool match = yOffset < dstYOffset; + if (match && vfb_byteStride != byteStride) { + // Grand Knights History copies with a mismatching stride but a full line at a time. + // Makes it hard to detect the wrong transfers in e.g. God of War. + if (width != dstStride || (byteStride * height != vfb_byteStride && byteStride * height != vfb_byteWidth)) { + match = false; + } else { + dstWidth = byteStride * height / vfb_bpp; + dstHeight = 1; + } + } else if (match) { + dstWidth = width; + dstHeight = height; + } + if (match) { + dstYOffset = yOffset; + dstXOffset = (byteOffset / bpp) % dstStride; + dstBuffer = vfb; + } + } + if (vfb_address <= srcBasePtr && srcBasePtr < vfb_address + vfb_size) { + const u32 byteOffset = srcBasePtr - vfb_address; + const u32 byteStride = srcStride * bpp; + const u32 yOffset = byteOffset / byteStride; + bool match = yOffset < srcYOffset; + if (match && vfb_byteStride != byteStride) { + if (width != srcStride || (byteStride * height != vfb_byteStride && byteStride * height != vfb_byteWidth)) { + match = false; + } else { + srcWidth = byteStride * height / vfb_bpp; + srcHeight = 1; + } + } else if (match) { + srcWidth = width; + srcHeight = height; + } + if (match) { + srcYOffset = yOffset; + srcXOffset = (byteOffset / bpp) % srcStride; + srcBuffer = vfb; + } + } + } + + if (dstYOffset != (u32)-1) { + dstY += dstYOffset; + dstX += dstXOffset; + } + if (srcYOffset != (u32)-1) { + srcY += srcYOffset; + srcX += srcXOffset; + } +} + +bool FramebufferManagerCommon::NotifyBlockTransferBefore(u32 dstBasePtr, int dstStride, int dstX, int dstY, u32 srcBasePtr, int srcStride, int srcX, int srcY, int width, int height, int bpp) { + if (!useBufferedRendering_ || updateVRAM_) { + return false; + } + + // Skip checking if there's no framebuffers in that area. + if (!MayIntersectFramebuffer(srcBasePtr) && !MayIntersectFramebuffer(dstBasePtr)) { + return false; + } + + VirtualFramebuffer *dstBuffer = 0; + VirtualFramebuffer *srcBuffer = 0; + int srcWidth = width; + int srcHeight = height; + int dstWidth = width; + int dstHeight = height; + FindTransferFramebuffers(dstBuffer, srcBuffer, dstBasePtr, dstStride, dstX, dstY, srcBasePtr, srcStride, srcX, srcY, srcWidth, srcHeight, dstWidth, dstHeight, bpp); + + if (dstBuffer && srcBuffer) { + if (srcBuffer == dstBuffer) { + if (srcX != dstX || srcY != dstY) { + WARN_LOG_ONCE(dstsrc, G3D, "Intra-buffer block transfer %08x -> %08x", srcBasePtr, dstBasePtr); + if (g_Config.bBlockTransferGPU) { + FlushBeforeCopy(); + BlitFramebuffer(dstBuffer, dstX, dstY, srcBuffer, srcX, srcY, dstWidth, dstHeight, bpp); + RebindFramebuffer(); + SetColorUpdated(dstBuffer); + return true; + } + } else { + // Ignore, nothing to do. Tales of Phantasia X does this by accident. + if (g_Config.bBlockTransferGPU) { + return true; + } + } + } else { + WARN_LOG_ONCE(dstnotsrc, G3D, "Inter-buffer block transfer %08x -> %08x", srcBasePtr, dstBasePtr); + // Just do the blit! + if (g_Config.bBlockTransferGPU) { + FlushBeforeCopy(); + BlitFramebuffer(dstBuffer, dstX, dstY, srcBuffer, srcX, srcY, dstWidth, dstHeight, bpp); + RebindFramebuffer(); + SetColorUpdated(dstBuffer); + return true; // No need to actually do the memory copy behind, probably. + } + } + return false; + } else if (dstBuffer) { + // Here we should just draw the pixels into the buffer. Copy first. + return false; + } else if (srcBuffer) { + WARN_LOG_ONCE(btd, G3D, "Block transfer download %08x -> %08x", srcBasePtr, dstBasePtr); + FlushBeforeCopy(); + if (g_Config.bBlockTransferGPU && !srcBuffer->memoryUpdated) { + const int srcBpp = srcBuffer->format == GE_FORMAT_8888 ? 4 : 2; + const float srcXFactor = (float)bpp / srcBpp; + if (srcHeight <= 0 || srcY + srcHeight > srcBuffer->bufferHeight) { + WARN_LOG_ONCE(btdheight, G3D, "Block transfer download %08x -> %08x skipped, %d+%d is taller than %d", srcBasePtr, dstBasePtr, srcY, srcHeight, srcBuffer->bufferHeight); + } else { + ReadFramebufferToMemory(srcBuffer, true, srcX * srcXFactor, srcY, srcWidth * srcXFactor, srcHeight); + } + } + return false; // Let the bit copy happen + } else { + return false; + } +} + +void FramebufferManagerCommon::NotifyBlockTransferAfter(u32 dstBasePtr, int dstStride, int dstX, int dstY, u32 srcBasePtr, int srcStride, int srcX, int srcY, int width, int height, int bpp) { + // A few games use this INSTEAD of actually drawing the video image to the screen, they just blast it to + // the backbuffer. Detect this and have the framebuffermanager draw the pixels. + + u32 backBuffer = PrevDisplayFramebufAddr(); + u32 displayBuffer = DisplayFramebufAddr(); + + // TODO: Is this not handled by upload? Should we check !dstBuffer to avoid a double copy? + if (((backBuffer != 0 && dstBasePtr == backBuffer) || + (displayBuffer != 0 && dstBasePtr == displayBuffer)) && + dstStride == 512 && height == 272 && !useBufferedRendering_) { + FlushBeforeCopy(); + DrawFramebuffer(Memory::GetPointerUnchecked(dstBasePtr), displayFormat_, 512, false); + } + + if (MayIntersectFramebuffer(srcBasePtr) || MayIntersectFramebuffer(dstBasePtr)) { + VirtualFramebuffer *dstBuffer = 0; + VirtualFramebuffer *srcBuffer = 0; + int srcWidth = width; + int srcHeight = height; + int dstWidth = width; + int dstHeight = height; + FindTransferFramebuffers(dstBuffer, srcBuffer, dstBasePtr, dstStride, dstX, dstY, srcBasePtr, srcStride, srcX, srcY, srcWidth, srcHeight, dstWidth, dstHeight, bpp); + + if (!useBufferedRendering_ && currentRenderVfb_ != dstBuffer) { + return; + } + + if (dstBuffer && !srcBuffer) { + WARN_LOG_ONCE(btu, G3D, "Block transfer upload %08x -> %08x", srcBasePtr, dstBasePtr); + if (g_Config.bBlockTransferGPU) { + FlushBeforeCopy(); + const u8 *srcBase = Memory::GetPointerUnchecked(srcBasePtr) + (srcX + srcY * srcStride) * bpp; + int dstBpp = dstBuffer->format == GE_FORMAT_8888 ? 4 : 2; + float dstXFactor = (float)bpp / dstBpp; + DrawPixels(dstBuffer, dstX * dstXFactor, dstY, srcBase, dstBuffer->format, srcStride * dstXFactor, dstWidth * dstXFactor, dstHeight); + SetColorUpdated(dstBuffer); + RebindFramebuffer(); + } + } + } +} diff --git a/GPU/Common/FramebufferCommon.h b/GPU/Common/FramebufferCommon.h index 623f464947..10be753152 100644 --- a/GPU/Common/FramebufferCommon.h +++ b/GPU/Common/FramebufferCommon.h @@ -17,6 +17,7 @@ #pragma once +#include #include #include "Common/CommonTypes.h" #include "Core/MemMap.h" @@ -110,9 +111,14 @@ public: } virtual void RebindFramebuffer() = 0; + bool NotifyFramebufferCopy(u32 src, u32 dest, int size, bool isMemset = false); void UpdateFromMemory(u32 addr, int size, bool safe); - virtual bool NotifyFramebufferCopy(u32 src, u32 dest, int size, bool isMemset = false) = 0; virtual bool NotifyStencilUpload(u32 addr, int size, bool skipZero = false) = 0; + // Returns true if it's sure this is a direct FBO->FBO transfer and it has already handle it. + // In that case we hardly need to actually copy the bytes in VRAM, they will be wrong anyway (unless + // read framebuffers is on, in which case this should always return false). + bool NotifyBlockTransferBefore(u32 dstBasePtr, int dstStride, int dstX, int dstY, u32 srcBasePtr, int srcStride, int srcX, int srcY, int w, int h, int bpp); + void NotifyBlockTransferAfter(u32 dstBasePtr, int dstStride, int dstX, int dstY, u32 srcBasePtr, int srcStride, int srcX, int srcY, int w, int h, int bpp); virtual void ReadFramebufferToMemory(VirtualFramebuffer *vfb, bool sync, int x, int y, int w, int h) = 0; virtual void MakePixelTexture(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height) = 0; @@ -185,6 +191,7 @@ protected: virtual void NotifyRenderFramebufferUpdated(VirtualFramebuffer *vfb, bool vfbFormatChanged) = 0; bool ShouldDownloadFramebuffer(const VirtualFramebuffer *vfb) const; + void FindTransferFramebuffers(VirtualFramebuffer *&dstBuffer, VirtualFramebuffer *&srcBuffer, u32 dstBasePtr, int dstStride, int &dstX, int &dstY, u32 srcBasePtr, int srcStride, int &srcX, int &srcY, int &srcWidth, int &srcHeight, int &dstWidth, int &dstHeight, int bpp) const; void SetColorUpdated(VirtualFramebuffer *dstBuffer) { dstBuffer->memoryUpdated = false; @@ -214,6 +221,7 @@ protected: bool updateVRAM_; std::vector vfbs_; + std::set> knownFramebufferRAMCopies_; bool hackForce04154000Download_; diff --git a/GPU/Directx9/FramebufferDX9.cpp b/GPU/Directx9/FramebufferDX9.cpp index 9a66fadacb..607cdccd78 100644 --- a/GPU/Directx9/FramebufferDX9.cpp +++ b/GPU/Directx9/FramebufferDX9.cpp @@ -552,7 +552,7 @@ namespace DX9 { // The game is displaying something directly from RAM. In GTA, it's decoded video. // First check that it's not a known RAM copy of a VRAM framebuffer though, as in MotoGP - for (auto iter = knownFramebufferCopies_.begin(); iter != knownFramebufferCopies_.end(); ++iter) { + for (auto iter = knownFramebufferRAMCopies_.begin(); iter != knownFramebufferRAMCopies_.end(); ++iter) { if (iter->second == displayFramebufPtr_) { vfb = GetVFBAt(iter->first); } @@ -944,19 +944,6 @@ namespace DX9 { return list; } - // MotoGP workaround - bool FramebufferManagerDX9::NotifyFramebufferCopy(u32 src, u32 dest, int size, bool isMemset) { - for (size_t i = 0; i < vfbs_.size(); i++) { - // This size fits for MotoGP. Might want to make this more flexible for other games if they do the same. - if ((vfbs_[i]->fb_address | 0x04000000) == src && size == 512 * 272 * 2) { - // A framebuffer matched! - knownFramebufferCopies_.insert(std::pair(src, dest)); - } - } - // TODO - return false; - } - bool FramebufferManagerDX9::NotifyStencilUpload(u32 addr, int size, bool skipZero) { // TODO return false; diff --git a/GPU/Directx9/FramebufferDX9.h b/GPU/Directx9/FramebufferDX9.h index 5979075cc7..28e3c78757 100644 --- a/GPU/Directx9/FramebufferDX9.h +++ b/GPU/Directx9/FramebufferDX9.h @@ -74,7 +74,6 @@ public: std::vector GetFramebufferList(); - bool NotifyFramebufferCopy(u32 src, u32 dest, int size, bool isMemset = false); bool NotifyStencilUpload(u32 addr, int size, bool skipZero = false); void DestroyFramebuf(VirtualFramebuffer *vfb); @@ -130,8 +129,6 @@ private: std::vector bvfbs_; // blitting FBOs - std::set> knownFramebufferCopies_; - #if 0 AsyncPBO *pixelBufObj_; //this isn't that large u8 currentPBO_; diff --git a/GPU/GLES/Framebuffer.cpp b/GPU/GLES/Framebuffer.cpp index 2cff2e738c..242b8fa9b3 100644 --- a/GPU/GLES/Framebuffer.cpp +++ b/GPU/GLES/Framebuffer.cpp @@ -1804,187 +1804,6 @@ void FramebufferManager::DestroyAllFBOs() { DisableState(); } -bool FramebufferManager::NotifyFramebufferCopy(u32 src, u32 dst, int size, bool isMemset) { - if (updateVRAM_ || size == 0) { - return false; - } - - dst &= 0x3FFFFFFF; - src &= 0x3FFFFFFF; - - VirtualFramebuffer *dstBuffer = 0; - VirtualFramebuffer *srcBuffer = 0; - u32 dstY = (u32)-1; - u32 dstH = 0; - u32 srcY = (u32)-1; - u32 srcH = 0; - for (size_t i = 0; i < vfbs_.size(); ++i) { - VirtualFramebuffer *vfb = vfbs_[i]; - const u32 vfb_address = (0x04000000 | vfb->fb_address) & 0x3FFFFFFF; - const u32 vfb_size = FramebufferByteSize(vfb); - const u32 vfb_bpp = vfb->format == GE_FORMAT_8888 ? 4 : 2; - const u32 vfb_byteStride = vfb->fb_stride * vfb_bpp; - const int vfb_byteWidth = vfb->width * vfb_bpp; - - if (dst >= vfb_address && (dst + size <= vfb_address + vfb_size || dst == vfb_address)) { - const u32 offset = dst - vfb_address; - const u32 yOffset = offset / vfb_byteStride; - if ((offset % vfb_byteStride) == 0 && (size == vfb_byteWidth || (size % vfb_byteStride) == 0) && yOffset < dstY) { - dstBuffer = vfb; - dstY = yOffset; - dstH = size == vfb_byteWidth ? 1 : std::min((u32)size / vfb_byteStride, (u32)vfb->height); - } - } - - if (src >= vfb_address && (src + size <= vfb_address + vfb_size || src == vfb_address)) { - const u32 offset = src - vfb_address; - const u32 yOffset = offset / vfb_byteStride; - if ((offset % vfb_byteStride) == 0 && (size == vfb_byteWidth || (size % vfb_byteStride) == 0) && yOffset < srcY) { - srcBuffer = vfb; - srcY = yOffset; - srcH = size == vfb_byteWidth ? 1 : std::min((u32)size / vfb_byteStride, (u32)vfb->height); - } - } - } - - if (srcBuffer && srcY == 0 && srcH == srcBuffer->height && !dstBuffer) { - // MotoGP workaround - it copies a framebuffer to memory and then displays it. - // TODO: It's rare anyway, but the game could modify the RAM and then we'd display the wrong thing. - // Unfortunately, that would force 1x render resolution. - if (Memory::IsRAMAddress(dst)) { - knownFramebufferRAMCopies_.insert(std::pair(src, dst)); - } - } - - if (!useBufferedRendering_) { - // If we're copying into a recently used display buf, it's probably destined for the screen. - if (srcBuffer || (dstBuffer != displayFramebuf_ && dstBuffer != prevDisplayFramebuf_)) { - return false; - } - } - - if (dstBuffer && srcBuffer && !isMemset) { - if (srcBuffer == dstBuffer) { - WARN_LOG_REPORT_ONCE(dstsrccpy, G3D, "Intra-buffer memcpy (not supported) %08x -> %08x", src, dst); - } else { - WARN_LOG_REPORT_ONCE(dstnotsrccpy, G3D, "Inter-buffer memcpy %08x -> %08x", src, dst); - // Just do the blit! - if (g_Config.bBlockTransferGPU) { - BlitFramebuffer(dstBuffer, 0, dstY, srcBuffer, 0, srcY, srcBuffer->width, srcH, 0); - SetColorUpdated(dstBuffer); - RebindFramebuffer(); - } - } - return false; - } else if (dstBuffer) { - WARN_LOG_ONCE(btucpy, G3D, "Memcpy fbo upload %08x -> %08x", src, dst); - if (g_Config.bBlockTransferGPU) { - FlushBeforeCopy(); - const u8 *srcBase = Memory::GetPointerUnchecked(src); - DrawPixels(dstBuffer, 0, dstY, srcBase, dstBuffer->format, dstBuffer->fb_stride, dstBuffer->width, dstH); - SetColorUpdated(dstBuffer); - RebindFramebuffer(); - // This is a memcpy, let's still copy just in case. - return false; - } - return false; - } else if (srcBuffer) { - WARN_LOG_ONCE(btdcpy, G3D, "Memcpy fbo download %08x -> %08x", src, dst); - FlushBeforeCopy(); - if (srcH == 0 || srcY + srcH > srcBuffer->bufferHeight) { - WARN_LOG_REPORT_ONCE(btdcpyheight, G3D, "Memcpy fbo download %08x -> %08x skipped, %d+%d is taller than %d", src, dst, srcY, srcH, srcBuffer->bufferHeight); - } else if (g_Config.bBlockTransferGPU && !srcBuffer->memoryUpdated) { - ReadFramebufferToMemory(srcBuffer, true, 0, srcY, srcBuffer->width, srcH); - } - return false; - } else { - return false; - } -} - -void FramebufferManager::FindTransferFramebuffers(VirtualFramebuffer *&dstBuffer, VirtualFramebuffer *&srcBuffer, u32 dstBasePtr, int dstStride, int &dstX, int &dstY, u32 srcBasePtr, int srcStride, int &srcX, int &srcY, int &srcWidth, int &srcHeight, int &dstWidth, int &dstHeight, int bpp) const { - u32 dstYOffset = -1; - u32 dstXOffset = -1; - u32 srcYOffset = -1; - u32 srcXOffset = -1; - int width = srcWidth; - int height = srcHeight; - - dstBasePtr &= 0x3FFFFFFF; - srcBasePtr &= 0x3FFFFFFF; - - for (size_t i = 0; i < vfbs_.size(); ++i) { - VirtualFramebuffer *vfb = vfbs_[i]; - const u32 vfb_address = (0x04000000 | vfb->fb_address) & 0x3FFFFFFF; - const u32 vfb_size = FramebufferByteSize(vfb); - const u32 vfb_bpp = vfb->format == GE_FORMAT_8888 ? 4 : 2; - const u32 vfb_byteStride = vfb->fb_stride * vfb_bpp; - const u32 vfb_byteWidth = vfb->width * vfb_bpp; - - // These heuristics are a bit annoying. - // The goal is to avoid using GPU block transfers for things that ought to be memory. - // Maybe we should even check for textures at these places instead? - - if (vfb_address <= dstBasePtr && dstBasePtr < vfb_address + vfb_size) { - const u32 byteOffset = dstBasePtr - vfb_address; - const u32 byteStride = dstStride * bpp; - const u32 yOffset = byteOffset / byteStride; - // Some games use mismatching bitdepths. But make sure the stride matches. - // If it doesn't, generally this means we detected the framebuffer with too large a height. - bool match = yOffset < dstYOffset; - if (match && vfb_byteStride != byteStride) { - // Grand Knights History copies with a mismatching stride but a full line at a time. - // Makes it hard to detect the wrong transfers in e.g. God of War. - if (width != dstStride || (byteStride * height != vfb_byteStride && byteStride * height != vfb_byteWidth)) { - match = false; - } else { - dstWidth = byteStride * height / vfb_bpp; - dstHeight = 1; - } - } else if (match) { - dstWidth = width; - dstHeight = height; - } - if (match) { - dstYOffset = yOffset; - dstXOffset = (byteOffset / bpp) % dstStride; - dstBuffer = vfb; - } - } - if (vfb_address <= srcBasePtr && srcBasePtr < vfb_address + vfb_size) { - const u32 byteOffset = srcBasePtr - vfb_address; - const u32 byteStride = srcStride * bpp; - const u32 yOffset = byteOffset / byteStride; - bool match = yOffset < srcYOffset; - if (match && vfb_byteStride != byteStride) { - if (width != srcStride || (byteStride * height != vfb_byteStride && byteStride * height != vfb_byteWidth)) { - match = false; - } else { - srcWidth = byteStride * height / vfb_bpp; - srcHeight = 1; - } - } else if (match) { - srcWidth = width; - srcHeight = height; - } - if (match) { - srcYOffset = yOffset; - srcXOffset = (byteOffset / bpp) % srcStride; - srcBuffer = vfb; - } - } - } - - if (dstYOffset != (u32)-1) { - dstY += dstYOffset; - dstX += dstXOffset; - } - if (srcYOffset != (u32)-1) { - srcY += srcYOffset; - srcX += srcXOffset; - } -} - void FramebufferManager::FlushBeforeCopy() { // Flush anything not yet drawn before blitting, downloading, or uploading. // This might be a stalled list, or unflushed before a block transfer, etc. @@ -1992,117 +1811,6 @@ void FramebufferManager::FlushBeforeCopy() { transformDraw_->Flush(); } -bool FramebufferManager::NotifyBlockTransferBefore(u32 dstBasePtr, int dstStride, int dstX, int dstY, u32 srcBasePtr, int srcStride, int srcX, int srcY, int width, int height, int bpp) { - if (!useBufferedRendering_ || updateVRAM_) { - return false; - } - - // Skip checking if there's no framebuffers in that area. - if (!MayIntersectFramebuffer(srcBasePtr) && !MayIntersectFramebuffer(dstBasePtr)) { - return false; - } - - VirtualFramebuffer *dstBuffer = 0; - VirtualFramebuffer *srcBuffer = 0; - int srcWidth = width; - int srcHeight = height; - int dstWidth = width; - int dstHeight = height; - FindTransferFramebuffers(dstBuffer, srcBuffer, dstBasePtr, dstStride, dstX, dstY, srcBasePtr, srcStride, srcX, srcY, srcWidth, srcHeight, dstWidth, dstHeight, bpp); - - if (dstBuffer && srcBuffer) { - if (srcBuffer == dstBuffer) { - if (srcX != dstX || srcY != dstY) { - WARN_LOG_ONCE(dstsrc, G3D, "Intra-buffer block transfer %08x -> %08x", srcBasePtr, dstBasePtr); - if (g_Config.bBlockTransferGPU) { - FlushBeforeCopy(); - BlitFramebuffer(dstBuffer, dstX, dstY, srcBuffer, srcX, srcY, dstWidth, dstHeight, bpp); - RebindFramebuffer(); - SetColorUpdated(dstBuffer); - return true; - } - } else { - // Ignore, nothing to do. Tales of Phantasia X does this by accident. - if (g_Config.bBlockTransferGPU) { - return true; - } - } - } else { - WARN_LOG_ONCE(dstnotsrc, G3D, "Inter-buffer block transfer %08x -> %08x", srcBasePtr, dstBasePtr); - // Just do the blit! - if (g_Config.bBlockTransferGPU) { - FlushBeforeCopy(); - BlitFramebuffer(dstBuffer, dstX, dstY, srcBuffer, srcX, srcY, dstWidth, dstHeight, bpp); - RebindFramebuffer(); - SetColorUpdated(dstBuffer); - return true; // No need to actually do the memory copy behind, probably. - } - } - return false; - } else if (dstBuffer) { - // Here we should just draw the pixels into the buffer. Copy first. - return false; - } else if (srcBuffer) { - WARN_LOG_ONCE(btd, G3D, "Block transfer download %08x -> %08x", srcBasePtr, dstBasePtr); - FlushBeforeCopy(); - if (g_Config.bBlockTransferGPU && !srcBuffer->memoryUpdated) { - const int srcBpp = srcBuffer->format == GE_FORMAT_8888 ? 4 : 2; - const float srcXFactor = (float)bpp / srcBpp; - if (srcHeight <= 0 || srcY + srcHeight > srcBuffer->bufferHeight) { - WARN_LOG_ONCE(btdheight, G3D, "Block transfer download %08x -> %08x skipped, %d+%d is taller than %d", srcBasePtr, dstBasePtr, srcY, srcHeight, srcBuffer->bufferHeight); - } else { - ReadFramebufferToMemory(srcBuffer, true, srcX * srcXFactor, srcY, srcWidth * srcXFactor, srcHeight); - } - } - return false; // Let the bit copy happen - } else { - return false; - } -} - -void FramebufferManager::NotifyBlockTransferAfter(u32 dstBasePtr, int dstStride, int dstX, int dstY, u32 srcBasePtr, int srcStride, int srcX, int srcY, int width, int height, int bpp) { - // A few games use this INSTEAD of actually drawing the video image to the screen, they just blast it to - // the backbuffer. Detect this and have the framebuffermanager draw the pixels. - - u32 backBuffer = PrevDisplayFramebufAddr(); - u32 displayBuffer = DisplayFramebufAddr(); - - // TODO: Is this not handled by upload? Should we check !dstBuffer to avoid a double copy? - if (((backBuffer != 0 && dstBasePtr == backBuffer) || - (displayBuffer != 0 && dstBasePtr == displayBuffer)) && - dstStride == 512 && height == 272 && !useBufferedRendering_) { - FlushBeforeCopy(); - DrawFramebuffer(Memory::GetPointerUnchecked(dstBasePtr), displayFormat_, 512, false); - } - - if (MayIntersectFramebuffer(srcBasePtr) || MayIntersectFramebuffer(dstBasePtr)) { - VirtualFramebuffer *dstBuffer = 0; - VirtualFramebuffer *srcBuffer = 0; - int srcWidth = width; - int srcHeight = height; - int dstWidth = width; - int dstHeight = height; - FindTransferFramebuffers(dstBuffer, srcBuffer, dstBasePtr, dstStride, dstX, dstY, srcBasePtr, srcStride, srcX, srcY, srcWidth, srcHeight, dstWidth, dstHeight, bpp); - - if (!useBufferedRendering_ && currentRenderVfb_ != dstBuffer) { - return; - } - - if (dstBuffer && !srcBuffer) { - WARN_LOG_ONCE(btu, G3D, "Block transfer upload %08x -> %08x", srcBasePtr, dstBasePtr); - if (g_Config.bBlockTransferGPU) { - FlushBeforeCopy(); - const u8 *srcBase = Memory::GetPointerUnchecked(srcBasePtr) + (srcX + srcY * srcStride) * bpp; - int dstBpp = dstBuffer->format == GE_FORMAT_8888 ? 4 : 2; - float dstXFactor = (float)bpp / dstBpp; - DrawPixels(dstBuffer, dstX * dstXFactor, dstY, srcBase, dstBuffer->format, srcStride * dstXFactor, dstWidth * dstXFactor, dstHeight); - SetColorUpdated(dstBuffer); - RebindFramebuffer(); - } - } - } -} - void FramebufferManager::Resized() { resized_ = true; } diff --git a/GPU/GLES/Framebuffer.h b/GPU/GLES/Framebuffer.h index f11575a419..1e2a22afa0 100644 --- a/GPU/GLES/Framebuffer.h +++ b/GPU/GLES/Framebuffer.h @@ -96,18 +96,11 @@ public: // For use when texturing from a framebuffer. May create a duplicate if target. void BindFramebufferColor(VirtualFramebuffer *framebuffer, bool skipCopy = false); - // Returns true if it's sure this is a direct FBO->FBO transfer and it has already handle it. - // In that case we hardly need to actually copy the bytes in VRAM, they will be wrong anyway (unless - // read framebuffers is on, in which case this should always return false). - bool NotifyBlockTransferBefore(u32 dstBasePtr, int dstStride, int dstX, int dstY, u32 srcBasePtr, int srcStride, int srcX, int srcY, int w, int h, int bpp); - void NotifyBlockTransferAfter(u32 dstBasePtr, int dstStride, int dstX, int dstY, u32 srcBasePtr, int srcStride, int srcX, int srcY, int w, int h, int bpp); - // Reads a rectangular subregion of a framebuffer to the right position in its backing memory. virtual void ReadFramebufferToMemory(VirtualFramebuffer *vfb, bool sync, int x, int y, int w, int h) override; std::vector GetFramebufferList(); - bool NotifyFramebufferCopy(u32 src, u32 dest, int size, bool isMemset = false); bool NotifyStencilUpload(u32 addr, int size, bool skipZero = false); void DestroyFramebuf(VirtualFramebuffer *vfb); @@ -139,8 +132,6 @@ private: void CompileDraw2DProgram(); void DestroyDraw2DProgram(); - void FindTransferFramebuffers(VirtualFramebuffer *&dstBuffer, VirtualFramebuffer *&srcBuffer, u32 dstBasePtr, int dstStride, int &dstX, int &dstY, u32 srcBasePtr, int srcStride, int &srcX, int &srcY, int &srcWidth, int &srcHeight, int &dstWidth, int &dstHeight, int bpp) const; - void SetNumExtraFBOs(int num); inline bool ShouldDownloadUsingCPU(const VirtualFramebuffer *vfb) const; @@ -185,8 +176,6 @@ private: std::vector bvfbs_; // blitting framebuffers (for download) std::map tempFBOs_; - std::set> knownFramebufferRAMCopies_; - #ifndef USING_GLES2 AsyncPBO *pixelBufObj_; //this isn't that large u8 currentPBO_; From 7f89723d2b198ffc2f58f7c17083b14fbde093e4 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 13 Sep 2014 16:37:59 -0700 Subject: [PATCH 10/33] d3d: Initial implementation of framebuf download. Not working, or not working properly... --- GPU/Directx9/FramebufferDX9.cpp | 127 ++++++++++++++++++-------------- GPU/Directx9/FramebufferDX9.h | 2 +- GPU/Directx9/helper/fbo.cpp | 4 +- GPU/Directx9/helper/fbo.h | 2 +- GPU/GLES/Framebuffer.cpp | 41 +++++------ 5 files changed, 96 insertions(+), 80 deletions(-) diff --git a/GPU/Directx9/FramebufferDX9.cpp b/GPU/Directx9/FramebufferDX9.cpp index 607cdccd78..6e85dbc1e7 100644 --- a/GPU/Directx9/FramebufferDX9.cpp +++ b/GPU/Directx9/FramebufferDX9.cpp @@ -28,6 +28,7 @@ #include "helper/fbo.h" #include "GPU/Common/FramebufferCommon.h" +#include "GPU/Common/TextureDecoder.h" #include "GPU/Directx9/FramebufferDX9.h" #include "GPU/Directx9/ShaderManagerDX9.h" #include "GPU/Directx9/TextureCacheDX9.h" @@ -48,7 +49,15 @@ namespace DX9 { return ((px >> 3) & 0x001F) | ((px >> 6) & 0x03E0) | ((px >> 9) & 0x7C00) | ((px >> 16) & 0x8000); } - static void ConvertFromRGBA8888(u8 *dst, u8 *src, u32 stride, u32 height, GEBufferFormat format); + inline u16 BGRA8888toRGB565(u32 px) { + return ((px >> 19) & 0x001F) | ((px >> 5) & 0x07E0) | ((px << 8) & 0xF800); + } + + inline u16 BGRA8888toRGBA4444(u32 px) { + return ((px >> 20) & 0x000F) | ((px >> 8) & 0x00F0) | ((px << 4) & 0x0F00) | ((px >> 16) & 0xF000); + } + + static void ConvertFromRGBA8888(u8 *dst, u8 *src, u32 dstStride, u32 srcStride, u32 width, u32 height, GEBufferFormat format); void CenterRect(float *x, float *y, float *w, float *h, float origW, float origH, float frameW, float frameH) { @@ -771,20 +780,7 @@ namespace DX9 { } BlitFramebuffer(nvfb, x, y, vfb, x, y, w, h, 0, false); - // TODO: Actually do it. -#if 0 -#ifdef USING_GLES2 - PackFramebufferSync_(nvfb); // synchronous glReadPixels -#else - if (gl_extensions.PBO_ARB || !gl_extensions.ATIClampBug) { - if (!sync) { - PackFramebufferAsync_(nvfb); // asynchronous glReadPixels using PBOs - } else { - PackFramebufferSync_(nvfb); // synchronous glReadPixels - } - } -#endif -#endif + PackFramebufferDirectx9_(nvfb, x, y, w, h); RebindFramebuffer(); } } @@ -838,75 +834,96 @@ namespace DX9 { // TODO: SSE/NEON // Could also make C fake-simd for 64-bit, two 8888 pixels fit in a register :) - void ConvertFromRGBA8888(u8 *dst, u8 *src, u32 stride, u32 height, GEBufferFormat format) { - if(format == GE_FORMAT_8888) { - if(src == dst) { + void ConvertFromRGBA8888(u8 *dst, u8 *src, u32 dstStride, u32 srcStride, u32 width, u32 height, GEBufferFormat format) { + // Must skip stride in the cases below. Some games pack data into the cracks, like MotoGP. + const u32 *src32 = (const u32 *)src; + + if (format == GE_FORMAT_8888) { + u32 *dst32 = (u32 *)dst; + if (src == dst) { return; - } else { // Here lets assume they don't intersect - memcpy(dst, src, stride * height * 4); + } else { + for (u32 y = 0; y < height; ++y) { + ConvertBGRA8888ToRGBA8888(dst32, src32, width); + src32 += srcStride; + dst32 += dstStride; + } } - } else { // But here it shouldn't matter if they do - int size = height * stride; - const u32 *src32 = (const u32 *)src; + } else { + // But here it shouldn't matter if they do intersect u16 *dst16 = (u16 *)dst; switch (format) { case GE_FORMAT_565: // BGR 565 - for(int i = 0; i < size; i++) { - dst16[i] = RGBA8888toRGB565(src32[i]); + for (u32 y = 0; y < height; ++y) { + for (u32 x = 0; x < width; ++x) { + dst16[x] = BGRA8888toRGB565(src32[x]); + } + src32 += srcStride; + dst16 += dstStride; } break; case GE_FORMAT_5551: // ABGR 1555 - for(int i = 0; i < size; i++) { - dst16[i] = RGBA8888toRGBA5551(src32[i]); + for (u32 y = 0; y < height; ++y) { + ConvertBGRA8888ToRGBA5551(dst16, src32, width); + src32 += srcStride; + dst16 += dstStride; } break; case GE_FORMAT_4444: // ABGR 4444 - for(int i = 0; i < size; i++) { - dst16[i] = RGBA8888toRGBA4444(src32[i]); + for (u32 y = 0; y < height; ++y) { + for (u32 x = 0; x < width; ++x) { + dst16[x] = BGRA8888toRGBA4444(src32[x]); + } + src32 += srcStride; + dst16 += dstStride; } break; case GE_FORMAT_8888: + case GE_FORMAT_INVALID: // Not possible. break; - default: - break; } } } - void FramebufferManagerDX9::PackFramebufferDirectx9_(VirtualFramebuffer *vfb) { - if (vfb->fbo) { - fbo_bind_for_read(vfb->fbo); - } else { - ERROR_LOG_REPORT_ONCE(vfbfbozero, SCEGE, "PackFramebufferSync_: vfb->fbo == 0"); + void FramebufferManagerDX9::PackFramebufferDirectx9_(VirtualFramebuffer *vfb, int x, int y, int w, int h) { + if (!vfb->fbo) { + ERROR_LOG_REPORT_ONCE(vfbfbozero, SCEGE, "PackFramebufferDirectx9_: vfb->fbo == 0"); fbo_unbind(); return; } - // Pixel size always 4 here because we always request RGBA8888 - size_t bufSize = vfb->fb_stride * vfb->height * 4; - u32 fb_address = (0x04000000) | vfb->fb_address; + const u32 fb_address = (0x04000000) | vfb->fb_address; + const int dstBpp = vfb->format == GE_FORMAT_8888 ? 4 : 2; - u8 *packed = 0; - if(vfb->format == GE_FORMAT_8888) { - packed = (u8 *)Memory::GetPointer(fb_address); - } else { // End result may be 16-bit but we are reading 32-bit, so there may not be enough space at fb_address - packed = (u8 *)malloc(bufSize * sizeof(u8)); - } + // We always need to convert from the framebuffer native format. + // Right now that's always 8888. + DEBUG_LOG(HLE, "Reading framebuffer to mem, fb_address = %08x", fb_address); - if(packed) { - DEBUG_LOG(HLE, "Reading framebuffer to mem, bufSize = %u, packed = %p, fb_address = %08x", - (u32)bufSize, packed, fb_address); + LPDIRECT3DSURFACE9 renderTarget = fbo_get_for_read(vfb->fbo); + D3DSURFACE_DESC desc; + renderTarget->GetDesc(&desc); - // Resolve(packed, vfb); - - if(vfb->format != GE_FORMAT_8888) { // If not RGBA 8888 we need to convert - ConvertFromRGBA8888(Memory::GetPointer(fb_address), packed, vfb->fb_stride, vfb->height, vfb->format); - free(packed); + LPDIRECT3DSURFACE9 offscreen = nullptr; + // TODO: Cache these? + HRESULT hr = pD3Ddevice->CreateOffscreenPlainSurface(desc.Width, desc.Height, desc.Format, D3DPOOL_SYSTEMMEM, &offscreen, NULL); + if (offscreen && SUCCEEDED(hr)) { + hr = pD3Ddevice->GetRenderTargetData(renderTarget, offscreen); + if (SUCCEEDED(hr)) { + D3DLOCKED_RECT locked; + RECT rect = {0, 0, vfb->renderWidth, vfb->renderHeight}; + hr = offscreen->LockRect(&locked, &rect, D3DLOCK_READONLY); + if (SUCCEEDED(hr)) { + // TODO: Handle the other formats? We don't currently create them, I think. + const int dstByteOffset = y * vfb->fb_stride * dstBpp; + const int srcByteOffset = y * locked.Pitch; + // Pixel size always 4 here because we always request BGRA8888. + ConvertFromRGBA8888(Memory::GetPointer(fb_address + dstByteOffset), (u8 *)locked.pBits + srcByteOffset, vfb->fb_stride, locked.Pitch / 4, vfb->width, h, vfb->format); + offscreen->UnlockRect(); + } } + offscreen->Release(); } - - fbo_unbind(); } void FramebufferManagerDX9::EndFrame() { if (resized_) { diff --git a/GPU/Directx9/FramebufferDX9.h b/GPU/Directx9/FramebufferDX9.h index 28e3c78757..bafd1cade8 100644 --- a/GPU/Directx9/FramebufferDX9.h +++ b/GPU/Directx9/FramebufferDX9.h @@ -105,7 +105,7 @@ private: void SetNumExtraFBOs(int num); - void PackFramebufferDirectx9_(VirtualFramebuffer *vfb); + void PackFramebufferDirectx9_(VirtualFramebuffer *vfb, int x, int y, int w, int h); // Used by DrawPixels LPDIRECT3DTEXTURE9 drawPixelsTex_; diff --git a/GPU/Directx9/helper/fbo.cpp b/GPU/Directx9/helper/fbo.cpp index 66990d080b..a6ae292a16 100644 --- a/GPU/Directx9/helper/fbo.cpp +++ b/GPU/Directx9/helper/fbo.cpp @@ -97,8 +97,8 @@ LPDIRECT3DTEXTURE9 fbo_get_color_texture(FBO *fbo) { return fbo->tex; } -void fbo_bind_for_read(FBO *fbo) { - // pD3Ddevice->SetRenderTarget(0, fbo->surf); +LPDIRECT3DSURFACE9 fbo_get_for_read(FBO *fbo) { + return fbo->surf; } void fbo_bind_color_as_texture(FBO *fbo, int color) { diff --git a/GPU/Directx9/helper/fbo.h b/GPU/Directx9/helper/fbo.h index 492629dfa9..d0bb79b504 100644 --- a/GPU/Directx9/helper/fbo.h +++ b/GPU/Directx9/helper/fbo.h @@ -28,7 +28,7 @@ FBO *fbo_create(int width, int height, int num_color_textures, bool z_stencil, F void fbo_bind_as_render_target(FBO *fbo); // color must be 0, for now. void fbo_bind_color_as_texture(FBO *fbo, int color); -void fbo_bind_for_read(FBO *fbo); +LPDIRECT3DSURFACE9 fbo_get_for_read(FBO *fbo); void fbo_unbind(); void fbo_destroy(FBO *fbo); void fbo_get_dimensions(FBO *fbo, int *w, int *h); diff --git a/GPU/GLES/Framebuffer.cpp b/GPU/GLES/Framebuffer.cpp index 242b8fa9b3..7cd8c078f3 100644 --- a/GPU/GLES/Framebuffer.cpp +++ b/GPU/GLES/Framebuffer.cpp @@ -113,7 +113,7 @@ inline u16 BGRA8888toRGBA4444(u32 px) { return ((px >> 20) & 0x000F) | ((px >> 8) & 0x00F0) | ((px << 4) & 0x0F00) | ((px >> 16) & 0xF000); } -void ConvertFromRGBA8888(u8 *dst, const u8 *src, u32 stride, u32 width, u32 height, GEBufferFormat format); +void ConvertFromRGBA8888(u8 *dst, const u8 *src, u32 dstStride, u32 srcStride, u32 width, u32 height, GEBufferFormat format); void CenterRect(float *x, float *y, float *w, float *h, float origW, float origH, float frameW, float frameH) { @@ -1357,7 +1357,7 @@ void FramebufferManager::BlitFramebuffer(VirtualFramebuffer *dst, int dstX, int // TODO: SSE/NEON // Could also make C fake-simd for 64-bit, two 8888 pixels fit in a register :) -void ConvertFromRGBA8888(u8 *dst, const u8 *src, u32 stride, u32 width, u32 height, GEBufferFormat format) { +void ConvertFromRGBA8888(u8 *dst, const u8 *src, u32 dstStride, u32 srcStride, u32 width, u32 height, GEBufferFormat format) { // Must skip stride in the cases below. Some games pack data into the cracks, like MotoGP. const u32 *src32 = (const u32 *)src; @@ -1368,20 +1368,19 @@ void ConvertFromRGBA8888(u8 *dst, const u8 *src, u32 stride, u32 width, u32 heig } else if (UseBGRA8888()) { for (u32 y = 0; y < height; ++y) { ConvertBGRA8888ToRGBA8888(dst32, src32, width); - src32 += stride; - dst32 += stride; + src32 += srcStride; + dst32 += dstStride; } } else { // Here let's assume they don't intersect for (u32 y = 0; y < height; ++y) { memcpy(dst32, src32, width * 4); - src32 += stride; - dst32 += stride; + src32 += srcStride; + dst32 += dstStride; } } } else { // But here it shouldn't matter if they do intersect - int size = height * stride; u16 *dst16 = (u16 *)dst; switch (format) { case GE_FORMAT_565: // BGR 565 @@ -1390,16 +1389,16 @@ void ConvertFromRGBA8888(u8 *dst, const u8 *src, u32 stride, u32 width, u32 heig for (u32 x = 0; x < width; ++x) { dst16[x] = BGRA8888toRGB565(src32[x]); } - src32 += stride; - dst16 += stride; + src32 += srcStride; + dst16 += dstStride; } } else { for (u32 y = 0; y < height; ++y) { for (u32 x = 0; x < width; ++x) { dst16[x] = RGBA8888toRGB565(src32[x]); } - src32 += stride; - dst16 += stride; + src32 += srcStride; + dst16 += dstStride; } } break; @@ -1407,14 +1406,14 @@ void ConvertFromRGBA8888(u8 *dst, const u8 *src, u32 stride, u32 width, u32 heig if (UseBGRA8888()) { for (u32 y = 0; y < height; ++y) { ConvertBGRA8888ToRGBA5551(dst16, src32, width); - src32 += stride; - dst16 += stride; + src32 += srcStride; + dst16 += dstStride; } } else { for (u32 y = 0; y < height; ++y) { ConvertRGBA8888ToRGBA5551(dst16, src32, width); - src32 += stride; - dst16 += stride; + src32 += srcStride; + dst16 += dstStride; } } break; @@ -1424,16 +1423,16 @@ void ConvertFromRGBA8888(u8 *dst, const u8 *src, u32 stride, u32 width, u32 heig for (u32 x = 0; x < width; ++x) { dst16[x] = BGRA8888toRGBA4444(src32[x]); } - src32 += stride; - dst16 += stride; + src32 += srcStride; + dst16 += dstStride; } } else { for (u32 y = 0; y < height; ++y) { for (u32 x = 0; x < width; ++x) { dst16[x] = RGBA8888toRGBA4444(src32[x]); } - src32 += stride; - dst16 += stride; + src32 += srcStride; + dst16 += dstStride; } } break; @@ -1508,7 +1507,7 @@ void FramebufferManager::PackFramebufferAsync_(VirtualFramebuffer *vfb) { if (useCPU || (UseBGRA8888() && pbo.format == GE_FORMAT_8888)) { u8 *dst = Memory::GetPointer(pbo.fb_address); - ConvertFromRGBA8888(dst, packed, pbo.stride, pbo.stride, pbo.height, pbo.format); + ConvertFromRGBA8888(dst, packed, pbo.stride, pbo.stride, pbo.stride, pbo.height, pbo.format); } else { // We don't need to convert, GPU already did (or should have) Memory::Memcpy(pbo.fb_address, packed, pbo.size); @@ -1672,7 +1671,7 @@ void FramebufferManager::PackFramebufferSync_(VirtualFramebuffer *vfb, int x, in if (convert) { int dstByteOffset = y * vfb->fb_stride * dstBpp; - ConvertFromRGBA8888(Memory::GetPointer(fb_address + dstByteOffset), packed + byteOffset, vfb->fb_stride, vfb->width, h, vfb->format); + ConvertFromRGBA8888(Memory::GetPointer(fb_address + dstByteOffset), packed + byteOffset, vfb->fb_stride, vfb->fb_stride, vfb->width, h, vfb->format); } } From 1f44bf23961357b07bdd9edf3d9136db132412d2 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 13 Sep 2014 16:39:31 -0700 Subject: [PATCH 11/33] d3d: Don't flip blits when downloading. --- GPU/Directx9/FramebufferDX9.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GPU/Directx9/FramebufferDX9.cpp b/GPU/Directx9/FramebufferDX9.cpp index 6e85dbc1e7..f83b4229b5 100644 --- a/GPU/Directx9/FramebufferDX9.cpp +++ b/GPU/Directx9/FramebufferDX9.cpp @@ -824,7 +824,7 @@ namespace DX9 { float srcW = src->bufferWidth; float srcH = src->bufferHeight; - DrawActiveTexture(0, dstX1, dstY, w * dstXFactor, h, dst->bufferWidth, dst->bufferHeight, !flip, srcX1 / srcW, srcY / srcH, srcX2 / srcW, (srcY + h) / srcH); + DrawActiveTexture(0, dstX1, dstY, w * dstXFactor, h, dst->bufferWidth, dst->bufferHeight, flip, srcX1 / srcW, srcY / srcH, srcX2 / srcW, (srcY + h) / srcH); pD3Ddevice->SetTexture(0, NULL); textureCache_->ForgetLastTexture(); dxstate.viewport.restore(); From b9d7ffe48470f73ff6fbc15ebac62df3b53ca55f Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 13 Sep 2014 16:47:23 -0700 Subject: [PATCH 12/33] d3d: Support the Danganronpa hack. --- GPU/Common/FramebufferCommon.cpp | 20 ++++++++++++++++++-- GPU/Common/FramebufferCommon.h | 1 + GPU/Directx9/FramebufferDX9.cpp | 4 ---- GPU/Directx9/GPU_DX9.cpp | 1 + GPU/GLES/Framebuffer.cpp | 14 +------------- GPU/GLES/Framebuffer.h | 2 +- 6 files changed, 22 insertions(+), 20 deletions(-) diff --git a/GPU/Common/FramebufferCommon.cpp b/GPU/Common/FramebufferCommon.cpp index 1a6576bf70..28bc2413b2 100644 --- a/GPU/Common/FramebufferCommon.cpp +++ b/GPU/Common/FramebufferCommon.cpp @@ -20,6 +20,7 @@ #include "Core/Config.h" #include "Core/CoreParameter.h" #include "Core/Reporting.h" +#include "Core/ELF/ParamSFO.h" #include "Core/System.h" #include "GPU/Common/FramebufferCommon.h" #include "GPU/GPUInterface.h" @@ -41,6 +42,21 @@ FramebufferManagerCommon::FramebufferManagerCommon() : FramebufferManagerCommon::~FramebufferManagerCommon() { } +void FramebufferManagerCommon::Init() { + + const std::string gameId = g_paramSFO.GetValueString("DISC_ID"); + // This applies a hack to Dangan Ronpa, its demo, and its sequel. + // The game draws solid colors to a small framebuffer, and then reads this directly in VRAM. + // We force this framebuffer to 1x and force download it automatically. + hackForce04154000Download_ = gameId == "NPJH50631" || gameId == "NPJH50372" || gameId == "NPJH90164" || gameId == "NPJH50515"; + + // And an initial clear. We don't clear per frame as the games are supposed to handle that + // by themselves. + ClearBuffer(); + + BeginFrame(); +} + void FramebufferManagerCommon::BeginFrame() { DecimateFBOs(); currentRenderVfb_ = 0; @@ -664,7 +680,7 @@ bool FramebufferManagerCommon::NotifyBlockTransferBefore(u32 dstBasePtr, int dst if (srcHeight <= 0 || srcY + srcHeight > srcBuffer->bufferHeight) { WARN_LOG_ONCE(btdheight, G3D, "Block transfer download %08x -> %08x skipped, %d+%d is taller than %d", srcBasePtr, dstBasePtr, srcY, srcHeight, srcBuffer->bufferHeight); } else { - ReadFramebufferToMemory(srcBuffer, true, srcX * srcXFactor, srcY, srcWidth * srcXFactor, srcHeight); + ReadFramebufferToMemory(srcBuffer, true, static_cast(srcX * srcXFactor), srcY, static_cast(srcWidth * srcXFactor), srcHeight); } } return false; // Let the bit copy happen @@ -708,7 +724,7 @@ void FramebufferManagerCommon::NotifyBlockTransferAfter(u32 dstBasePtr, int dstS const u8 *srcBase = Memory::GetPointerUnchecked(srcBasePtr) + (srcX + srcY * srcStride) * bpp; int dstBpp = dstBuffer->format == GE_FORMAT_8888 ? 4 : 2; float dstXFactor = (float)bpp / dstBpp; - DrawPixels(dstBuffer, dstX * dstXFactor, dstY, srcBase, dstBuffer->format, srcStride * dstXFactor, dstWidth * dstXFactor, dstHeight); + DrawPixels(dstBuffer, static_cast(dstX * dstXFactor), dstY, srcBase, dstBuffer->format, static_cast(srcStride * dstXFactor), static_cast(dstWidth * dstXFactor), dstHeight); SetColorUpdated(dstBuffer); RebindFramebuffer(); } diff --git a/GPU/Common/FramebufferCommon.h b/GPU/Common/FramebufferCommon.h index 10be753152..b71ca1c807 100644 --- a/GPU/Common/FramebufferCommon.h +++ b/GPU/Common/FramebufferCommon.h @@ -94,6 +94,7 @@ public: FramebufferManagerCommon(); virtual ~FramebufferManagerCommon(); + virtual void Init(); void BeginFrame(); void SetDisplayFramebuffer(u32 framebuf, u32 stride, GEBufferFormat format); diff --git a/GPU/Directx9/FramebufferDX9.cpp b/GPU/Directx9/FramebufferDX9.cpp index f83b4229b5..55372e2ad0 100644 --- a/GPU/Directx9/FramebufferDX9.cpp +++ b/GPU/Directx9/FramebufferDX9.cpp @@ -126,9 +126,6 @@ namespace DX9 { convBuf(0), gameUsesSequentialCopies_(false) { - // And an initial clear. We don't clear per frame as the games are supposed to handle that - // by themselves. - ClearBuffer(); // TODO: Check / use D3DCAPS2_DYNAMICTEXTURES? int usage = 0; D3DPOOL pool = D3DPOOL_MANAGED; @@ -141,7 +138,6 @@ namespace DX9 { drawPixelsTex_ = nullptr; ERROR_LOG(G3D, "Failed to create drawpixels texture"); } - BeginFrame(); } FramebufferManagerDX9::~FramebufferManagerDX9() { diff --git a/GPU/Directx9/GPU_DX9.cpp b/GPU/Directx9/GPU_DX9.cpp index 26c0176541..ef4cc74711 100644 --- a/GPU/Directx9/GPU_DX9.cpp +++ b/GPU/Directx9/GPU_DX9.cpp @@ -396,6 +396,7 @@ DIRECTX9_GPU::DIRECTX9_GPU() transformDraw_.SetShaderManager(shaderManager_); transformDraw_.SetTextureCache(&textureCache_); transformDraw_.SetFramebufferManager(&framebufferManager_); + framebufferManager_.Init(); framebufferManager_.SetTextureCache(&textureCache_); framebufferManager_.SetShaderManager(shaderManager_); framebufferManager_.SetTransformDrawEngine(&transformDraw_); diff --git a/GPU/GLES/Framebuffer.cpp b/GPU/GLES/Framebuffer.cpp index 7cd8c078f3..eaceb665a4 100644 --- a/GPU/GLES/Framebuffer.cpp +++ b/GPU/GLES/Framebuffer.cpp @@ -30,7 +30,6 @@ #include "Core/Config.h" #include "Core/System.h" #include "Core/Reporting.h" -#include "Core/ELF/ParamSFO.h" #include "Core/HLE/sceDisplay.h" #include "GPU/ge_constants.h" #include "GPU/GPUState.h" @@ -353,20 +352,9 @@ FramebufferManager::FramebufferManager() : } void FramebufferManager::Init() { + FramebufferManagerCommon::Init(); CompileDraw2DProgram(); - - const std::string gameId = g_paramSFO.GetValueString("DISC_ID"); - // This applies a hack to Dangan Ronpa, its demo, and its sequel. - // The game draws solid colors to a small framebuffer, and then reads this directly in VRAM. - // We force this framebuffer to 1x and force download it automatically. - hackForce04154000Download_ = gameId == "NPJH50631" || gameId == "NPJH50372" || gameId == "NPJH90164" || gameId == "NPJH50515"; - - // And an initial clear. We don't clear per frame as the games are supposed to handle that - // by themselves. - ClearBuffer(); - SetLineWidth(); - BeginFrame(); } FramebufferManager::~FramebufferManager() { diff --git a/GPU/GLES/Framebuffer.h b/GPU/GLES/Framebuffer.h index 1e2a22afa0..ec17e246bc 100644 --- a/GPU/GLES/Framebuffer.h +++ b/GPU/GLES/Framebuffer.h @@ -83,7 +83,7 @@ public: void DestroyAllFBOs(); - void Init(); + virtual void Init() override; void EndFrame(); void Resized(); void DeviceLost(); From c17931c1790dcc9a688021e34593cb88a3bbb9c4 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 13 Sep 2014 17:10:57 -0700 Subject: [PATCH 13/33] d3d: Fix flipped y in framebuf blit. --- GPU/Directx9/FramebufferDX9.cpp | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/GPU/Directx9/FramebufferDX9.cpp b/GPU/Directx9/FramebufferDX9.cpp index 55372e2ad0..7989e98b53 100644 --- a/GPU/Directx9/FramebufferDX9.cpp +++ b/GPU/Directx9/FramebufferDX9.cpp @@ -804,8 +804,8 @@ namespace DX9 { } int srcX1 = srcX * srcXFactor; int srcX2 = (srcX + w) * srcXFactor; - int srcY2 = src->renderHeight - (h + srcY) * srcYFactor; - int srcY1 = srcY2 + h * srcYFactor; + int srcY1 = srcY * srcYFactor; + int srcY2 = (srcY + h) * srcYFactor; float dstXFactor = 1.0f; float dstYFactor = 1.0f; @@ -815,8 +815,8 @@ namespace DX9 { } int dstX1 = dstX * dstXFactor; int dstX2 = (dstX + w) * dstXFactor; - int dstY2 = dst->renderHeight - (h + dstY) * dstYFactor; - int dstY1 = dstY2 + h * dstYFactor; + int dstY1 = dstY * dstYFactor; + int dstY2 = (dstY + h) * dstYFactor; float srcW = src->bufferWidth; float srcH = src->bufferHeight; @@ -824,8 +824,6 @@ namespace DX9 { pD3Ddevice->SetTexture(0, NULL); textureCache_->ForgetLastTexture(); dxstate.viewport.restore(); - - fbo_unbind(); } // TODO: SSE/NEON From ddc9aaa4e88dec18c3fe46df1df138f148c3c8ae Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 13 Sep 2014 17:14:29 -0700 Subject: [PATCH 14/33] d3d: Download from the specified region only. --- GPU/Directx9/FramebufferDX9.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/GPU/Directx9/FramebufferDX9.cpp b/GPU/Directx9/FramebufferDX9.cpp index 7989e98b53..ed411d0bc8 100644 --- a/GPU/Directx9/FramebufferDX9.cpp +++ b/GPU/Directx9/FramebufferDX9.cpp @@ -899,20 +899,21 @@ namespace DX9 { renderTarget->GetDesc(&desc); LPDIRECT3DSURFACE9 offscreen = nullptr; - // TODO: Cache these? + // TODO: Cache these? Also, StretchRect to resample from 1x? HRESULT hr = pD3Ddevice->CreateOffscreenPlainSurface(desc.Width, desc.Height, desc.Format, D3DPOOL_SYSTEMMEM, &offscreen, NULL); if (offscreen && SUCCEEDED(hr)) { hr = pD3Ddevice->GetRenderTargetData(renderTarget, offscreen); if (SUCCEEDED(hr)) { D3DLOCKED_RECT locked; - RECT rect = {0, 0, vfb->renderWidth, vfb->renderHeight}; + u32 widthFactor = vfb->renderWidth / vfb->bufferWidth; + u32 heightFactor = vfb->renderHeight / vfb->bufferHeight; + RECT rect = {x * widthFactor, y * heightFactor, w * widthFactor, h * heightFactor}; hr = offscreen->LockRect(&locked, &rect, D3DLOCK_READONLY); if (SUCCEEDED(hr)) { // TODO: Handle the other formats? We don't currently create them, I think. - const int dstByteOffset = y * vfb->fb_stride * dstBpp; - const int srcByteOffset = y * locked.Pitch; + const int dstByteOffset = (y * vfb->fb_stride + x) * dstBpp; // Pixel size always 4 here because we always request BGRA8888. - ConvertFromRGBA8888(Memory::GetPointer(fb_address + dstByteOffset), (u8 *)locked.pBits + srcByteOffset, vfb->fb_stride, locked.Pitch / 4, vfb->width, h, vfb->format); + ConvertFromRGBA8888(Memory::GetPointer(fb_address + dstByteOffset), (u8 *)locked.pBits, vfb->fb_stride, locked.Pitch / 4, w, h, vfb->format); offscreen->UnlockRect(); } } From 9d24de3269bc3ec478f7c1df4e54e6c6cf341afd Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 13 Sep 2014 17:28:35 -0700 Subject: [PATCH 15/33] d3d: Oops, switch over block transfer logic. --- GPU/Directx9/GPU_DX9.cpp | 59 +++++++++++++++++++++++++++------------- 1 file changed, 40 insertions(+), 19 deletions(-) diff --git a/GPU/Directx9/GPU_DX9.cpp b/GPU/Directx9/GPU_DX9.cpp index ef4cc74711..715c6580a2 100644 --- a/GPU/Directx9/GPU_DX9.cpp +++ b/GPU/Directx9/GPU_DX9.cpp @@ -19,7 +19,9 @@ #include "Common/ChunkFile.h" #include "base/logging.h" +#include "Core/Debugger/Breakpoints.h" #include "Core/MemMap.h" +#include "Core/MIPS/MIPS.h" #include "Core/Host.h" #include "Core/Config.h" #include "Core/Reporting.h" @@ -1808,31 +1810,50 @@ void DIRECTX9_GPU::DoBlockTransfer() { return; } - // Do the copy! (Hm, if we detect a drawn video frame (see below) then we could maybe skip this?) - // Can use GetPointerUnchecked because we checked the addresses above. We could also avoid them - // entirely by walking a couple of pointers... - for (int y = 0; y < height; y++) { - const u8 *src = Memory::GetPointerUnchecked(srcBasePtr + ((y + srcY) * srcStride + srcX) * bpp); - u8 *dst = Memory::GetPointerUnchecked(dstBasePtr + ((y + dstY) * dstStride + dstX) * bpp); - memcpy(dst, src, width * bpp); + // Check that the last address of both source and dest are valid addresses + + u32 srcLastAddr = srcBasePtr + ((height - 1 + srcY) * srcStride + (srcX + width - 1)) * bpp; + u32 dstLastAddr = dstBasePtr + ((height - 1 + dstY) * dstStride + (dstX + width - 1)) * bpp; + + if (!Memory::IsValidAddress(srcLastAddr)) { + ERROR_LOG_REPORT(G3D, "Bottom-right corner of source of block transfer is at an invalid address: %08x", srcLastAddr); + return; + } + if (!Memory::IsValidAddress(dstLastAddr)) { + ERROR_LOG_REPORT(G3D, "Bottom-right corner of destination of block transfer is at an invalid address: %08x", srcLastAddr); + return; } - // TODO: Notify all overlapping FBOs that they need to reload. + // Tell the framebuffer manager to take action if possible. If it does the entire thing, let's just return. + if (!framebufferManager_.NotifyBlockTransferBefore(dstBasePtr, dstStride, dstX, dstY, srcBasePtr, srcStride, srcX, srcY, width, height, bpp)) { + // Do the copy! (Hm, if we detect a drawn video frame (see below) then we could maybe skip this?) + // Can use GetPointerUnchecked because we checked the addresses above. We could also avoid them + // entirely by walking a couple of pointers... + if (srcStride == dstStride && (u32)width == srcStride) { + // Common case in God of War, let's do it all in one chunk. + u32 srcLineStartAddr = srcBasePtr + (srcY * srcStride + srcX) * bpp; + u32 dstLineStartAddr = dstBasePtr + (dstY * dstStride + dstX) * bpp; + const u8 *src = Memory::GetPointerUnchecked(srcLineStartAddr); + u8 *dst = Memory::GetPointerUnchecked(dstLineStartAddr); + memcpy(dst, src, width * height * bpp); + } else { + for (int y = 0; y < height; y++) { + u32 srcLineStartAddr = srcBasePtr + ((y + srcY) * srcStride + srcX) * bpp; + u32 dstLineStartAddr = dstBasePtr + ((y + dstY) * dstStride + dstX) * bpp; - textureCache_.Invalidate(dstBasePtr + (dstY * dstStride + dstX) * bpp, height * dstStride * bpp, GPU_INVALIDATE_HINT); + const u8 *src = Memory::GetPointerUnchecked(srcLineStartAddr); + u8 *dst = Memory::GetPointerUnchecked(dstLineStartAddr); + memcpy(dst, src, width * bpp); + } + } - // A few games use this INSTEAD of actually drawing the video image to the screen, they just blast it to - // the backbuffer. Detect this and have the framebuffermanager draw the pixels. - - u32 backBuffer = framebufferManager_.PrevDisplayFramebufAddr(); - u32 displayBuffer = framebufferManager_.DisplayFramebufAddr(); - - if (((backBuffer != 0 && dstBasePtr == backBuffer) || - (displayBuffer != 0 && dstBasePtr == displayBuffer)) && - dstStride == 512 && height == 272) { - framebufferManager_.DrawFramebuffer(Memory::GetPointerUnchecked(dstBasePtr), GE_FORMAT_8888, 512, false); + textureCache_.Invalidate(dstBasePtr + (dstY * dstStride + dstX) * bpp, height * dstStride * bpp, GPU_INVALIDATE_HINT); + framebufferManager_.NotifyBlockTransferAfter(dstBasePtr, dstStride, dstX, dstY, srcBasePtr, srcStride, srcX, srcY, width, height, bpp); } + CBreakPoints::ExecMemCheck(srcBasePtr + (srcY * srcStride + srcX) * bpp, false, height * srcStride * bpp, currentMIPS->pc); + CBreakPoints::ExecMemCheck(dstBasePtr + (srcY * dstStride + srcX) * bpp, true, height * dstStride * bpp, currentMIPS->pc); + // TODO: Correct timing appears to be 1.9, but erring a bit low since some of our other timing is inaccurate. cyclesExecuted += ((height * width * bpp) * 16) / 10; } From d33245207759b5d8fc5a35b369f4eae6d1aa73a7 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 13 Sep 2014 17:59:21 -0700 Subject: [PATCH 16/33] d3d: Track memory copies/uploads/downloads. --- GPU/Directx9/GPU_DX9.cpp | 95 +++++++++++++++++++++++++++++++++++++--- GPU/Directx9/GPU_DX9.h | 3 ++ 2 files changed, 92 insertions(+), 6 deletions(-) diff --git a/GPU/Directx9/GPU_DX9.cpp b/GPU/Directx9/GPU_DX9.cpp index 715c6580a2..b768183798 100644 --- a/GPU/Directx9/GPU_DX9.cpp +++ b/GPU/Directx9/GPU_DX9.cpp @@ -634,6 +634,18 @@ void DIRECTX9_GPU::ProcessEvent(GPUEvent ev) { InvalidateCacheInternal(ev.invalidate_cache.addr, ev.invalidate_cache.size, ev.invalidate_cache.type); break; + case GPU_EVENT_FB_MEMCPY: + PerformMemoryCopyInternal(ev.fb_memcpy.dst, ev.fb_memcpy.src, ev.fb_memcpy.size); + break; + + case GPU_EVENT_FB_MEMSET: + PerformMemorySetInternal(ev.fb_memset.dst, ev.fb_memset.v, ev.fb_memset.size); + break; + + case GPU_EVENT_FB_STENCIL_UPLOAD: + PerformStencilUploadInternal(ev.fb_stencil_upload.dst, ev.fb_stencil_upload.size); + break; + default: GPUCommon::ProcessEvent(ev); } @@ -1881,32 +1893,103 @@ void DIRECTX9_GPU::InvalidateCacheInternal(u32 addr, int size, GPUInvalidationTy } } +void DIRECTX9_GPU::PerformMemoryCopyInternal(u32 dest, u32 src, int size) { + if (!framebufferManager_.NotifyFramebufferCopy(src, dest, size)) { + // We use a little hack for Download/Upload using a VRAM mirror. + // Since they're identical we don't need to copy. + if (!Memory::IsVRAMAddress(dest) || (dest ^ 0x00400000) != src) { + Memory::Memcpy(dest, Memory::GetPointer(src), size); + } + } + InvalidateCache(dest, size, GPU_INVALIDATE_HINT); +} + +void DIRECTX9_GPU::PerformMemorySetInternal(u32 dest, u8 v, int size) { + if (!framebufferManager_.NotifyFramebufferCopy(dest, dest, size, true)) { + InvalidateCache(dest, size, GPU_INVALIDATE_HINT); + } +} + +void DIRECTX9_GPU::PerformStencilUploadInternal(u32 dest, int size) { + framebufferManager_.NotifyStencilUpload(dest, size); +} + bool DIRECTX9_GPU::PerformMemoryCopy(u32 dest, u32 src, int size) { + // Track stray copies of a framebuffer in RAM. MotoGP does this. + if (framebufferManager_.MayIntersectFramebuffer(src) || framebufferManager_.MayIntersectFramebuffer(dest)) { + if (IsOnSeparateCPUThread()) { + GPUEvent ev(GPU_EVENT_FB_MEMCPY); + ev.fb_memcpy.dst = dest; + ev.fb_memcpy.src = src; + ev.fb_memcpy.size = size; + ScheduleEvent(ev); + + // This is a memcpy, so we need to wait for it to complete. + SyncThread(); + } else { + PerformMemoryCopyInternal(dest, src, size); + } + return true; + } + InvalidateCache(dest, size, GPU_INVALIDATE_HINT); return false; } bool DIRECTX9_GPU::PerformMemorySet(u32 dest, u8 v, int size) { + // This may indicate a memset, usually to 0, of a framebuffer. + if (framebufferManager_.MayIntersectFramebuffer(dest)) { + Memory::Memset(dest, v, size); + + if (IsOnSeparateCPUThread()) { + GPUEvent ev(GPU_EVENT_FB_MEMSET); + ev.fb_memset.dst = dest; + ev.fb_memset.v = v; + ev.fb_memset.size = size; + ScheduleEvent(ev); + + // We don't need to wait for the framebuffer to be updated. + } else { + PerformMemorySetInternal(dest, v, size); + } + return true; + } + + // Or perhaps a texture, let's invalidate. InvalidateCache(dest, size, GPU_INVALIDATE_HINT); return false; } bool DIRECTX9_GPU::PerformMemoryDownload(u32 dest, int size) { - InvalidateCache(dest, size, GPU_INVALIDATE_HINT); - - // Track stray copies of a framebuffer in RAM. MotoGP does this. - if (Memory::IsRAMAddress(dest)) { -// framebufferManager_.NotifyFramebufferCopy(src, dest, size); + // Cheat a bit to force a download of the framebuffer. + // VRAM + 0x00400000 is simply a VRAM mirror. + if (Memory::IsVRAMAddress(dest)) { + return PerformMemoryCopy(dest ^ 0x00400000, dest, size); } return false; } bool DIRECTX9_GPU::PerformMemoryUpload(u32 dest, int size) { - InvalidateCache(dest, size, GPU_INVALIDATE_HINT); + // Cheat a bit to force an upload of the framebuffer. + // VRAM + 0x00400000 is simply a VRAM mirror. + if (Memory::IsVRAMAddress(dest)) { + return PerformMemoryCopy(dest, dest ^ 0x00400000, size); + } return false; } bool DIRECTX9_GPU::PerformStencilUpload(u32 dest, int size) { + if (framebufferManager_.MayIntersectFramebuffer(dest)) { + if (IsOnSeparateCPUThread()) { + GPUEvent ev(GPU_EVENT_FB_STENCIL_UPLOAD); + ev.fb_stencil_upload.dst = dest; + ev.fb_stencil_upload.size = size; + ScheduleEvent(ev); + } else { + PerformStencilUploadInternal(dest, size); + } + return true; + } return false; } diff --git a/GPU/Directx9/GPU_DX9.h b/GPU/Directx9/GPU_DX9.h index f44cd3629c..f53b24ecaa 100644 --- a/GPU/Directx9/GPU_DX9.h +++ b/GPU/Directx9/GPU_DX9.h @@ -158,6 +158,9 @@ private: void InitClearInternal(); void BeginFrameInternal(); void CopyDisplayToOutputInternal(); + void PerformMemoryCopyInternal(u32 dest, u32 src, int size); + void PerformMemorySetInternal(u32 dest, u8 v, int size); + void PerformStencilUploadInternal(u32 dest, int size); void InvalidateCacheInternal(u32 addr, int size, GPUInvalidationType type); FramebufferManagerDX9 framebufferManager_; From 2d530a916f1d055ca01086265839d3b7d2f17dfe Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 13 Sep 2014 18:25:45 -0700 Subject: [PATCH 17/33] d3d: Always download 8888, at least for now. --- GPU/Directx9/FramebufferDX9.cpp | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) diff --git a/GPU/Directx9/FramebufferDX9.cpp b/GPU/Directx9/FramebufferDX9.cpp index ed411d0bc8..be843f69ae 100644 --- a/GPU/Directx9/FramebufferDX9.cpp +++ b/GPU/Directx9/FramebufferDX9.cpp @@ -703,22 +703,7 @@ namespace DX9 { nvfb->usageFlags = FB_USAGE_RENDERTARGET; nvfb->dirtyAfterDisplay = true; - // When updating VRAM, it need to be exact format. - switch (vfb->format) { - case GE_FORMAT_4444: - nvfb->colorDepth = FBO_4444; - break; - case GE_FORMAT_5551: - nvfb->colorDepth = FBO_5551; - break; - case GE_FORMAT_565: - nvfb->colorDepth = FBO_565; - break; - case GE_FORMAT_8888: - default: - nvfb->colorDepth = FBO_8888; - break; - } + nvfb->colorDepth = FBO_8888; textureCache_->ForgetLastTexture(); nvfb->fbo = fbo_create(nvfb->width, nvfb->height, 1, true, (FBOColorDepth)nvfb->colorDepth); From ec4e96b87025071897c353884b2c61ce9f93c471 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 13 Sep 2014 18:46:59 -0700 Subject: [PATCH 18/33] d3d: Expliticly bind after fbo_create(). In GLES, this is a tiny call that checks a cache, but it's necessary in Direct3D 9. --- GPU/Directx9/FramebufferDX9.cpp | 2 ++ GPU/GLES/Framebuffer.cpp | 6 +++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/GPU/Directx9/FramebufferDX9.cpp b/GPU/Directx9/FramebufferDX9.cpp index be843f69ae..0f0f1cafef 100644 --- a/GPU/Directx9/FramebufferDX9.cpp +++ b/GPU/Directx9/FramebufferDX9.cpp @@ -407,6 +407,7 @@ namespace DX9 { if (old.fbo) { INFO_LOG(SCEGE, "Resizing FBO for %08x : %i x %i x %i", vfb->fb_address, w, h, vfb->format); if (vfb->fbo) { + fbo_bind_as_render_target(vfb->fbo); ClearBuffer(); if (!g_Config.bDisableSlowFramebufEffects) { BlitFramebuffer(vfb, 0, 0, &old, 0, 0, std::min(vfb->bufferWidth, vfb->width), std::min(vfb->height, vfb->bufferHeight), 0); @@ -714,6 +715,7 @@ namespace DX9 { nvfb->last_frame_render = gpuStats.numFlips; bvfbs_.push_back(nvfb); + fbo_bind_as_render_target(nvfb->fbo); ClearBuffer(); } else { nvfb->usageFlags |= FB_USAGE_RENDERTARGET; diff --git a/GPU/GLES/Framebuffer.cpp b/GPU/GLES/Framebuffer.cpp index eaceb665a4..004a91ca97 100644 --- a/GPU/GLES/Framebuffer.cpp +++ b/GPU/GLES/Framebuffer.cpp @@ -211,7 +211,8 @@ void FramebufferManager::SetNumExtraFBOs(int num) { FBO *fbo = fbo_create(PSP_CoreParameter().renderWidth, PSP_CoreParameter().renderHeight, 1, false, FBO_8888); extraFBOs_.push_back(fbo); - // The new FBO is still bound after creation. + // The new FBO is still bound after creation, but let's bind it anyway. + fbo_bind_as_render_target(fbo); ClearBuffer(); } @@ -709,6 +710,7 @@ void FramebufferManager::ResizeFramebufFBO(VirtualFramebuffer *vfb, u16 w, u16 h if (old.fbo) { INFO_LOG(SCEGE, "Resizing FBO for %08x : %i x %i x %i", vfb->fb_address, w, h, vfb->format); if (vfb->fbo) { + fbo_bind_as_render_target(vfb->fbo); ClearBuffer(); if (!g_Config.bDisableSlowFramebufEffects) { BlitFramebuffer(vfb, 0, 0, &old, 0, 0, std::min(vfb->bufferWidth, vfb->width), std::min(vfb->height, vfb->bufferHeight), 0); @@ -912,6 +914,7 @@ FBO *FramebufferManager::GetTempFBO(u16 w, u16 h, FBOColorDepth depth) { FBO *fbo = fbo_create(w, h, 1, false, depth); if (!fbo) return fbo; + fbo_bind_as_render_target(fbo); ClearBuffer(); const TempFBO info = {fbo, gpuStats.numFlips}; tempFBOs_[key] = info; @@ -1190,6 +1193,7 @@ void FramebufferManager::ReadFramebufferToMemory(VirtualFramebuffer *vfb, bool s nvfb->last_frame_render = gpuStats.numFlips; bvfbs_.push_back(nvfb); + fbo_bind_as_render_target(nvfb->fbo); ClearBuffer(); glDisable(GL_DITHER); } else { From 6490a4ef44fe255f949493a49105b3a956263e91 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 13 Sep 2014 19:21:59 -0700 Subject: [PATCH 19/33] d3d: Report errors on download/blit/upload. --- GPU/Directx9/FramebufferDX9.cpp | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/GPU/Directx9/FramebufferDX9.cpp b/GPU/Directx9/FramebufferDX9.cpp index 0f0f1cafef..00d4e3d6b5 100644 --- a/GPU/Directx9/FramebufferDX9.cpp +++ b/GPU/Directx9/FramebufferDX9.cpp @@ -315,7 +315,10 @@ namespace DX9 { if (tex != NULL) { pD3Ddevice->SetTexture(0, tex); } - pD3Ddevice->DrawPrimitiveUP(D3DPT_TRIANGLEFAN, 2, coord, 5 * sizeof(float)); + HRESULT hr = pD3Ddevice->DrawPrimitiveUP(D3DPT_TRIANGLEFAN, 2, coord, 5 * sizeof(float)); + if (FAILED(hr)) { + ERROR_LOG_REPORT(G3D, "DrawActiveTexture() failed: %08x", hr); + } } void FramebufferManagerDX9::DestroyFramebuf(VirtualFramebuffer *v) { @@ -902,9 +905,15 @@ namespace DX9 { // Pixel size always 4 here because we always request BGRA8888. ConvertFromRGBA8888(Memory::GetPointer(fb_address + dstByteOffset), (u8 *)locked.pBits, vfb->fb_stride, locked.Pitch / 4, w, h, vfb->format); offscreen->UnlockRect(); + } else { + ERROR_LOG_REPORT(G3D, "Unable to lock rect from %08x: %d,%d %dx%d of %dx%d", fb_address, rect.left, rect.top, rect.right, rect.bottom, vfb->renderWidth, vfb->renderHeight); } + } else { + ERROR_LOG_REPORT(G3D, "Unable to download render target data from %08x", fb_address); } offscreen->Release(); + } else { + ERROR_LOG_REPORT(G3D, "Unable to create offscreen surface for %08x %dx%d", fb_address, desc.Width, desc.Height); } } void FramebufferManagerDX9::EndFrame() { From e4d5eb5d6cf45ff134fe187d8ebd3fb73e06eaab Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 13 Sep 2014 19:56:08 -0700 Subject: [PATCH 20/33] d3d: Dynamically size the drawPixelsTex_ like gl. --- GPU/Directx9/FramebufferDX9.cpp | 39 +++++++++++++++++++-------------- GPU/Directx9/FramebufferDX9.h | 3 ++- 2 files changed, 24 insertions(+), 18 deletions(-) diff --git a/GPU/Directx9/FramebufferDX9.cpp b/GPU/Directx9/FramebufferDX9.cpp index 00d4e3d6b5..13b17c7eab 100644 --- a/GPU/Directx9/FramebufferDX9.cpp +++ b/GPU/Directx9/FramebufferDX9.cpp @@ -122,26 +122,12 @@ namespace DX9 { FramebufferManagerDX9::FramebufferManagerDX9() : drawPixelsTex_(0), - drawPixelsTexFormat_(GE_FORMAT_INVALID), convBuf(0), - gameUsesSequentialCopies_(false) - { - // TODO: Check / use D3DCAPS2_DYNAMICTEXTURES? - int usage = 0; - D3DPOOL pool = D3DPOOL_MANAGED; - if (pD3DdeviceEx) { - pool = D3DPOOL_DEFAULT; - usage = D3DUSAGE_DYNAMIC; - } - HRESULT hr = pD3Ddevice->CreateTexture(512, 272, 1, usage, D3DFMT(D3DFMT_A8R8G8B8), pool, &drawPixelsTex_, NULL); - if (FAILED(hr)) { - drawPixelsTex_ = nullptr; - ERROR_LOG(G3D, "Failed to create drawpixels texture"); - } + gameUsesSequentialCopies_(false) { } FramebufferManagerDX9::~FramebufferManagerDX9() { - if(drawPixelsTex_) { + if (drawPixelsTex_) { drawPixelsTex_->Release(); } delete [] convBuf; @@ -166,10 +152,29 @@ namespace DX9 { } void FramebufferManagerDX9::MakePixelTexture(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height) { - u8 *convBuf = NULL; D3DLOCKED_RECT rect; + // TODO: Check / use D3DCAPS2_DYNAMICTEXTURES? + if (drawPixelsTex_ && (drawPixelsTexW_ != width || drawPixelsTexH_ != height)) { + drawPixelsTex_->Release(); + drawPixelsTex_ = nullptr; + } + + if (!drawPixelsTex_) { + int usage = 0; + D3DPOOL pool = D3DPOOL_MANAGED; + if (pD3DdeviceEx) { + pool = D3DPOOL_DEFAULT; + usage = D3DUSAGE_DYNAMIC; + } + HRESULT hr = pD3Ddevice->CreateTexture(width, height, 1, usage, D3DFMT(D3DFMT_A8R8G8B8), pool, &drawPixelsTex_, NULL); + if (FAILED(hr)) { + drawPixelsTex_ = nullptr; + ERROR_LOG(G3D, "Failed to create drawpixels texture"); + } + } + if (!drawPixelsTex_) { return; } diff --git a/GPU/Directx9/FramebufferDX9.h b/GPU/Directx9/FramebufferDX9.h index bafd1cade8..aa650d201d 100644 --- a/GPU/Directx9/FramebufferDX9.h +++ b/GPU/Directx9/FramebufferDX9.h @@ -109,7 +109,8 @@ private: // Used by DrawPixels LPDIRECT3DTEXTURE9 drawPixelsTex_; - GEBufferFormat drawPixelsTexFormat_; + int drawPixelsTexW_; + int drawPixelsTexH_; u8 *convBuf; From 98549aeb51df964881e13e75106161d108b5d2be Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 13 Sep 2014 19:57:49 -0700 Subject: [PATCH 21/33] d3d: Fix download of small sections, oops. --- GPU/Directx9/FramebufferDX9.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GPU/Directx9/FramebufferDX9.cpp b/GPU/Directx9/FramebufferDX9.cpp index 13b17c7eab..ead699f63b 100644 --- a/GPU/Directx9/FramebufferDX9.cpp +++ b/GPU/Directx9/FramebufferDX9.cpp @@ -902,7 +902,7 @@ namespace DX9 { D3DLOCKED_RECT locked; u32 widthFactor = vfb->renderWidth / vfb->bufferWidth; u32 heightFactor = vfb->renderHeight / vfb->bufferHeight; - RECT rect = {x * widthFactor, y * heightFactor, w * widthFactor, h * heightFactor}; + RECT rect = {x * widthFactor, y * heightFactor, (x + w) * widthFactor, (y + h) * heightFactor}; hr = offscreen->LockRect(&locked, &rect, D3DLOCK_READONLY); if (SUCCEEDED(hr)) { // TODO: Handle the other formats? We don't currently create them, I think. From 003aaed9a721fe416c4fcf47ef39ad6ca70d0b7c Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 13 Sep 2014 20:05:41 -0700 Subject: [PATCH 22/33] d3d: Dirty the shader when drawing textures. --- GPU/Directx9/FramebufferDX9.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/GPU/Directx9/FramebufferDX9.cpp b/GPU/Directx9/FramebufferDX9.cpp index ead699f63b..2fae9a210d 100644 --- a/GPU/Directx9/FramebufferDX9.cpp +++ b/GPU/Directx9/FramebufferDX9.cpp @@ -317,6 +317,7 @@ namespace DX9 { pD3Ddevice->SetVertexDeclaration(pFramebufferVertexDecl); pD3Ddevice->SetPixelShader(pFramebufferPixelShader); pD3Ddevice->SetVertexShader(pFramebufferVertexShader); + shaderManager_->DirtyLastShader(); if (tex != NULL) { pD3Ddevice->SetTexture(0, tex); } From 822c5d9e75b3d10ecc262ce36fa6860184ce4ae8 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 13 Sep 2014 20:08:29 -0700 Subject: [PATCH 23/33] d3d: Use StretchRect() when possible. --- GPU/Directx9/FramebufferDX9.cpp | 57 ++++++++++++++++++++++----------- GPU/Directx9/helper/fbo.cpp | 4 +++ GPU/Directx9/helper/fbo.h | 1 + 3 files changed, 44 insertions(+), 18 deletions(-) diff --git a/GPU/Directx9/FramebufferDX9.cpp b/GPU/Directx9/FramebufferDX9.cpp index 2fae9a210d..9fc06f0e2b 100644 --- a/GPU/Directx9/FramebufferDX9.cpp +++ b/GPU/Directx9/FramebufferDX9.cpp @@ -784,16 +784,8 @@ namespace DX9 { return; } - // TODO: StretchRect? - - fbo_bind_as_render_target(dst->fbo); - dxstate.viewport.set(0, 0, dst->renderWidth, dst->renderHeight); - DisableState(); - - fbo_bind_color_as_texture(src->fbo, 0); - - float srcXFactor = 1.0f; - float srcYFactor = 1.0f; + float srcXFactor = flip ? 1.0f : (float)src->renderWidth / (float)src->bufferWidth; + float srcYFactor = flip ? 1.0f : (float)src->renderHeight / (float)src->bufferHeight; const int srcBpp = src->format == GE_FORMAT_8888 ? 4 : 2; if (srcBpp != bpp && bpp != 0) { srcXFactor = (srcXFactor * bpp) / srcBpp; @@ -803,8 +795,8 @@ namespace DX9 { int srcY1 = srcY * srcYFactor; int srcY2 = (srcY + h) * srcYFactor; - float dstXFactor = 1.0f; - float dstYFactor = 1.0f; + float dstXFactor = flip ? 1.0f : (float)dst->renderWidth / (float)dst->bufferWidth; + float dstYFactor = flip ? 1.0f : (float)dst->renderHeight / (float)dst->bufferHeight; const int dstBpp = dst->format == GE_FORMAT_8888 ? 4 : 2; if (dstBpp != bpp && bpp != 0) { dstXFactor = (dstXFactor * bpp) / dstBpp; @@ -814,12 +806,41 @@ namespace DX9 { int dstY1 = dstY * dstYFactor; int dstY2 = (dstY + h) * dstYFactor; - float srcW = src->bufferWidth; - float srcH = src->bufferHeight; - DrawActiveTexture(0, dstX1, dstY, w * dstXFactor, h, dst->bufferWidth, dst->bufferHeight, flip, srcX1 / srcW, srcY / srcH, srcX2 / srcW, (srcY + h) / srcH); - pD3Ddevice->SetTexture(0, NULL); - textureCache_->ForgetLastTexture(); - dxstate.viewport.restore(); + if (flip) { + fbo_bind_as_render_target(dst->fbo); + dxstate.viewport.set(0, 0, dst->renderWidth, dst->renderHeight); + DisableState(); + + fbo_bind_color_as_texture(src->fbo, 0); + + float srcW = src->bufferWidth; + float srcH = src->bufferHeight; + DrawActiveTexture(0, dstX1, dstY, w * dstXFactor, h, dst->bufferWidth, dst->bufferHeight, flip, srcX1 / srcW, srcY / srcH, srcX2 / srcW, (srcY + h) / srcH); + pD3Ddevice->SetTexture(0, NULL); + textureCache_->ForgetLastTexture(); + dxstate.viewport.restore(); + + RebindFramebuffer(); + } else { + LPDIRECT3DSURFACE9 srcSurf = fbo_get_for_read(src->fbo); + LPDIRECT3DSURFACE9 dstSurf = fbo_get_for_write(dst->fbo); + RECT srcRect = {srcX1, srcY1, srcX2, srcY2}; + RECT dstRect = {dstX1, dstY1, dstX2, dstY2}; + + D3DSURFACE_DESC desc; + srcSurf->GetDesc(&desc); + srcRect.right = std::min(srcRect.right, (LONG)desc.Width); + srcRect.bottom = std::min(srcRect.bottom, (LONG)desc.Height); + + dstSurf->GetDesc(&desc); + dstRect.right = std::min(dstRect.right, (LONG)desc.Width); + dstRect.bottom = std::min(dstRect.bottom, (LONG)desc.Height); + + HRESULT hr = pD3Ddevice->StretchRect(srcSurf, &srcRect, dstSurf, &dstRect, D3DTEXF_POINT); + if (FAILED(hr)) { + ERROR_LOG_REPORT(G3D, "StretchRect failed in blit: %08x (%08x -> %08x)", hr, src->fb_address, dst->fb_address); + } + } } // TODO: SSE/NEON diff --git a/GPU/Directx9/helper/fbo.cpp b/GPU/Directx9/helper/fbo.cpp index a6ae292a16..c5222a890b 100644 --- a/GPU/Directx9/helper/fbo.cpp +++ b/GPU/Directx9/helper/fbo.cpp @@ -101,6 +101,10 @@ LPDIRECT3DSURFACE9 fbo_get_for_read(FBO *fbo) { return fbo->surf; } +LPDIRECT3DSURFACE9 fbo_get_for_write(FBO *fbo) { + return fbo->surf; +} + void fbo_bind_color_as_texture(FBO *fbo, int color) { pD3Ddevice->SetTexture(0, fbo->tex); } diff --git a/GPU/Directx9/helper/fbo.h b/GPU/Directx9/helper/fbo.h index d0bb79b504..014dc51f67 100644 --- a/GPU/Directx9/helper/fbo.h +++ b/GPU/Directx9/helper/fbo.h @@ -29,6 +29,7 @@ void fbo_bind_as_render_target(FBO *fbo); // color must be 0, for now. void fbo_bind_color_as_texture(FBO *fbo, int color); LPDIRECT3DSURFACE9 fbo_get_for_read(FBO *fbo); +LPDIRECT3DSURFACE9 fbo_get_for_write(FBO *fbo); void fbo_unbind(); void fbo_destroy(FBO *fbo); void fbo_get_dimensions(FBO *fbo, int *w, int *h); From abb2ce325c56c01f8bcaf3f5046ae0446a334ccb Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 13 Sep 2014 20:31:01 -0700 Subject: [PATCH 24/33] d3d: If possible, use StretchRect() to display. May be faster, has less half-pixel offset issues. Still seeing blurriness problems, but they're better. --- GPU/Directx9/FramebufferDX9.cpp | 38 +++++++++++++++++++-------------- GPU/Directx9/helper/fbo.cpp | 10 +++++++-- GPU/Directx9/helper/fbo.h | 5 +++-- 3 files changed, 33 insertions(+), 20 deletions(-) diff --git a/GPU/Directx9/FramebufferDX9.cpp b/GPU/Directx9/FramebufferDX9.cpp index 9fc06f0e2b..abf4237495 100644 --- a/GPU/Directx9/FramebufferDX9.cpp +++ b/GPU/Directx9/FramebufferDX9.cpp @@ -620,18 +620,24 @@ namespace DX9 { const float v1 = (272.0f + offsetY) / (float)vfb->bufferHeight; if (1) { - dxstate.viewport.set(0, 0, PSP_CoreParameter().pixelWidth, PSP_CoreParameter().pixelHeight); - // These are in the output display coordinates - if (g_Config.iBufFilter == SCALE_LINEAR) { - dxstate.texMagFilter.set(D3DTEXF_LINEAR); - dxstate.texMinFilter.set(D3DTEXF_LINEAR); - } else { - dxstate.texMagFilter.set(D3DTEXF_POINT); - dxstate.texMinFilter.set(D3DTEXF_POINT); + RECT srcRect = {offsetX, offsetY, offsetX + 480, offsetY + 272}; + RECT dstRect = {x, y, x + w, y + h}; + HRESULT hr = fbo_blit_color(vfb->fbo, &srcRect, nullptr, &dstRect, g_Config.iBufFilter == SCALE_LINEAR ? D3DTEXF_LINEAR : D3DTEXF_POINT); + if (FAILED(hr)) { + ERROR_LOG_REPORT(G3D, "fbo_blit_color failed on display: %08x", hr); + dxstate.viewport.set(0, 0, PSP_CoreParameter().pixelWidth, PSP_CoreParameter().pixelHeight); + // These are in the output display coordinates + if (g_Config.iBufFilter == SCALE_LINEAR) { + dxstate.texMagFilter.set(D3DTEXF_LINEAR); + dxstate.texMinFilter.set(D3DTEXF_LINEAR); + } else { + dxstate.texMagFilter.set(D3DTEXF_POINT); + dxstate.texMinFilter.set(D3DTEXF_POINT); + } + dxstate.texMipFilter.set(D3DTEXF_NONE); + dxstate.texMipLodBias.set(0); + DrawActiveTexture(colorTexture, x, y, w, h, (float)PSP_CoreParameter().pixelWidth, (float)PSP_CoreParameter().pixelHeight, false, u0, v0, u1, v1); } - dxstate.texMipFilter.set(D3DTEXF_NONE); - dxstate.texMipLodBias.set(0); - DrawActiveTexture(colorTexture, x, y, w, h, (float)PSP_CoreParameter().pixelWidth, (float)PSP_CoreParameter().pixelHeight, false, u0, v0, u1, v1); } /* else if (usePostShader_ && extraFBOs_.size() == 1 && !postShaderAtOutputResolution_) { @@ -822,8 +828,8 @@ namespace DX9 { RebindFramebuffer(); } else { - LPDIRECT3DSURFACE9 srcSurf = fbo_get_for_read(src->fbo); - LPDIRECT3DSURFACE9 dstSurf = fbo_get_for_write(dst->fbo); + LPDIRECT3DSURFACE9 srcSurf = fbo_get_color_for_read(src->fbo); + LPDIRECT3DSURFACE9 dstSurf = fbo_get_color_for_write(dst->fbo); RECT srcRect = {srcX1, srcY1, srcX2, srcY2}; RECT dstRect = {dstX1, dstY1, dstX2, dstY2}; @@ -836,9 +842,9 @@ namespace DX9 { dstRect.right = std::min(dstRect.right, (LONG)desc.Width); dstRect.bottom = std::min(dstRect.bottom, (LONG)desc.Height); - HRESULT hr = pD3Ddevice->StretchRect(srcSurf, &srcRect, dstSurf, &dstRect, D3DTEXF_POINT); + HRESULT hr = fbo_blit_color(src->fbo, &srcRect, dst->fbo, &dstRect, D3DTEXF_POINT); if (FAILED(hr)) { - ERROR_LOG_REPORT(G3D, "StretchRect failed in blit: %08x (%08x -> %08x)", hr, src->fb_address, dst->fb_address); + ERROR_LOG_REPORT(G3D, "fbo_blit_color failed in blit: %08x (%08x -> %08x)", hr, src->fb_address, dst->fb_address); } } } @@ -911,7 +917,7 @@ namespace DX9 { // Right now that's always 8888. DEBUG_LOG(HLE, "Reading framebuffer to mem, fb_address = %08x", fb_address); - LPDIRECT3DSURFACE9 renderTarget = fbo_get_for_read(vfb->fbo); + LPDIRECT3DSURFACE9 renderTarget = fbo_get_color_for_read(vfb->fbo); D3DSURFACE_DESC desc; renderTarget->GetDesc(&desc); diff --git a/GPU/Directx9/helper/fbo.cpp b/GPU/Directx9/helper/fbo.cpp index c5222a890b..96de329af1 100644 --- a/GPU/Directx9/helper/fbo.cpp +++ b/GPU/Directx9/helper/fbo.cpp @@ -97,11 +97,11 @@ LPDIRECT3DTEXTURE9 fbo_get_color_texture(FBO *fbo) { return fbo->tex; } -LPDIRECT3DSURFACE9 fbo_get_for_read(FBO *fbo) { +LPDIRECT3DSURFACE9 fbo_get_color_for_read(FBO *fbo) { return fbo->surf; } -LPDIRECT3DSURFACE9 fbo_get_for_write(FBO *fbo) { +LPDIRECT3DSURFACE9 fbo_get_color_for_write(FBO *fbo) { return fbo->surf; } @@ -114,4 +114,10 @@ void fbo_get_dimensions(FBO *fbo, int *w, int *h) { *h = fbo->height; } +HRESULT fbo_blit_color(FBO *src, RECT *srcRect, FBO *dst, RECT *dstRect, D3DTEXTUREFILTERTYPE filter) { + LPDIRECT3DSURFACE9 srcSurf = src ? src->surf : deviceRTsurf; + LPDIRECT3DSURFACE9 dstSurf = dst ? dst->surf : deviceRTsurf; + return pD3Ddevice->StretchRect(srcSurf, srcRect, dstSurf, dstRect, filter); +} + } diff --git a/GPU/Directx9/helper/fbo.h b/GPU/Directx9/helper/fbo.h index 014dc51f67..c25d864b63 100644 --- a/GPU/Directx9/helper/fbo.h +++ b/GPU/Directx9/helper/fbo.h @@ -28,12 +28,13 @@ FBO *fbo_create(int width, int height, int num_color_textures, bool z_stencil, F void fbo_bind_as_render_target(FBO *fbo); // color must be 0, for now. void fbo_bind_color_as_texture(FBO *fbo, int color); -LPDIRECT3DSURFACE9 fbo_get_for_read(FBO *fbo); -LPDIRECT3DSURFACE9 fbo_get_for_write(FBO *fbo); +LPDIRECT3DSURFACE9 fbo_get_color_for_read(FBO *fbo); +LPDIRECT3DSURFACE9 fbo_get_color_for_write(FBO *fbo); void fbo_unbind(); void fbo_destroy(FBO *fbo); void fbo_get_dimensions(FBO *fbo, int *w, int *h); void fbo_resolve(FBO *fbo); +HRESULT fbo_blit_color(FBO *src, RECT *srcRect, FBO *dst, RECT *dstRect, D3DTEXTUREFILTERTYPE filter); LPDIRECT3DTEXTURE9 fbo_get_color_texture(FBO *fbo); From 95bf3a71ddd1e4fe357773598e0aa2146afe8972 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 13 Sep 2014 21:45:18 -0700 Subject: [PATCH 25/33] d3d: Properly cache the DrawPixels tex size. --- GPU/Directx9/FramebufferDX9.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/GPU/Directx9/FramebufferDX9.cpp b/GPU/Directx9/FramebufferDX9.cpp index abf4237495..daa783d1fb 100644 --- a/GPU/Directx9/FramebufferDX9.cpp +++ b/GPU/Directx9/FramebufferDX9.cpp @@ -173,6 +173,8 @@ namespace DX9 { drawPixelsTex_ = nullptr; ERROR_LOG(G3D, "Failed to create drawpixels texture"); } + drawPixelsTexW_ = width; + drawPixelsTexH_ = height; } if (!drawPixelsTex_) { From 0d1a529b017165c7f0fd6b18054e93c1f182da4b Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 13 Sep 2014 21:50:38 -0700 Subject: [PATCH 26/33] d3d: Fix 4444 framebuffer data upload. --- GPU/Directx9/FramebufferDX9.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/GPU/Directx9/FramebufferDX9.cpp b/GPU/Directx9/FramebufferDX9.cpp index daa783d1fb..62ddbce986 100644 --- a/GPU/Directx9/FramebufferDX9.cpp +++ b/GPU/Directx9/FramebufferDX9.cpp @@ -213,11 +213,10 @@ namespace DX9 { } } break; - // not tested case GE_FORMAT_4444: { const u16_le *src = (const u16_le *)srcPixels + srcStride * y; - u32 *dst = (u32 *)(convBuf + rect.Pitch * y); + u8 *dst = (u8 *)(convBuf + rect.Pitch * y); for (int x = 0; x < width; x++) { u16_le col = src[x]; From ff3c5075faec6bd08b8a186dd06088ae998ff6b6 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 13 Sep 2014 22:01:32 -0700 Subject: [PATCH 27/33] d3d: Apply a half pixel offset when rendering. Makes things a lot sharper, although there still seem to be problems. This makes it so the viewport aligns with pixel edges rather than centers at whole integers. --- GPU/Directx9/FramebufferDX9.cpp | 12 ++++++++++++ GPU/Directx9/ShaderManagerDX9.cpp | 21 +++++++++++++-------- 2 files changed, 25 insertions(+), 8 deletions(-) diff --git a/GPU/Directx9/FramebufferDX9.cpp b/GPU/Directx9/FramebufferDX9.cpp index 62ddbce986..1709438971 100644 --- a/GPU/Directx9/FramebufferDX9.cpp +++ b/GPU/Directx9/FramebufferDX9.cpp @@ -449,6 +449,10 @@ namespace DX9 { if (gstate_c.curRTWidth != vfb->width || gstate_c.curRTHeight != vfb->height) { shaderManager_->DirtyUniform(DIRTY_PROJTHROUGHMATRIX); } + if (gstate_c.curRTRenderWidth != vfb->renderWidth || gstate_c.curRTRenderHeight != vfb->renderHeight) { + shaderManager_->DirtyUniform(DIRTY_PROJMATRIX); + shaderManager_->DirtyUniform(DIRTY_PROJTHROUGHMATRIX); + } } void FramebufferManagerDX9::NotifyRenderFramebufferSwitched(VirtualFramebuffer *prevVfb, VirtualFramebuffer *vfb) { @@ -497,6 +501,10 @@ namespace DX9 { if (gstate_c.curRTWidth != vfb->width || gstate_c.curRTHeight != vfb->height) { shaderManager_->DirtyUniform(DIRTY_PROJTHROUGHMATRIX); } + if (gstate_c.curRTRenderWidth != vfb->renderWidth || gstate_c.curRTRenderHeight != vfb->renderHeight) { + shaderManager_->DirtyUniform(DIRTY_PROJMATRIX); + shaderManager_->DirtyUniform(DIRTY_PROJTHROUGHMATRIX); + } } void FramebufferManagerDX9::NotifyRenderFramebufferUpdated(VirtualFramebuffer *vfb, bool vfbFormatChanged) { @@ -512,6 +520,10 @@ namespace DX9 { if (gstate_c.curRTWidth != vfb->width || gstate_c.curRTHeight != vfb->height) { shaderManager_->DirtyUniform(DIRTY_PROJTHROUGHMATRIX); } + if (gstate_c.curRTRenderWidth != vfb->renderWidth || gstate_c.curRTRenderHeight != vfb->renderHeight) { + shaderManager_->DirtyUniform(DIRTY_PROJMATRIX); + shaderManager_->DirtyUniform(DIRTY_PROJTHROUGHMATRIX); + } } void FramebufferManagerDX9::CopyDisplayToOutput() { diff --git a/GPU/Directx9/ShaderManagerDX9.cpp b/GPU/Directx9/ShaderManagerDX9.cpp index 66e7d554e5..8253a4f405 100644 --- a/GPU/Directx9/ShaderManagerDX9.cpp +++ b/GPU/Directx9/ShaderManagerDX9.cpp @@ -202,11 +202,13 @@ void ShaderManagerDX9::VSSetMatrix(int creg, const float* pMatrix) { } // Depth in ogl is between -1;1 we need between 0;1 and optionally reverse it -void ConvertProjMatrixToD3D(Matrix4x4 & in, bool invert) { +static void ConvertProjMatrixToD3D(Matrix4x4 & in, bool invertedX, bool invertedY, bool invertedZ) { Matrix4x4 s; Matrix4x4 t; - s.setScaling(Vec3(1, 1, invert ? -0.5 : 0.5f)); - t.setTranslation(Vec3(0, 0, 0.5f)); + s.setScaling(Vec3(1, 1, invertedZ ? -0.5 : 0.5f)); + float xoff = 0.5f / gstate_c.curRTRenderWidth; + float yoff = 0.5f / gstate_c.curRTRenderHeight; + t.setTranslation(Vec3(invertedX ? xoff : -xoff, invertedY ? -yoff : yoff, 0.5f)); in = in * s * t; } @@ -230,17 +232,20 @@ void ShaderManagerDX9::VSUpdateUniforms(int dirtyUniforms) { if (dirtyUniforms & DIRTY_PROJMATRIX) { Matrix4x4 flippedMatrix; memcpy(&flippedMatrix, gstate.projMatrix, 16 * sizeof(float)); - if (gstate_c.vpHeight < 0) { + + const bool invertedY = gstate_c.vpHeight < 0; + if (invertedY) { flippedMatrix[5] = -flippedMatrix[5]; flippedMatrix[13] = -flippedMatrix[13]; } - if (gstate_c.vpWidth < 0) { + const bool invertedX = gstate_c.vpWidth < 0; + if (invertedX) { flippedMatrix[0] = -flippedMatrix[0]; flippedMatrix[12] = -flippedMatrix[12]; } - bool invert = gstate_c.vpDepth < 0; - ConvertProjMatrixToD3D(flippedMatrix, invert); + const bool invertedZ = gstate_c.vpDepth < 0; + ConvertProjMatrixToD3D(flippedMatrix, invertedX, invertedY, invertedZ); VSSetMatrix(CONST_VS_PROJ, flippedMatrix.getReadPtr()); } @@ -248,7 +253,7 @@ void ShaderManagerDX9::VSUpdateUniforms(int dirtyUniforms) { Matrix4x4 proj_through; proj_through.setOrtho(0.0f, gstate_c.curRTWidth, gstate_c.curRTHeight, 0, 0, 1); - ConvertProjMatrixToD3D(proj_through, false); + ConvertProjMatrixToD3D(proj_through, false, false, false); VSSetMatrix(CONST_VS_PROJ_THROUGH, proj_through.getReadPtr()); } From 8c4c9e554e7a2c682c9ccf89dd06e89701df7758 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 13 Sep 2014 22:28:39 -0700 Subject: [PATCH 28/33] d3d: Cache offscreen surfaces. --- GPU/Directx9/FramebufferDX9.cpp | 66 +++++++++++++++++++++++++++------ GPU/Directx9/FramebufferDX9.h | 8 ++++ GPU/GLES/Framebuffer.cpp | 2 +- 3 files changed, 63 insertions(+), 13 deletions(-) diff --git a/GPU/Directx9/FramebufferDX9.cpp b/GPU/Directx9/FramebufferDX9.cpp index 1709438971..77e8f3b709 100644 --- a/GPU/Directx9/FramebufferDX9.cpp +++ b/GPU/Directx9/FramebufferDX9.cpp @@ -130,6 +130,9 @@ namespace DX9 { if (drawPixelsTex_) { drawPixelsTex_->Release(); } + for (auto it = offscreenSurfaces_.begin(), end = offscreenSurfaces_.end(); it != end; ++it) { + it->second.surface->Release(); + } delete [] convBuf; } @@ -526,6 +529,29 @@ namespace DX9 { } } + LPDIRECT3DSURFACE9 FramebufferManagerDX9::GetOffscreenSurface(LPDIRECT3DSURFACE9 similarSurface) { + D3DSURFACE_DESC desc; + similarSurface->GetDesc(&desc); + + u64 key = ((u64)desc.Format << 32) | (desc.Width << 16) | desc.Height; + auto it = offscreenSurfaces_.find(key); + if (it != offscreenSurfaces_.end()) { + it->second.last_frame_used = gpuStats.numFlips; + return it->second.surface; + } + + textureCache_->ForgetLastTexture(); + LPDIRECT3DSURFACE9 offscreen = nullptr; + HRESULT hr = pD3Ddevice->CreateOffscreenPlainSurface(desc.Width, desc.Height, desc.Format, D3DPOOL_SYSTEMMEM, &offscreen, NULL); + if (FAILED(hr) || !offscreen) { + ERROR_LOG_REPORT(G3D, "Unable to create offscreen surface %dx%d @%d", desc.Width, desc.Height, desc.Format); + return nullptr; + } + const OffscreenSurface info = {offscreen, gpuStats.numFlips}; + offscreenSurfaces_[key] = info; + return offscreen; + } + void FramebufferManagerDX9::CopyDisplayToOutput() { fbo_unbind(); @@ -934,11 +960,9 @@ namespace DX9 { D3DSURFACE_DESC desc; renderTarget->GetDesc(&desc); - LPDIRECT3DSURFACE9 offscreen = nullptr; - // TODO: Cache these? Also, StretchRect to resample from 1x? - HRESULT hr = pD3Ddevice->CreateOffscreenPlainSurface(desc.Width, desc.Height, desc.Format, D3DPOOL_SYSTEMMEM, &offscreen, NULL); - if (offscreen && SUCCEEDED(hr)) { - hr = pD3Ddevice->GetRenderTargetData(renderTarget, offscreen); + LPDIRECT3DSURFACE9 offscreen = GetOffscreenSurface(renderTarget); + if (offscreen) { + HRESULT hr = pD3Ddevice->GetRenderTargetData(renderTarget, offscreen); if (SUCCEEDED(hr)) { D3DLOCKED_RECT locked; u32 widthFactor = vfb->renderWidth / vfb->bufferWidth; @@ -957,11 +981,9 @@ namespace DX9 { } else { ERROR_LOG_REPORT(G3D, "Unable to download render target data from %08x", fb_address); } - offscreen->Release(); - } else { - ERROR_LOG_REPORT(G3D, "Unable to create offscreen surface for %08x %dx%d", fb_address, desc.Width, desc.Height); } } + void FramebufferManagerDX9::EndFrame() { if (resized_) { DestroyAllFBOs(); @@ -1027,6 +1049,16 @@ namespace DX9 { } } + for (auto it = offscreenSurfaces_.begin(); it != offscreenSurfaces_.end(); ) { + int age = frameLastFramebufUsed_ - it->second.last_frame_used; + if (age > FBO_OLD_AGE) { + it->second.surface->Release(); + offscreenSurfaces_.erase(it++); + } else { + ++it; + } + } + // Do the same for ReadFramebuffersToMemory's VFBs for (size_t i = 0; i < bvfbs_.size(); ++i) { VirtualFramebuffer *vfb = bvfbs_[i]; @@ -1052,6 +1084,18 @@ namespace DX9 { DestroyFramebuf(vfb); } vfbs_.clear(); + + for (size_t i = 0; i < bvfbs_.size(); ++i) { + VirtualFramebuffer *vfb = bvfbs_[i]; + DestroyFramebuf(vfb); + } + bvfbs_.clear(); + + for (auto it = offscreenSurfaces_.begin(), end = offscreenSurfaces_.end(); it != end; ++it) { + it->second.surface->Release(); + } + offscreenSurfaces_.clear(); + DisableState(); } void FramebufferManagerDX9::FlushBeforeCopy() { @@ -1090,9 +1134,8 @@ namespace DX9 { D3DSURFACE_DESC desc; renderTarget->GetDesc(&desc); - LPDIRECT3DSURFACE9 offscreen = nullptr; - hr = pD3Ddevice->CreateOffscreenPlainSurface(desc.Width, desc.Height, desc.Format, D3DPOOL_SYSTEMMEM, &offscreen, NULL); - if (!offscreen || !SUCCEEDED(hr)) { + LPDIRECT3DSURFACE9 offscreen = GetOffscreenSurface(renderTarget); + if (!offscreen) { renderTarget->Release(); return false; } @@ -1112,7 +1155,6 @@ namespace DX9 { } } - offscreen->Release(); renderTarget->Release(); return success; diff --git a/GPU/Directx9/FramebufferDX9.h b/GPU/Directx9/FramebufferDX9.h index aa650d201d..c3c9e29e66 100644 --- a/GPU/Directx9/FramebufferDX9.h +++ b/GPU/Directx9/FramebufferDX9.h @@ -85,6 +85,8 @@ public: virtual void RebindFramebuffer() override; + LPDIRECT3DSURFACE9 GetOffscreenSurface(LPDIRECT3DSURFACE9 similarSurface); + protected: virtual void DisableState() override; virtual void ClearBuffer() override; @@ -128,7 +130,13 @@ private: bool resized_; bool gameUsesSequentialCopies_; + struct OffscreenSurface { + LPDIRECT3DSURFACE9 surface; + int last_frame_used; + }; + std::vector bvfbs_; // blitting FBOs + std::map offscreenSurfaces_; #if 0 AsyncPBO *pixelBufObj_; //this isn't that large diff --git a/GPU/GLES/Framebuffer.cpp b/GPU/GLES/Framebuffer.cpp index 004a91ca97..b4d36d9199 100644 --- a/GPU/GLES/Framebuffer.cpp +++ b/GPU/GLES/Framebuffer.cpp @@ -903,7 +903,7 @@ void FramebufferManager::BlitFramebufferDepth(VirtualFramebuffer *sourceframebuf } FBO *FramebufferManager::GetTempFBO(u16 w, u16 h, FBOColorDepth depth) { - u32 key = ((u64)depth << 32) | (w << 16) | h; + u64 key = ((u64)depth << 32) | (w << 16) | h; auto it = tempFBOs_.find(key); if (it != tempFBOs_.end()) { it->second.last_frame_used = gpuStats.numFlips; From 26978ad7e07ea7529c8ac54a042d5e4fbdc0fece Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 13 Sep 2014 22:39:54 -0700 Subject: [PATCH 29/33] d3d: Allow blit to self in block transfers/etc. --- GPU/Directx9/FramebufferDX9.cpp | 49 ++++++++++++++++++++++++++++++++- GPU/Directx9/FramebufferDX9.h | 6 ++++ 2 files changed, 54 insertions(+), 1 deletion(-) diff --git a/GPU/Directx9/FramebufferDX9.cpp b/GPU/Directx9/FramebufferDX9.cpp index 77e8f3b709..2ef4a9ce94 100644 --- a/GPU/Directx9/FramebufferDX9.cpp +++ b/GPU/Directx9/FramebufferDX9.cpp @@ -130,6 +130,9 @@ namespace DX9 { if (drawPixelsTex_) { drawPixelsTex_->Release(); } + for (auto it = tempFBOs_.begin(), end = tempFBOs_.end(); it != end; ++it) { + fbo_destroy(it->second.fbo); + } for (auto it = offscreenSurfaces_.begin(), end = offscreenSurfaces_.end(); it != end; ++it) { it->second.surface->Release(); } @@ -529,6 +532,25 @@ namespace DX9 { } } + FBO *FramebufferManagerDX9::GetTempFBO(u16 w, u16 h, FBOColorDepth depth) { + u64 key = ((u64)depth << 32) | (w << 16) | h; + auto it = tempFBOs_.find(key); + if (it != tempFBOs_.end()) { + it->second.last_frame_used = gpuStats.numFlips; + return it->second.fbo; + } + + textureCache_->ForgetLastTexture(); + FBO *fbo = fbo_create(w, h, 1, false, depth); + if (!fbo) + return fbo; + fbo_bind_as_render_target(fbo); + ClearBuffer(); + const TempFBO info = {fbo, gpuStats.numFlips}; + tempFBOs_[key] = info; + return fbo; + } + LPDIRECT3DSURFACE9 FramebufferManagerDX9::GetOffscreenSurface(LPDIRECT3DSURFACE9 similarSurface) { D3DSURFACE_DESC desc; similarSurface->GetDesc(&desc); @@ -881,7 +903,17 @@ namespace DX9 { dstRect.right = std::min(dstRect.right, (LONG)desc.Width); dstRect.bottom = std::min(dstRect.bottom, (LONG)desc.Height); - HRESULT hr = fbo_blit_color(src->fbo, &srcRect, dst->fbo, &dstRect, D3DTEXF_POINT); + // Direct3D 9 doesn't support rect -> self. + FBO *srcFBO = src->fbo; + if (src == dst) { + FBO *tempFBO = GetTempFBO(src->renderWidth, src->renderHeight, (FBOColorDepth)src->colorDepth); + HRESULT hr = fbo_blit_color(src->fbo, &srcRect, tempFBO, &srcRect, D3DTEXF_POINT); + if (SUCCEEDED(hr)) { + srcFBO = tempFBO; + } + } + + HRESULT hr = fbo_blit_color(srcFBO, &srcRect, dst->fbo, &dstRect, D3DTEXF_POINT); if (FAILED(hr)) { ERROR_LOG_REPORT(G3D, "fbo_blit_color failed in blit: %08x (%08x -> %08x)", hr, src->fb_address, dst->fb_address); } @@ -1049,6 +1081,16 @@ namespace DX9 { } } + for (auto it = tempFBOs_.begin(); it != tempFBOs_.end(); ) { + int age = frameLastFramebufUsed_ - it->second.last_frame_used; + if (age > FBO_OLD_AGE) { + fbo_destroy(it->second.fbo); + tempFBOs_.erase(it++); + } else { + ++it; + } + } + for (auto it = offscreenSurfaces_.begin(); it != offscreenSurfaces_.end(); ) { int age = frameLastFramebufUsed_ - it->second.last_frame_used; if (age > FBO_OLD_AGE) { @@ -1091,6 +1133,11 @@ namespace DX9 { } bvfbs_.clear(); + for (auto it = tempFBOs_.begin(), end = tempFBOs_.end(); it != end; ++it) { + fbo_destroy(it->second.fbo); + } + tempFBOs_.clear(); + for (auto it = offscreenSurfaces_.begin(), end = offscreenSurfaces_.end(); it != end; ++it) { it->second.surface->Release(); } diff --git a/GPU/Directx9/FramebufferDX9.h b/GPU/Directx9/FramebufferDX9.h index c3c9e29e66..ada72ef752 100644 --- a/GPU/Directx9/FramebufferDX9.h +++ b/GPU/Directx9/FramebufferDX9.h @@ -85,6 +85,7 @@ public: virtual void RebindFramebuffer() override; + FBO *GetTempFBO(u16 w, u16 h, FBOColorDepth depth = FBO_8888); LPDIRECT3DSURFACE9 GetOffscreenSurface(LPDIRECT3DSURFACE9 similarSurface); protected: @@ -130,12 +131,17 @@ private: bool resized_; bool gameUsesSequentialCopies_; + struct TempFBO { + FBO *fbo; + int last_frame_used; + }; struct OffscreenSurface { LPDIRECT3DSURFACE9 surface; int last_frame_used; }; std::vector bvfbs_; // blitting FBOs + std::map tempFBOs_; std::map offscreenSurfaces_; #if 0 From b0cad8ecac727b88dfce05a16f8a09ada6a3f57c Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sun, 14 Sep 2014 00:20:59 -0700 Subject: [PATCH 30/33] d3d: StretchRect properly at > 1x. --- GPU/Directx9/FramebufferDX9.cpp | 6 ++++-- GPU/Directx9/helper/fbo.cpp | 2 +- GPU/Directx9/helper/fbo.h | 2 +- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/GPU/Directx9/FramebufferDX9.cpp b/GPU/Directx9/FramebufferDX9.cpp index 2ef4a9ce94..7572e6b8a4 100644 --- a/GPU/Directx9/FramebufferDX9.cpp +++ b/GPU/Directx9/FramebufferDX9.cpp @@ -681,8 +681,10 @@ namespace DX9 { const float v1 = (272.0f + offsetY) / (float)vfb->bufferHeight; if (1) { - RECT srcRect = {offsetX, offsetY, offsetX + 480, offsetY + 272}; - RECT dstRect = {x, y, x + w, y + h}; + const u32 rw = PSP_CoreParameter().pixelWidth; + const u32 rh = PSP_CoreParameter().pixelHeight; + const RECT srcRect = {(LONG)(u0 * vfb->renderWidth), (LONG)(v0 * vfb->renderHeight), (LONG)(u1 * vfb->renderWidth), (LONG)(v1 * vfb->renderHeight)}; + const RECT dstRect = {x * rw / w, y * rh / h, (x + w) * rw / w, (y + h) * rh / h}; HRESULT hr = fbo_blit_color(vfb->fbo, &srcRect, nullptr, &dstRect, g_Config.iBufFilter == SCALE_LINEAR ? D3DTEXF_LINEAR : D3DTEXF_POINT); if (FAILED(hr)) { ERROR_LOG_REPORT(G3D, "fbo_blit_color failed on display: %08x", hr); diff --git a/GPU/Directx9/helper/fbo.cpp b/GPU/Directx9/helper/fbo.cpp index 96de329af1..d56c523124 100644 --- a/GPU/Directx9/helper/fbo.cpp +++ b/GPU/Directx9/helper/fbo.cpp @@ -114,7 +114,7 @@ void fbo_get_dimensions(FBO *fbo, int *w, int *h) { *h = fbo->height; } -HRESULT fbo_blit_color(FBO *src, RECT *srcRect, FBO *dst, RECT *dstRect, D3DTEXTUREFILTERTYPE filter) { +HRESULT fbo_blit_color(FBO *src, const RECT *srcRect, FBO *dst, const RECT *dstRect, D3DTEXTUREFILTERTYPE filter) { LPDIRECT3DSURFACE9 srcSurf = src ? src->surf : deviceRTsurf; LPDIRECT3DSURFACE9 dstSurf = dst ? dst->surf : deviceRTsurf; return pD3Ddevice->StretchRect(srcSurf, srcRect, dstSurf, dstRect, filter); diff --git a/GPU/Directx9/helper/fbo.h b/GPU/Directx9/helper/fbo.h index c25d864b63..0cdddea2ca 100644 --- a/GPU/Directx9/helper/fbo.h +++ b/GPU/Directx9/helper/fbo.h @@ -34,7 +34,7 @@ void fbo_unbind(); void fbo_destroy(FBO *fbo); void fbo_get_dimensions(FBO *fbo, int *w, int *h); void fbo_resolve(FBO *fbo); -HRESULT fbo_blit_color(FBO *src, RECT *srcRect, FBO *dst, RECT *dstRect, D3DTEXTUREFILTERTYPE filter); +HRESULT fbo_blit_color(FBO *src, const RECT *srcRect, FBO *dst, const RECT *dstRect, D3DTEXTUREFILTERTYPE filter); LPDIRECT3DTEXTURE9 fbo_get_color_texture(FBO *fbo); From d48fb04a18e1db92038ac170802bb92330fa32cb Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sun, 14 Sep 2014 00:34:52 -0700 Subject: [PATCH 31/33] d3d: Declare a couple overrides. --- GPU/Directx9/TextureCacheDX9.h | 2 +- GPU/GLES/TextureCache.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/GPU/Directx9/TextureCacheDX9.h b/GPU/Directx9/TextureCacheDX9.h index 385832396f..ea7809a10d 100644 --- a/GPU/Directx9/TextureCacheDX9.h +++ b/GPU/Directx9/TextureCacheDX9.h @@ -51,7 +51,7 @@ public: ~TextureCacheDX9(); void SetTexture(bool force = false); - bool SetOffsetTexture(u32 offset); + virtual bool SetOffsetTexture(u32 offset) override; void Clear(bool delete_them); void StartFrame(); diff --git a/GPU/GLES/TextureCache.h b/GPU/GLES/TextureCache.h index 6028f4c72a..6b9173718c 100644 --- a/GPU/GLES/TextureCache.h +++ b/GPU/GLES/TextureCache.h @@ -58,7 +58,7 @@ public: ~TextureCache(); void SetTexture(bool force = false); - bool SetOffsetTexture(u32 offset) override; + virtual bool SetOffsetTexture(u32 offset) override; void Clear(bool delete_them); void StartFrame(); From 692cc8dbf1912d02847de642cf415a4d01e6ba94 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sun, 14 Sep 2014 00:49:40 -0700 Subject: [PATCH 32/33] d3d: Support the Breath of Fire 3 hack. Since framebuffers are not always flipped. --- GPU/Common/SoftwareTransformCommon.cpp | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/GPU/Common/SoftwareTransformCommon.cpp b/GPU/Common/SoftwareTransformCommon.cpp index 9d39dd7ae7..31b020805c 100644 --- a/GPU/Common/SoftwareTransformCommon.cpp +++ b/GPU/Common/SoftwareTransformCommon.cpp @@ -395,13 +395,21 @@ void SoftwareTransform( return; } - if (gstate_c.flipTexture && maxIndex >= 2) { + // This means we're using a framebuffer (and one that isn't big enough.) + if (gstate_c.curTextureHeight < (u32)gstate.getTextureHeight(0) && maxIndex >= 2) { // Even if not rectangles, this will detect if either of the first two are outside the framebuffer. // HACK: Adding one pixel margin to this detection fixes issues in Assassin's Creed : Bloodlines, // while still keeping BOF working (see below). const float invTexH = 1.0f / gstate_c.curTextureHeight; // size of one texel. - const bool tlOutside = transformed[0].v < -invTexH && transformed[0].v > 1.0f - heightFactor; - const bool brOutside = transformed[1].v < -invTexH && transformed[1].v > 1.0f - heightFactor; + bool tlOutside; + bool brOutside; + if (gstate_c.flipTexture) { + tlOutside = transformed[0].v < -invTexH && transformed[0].v > 1.0f - heightFactor; + brOutside = transformed[1].v < -invTexH && transformed[1].v > 1.0f - heightFactor; + } else { + tlOutside = transformed[0].v > invTexH && transformed[0].v > heightFactor - 1.0f; + brOutside = transformed[1].v > invTexH && transformed[1].v > heightFactor - 1.0f; + } if (tlOutside || brOutside) { // Okay, so we're texturing from outside the framebuffer, but inside the texture height. // Breath of Fire 3 does this to access a render surface at an offset. @@ -420,9 +428,13 @@ void SoftwareTransform( for (int index = 0; index < maxIndex; ++index) { transformed[index].u *= widthFactor / oldWidthFactor; // Inverse it back to scale to the new FBO, and add 1.0f to account for old FBO. - transformed[index].v = (1.0f - transformed[index].v) / oldHeightFactor; - transformed[index].v -= yDiff; - transformed[index].v = 1.0f - (transformed[index].v * heightFactor); + if (gstate_c.flipTexture) { + transformed[index].v = (1.0f - transformed[index].v) / oldHeightFactor; + transformed[index].v -= yDiff; + transformed[index].v = 1.0f - (transformed[index].v * heightFactor); + } else { + transformed[index].v = (transformed[index].v / oldHeightFactor - yDiff) * heightFactor; + } } } } From db4fedcbc0d0621c6992e50c92ca7bfa404862a0 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sun, 14 Sep 2014 02:01:17 -0700 Subject: [PATCH 33/33] d3d: Fix a missing include. --- GPU/Directx9/FramebufferDX9.h | 1 + 1 file changed, 1 insertion(+) diff --git a/GPU/Directx9/FramebufferDX9.h b/GPU/Directx9/FramebufferDX9.h index ada72ef752..b3ec0bdf54 100644 --- a/GPU/Directx9/FramebufferDX9.h +++ b/GPU/Directx9/FramebufferDX9.h @@ -19,6 +19,7 @@ #include #include +#include #include "d3d9.h"