From f2a6c744bc9f30dfd1282bae35204744eb4cb862 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Fri, 3 Feb 2023 18:51:59 +0100 Subject: [PATCH] Add built-in stretch functionality to depth readback shader path --- GPU/Common/FramebufferManagerCommon.cpp | 39 +++++++++++++++++-------- GPU/Common/FramebufferManagerCommon.h | 2 +- GPU/Directx9/FramebufferManagerDX9.cpp | 7 ++++- GPU/Directx9/FramebufferManagerDX9.h | 2 +- GPU/GLES/DepthBufferGLES.cpp | 18 ++++++++---- GPU/GLES/FramebufferManagerGLES.h | 2 +- 6 files changed, 48 insertions(+), 22 deletions(-) diff --git a/GPU/Common/FramebufferManagerCommon.cpp b/GPU/Common/FramebufferManagerCommon.cpp index de73d52e29..9c7a4de708 100644 --- a/GPU/Common/FramebufferManagerCommon.cpp +++ b/GPU/Common/FramebufferManagerCommon.cpp @@ -2640,17 +2640,26 @@ bool FramebufferManagerCommon::GetDepthbuffer(u32 fb_address, int fb_stride, u32 } bool flipY = (GetGPUBackend() == GPUBackend::OPENGL && !useBufferedRendering_) ? true : false; - if (gstate_c.Use(GPU_SCALE_DEPTH_FROM_24BIT_TO_16BIT)) { - buffer.Allocate(w, h, GPU_DBG_FORMAT_FLOAT_DIV_256, flipY); - } else { - buffer.Allocate(w, h, GPU_DBG_FORMAT_FLOAT, flipY); - } - // No need to free on failure, that's the caller's job (it likely will reuse a buffer.) - bool retval = draw_->CopyFramebufferToMemorySync(vfb->fbo, Draw::FB_DEPTH_BIT, 0, 0, w, h, Draw::DataFormat::D32F, buffer.GetData(), w, "GetDepthBuffer"); - if (!retval) { - // Try ReadbackDepthbufferSync, in case GLES. + + bool retval; + if (true) { + // Always use ReadbackDepthbufferSync (while we debug it) buffer.Allocate(w, h, GPU_DBG_FORMAT_16BIT, flipY); - retval = ReadbackDepthbufferSync(vfb->fbo, 0, 0, w, h, (uint16_t *)buffer.GetData(), w); + retval = ReadbackDepthbufferSync(vfb->fbo, 0, 0, w, h, (uint16_t *)buffer.GetData(), w, w, h); + } else { + // Old code + if (gstate_c.Use(GPU_SCALE_DEPTH_FROM_24BIT_TO_16BIT)) { + buffer.Allocate(w, h, GPU_DBG_FORMAT_FLOAT_DIV_256, flipY); + } else { + buffer.Allocate(w, h, GPU_DBG_FORMAT_FLOAT, flipY); + } + // No need to free on failure, that's the caller's job (it likely will reuse a buffer.) + retval = draw_->CopyFramebufferToMemorySync(vfb->fbo, Draw::FB_DEPTH_BIT, 0, 0, w, h, Draw::DataFormat::D32F, buffer.GetData(), w, "GetDepthBuffer"); + if (!retval) { + // Try ReadbackDepthbufferSync, in case GLES. + buffer.Allocate(w, h, GPU_DBG_FORMAT_16BIT, flipY); + retval = ReadbackDepthbufferSync(vfb->fbo, 0, 0, w, h, (uint16_t *)buffer.GetData(), w, w, h); + } } // After a readback we'll have flushed and started over, need to dirty a bunch of things to be safe. @@ -2748,7 +2757,7 @@ void FramebufferManagerCommon::ReadbackFramebufferSync(VirtualFramebuffer *vfb, if (channel == RASTER_DEPTH) { _assert_msg_(vfb && vfb->z_address != 0 && vfb->z_stride != 0, "Depth buffer invalid"); - ReadbackDepthbufferSync(vfb->fbo, x, y, w, h, (uint16_t *)destPtr, stride); + ReadbackDepthbufferSync(vfb->fbo, x, y, w, h, (uint16_t *)destPtr, stride, w, h); } else { draw_->CopyFramebufferToMemorySync(vfb->fbo, channel == RASTER_COLOR ? Draw::FB_COLOR_BIT : Draw::FB_DEPTH_BIT, x, y, w, h, destFormat, destPtr, stride, "ReadbackFramebufferSync"); } @@ -2760,8 +2769,14 @@ void FramebufferManagerCommon::ReadbackFramebufferSync(VirtualFramebuffer *vfb, gpuStats.numReadbacks++; } -bool FramebufferManagerCommon::ReadbackDepthbufferSync(Draw::Framebuffer *fbo, int x, int y, int w, int h, uint16_t *pixels, int pixelsStride) { +bool FramebufferManagerCommon::ReadbackDepthbufferSync(Draw::Framebuffer *fbo, int x, int y, int w, int h, uint16_t *pixels, int pixelsStride, int destW, int destH) { Draw::DataFormat destFormat = GEFormatToThin3D(GE_FORMAT_DEPTH16); + + if (w != destW || h != destH) { + // This path can't handle stretch blits. That's fine, this path is going away later. + return false; + } + // TODO: Apply depth scale factors if we don't have depth clamp. return draw_->CopyFramebufferToMemorySync(fbo, Draw::FB_DEPTH_BIT, x, y, w, h, destFormat, pixels, pixelsStride, "ReadbackDepthbufferSync"); } diff --git a/GPU/Common/FramebufferManagerCommon.h b/GPU/Common/FramebufferManagerCommon.h index 4f0212f8cd..2e91e4b2a8 100644 --- a/GPU/Common/FramebufferManagerCommon.h +++ b/GPU/Common/FramebufferManagerCommon.h @@ -453,7 +453,7 @@ public: protected: virtual void ReadbackFramebufferSync(VirtualFramebuffer *vfb, int x, int y, int w, int h, RasterChannel channel); // Used for when a shader is required, such as GLES. - virtual bool ReadbackDepthbufferSync(Draw::Framebuffer *fbo, int x, int y, int w, int h, uint16_t *pixels, int pixelsStride); + virtual bool ReadbackDepthbufferSync(Draw::Framebuffer *fbo, int x, int y, int w, int h, uint16_t *pixels, int pixelsStride, int destW, int destH); virtual bool ReadbackStencilbufferSync(Draw::Framebuffer *fbo, int x, int y, int w, int h, uint8_t *pixels, int pixelsStride); void SetViewport2D(int x, int y, int w, int h); Draw::Texture *MakePixelTexture(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height); diff --git a/GPU/Directx9/FramebufferManagerDX9.cpp b/GPU/Directx9/FramebufferManagerDX9.cpp index da53a7b802..a3a37eea4c 100644 --- a/GPU/Directx9/FramebufferManagerDX9.cpp +++ b/GPU/Directx9/FramebufferManagerDX9.cpp @@ -53,7 +53,12 @@ FramebufferManagerDX9::FramebufferManagerDX9(Draw::DrawContext *draw) FramebufferManagerDX9::~FramebufferManagerDX9() { } -bool FramebufferManagerDX9::ReadbackDepthbufferSync(Draw::Framebuffer *fbo, int x, int y, int w, int h, uint16_t *pixels, int pixelsStride) { +bool FramebufferManagerDX9::ReadbackDepthbufferSync(Draw::Framebuffer *fbo, int x, int y, int w, int h, uint16_t *pixels, int pixelsStride, int destW, int destH) { + // Don't yet support stretched readbacks here. + if (destW != w || destH != h) { + return false; + } + // We always read the depth buffer in 24_8 format. LPDIRECT3DTEXTURE9 tex = (LPDIRECT3DTEXTURE9)draw_->GetFramebufferAPITexture(fbo, Draw::FB_DEPTH_BIT, 0); if (!tex) diff --git a/GPU/Directx9/FramebufferManagerDX9.h b/GPU/Directx9/FramebufferManagerDX9.h index e189425b38..bcbe3f7941 100644 --- a/GPU/Directx9/FramebufferManagerDX9.h +++ b/GPU/Directx9/FramebufferManagerDX9.h @@ -38,5 +38,5 @@ public: ~FramebufferManagerDX9(); protected: - bool ReadbackDepthbufferSync(Draw::Framebuffer *fbo, int x, int y, int w, int h, uint16_t *pixels, int pixelsStride) override; + bool ReadbackDepthbufferSync(Draw::Framebuffer *fbo, int x, int y, int w, int h, uint16_t *pixels, int pixelsStride, int destW, int destH) override; }; diff --git a/GPU/GLES/DepthBufferGLES.cpp b/GPU/GLES/DepthBufferGLES.cpp index 96072685ea..39b9fed283 100644 --- a/GPU/GLES/DepthBufferGLES.cpp +++ b/GPU/GLES/DepthBufferGLES.cpp @@ -173,7 +173,7 @@ static Draw::Pipeline *CreateReadbackPipeline(Draw::DrawContext *draw, const cha return pipeline; } -bool FramebufferManagerGLES::ReadbackDepthbufferSync(Draw::Framebuffer *fbo, int x, int y, int w, int h, uint16_t *pixels, int pixelsStride) { +bool FramebufferManagerGLES::ReadbackDepthbufferSync(Draw::Framebuffer *fbo, int x, int y, int w, int h, uint16_t *pixels, int pixelsStride, int destW, int destH) { using namespace Draw; if (!fbo) { @@ -186,16 +186,22 @@ bool FramebufferManagerGLES::ReadbackDepthbufferSync(Draw::Framebuffer *fbo, int } // Pixel size always 4 here because we always request float or RGBA. - const u32 bufSize = w * h * 4; + const u32 bufSize = destW * destH * 4; if (!convBuf_ || convBufSize_ < bufSize) { delete[] convBuf_; convBuf_ = new u8[bufSize]; convBufSize_ = bufSize; } - const bool useColorPath = gl_extensions.IsGLES; + float scaleX = (float)destW / w; + float scaleY = (float)destH / h; + + bool useColorPath = gl_extensions.IsGLES || scaleX != 1.0f || scaleY != 1.0f; bool format16Bit = false; + // For testing. DO NOT merge. + useColorPath = true; + if (useColorPath) { if (!depthReadbackPipeline_) { depthReadbackPipeline_ = CreateReadbackPipeline(draw_, "depth_dl", &depthUBDesc, depth_dl_fs, "depth_dl_fs", depth_vs, "depth_vs"); @@ -205,14 +211,14 @@ bool FramebufferManagerGLES::ReadbackDepthbufferSync(Draw::Framebuffer *fbo, int shaderManager_->DirtyLastShader(); auto *blitFBO = GetTempFBO(TempFBO::COPY, fbo->Width(), fbo->Height()); draw_->BindFramebufferAsRenderTarget(blitFBO, { RPAction::DONT_CARE, RPAction::DONT_CARE, RPAction::DONT_CARE }, "ReadbackDepthbufferSync"); - Draw::Viewport viewport = { 0.0f, 0.0f, (float)fbo->Width(), (float)fbo->Height(), 0.0f, 1.0f }; + Draw::Viewport viewport = { 0.0f, 0.0f, (float)destW, (float)destH, 0.0f, 1.0f }; draw_->SetViewports(1, &viewport); draw_->BindFramebufferAsTexture(fbo, TEX_SLOT_PSP_TEXTURE, FB_DEPTH_BIT, 0); draw_->BindSamplerStates(TEX_SLOT_PSP_TEXTURE, 1, &depthReadbackSampler_); // We must bind the program after starting the render pass. - draw_->SetScissorRect(0, 0, w, h); + draw_->SetScissorRect(0, 0, destW, destH); draw_->BindPipeline(depthReadbackPipeline_); DepthUB ub{}; @@ -241,7 +247,7 @@ bool FramebufferManagerGLES::ReadbackDepthbufferSync(Draw::Framebuffer *fbo, int }; draw_->DrawUP(positions, 3); - draw_->CopyFramebufferToMemorySync(blitFBO, FB_COLOR_BIT, x, y, w, h, DataFormat::R8G8B8A8_UNORM, convBuf_, w, "ReadbackDepthbufferSync"); + draw_->CopyFramebufferToMemorySync(blitFBO, FB_COLOR_BIT, x * scaleX, y * scaleY, w * scaleX, h * scaleY, DataFormat::R8G8B8A8_UNORM, convBuf_, w, "ReadbackDepthbufferSync"); textureCache_->ForgetLastTexture(); // TODO: Use 4444 so we can copy lines directly (instead of 32 -> 16 on CPU)? diff --git a/GPU/GLES/FramebufferManagerGLES.h b/GPU/GLES/FramebufferManagerGLES.h index c910014375..528fc0a79a 100644 --- a/GPU/GLES/FramebufferManagerGLES.h +++ b/GPU/GLES/FramebufferManagerGLES.h @@ -37,7 +37,7 @@ public: protected: void UpdateDownloadTempBuffer(VirtualFramebuffer *nvfb) override; - bool ReadbackDepthbufferSync(Draw::Framebuffer *fbo, int x, int y, int w, int h, uint16_t *pixels, int pixelsStride) override; + bool ReadbackDepthbufferSync(Draw::Framebuffer *fbo, int x, int y, int w, int h, uint16_t *pixels, int pixelsStride, int destW, int destH) override; bool ReadbackStencilbufferSync(Draw::Framebuffer *fbo, int x, int y, int w, int h, uint8_t *pixels, int pixelsStride) override; private: