From 62972cb8915dc362085778511686a8d20ca28d91 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Tue, 10 Oct 2017 16:51:44 +0200 Subject: [PATCH] D3D11: Move to CopyFramebufferToSync where easily possible. --- GPU/D3D11/FramebufferManagerD3D11.cpp | 133 ++++---------------------- GPU/D3D11/FramebufferManagerD3D11.h | 4 - GPU/GLES/FramebufferManagerGLES.cpp | 21 ++-- Windows/GPU/D3D11Context.cpp | 4 - ext/native/thin3d/thin3d.cpp | 14 +++ ext/native/thin3d/thin3d.h | 9 +- ext/native/thin3d/thin3d_d3d11.cpp | 124 +++++++++++++++++++++++- ext/native/thin3d/thin3d_gl.cpp | 6 +- 8 files changed, 176 insertions(+), 139 deletions(-) diff --git a/GPU/D3D11/FramebufferManagerD3D11.cpp b/GPU/D3D11/FramebufferManagerD3D11.cpp index 2768aa8609..9f72382682 100644 --- a/GPU/D3D11/FramebufferManagerD3D11.cpp +++ b/GPU/D3D11/FramebufferManagerD3D11.cpp @@ -133,22 +133,9 @@ FramebufferManagerD3D11::FramebufferManagerD3D11(Draw::DrawContext *draw) ShaderTranslationInit(); CompilePostShader(); - - D3D11_TEXTURE2D_DESC packDesc{}; - packDesc.CPUAccessFlags = D3D11_CPU_ACCESS_READ; - packDesc.BindFlags = 0; - packDesc.Width = 512; // 512x512 is the maximum size of a framebuffer on the PSP. - packDesc.Height = 512; - packDesc.ArraySize = 1; - packDesc.MipLevels = 1; - packDesc.Usage = D3D11_USAGE_STAGING; - packDesc.SampleDesc.Count = 1; - packDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; - ASSERT_SUCCESS(device_->CreateTexture2D(&packDesc, nullptr, &packTexture_)); } FramebufferManagerD3D11::~FramebufferManagerD3D11() { - packTexture_->Release(); ShaderTranslationShutdown(); // Drawing cleanup @@ -762,53 +749,18 @@ void FramebufferManagerD3D11::BlitFramebuffer(VirtualFramebuffer *dst, int dstX, false); } -// TODO: SSE/NEON -// Could also make C fake-simd for 64-bit, two 8888 pixels fit in a register :) -void ConvertFromRGBA8888(u8 *dst, u8 *src, u32 dstStride, u32 srcStride, u32 width, u32 height, GEBufferFormat format) { - // Must skip stride in the cases below. Some games pack data into the cracks, like MotoGP. - const u32 *src32 = (const u32 *)src; - - if (format == GE_FORMAT_8888) { - u32 *dst32 = (u32 *)dst; - if (src == dst) { - return; - } else { - for (u32 y = 0; y < height; ++y) { - memcpy(dst32, src32, width * 4); - src32 += srcStride; - dst32 += dstStride; - } - } - } else { - // But here it shouldn't matter if they do intersect - u16 *dst16 = (u16 *)dst; - switch (format) { - case GE_FORMAT_565: // BGR 565 - for (u32 y = 0; y < height; ++y) { - ConvertRGBA8888ToRGB565(dst16, src32, width); - src32 += srcStride; - dst16 += dstStride; - } - break; - case GE_FORMAT_5551: // ABGR 1555 - for (u32 y = 0; y < height; ++y) { - ConvertRGBA8888ToRGBA5551(dst16, src32, width); - src32 += srcStride; - dst16 += dstStride; - } - break; - case GE_FORMAT_4444: // ABGR 4444 - for (u32 y = 0; y < height; ++y) { - ConvertRGBA8888ToRGBA4444(dst16, src32, width); - src32 += srcStride; - dst16 += dstStride; - } - break; - case GE_FORMAT_8888: - case GE_FORMAT_INVALID: - // Not possible. - break; - } +static Draw::DataFormat GEFormatToThin3D(int geFormat) { + switch (geFormat) { + case GE_FORMAT_4444: + return Draw::DataFormat::A4R4G4B4_UNORM_PACK16; + case GE_FORMAT_5551: + return Draw::DataFormat::A1R5G5B5_UNORM_PACK16; + case GE_FORMAT_565: + return Draw::DataFormat::R5G6B5_UNORM_PACK16; + case GE_FORMAT_8888: + return Draw::DataFormat::R8G8B8A8_UNORM; + default: + return Draw::DataFormat::UNDEFINED; } } @@ -822,7 +774,9 @@ void FramebufferManagerD3D11::PackFramebufferSync_(VirtualFramebuffer *vfb, int } const u32 fb_address = (0x04000000) | vfb->fb_address; - const int dstBpp = vfb->format == GE_FORMAT_8888 ? 4 : 2; + + Draw::DataFormat destFormat = GEFormatToThin3D(vfb->format); + const int dstBpp = (int)DataFormatSizeInBytes(destFormat); // TODO: Handle the other formats? We don't currently create them, I think. const int dstByteOffset = (y * vfb->fb_stride + x) * dstBpp; @@ -831,26 +785,8 @@ void FramebufferManagerD3D11::PackFramebufferSync_(VirtualFramebuffer *vfb, int // We always need to convert from the framebuffer native format. // Right now that's always 8888. DEBUG_LOG(G3D, "Reading framebuffer to mem, fb_address = %08x", fb_address); - ID3D11Texture2D *colorTex = (ID3D11Texture2D *)draw_->GetFramebufferAPITexture(vfb->fbo, Draw::FB_COLOR_BIT, 0); - // Only copy the necessary rectangle. - D3D11_BOX srcBox{ (UINT)x, (UINT)y, 0, (UINT)(x+w), (UINT)(y+h), 1 }; - context_->CopySubresourceRegion(packTexture_, 0, x, y, 0, colorTex, 0, &srcBox); - - // Ideally, we'd round robin between two packTexture_, and simply use the other one. Though if the game - // does a once-off copy, that won't work at all. - - // BIG GPU STALL - D3D11_MAPPED_SUBRESOURCE map; - HRESULT result = context_->Map(packTexture_, 0, D3D11_MAP_READ, 0, &map); - if (FAILED(result)) { - return; - } - - const int srcByteOffset = y * map.RowPitch + x * 4; - // Pixel size always 4 here because we always request BGRA8888. - ConvertFromRGBA8888(destPtr, (u8 *)map.pData + srcByteOffset, vfb->fb_stride, map.RowPitch/4, w, h, vfb->format); - context_->Unmap(packTexture_, 0); + draw_->CopyFramebufferToMemorySync(vfb->fbo, Draw::FB_COLOR_BIT, x, y, w, h, destFormat, destPtr, vfb->fb_stride); } // Nobody calls this yet. @@ -953,7 +889,7 @@ bool FramebufferManagerD3D11::GetFramebuffer(u32 fb_address, int fb_stride, GEBu } int w = vfb->renderWidth, h = vfb->renderHeight; - Draw::Framebuffer *fboForRead = nullptr; + Draw::Framebuffer *bound = nullptr; if (vfb->fbo) { if (maxRes > 0 && vfb->renderWidth > vfb->width * maxRes) { w = vfb->width * maxRes; @@ -968,44 +904,17 @@ bool FramebufferManagerD3D11::GetFramebuffer(u32 fb_address, int fb_stride, GEBu tempVfb.renderHeight = h; BlitFramebuffer(&tempVfb, 0, 0, vfb, 0, 0, vfb->width, vfb->height, 0); - fboForRead = tempFBO; + bound = tempFBO; } else { - fboForRead = vfb->fbo; + bound = vfb->fbo; } } - if (!fboForRead) + if (!bound) return false; buffer.Allocate(w, h, GE_FORMAT_8888, !useBufferedRendering_, true); - ID3D11Texture2D *packTex; - D3D11_TEXTURE2D_DESC packDesc{}; - packDesc.CPUAccessFlags = D3D11_CPU_ACCESS_READ; - packDesc.BindFlags = 0; - packDesc.Width = w; - packDesc.Height = h; - packDesc.ArraySize = 1; - packDesc.MipLevels = 1; - packDesc.Usage = D3D11_USAGE_STAGING; - packDesc.SampleDesc.Count = 1; - packDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; - ASSERT_SUCCESS(device_->CreateTexture2D(&packDesc, nullptr, &packTex)); - - ID3D11Texture2D *nativeTex = (ID3D11Texture2D *)draw_->GetFramebufferAPITexture(fboForRead, Draw::FB_COLOR_BIT, 0); - context_->CopyResource(packTex, nativeTex); - - D3D11_MAPPED_SUBRESOURCE map; - context_->Map(packTex, 0, D3D11_MAP_READ, 0, &map); - - for (int y = 0; y < h; y++) { - uint8_t *dest = (uint8_t *)buffer.GetData() + y * w * 4; - const uint8_t *src = ((const uint8_t *)map.pData) + map.RowPitch * y; - memcpy(dest, src, 4 * w); - } - - context_->Unmap(packTex, 0); - packTex->Release(); - return true; + return draw_->CopyFramebufferToMemorySync(bound, Draw::FB_COLOR_BIT, 0, 0, w, h, Draw::DataFormat::R8G8B8A8_UNORM, buffer.GetData(), w); } bool FramebufferManagerD3D11::GetDepthStencilBuffer(VirtualFramebuffer *vfb, GPUDebugBuffer &buffer, bool stencil) { diff --git a/GPU/D3D11/FramebufferManagerD3D11.h b/GPU/D3D11/FramebufferManagerD3D11.h index 7d9b34f1f7..3ef5f5c7bf 100644 --- a/GPU/D3D11/FramebufferManagerD3D11.h +++ b/GPU/D3D11/FramebufferManagerD3D11.h @@ -135,10 +135,6 @@ private: ShaderManagerD3D11 *shaderManagerD3D11_; DrawEngineD3D11 *drawEngine_; - // Permanent 1:1 readback texture, 512x512 fixed - // For larger debug readbacks, we create/destroy textures on the fly. - ID3D11Texture2D *packTexture_; - // Used by post-processing shader // Postprocessing ID3D11VertexShader *postVertexShader_ = nullptr; diff --git a/GPU/GLES/FramebufferManagerGLES.cpp b/GPU/GLES/FramebufferManagerGLES.cpp index ed3dd6abec..f623120a15 100644 --- a/GPU/GLES/FramebufferManagerGLES.cpp +++ b/GPU/GLES/FramebufferManagerGLES.cpp @@ -943,8 +943,8 @@ void FramebufferManagerGLES::PackFramebufferAsync_(VirtualFramebuffer *vfb) { pixelBufObj_[currentPBO_].maxSize = bufSize; } - // TODO: Change to CopyFramebufferToBuffer with a proper pointer. - draw_->CopyFramebufferToMemorySync(vfb->fbo, 0, 0, vfb->fb_stride, vfb->height, dataFmt, nullptr); + // TODO: This is a hack since PBOs have not been implemented in Thin3D yet (and maybe shouldn't? maybe should do this internally?) + draw_->CopyFramebufferToMemorySync(vfb->fbo, Draw::FB_COLOR_BIT, 0, 0, vfb->fb_stride, vfb->height, dataFmt, nullptr, vfb->fb_stride); unbind = true; @@ -1002,7 +1002,7 @@ void FramebufferManagerGLES::PackFramebufferSync_(VirtualFramebuffer *vfb, int x if (packed) { DEBUG_LOG(FRAMEBUF, "Reading framebuffer to mem, bufSize = %u, fb_address = %08x", bufSize, fb_address); int packW = h == 1 ? packWidth : vfb->fb_stride; // TODO: What's this about? - draw_->CopyFramebufferToMemorySync(vfb->fbo, 0, y, packW, h, Draw::DataFormat::R8G8B8A8_UNORM, packed); + draw_->CopyFramebufferToMemorySync(vfb->fbo, Draw::FB_COLOR_BIT, 0, y, packW, h, Draw::DataFormat::R8G8B8A8_UNORM, packed, packW); if (convert) { ConvertFromRGBA8888(dst, packed, vfb->fb_stride, vfb->fb_stride, packWidth, h, vfb->format); } @@ -1042,7 +1042,8 @@ void FramebufferManagerGLES::PackDepthbuffer(VirtualFramebuffer *vfb, int x, int DEBUG_LOG(FRAMEBUF, "Reading depthbuffer to mem at %08x for vfb=%08x", z_address, vfb->fb_address); - draw_->CopyFramebufferToMemorySync(vfb->fbo, 0, y, h == 1 ? packWidth : vfb->z_stride, h, Draw::DataFormat::D32F, convBuf_); + int packW = h == 1 ? packWidth : vfb->z_stride; + draw_->CopyFramebufferToMemorySync(vfb->fbo, Draw::FB_DEPTH_BIT, 0, y, packW, h, Draw::DataFormat::D32F, convBuf_, packW); int dstByteOffset = y * vfb->fb_stride * sizeof(u16); u16 *depth = (u16 *)Memory::GetPointer(z_address + dstByteOffset); @@ -1203,12 +1204,10 @@ bool FramebufferManagerGLES::GetFramebuffer(u32 fb_address, int fb_stride, GEBuf } buffer.Allocate(w, h, GE_FORMAT_8888, !useBufferedRendering_, true); - draw_->CopyFramebufferToMemorySync(bound, 0, 0, w, h, Draw::DataFormat::R8G8B8A8_UNORM, buffer.GetData()); - + bool retval = draw_->CopyFramebufferToMemorySync(bound, Draw::FB_COLOR_BIT, 0, 0, w, h, Draw::DataFormat::R8G8B8A8_UNORM, buffer.GetData(), w); // We may have blitted to a temp FBO. RebindFramebuffer(); - CHECK_GL_ERROR_IF_DEBUG(); - return true; + return retval; } bool FramebufferManagerGLES::GetOutputFramebuffer(GPUDebugBuffer &buffer) { @@ -1217,7 +1216,7 @@ bool FramebufferManagerGLES::GetOutputFramebuffer(GPUDebugBuffer &buffer) { // The backbuffer is flipped (last bool) buffer.Allocate(pw, ph, GPU_DBG_FORMAT_888_RGB, true); - draw_->CopyFramebufferToMemorySync(nullptr, 0, 0, pw, ph, Draw::DataFormat::R8G8B8_UNORM, buffer.GetData()); + draw_->CopyFramebufferToMemorySync(nullptr, Draw::FB_COLOR_BIT, 0, 0, pw, ph, Draw::DataFormat::R8G8B8_UNORM, buffer.GetData(), pw); CHECK_GL_ERROR_IF_DEBUG(); return true; } @@ -1243,7 +1242,7 @@ bool FramebufferManagerGLES::GetDepthbuffer(u32 fb_address, int fb_stride, u32 z } else { buffer.Allocate(vfb->renderWidth, vfb->renderHeight, GPU_DBG_FORMAT_FLOAT, !useBufferedRendering_); } - draw_->CopyFramebufferToMemorySync(vfb->fbo, 0, 0, vfb->renderWidth, vfb->renderHeight, Draw::DataFormat::D32F, buffer.GetData()); + draw_->CopyFramebufferToMemorySync(vfb->fbo, Draw::FB_DEPTH_BIT, 0, 0, vfb->renderWidth, vfb->renderHeight, Draw::DataFormat::D32F, buffer.GetData(), vfb->renderWidth); CHECK_GL_ERROR_IF_DEBUG(); return true; } @@ -1263,7 +1262,7 @@ bool FramebufferManagerGLES::GetStencilbuffer(u32 fb_address, int fb_stride, GPU #ifndef USING_GLES2 buffer.Allocate(vfb->renderWidth, vfb->renderHeight, GPU_DBG_FORMAT_8BIT, !useBufferedRendering_); - draw_->CopyFramebufferToMemorySync(vfb->fbo, 0, 0, vfb->renderWidth, vfb->renderHeight, Draw::DataFormat::S8, buffer.GetData()); + draw_->CopyFramebufferToMemorySync(vfb->fbo, Draw::FB_STENCIL_BIT, 0, 0, vfb->renderWidth, vfb->renderHeight, Draw::DataFormat::S8, buffer.GetData(), vfb->renderWidth); CHECK_GL_ERROR_IF_DEBUG(); return true; #else diff --git a/Windows/GPU/D3D11Context.cpp b/Windows/GPU/D3D11Context.cpp index ffd87e29ee..bed1ff2f8d 100644 --- a/Windows/GPU/D3D11Context.cpp +++ b/Windows/GPU/D3D11Context.cpp @@ -27,10 +27,6 @@ D3D11Context::~D3D11Context() { void D3D11Context::SwapBuffers() { swapChain_->Present(0, 0); draw_->HandleEvent(Draw::Event::PRESENTED, 0, 0, nullptr, nullptr); - - // Might be a good idea. - // context_->ClearState(); - // } void D3D11Context::SwapInterval(int interval) { diff --git a/ext/native/thin3d/thin3d.cpp b/ext/native/thin3d/thin3d.cpp index 46ddbf3c53..44dc740664 100644 --- a/ext/native/thin3d/thin3d.cpp +++ b/ext/native/thin3d/thin3d.cpp @@ -41,6 +41,20 @@ size_t DataFormatSizeInBytes(DataFormat fmt) { } } +bool DataFormatIsDepthStencil(DataFormat fmt) { + switch (fmt) { + case DataFormat::D16: + case DataFormat::D24_S8: + case DataFormat::S8: + case DataFormat::D32F: + case DataFormat::D32F_S8: + return true; + default: + return false; + } +} + + bool RefCountedObject::Release() { if (refcount_ > 0 && refcount_ < 10000) { refcount_--; diff --git a/ext/native/thin3d/thin3d.h b/ext/native/thin3d/thin3d.h index d3431ca2e5..3c8dcc73e0 100644 --- a/ext/native/thin3d/thin3d.h +++ b/ext/native/thin3d/thin3d.h @@ -617,10 +617,7 @@ public: virtual void CopyFramebufferImage(Framebuffer *src, int level, int x, int y, int z, Framebuffer *dst, int dstLevel, int dstX, int dstY, int dstZ, int width, int height, int depth, int channelBits) = 0; virtual bool BlitFramebuffer(Framebuffer *src, int srcX1, int srcY1, int srcX2, int srcY2, Framebuffer *dst, int dstX1, int dstY1, int dstX2, int dstY2, int channelBits, FBBlitFilter filter) = 0; - virtual bool CopyFramebufferToBuffer(Framebuffer *src, Buffer *buffer, Draw::DataFormat bufferFormat) { - return false; - } - virtual bool CopyFramebufferToMemorySync(Framebuffer *src, int x, int y, int w, int h, Draw::DataFormat format, void *pixels) { + virtual bool CopyFramebufferToMemorySync(Framebuffer *src, int channelBits, int x, int y, int w, int h, Draw::DataFormat format, void *pixels, int pixelStride) { return false; } @@ -704,6 +701,10 @@ protected: }; size_t DataFormatSizeInBytes(DataFormat fmt); +bool DataFormatIsDepthStencil(DataFormat fmt); +inline bool DataFormatIsColor(DataFormat fmt) { + return !DataFormatIsDepthStencil(fmt); +} DrawContext *T3DCreateGLContext(); diff --git a/ext/native/thin3d/thin3d_d3d11.cpp b/ext/native/thin3d/thin3d_d3d11.cpp index a57ab83479..d8b83229a8 100644 --- a/ext/native/thin3d/thin3d_d3d11.cpp +++ b/ext/native/thin3d/thin3d_d3d11.cpp @@ -10,6 +10,9 @@ #include "math/dataconv.h" #include "util/text/utf8.h" +#include "Common/ColorConv.h" + +#include #include #include #include @@ -27,7 +30,6 @@ class D3D11DepthStencilState; class D3D11SamplerState; class D3D11RasterState; - class D3D11DrawContext : public DrawContext { public: D3D11DrawContext(ID3D11Device *device, ID3D11DeviceContext *deviceContext, ID3D11Device1 *device1, ID3D11DeviceContext1 *deviceContext1, D3D_FEATURE_LEVEL featureLevel, HWND hWnd); @@ -56,6 +58,7 @@ public: void CopyFramebufferImage(Framebuffer *src, int level, int x, int y, int z, Framebuffer *dst, int dstLevel, int dstX, int dstY, int dstZ, int width, int height, int depth, int channelBits) override; bool BlitFramebuffer(Framebuffer *src, int srcX1, int srcY1, int srcX2, int srcY2, Framebuffer *dst, int dstX1, int dstY1, int dstX2, int dstY2, int channelBits, FBBlitFilter filter) override; + bool CopyFramebufferToMemorySync(Framebuffer *src, int channelBits, int x, int y, int w, int h, Draw::DataFormat format, void *pixels, int pixelStride); // These functions should be self explanatory. void BindFramebufferAsRenderTarget(Framebuffer *fbo, const RenderPassInfo &rp) override; @@ -193,6 +196,9 @@ private: uint8_t stencilRef_; bool stencilRefDirty_; + // Temporaries + ID3D11Texture2D *packTexture_ = nullptr; + // System info D3D_FEATURE_LEVEL featureLevel_; std::string adapterDesc_; @@ -237,9 +243,24 @@ D3D11DrawContext::D3D11DrawContext(ID3D11Device *device, ID3D11DeviceContext *de dxgiDevice->Release(); } CreatePresets(); + + D3D11_TEXTURE2D_DESC packDesc{}; + packDesc.CPUAccessFlags = D3D11_CPU_ACCESS_READ; + packDesc.BindFlags = 0; + packDesc.Width = 512; // 512x512 is the maximum size of a framebuffer on the PSP. + packDesc.Height = 512; + packDesc.ArraySize = 1; + packDesc.MipLevels = 1; + packDesc.Usage = D3D11_USAGE_STAGING; + packDesc.SampleDesc.Count = 1; + packDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + hr = device_->CreateTexture2D(&packDesc, nullptr, &packTexture_); + assert(SUCCEEDED(hr)); } D3D11DrawContext::~D3D11DrawContext() { + packTexture_->Release(); + // Release references. ID3D11RenderTargetView *view = nullptr; context_->OMSetRenderTargets(1, &view, nullptr); @@ -1285,6 +1306,107 @@ bool D3D11DrawContext::BlitFramebuffer(Framebuffer *srcfb, int srcX1, int srcY1, return false; } +// TODO: SSE/NEON +// Could also make C fake-simd for 64-bit, two 8888 pixels fit in a register :) +void ConvertFromRGBA8888(u8 *dst, u8 *src, u32 dstStride, u32 srcStride, u32 width, u32 height, Draw::DataFormat format) { + // Must skip stride in the cases below. Some games pack data into the cracks, like MotoGP. + const u32 *src32 = (const u32 *)src; + + if (format == Draw::DataFormat::R8G8B8A8_UNORM) { + u32 *dst32 = (u32 *)dst; + if (src == dst) { + return; + } else { + for (u32 y = 0; y < height; ++y) { + memcpy(dst32, src32, width * 4); + src32 += srcStride; + dst32 += dstStride; + } + } + } else { + // But here it shouldn't matter if they do intersect + u16 *dst16 = (u16 *)dst; + switch (format) { + case Draw::DataFormat::R5G6B5_UNORM_PACK16: // BGR 565 + for (u32 y = 0; y < height; ++y) { + ConvertRGBA8888ToRGB565(dst16, src32, width); + src32 += srcStride; + dst16 += dstStride; + } + break; + case Draw::DataFormat::A1R5G5B5_UNORM_PACK16: // ABGR 1555 + for (u32 y = 0; y < height; ++y) { + ConvertRGBA8888ToRGBA5551(dst16, src32, width); + src32 += srcStride; + dst16 += dstStride; + } + break; + case Draw::DataFormat::A4R4G4B4_UNORM_PACK16: // ABGR 4444 + for (u32 y = 0; y < height; ++y) { + ConvertRGBA8888ToRGBA4444(dst16, src32, width); + src32 += srcStride; + dst16 += dstStride; + } + break; + case Draw::DataFormat::R8G8B8A8_UNORM: + case Draw::DataFormat::UNDEFINED: + // Not possible. + break; + } + } +} + +bool D3D11DrawContext::CopyFramebufferToMemorySync(Framebuffer *src, int channelBits, int x, int y, int w, int h, Draw::DataFormat format, void *pixels, int pixelStride) { + D3D11Framebuffer *fb = (D3D11Framebuffer *)src; + + assert(fb->colorFormat == DXGI_FORMAT_R8G8B8A8_UNORM); + + bool useGlobalPacktex = (x + w <= 512 && y + h <= 512); + + ID3D11Texture2D *packTex; + if (!useGlobalPacktex) { + D3D11_TEXTURE2D_DESC packDesc{}; + packDesc.CPUAccessFlags = D3D11_CPU_ACCESS_READ; + packDesc.BindFlags = 0; + packDesc.Width = w; + packDesc.Height = h; + packDesc.ArraySize = 1; + packDesc.MipLevels = 1; + packDesc.Usage = D3D11_USAGE_STAGING; + packDesc.SampleDesc.Count = 1; + packDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + device_->CreateTexture2D(&packDesc, nullptr, &packTex); + + context_->CopyResource(packTex, fb->colorTex); + } else { + packTex = packTexture_; + } + + // Only copy the necessary rectangle. + D3D11_BOX srcBox{ (UINT)x, (UINT)y, 0, (UINT)(x + w), (UINT)(y + h), 1 }; + context_->CopySubresourceRegion(packTex, 0, x, y, 0, fb->colorTex, 0, &srcBox); + + // Ideally, we'd round robin between two packTexture_, and simply use the other one. Though if the game + // does a once-off copy, that won't work at all. + + // BIG GPU STALL + D3D11_MAPPED_SUBRESOURCE map; + HRESULT result = context_->Map(packTex, 0, D3D11_MAP_READ, 0, &map); + if (FAILED(result)) { + return false; + } + + const int srcByteOffset = y * map.RowPitch + x * 4; + // Pixel size always 4 here because we always request BGRA8888. + ConvertFromRGBA8888((u8 *)pixels, (u8 *)map.pData + srcByteOffset, pixelStride, map.RowPitch / 4, w, h, format); + context_->Unmap(packTex, 0); + + if (!useGlobalPacktex) { + packTex->Release(); + } + return true; +} + void D3D11DrawContext::BindFramebufferAsRenderTarget(Framebuffer *fbo, const RenderPassInfo &rp) { // TODO: deviceContext1 can actually discard. Useful on Windows Mobile. if (fbo) { diff --git a/ext/native/thin3d/thin3d_gl.cpp b/ext/native/thin3d/thin3d_gl.cpp index d1b6b804af..c48b027085 100644 --- a/ext/native/thin3d/thin3d_gl.cpp +++ b/ext/native/thin3d/thin3d_gl.cpp @@ -466,7 +466,7 @@ public: void CopyFramebufferImage(Framebuffer *src, int level, int x, int y, int z, Framebuffer *dst, int dstLevel, int dstX, int dstY, int dstZ, int width, int height, int depth, int channelBits) override; bool BlitFramebuffer(Framebuffer *src, int srcX1, int srcY1, int srcX2, int srcY2, Framebuffer *dst, int dstX1, int dstY1, int dstX2, int dstY2, int channelBits, FBBlitFilter filter) override; - bool CopyFramebufferToMemorySync(Framebuffer *src, int x, int y, int w, int h, Draw::DataFormat format, void *pixels) override; + bool CopyFramebufferToMemorySync(Framebuffer *src, int channelBits, int x, int y, int w, int h, Draw::DataFormat format, void *pixels, int pixelStride) override; // These functions should be self explanatory. void BindFramebufferAsRenderTarget(Framebuffer *fbo, const RenderPassInfo &rp) override; @@ -870,7 +870,7 @@ void OpenGLTexture::SetImageData(int x, int y, int z, int width, int height, int CHECK_GL_ERROR_IF_DEBUG(); } -bool OpenGLContext::CopyFramebufferToMemorySync(Framebuffer *src, int x, int y, int w, int h, Draw::DataFormat dataFormat, void *pixels) { +bool OpenGLContext::CopyFramebufferToMemorySync(Framebuffer *src, int channelBits, int x, int y, int w, int h, Draw::DataFormat dataFormat, void *pixels, int pixelStride) { OpenGLFramebuffer *fb = (OpenGLFramebuffer *)src; fbo_bind_fb_target(true, fb ? fb->handle : 0); @@ -891,7 +891,7 @@ bool OpenGLContext::CopyFramebufferToMemorySync(Framebuffer *src, int x, int y, glPixelStorei(GL_PACK_ALIGNMENT, alignment); if (!gl_extensions.IsGLES || (gl_extensions.GLES3 && gl_extensions.gpuVendor != GPU_VENDOR_NVIDIA)) { // Some drivers seem to require we specify this. See #8254. - glPixelStorei(GL_PACK_ROW_LENGTH, w); + glPixelStorei(GL_PACK_ROW_LENGTH, pixelStride); } glReadPixels(x, y, w, h, format, type, pixels);