From 7f89723d2b198ffc2f58f7c17083b14fbde093e4 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 13 Sep 2014 16:37:59 -0700 Subject: [PATCH] d3d: Initial implementation of framebuf download. Not working, or not working properly... --- GPU/Directx9/FramebufferDX9.cpp | 127 ++++++++++++++++++-------------- GPU/Directx9/FramebufferDX9.h | 2 +- GPU/Directx9/helper/fbo.cpp | 4 +- GPU/Directx9/helper/fbo.h | 2 +- GPU/GLES/Framebuffer.cpp | 41 +++++------ 5 files changed, 96 insertions(+), 80 deletions(-) diff --git a/GPU/Directx9/FramebufferDX9.cpp b/GPU/Directx9/FramebufferDX9.cpp index 607cdccd78..6e85dbc1e7 100644 --- a/GPU/Directx9/FramebufferDX9.cpp +++ b/GPU/Directx9/FramebufferDX9.cpp @@ -28,6 +28,7 @@ #include "helper/fbo.h" #include "GPU/Common/FramebufferCommon.h" +#include "GPU/Common/TextureDecoder.h" #include "GPU/Directx9/FramebufferDX9.h" #include "GPU/Directx9/ShaderManagerDX9.h" #include "GPU/Directx9/TextureCacheDX9.h" @@ -48,7 +49,15 @@ namespace DX9 { return ((px >> 3) & 0x001F) | ((px >> 6) & 0x03E0) | ((px >> 9) & 0x7C00) | ((px >> 16) & 0x8000); } - static void ConvertFromRGBA8888(u8 *dst, u8 *src, u32 stride, u32 height, GEBufferFormat format); + inline u16 BGRA8888toRGB565(u32 px) { + return ((px >> 19) & 0x001F) | ((px >> 5) & 0x07E0) | ((px << 8) & 0xF800); + } + + inline u16 BGRA8888toRGBA4444(u32 px) { + return ((px >> 20) & 0x000F) | ((px >> 8) & 0x00F0) | ((px << 4) & 0x0F00) | ((px >> 16) & 0xF000); + } + + static void ConvertFromRGBA8888(u8 *dst, u8 *src, u32 dstStride, u32 srcStride, u32 width, u32 height, GEBufferFormat format); void CenterRect(float *x, float *y, float *w, float *h, float origW, float origH, float frameW, float frameH) { @@ -771,20 +780,7 @@ namespace DX9 { } BlitFramebuffer(nvfb, x, y, vfb, x, y, w, h, 0, false); - // TODO: Actually do it. -#if 0 -#ifdef USING_GLES2 - PackFramebufferSync_(nvfb); // synchronous glReadPixels -#else - if (gl_extensions.PBO_ARB || !gl_extensions.ATIClampBug) { - if (!sync) { - PackFramebufferAsync_(nvfb); // asynchronous glReadPixels using PBOs - } else { - PackFramebufferSync_(nvfb); // synchronous glReadPixels - } - } -#endif -#endif + PackFramebufferDirectx9_(nvfb, x, y, w, h); RebindFramebuffer(); } } @@ -838,75 +834,96 @@ namespace DX9 { // TODO: SSE/NEON // Could also make C fake-simd for 64-bit, two 8888 pixels fit in a register :) - void ConvertFromRGBA8888(u8 *dst, u8 *src, u32 stride, u32 height, GEBufferFormat format) { - if(format == GE_FORMAT_8888) { - if(src == dst) { + void ConvertFromRGBA8888(u8 *dst, u8 *src, u32 dstStride, u32 srcStride, u32 width, u32 height, GEBufferFormat format) { + // Must skip stride in the cases below. Some games pack data into the cracks, like MotoGP. + const u32 *src32 = (const u32 *)src; + + if (format == GE_FORMAT_8888) { + u32 *dst32 = (u32 *)dst; + if (src == dst) { return; - } else { // Here lets assume they don't intersect - memcpy(dst, src, stride * height * 4); + } else { + for (u32 y = 0; y < height; ++y) { + ConvertBGRA8888ToRGBA8888(dst32, src32, width); + src32 += srcStride; + dst32 += dstStride; + } } - } else { // But here it shouldn't matter if they do - int size = height * stride; - const u32 *src32 = (const u32 *)src; + } else { + // But here it shouldn't matter if they do intersect u16 *dst16 = (u16 *)dst; switch (format) { case GE_FORMAT_565: // BGR 565 - for(int i = 0; i < size; i++) { - dst16[i] = RGBA8888toRGB565(src32[i]); + for (u32 y = 0; y < height; ++y) { + for (u32 x = 0; x < width; ++x) { + dst16[x] = BGRA8888toRGB565(src32[x]); + } + src32 += srcStride; + dst16 += dstStride; } break; case GE_FORMAT_5551: // ABGR 1555 - for(int i = 0; i < size; i++) { - dst16[i] = RGBA8888toRGBA5551(src32[i]); + for (u32 y = 0; y < height; ++y) { + ConvertBGRA8888ToRGBA5551(dst16, src32, width); + src32 += srcStride; + dst16 += dstStride; } break; case GE_FORMAT_4444: // ABGR 4444 - for(int i = 0; i < size; i++) { - dst16[i] = RGBA8888toRGBA4444(src32[i]); + for (u32 y = 0; y < height; ++y) { + for (u32 x = 0; x < width; ++x) { + dst16[x] = BGRA8888toRGBA4444(src32[x]); + } + src32 += srcStride; + dst16 += dstStride; } break; case GE_FORMAT_8888: + case GE_FORMAT_INVALID: // Not possible. break; - default: - break; } } } - void FramebufferManagerDX9::PackFramebufferDirectx9_(VirtualFramebuffer *vfb) { - if (vfb->fbo) { - fbo_bind_for_read(vfb->fbo); - } else { - ERROR_LOG_REPORT_ONCE(vfbfbozero, SCEGE, "PackFramebufferSync_: vfb->fbo == 0"); + void FramebufferManagerDX9::PackFramebufferDirectx9_(VirtualFramebuffer *vfb, int x, int y, int w, int h) { + if (!vfb->fbo) { + ERROR_LOG_REPORT_ONCE(vfbfbozero, SCEGE, "PackFramebufferDirectx9_: vfb->fbo == 0"); fbo_unbind(); return; } - // Pixel size always 4 here because we always request RGBA8888 - size_t bufSize = vfb->fb_stride * vfb->height * 4; - u32 fb_address = (0x04000000) | vfb->fb_address; + const u32 fb_address = (0x04000000) | vfb->fb_address; + const int dstBpp = vfb->format == GE_FORMAT_8888 ? 4 : 2; - u8 *packed = 0; - if(vfb->format == GE_FORMAT_8888) { - packed = (u8 *)Memory::GetPointer(fb_address); - } else { // End result may be 16-bit but we are reading 32-bit, so there may not be enough space at fb_address - packed = (u8 *)malloc(bufSize * sizeof(u8)); - } + // We always need to convert from the framebuffer native format. + // Right now that's always 8888. + DEBUG_LOG(HLE, "Reading framebuffer to mem, fb_address = %08x", fb_address); - if(packed) { - DEBUG_LOG(HLE, "Reading framebuffer to mem, bufSize = %u, packed = %p, fb_address = %08x", - (u32)bufSize, packed, fb_address); + LPDIRECT3DSURFACE9 renderTarget = fbo_get_for_read(vfb->fbo); + D3DSURFACE_DESC desc; + renderTarget->GetDesc(&desc); - // Resolve(packed, vfb); - - if(vfb->format != GE_FORMAT_8888) { // If not RGBA 8888 we need to convert - ConvertFromRGBA8888(Memory::GetPointer(fb_address), packed, vfb->fb_stride, vfb->height, vfb->format); - free(packed); + LPDIRECT3DSURFACE9 offscreen = nullptr; + // TODO: Cache these? + HRESULT hr = pD3Ddevice->CreateOffscreenPlainSurface(desc.Width, desc.Height, desc.Format, D3DPOOL_SYSTEMMEM, &offscreen, NULL); + if (offscreen && SUCCEEDED(hr)) { + hr = pD3Ddevice->GetRenderTargetData(renderTarget, offscreen); + if (SUCCEEDED(hr)) { + D3DLOCKED_RECT locked; + RECT rect = {0, 0, vfb->renderWidth, vfb->renderHeight}; + hr = offscreen->LockRect(&locked, &rect, D3DLOCK_READONLY); + if (SUCCEEDED(hr)) { + // TODO: Handle the other formats? We don't currently create them, I think. + const int dstByteOffset = y * vfb->fb_stride * dstBpp; + const int srcByteOffset = y * locked.Pitch; + // Pixel size always 4 here because we always request BGRA8888. + ConvertFromRGBA8888(Memory::GetPointer(fb_address + dstByteOffset), (u8 *)locked.pBits + srcByteOffset, vfb->fb_stride, locked.Pitch / 4, vfb->width, h, vfb->format); + offscreen->UnlockRect(); + } } + offscreen->Release(); } - - fbo_unbind(); } void FramebufferManagerDX9::EndFrame() { if (resized_) { diff --git a/GPU/Directx9/FramebufferDX9.h b/GPU/Directx9/FramebufferDX9.h index 28e3c78757..bafd1cade8 100644 --- a/GPU/Directx9/FramebufferDX9.h +++ b/GPU/Directx9/FramebufferDX9.h @@ -105,7 +105,7 @@ private: void SetNumExtraFBOs(int num); - void PackFramebufferDirectx9_(VirtualFramebuffer *vfb); + void PackFramebufferDirectx9_(VirtualFramebuffer *vfb, int x, int y, int w, int h); // Used by DrawPixels LPDIRECT3DTEXTURE9 drawPixelsTex_; diff --git a/GPU/Directx9/helper/fbo.cpp b/GPU/Directx9/helper/fbo.cpp index 66990d080b..a6ae292a16 100644 --- a/GPU/Directx9/helper/fbo.cpp +++ b/GPU/Directx9/helper/fbo.cpp @@ -97,8 +97,8 @@ LPDIRECT3DTEXTURE9 fbo_get_color_texture(FBO *fbo) { return fbo->tex; } -void fbo_bind_for_read(FBO *fbo) { - // pD3Ddevice->SetRenderTarget(0, fbo->surf); +LPDIRECT3DSURFACE9 fbo_get_for_read(FBO *fbo) { + return fbo->surf; } void fbo_bind_color_as_texture(FBO *fbo, int color) { diff --git a/GPU/Directx9/helper/fbo.h b/GPU/Directx9/helper/fbo.h index 492629dfa9..d0bb79b504 100644 --- a/GPU/Directx9/helper/fbo.h +++ b/GPU/Directx9/helper/fbo.h @@ -28,7 +28,7 @@ FBO *fbo_create(int width, int height, int num_color_textures, bool z_stencil, F void fbo_bind_as_render_target(FBO *fbo); // color must be 0, for now. void fbo_bind_color_as_texture(FBO *fbo, int color); -void fbo_bind_for_read(FBO *fbo); +LPDIRECT3DSURFACE9 fbo_get_for_read(FBO *fbo); void fbo_unbind(); void fbo_destroy(FBO *fbo); void fbo_get_dimensions(FBO *fbo, int *w, int *h); diff --git a/GPU/GLES/Framebuffer.cpp b/GPU/GLES/Framebuffer.cpp index 242b8fa9b3..7cd8c078f3 100644 --- a/GPU/GLES/Framebuffer.cpp +++ b/GPU/GLES/Framebuffer.cpp @@ -113,7 +113,7 @@ inline u16 BGRA8888toRGBA4444(u32 px) { return ((px >> 20) & 0x000F) | ((px >> 8) & 0x00F0) | ((px << 4) & 0x0F00) | ((px >> 16) & 0xF000); } -void ConvertFromRGBA8888(u8 *dst, const u8 *src, u32 stride, u32 width, u32 height, GEBufferFormat format); +void ConvertFromRGBA8888(u8 *dst, const u8 *src, u32 dstStride, u32 srcStride, u32 width, u32 height, GEBufferFormat format); void CenterRect(float *x, float *y, float *w, float *h, float origW, float origH, float frameW, float frameH) { @@ -1357,7 +1357,7 @@ void FramebufferManager::BlitFramebuffer(VirtualFramebuffer *dst, int dstX, int // TODO: SSE/NEON // Could also make C fake-simd for 64-bit, two 8888 pixels fit in a register :) -void ConvertFromRGBA8888(u8 *dst, const u8 *src, u32 stride, u32 width, u32 height, GEBufferFormat format) { +void ConvertFromRGBA8888(u8 *dst, const u8 *src, u32 dstStride, u32 srcStride, u32 width, u32 height, GEBufferFormat format) { // Must skip stride in the cases below. Some games pack data into the cracks, like MotoGP. const u32 *src32 = (const u32 *)src; @@ -1368,20 +1368,19 @@ void ConvertFromRGBA8888(u8 *dst, const u8 *src, u32 stride, u32 width, u32 heig } else if (UseBGRA8888()) { for (u32 y = 0; y < height; ++y) { ConvertBGRA8888ToRGBA8888(dst32, src32, width); - src32 += stride; - dst32 += stride; + src32 += srcStride; + dst32 += dstStride; } } else { // Here let's assume they don't intersect for (u32 y = 0; y < height; ++y) { memcpy(dst32, src32, width * 4); - src32 += stride; - dst32 += stride; + src32 += srcStride; + dst32 += dstStride; } } } else { // But here it shouldn't matter if they do intersect - int size = height * stride; u16 *dst16 = (u16 *)dst; switch (format) { case GE_FORMAT_565: // BGR 565 @@ -1390,16 +1389,16 @@ void ConvertFromRGBA8888(u8 *dst, const u8 *src, u32 stride, u32 width, u32 heig for (u32 x = 0; x < width; ++x) { dst16[x] = BGRA8888toRGB565(src32[x]); } - src32 += stride; - dst16 += stride; + src32 += srcStride; + dst16 += dstStride; } } else { for (u32 y = 0; y < height; ++y) { for (u32 x = 0; x < width; ++x) { dst16[x] = RGBA8888toRGB565(src32[x]); } - src32 += stride; - dst16 += stride; + src32 += srcStride; + dst16 += dstStride; } } break; @@ -1407,14 +1406,14 @@ void ConvertFromRGBA8888(u8 *dst, const u8 *src, u32 stride, u32 width, u32 heig if (UseBGRA8888()) { for (u32 y = 0; y < height; ++y) { ConvertBGRA8888ToRGBA5551(dst16, src32, width); - src32 += stride; - dst16 += stride; + src32 += srcStride; + dst16 += dstStride; } } else { for (u32 y = 0; y < height; ++y) { ConvertRGBA8888ToRGBA5551(dst16, src32, width); - src32 += stride; - dst16 += stride; + src32 += srcStride; + dst16 += dstStride; } } break; @@ -1424,16 +1423,16 @@ void ConvertFromRGBA8888(u8 *dst, const u8 *src, u32 stride, u32 width, u32 heig for (u32 x = 0; x < width; ++x) { dst16[x] = BGRA8888toRGBA4444(src32[x]); } - src32 += stride; - dst16 += stride; + src32 += srcStride; + dst16 += dstStride; } } else { for (u32 y = 0; y < height; ++y) { for (u32 x = 0; x < width; ++x) { dst16[x] = RGBA8888toRGBA4444(src32[x]); } - src32 += stride; - dst16 += stride; + src32 += srcStride; + dst16 += dstStride; } } break; @@ -1508,7 +1507,7 @@ void FramebufferManager::PackFramebufferAsync_(VirtualFramebuffer *vfb) { if (useCPU || (UseBGRA8888() && pbo.format == GE_FORMAT_8888)) { u8 *dst = Memory::GetPointer(pbo.fb_address); - ConvertFromRGBA8888(dst, packed, pbo.stride, pbo.stride, pbo.height, pbo.format); + ConvertFromRGBA8888(dst, packed, pbo.stride, pbo.stride, pbo.stride, pbo.height, pbo.format); } else { // We don't need to convert, GPU already did (or should have) Memory::Memcpy(pbo.fb_address, packed, pbo.size); @@ -1672,7 +1671,7 @@ void FramebufferManager::PackFramebufferSync_(VirtualFramebuffer *vfb, int x, in if (convert) { int dstByteOffset = y * vfb->fb_stride * dstBpp; - ConvertFromRGBA8888(Memory::GetPointer(fb_address + dstByteOffset), packed + byteOffset, vfb->fb_stride, vfb->width, h, vfb->format); + ConvertFromRGBA8888(Memory::GetPointer(fb_address + dstByteOffset), packed + byteOffset, vfb->fb_stride, vfb->fb_stride, vfb->width, h, vfb->format); } }