From d8f4a703967b906279d8365ee7d174995271052d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Sun, 11 Nov 2018 10:54:28 +0100 Subject: [PATCH] Remove constraint that virtual framebuffers have to represent VRAM. Prerequisite for #11531, virtual readbacks. --- GPU/Common/FramebufferCommon.cpp | 45 ++++++++++++++------------- GPU/Common/FramebufferCommon.h | 5 ++- GPU/Common/TextureCacheCommon.cpp | 9 +++--- GPU/D3D11/FramebufferManagerD3D11.cpp | 2 +- GPU/Directx9/FramebufferDX9.cpp | 10 +++--- GPU/GLES/DrawEngineGLES.cpp | 2 +- 6 files changed, 37 insertions(+), 36 deletions(-) diff --git a/GPU/Common/FramebufferCommon.cpp b/GPU/Common/FramebufferCommon.cpp index d0ae91c2d0..c654f3f7ce 100644 --- a/GPU/Common/FramebufferCommon.cpp +++ b/GPU/Common/FramebufferCommon.cpp @@ -253,10 +253,10 @@ void FramebufferManagerCommon::EstimateDrawingSize(u32 fb_address, GEBufferForma if (viewport_width != region_width) { // The majority of the time, these are equal. If not, let's check what we know. - const u32 fb_normalized_address = fb_address | 0x44000000; + const u32 fb_normalized_address = fb_address & 0x3FFFFFFF; u32 nearest_address = 0xFFFFFFFF; for (size_t i = 0; i < vfbs_.size(); ++i) { - const u32 other_address = vfbs_[i]->fb_address | 0x44000000; + const u32 other_address = vfbs_[i]->fb_address & 0x3FFFFFFF; if (other_address > fb_normalized_address && other_address < nearest_address) { nearest_address = other_address; } @@ -282,11 +282,10 @@ void FramebufferManagerCommon::EstimateDrawingSize(u32 fb_address, GEBufferForma } void GetFramebufferHeuristicInputs(FramebufferHeuristicParams *params, const GPUgstate &gstate) { - params->fb_addr = gstate.getFrameBufAddress(); - params->fb_address = gstate.getFrameBufRawAddress(); + params->fb_address = (gstate.getFrameBufRawAddress() & 0x3FFFFFFF) | 0x04000000; // GetFramebufferHeuristicInputs is only called from rendering, and thus, it's VRAM. params->fb_stride = gstate.FrameBufStride(); - params->z_address = gstate.getDepthBufRawAddress(); + params->z_address = (gstate.getDepthBufRawAddress() & 0x3FFFFFFF) | 0x04000000; params->z_stride = gstate.DepthBufStride(); params->fmt = gstate.FrameBufFormat(); @@ -440,9 +439,9 @@ VirtualFramebuffer *FramebufferManagerCommon::DoSetRenderFrameBuffer(const Frame SetColorUpdated(vfb, skipDrawReason); u32 byteSize = FramebufferByteSize(vfb); - u32 fb_address_mem = (params.fb_address & 0x3FFFFFFF) | 0x04000000; - if (Memory::IsVRAMAddress(fb_address_mem) && fb_address_mem + byteSize > framebufRangeEnd_) { - framebufRangeEnd_ = fb_address_mem + byteSize; + // FB heuristics always produce an address in VRAM (this is during rendering) so we don't need to poke in the 0x04000000 flag here. + if (Memory::IsVRAMAddress(params.fb_address) && params.fb_address + byteSize > framebufRangeEnd_) { + framebufRangeEnd_ = params.fb_address + byteSize; } ResizeFramebufFBO(vfb, drawing_width, drawing_height, true); @@ -456,8 +455,8 @@ VirtualFramebuffer *FramebufferManagerCommon::DoSetRenderFrameBuffer(const Frame currentRenderVfb_ = vfb; if (useBufferedRendering_ && !g_Config.bDisableSlowFramebufEffects) { - gpu->PerformMemoryUpload(fb_address_mem, byteSize); - NotifyStencilUpload(fb_address_mem, byteSize, true); + gpu->PerformMemoryUpload(params.fb_address, byteSize); + NotifyStencilUpload(params.fb_address, byteSize, true); // TODO: Is it worth trying to upload the depth buffer? } @@ -680,7 +679,8 @@ void FramebufferManagerCommon::NotifyVideoUpload(u32 addr, int size, int width, } void FramebufferManagerCommon::UpdateFromMemory(u32 addr, int size, bool safe) { - addr &= ~0x40000000; + // Take off the uncached flag from the address. Not to be confused with the start of VRAM. + addr &= 0x3FFFFFFF; // TODO: Could go through all FBOs, but probably not important? // TODO: Could also check for inner changes, but video is most important. bool isDisplayBuf = addr == DisplayFramebufAddr() || addr == PrevDisplayFramebufAddr(); @@ -700,7 +700,7 @@ void FramebufferManagerCommon::UpdateFromMemory(u32 addr, int size, bool safe) { // If we're not rendering to it, format may be wrong. Use displayFormat_ instead. fmt = displayFormat_; } - DrawPixels(vfb, 0, 0, Memory::GetPointer(addr | 0x04000000), fmt, vfb->fb_stride, vfb->width, vfb->height); + DrawPixels(vfb, 0, 0, Memory::GetPointer(addr), fmt, vfb->fb_stride, vfb->width, vfb->height); SetColorUpdated(vfb, gstate_c.skipDrawReason); } else { INFO_LOG(FRAMEBUF, "Invalidating FBO for %08x (%i x %i x %i)", vfb->fb_address, vfb->width, vfb->height, vfb->format); @@ -866,11 +866,13 @@ void FramebufferManagerCommon::CopyDisplayToOutput() { VirtualFramebuffer *vfb = GetVFBAt(displayFramebufPtr_); if (!vfb) { - // Let's search for a framebuf within this range. - const u32 addr = (displayFramebufPtr_ & 0x03FFFFFF) | 0x04000000; + // Let's search for a framebuf within this range. Note that we also look for + // "framebuffers" sitting in RAM so we only take off the kernel and uncached bits of the address + // when comparing. + const u32 addr = displayFramebufPtr_ & 0x3FFFFFFF; for (size_t i = 0; i < vfbs_.size(); ++i) { VirtualFramebuffer *v = vfbs_[i]; - const u32 v_addr = (v->fb_address & 0x03FFFFFF) | 0x04000000; + const u32 v_addr = v->fb_address & 0x3FFFFFFF; const u32 v_size = FramebufferByteSize(v); if (addr >= v_addr && addr < v_addr + v_size) { const u32 dstBpp = v->format == GE_FORMAT_8888 ? 4 : 2; @@ -1241,7 +1243,8 @@ bool FramebufferManagerCommon::NotifyFramebufferCopy(u32 src, u32 dst, int size, continue; } - const u32 vfb_address = (0x04000000 | vfb->fb_address) & 0x3FFFFFFF; + // We only remove the kernel and uncached bits when comparing. + const u32 vfb_address = vfb->fb_address & 0x3FFFFFFF; const u32 vfb_size = FramebufferByteSize(vfb); const u32 vfb_bpp = vfb->format == GE_FORMAT_8888 ? 4 : 2; const u32 vfb_byteStride = vfb->fb_stride * vfb_bpp; @@ -1352,7 +1355,7 @@ void FramebufferManagerCommon::FindTransferFramebuffers(VirtualFramebuffer *&dst for (size_t i = 0; i < vfbs_.size(); ++i) { VirtualFramebuffer *vfb = vfbs_[i]; - const u32 vfb_address = (0x04000000 | vfb->fb_address) & 0x3FFFFFFF; + const u32 vfb_address = vfb->fb_address & 0x3FFFFFFF; const u32 vfb_size = FramebufferByteSize(vfb); const u32 vfb_bpp = vfb->format == GE_FORMAT_8888 ? 4 : 2; const u32 vfb_byteStride = vfb->fb_stride * vfb_bpp; @@ -1915,7 +1918,7 @@ bool FramebufferManagerCommon::GetFramebuffer(u32 fb_address, int fb_stride, GEB if (!vfb) { // If there's no vfb and we're drawing there, must be memory? - buffer = GPUDebugBuffer(Memory::GetPointer(fb_address | 0x04000000), fb_stride, 512, format); + buffer = GPUDebugBuffer(Memory::GetPointer(fb_address), fb_stride, 512, format); return true; } @@ -1969,7 +1972,7 @@ bool FramebufferManagerCommon::GetDepthbuffer(u32 fb_address, int fb_stride, u32 if (!vfb) { // If there's no vfb and we're drawing there, must be memory? - buffer = GPUDebugBuffer(Memory::GetPointer(z_address | 0x04000000), z_stride, 512, GPU_DBG_FORMAT_16BIT); + buffer = GPUDebugBuffer(Memory::GetPointer(z_address), z_stride, 512, GPU_DBG_FORMAT_16BIT); return true; } @@ -2005,7 +2008,7 @@ bool FramebufferManagerCommon::GetStencilbuffer(u32 fb_address, int fb_stride, G if (!vfb) { // If there's no vfb and we're drawing there, must be memory? // TODO: Actually get the stencil. - buffer = GPUDebugBuffer(Memory::GetPointer(fb_address | 0x04000000), fb_stride, 512, GPU_DBG_FORMAT_8888); + buffer = GPUDebugBuffer(Memory::GetPointer(fb_address), fb_stride, 512, GPU_DBG_FORMAT_8888); return true; } @@ -2057,7 +2060,7 @@ void FramebufferManagerCommon::PackFramebufferSync_(VirtualFramebuffer *vfb, int return; } - const u32 fb_address = (0x04000000) | vfb->fb_address; + const u32 fb_address = vfb->fb_address & 0x3FFFFFFF; Draw::DataFormat destFormat = GEFormatToThin3D(vfb->format); const int dstBpp = (int)DataFormatSizeInBytes(destFormat); diff --git a/GPU/Common/FramebufferCommon.h b/GPU/Common/FramebufferCommon.h index 4f19debd01..9397c94b0a 100644 --- a/GPU/Common/FramebufferCommon.h +++ b/GPU/Common/FramebufferCommon.h @@ -119,7 +119,6 @@ struct VirtualFramebuffer { }; struct FramebufferHeuristicParams { - u32 fb_addr; u32 fb_address; int fb_stride; u32 z_address; @@ -243,10 +242,10 @@ public: size_t NumVFBs() const { return vfbs_.size(); } u32 PrevDisplayFramebufAddr() { - return prevDisplayFramebuf_ ? (0x04000000 | prevDisplayFramebuf_->fb_address) : 0; + return prevDisplayFramebuf_ ? prevDisplayFramebuf_->fb_address : 0; } u32 DisplayFramebufAddr() { - return displayFramebuf_ ? (0x04000000 | displayFramebuf_->fb_address) : 0; + return displayFramebuf_ ? displayFramebuf_->fb_address : 0; } u32 DisplayFramebufStride() { diff --git a/GPU/Common/TextureCacheCommon.cpp b/GPU/Common/TextureCacheCommon.cpp index 1409a6b403..9889fda271 100644 --- a/GPU/Common/TextureCacheCommon.cpp +++ b/GPU/Common/TextureCacheCommon.cpp @@ -635,9 +635,9 @@ void TextureCacheCommon::HandleTextureChange(TexCacheEntry *const entry, const c } void TextureCacheCommon::NotifyFramebuffer(u32 address, VirtualFramebuffer *framebuffer, FramebufferNotification msg) { - // Must be in VRAM so | 0x04000000 it is. Also, ignore memory mirrors. + // Mask to ignore the Z memory mirrors. // These checks are mainly to reduce scanning all textures. - const u32 addr = (address | 0x04000000) & 0x3F9FFFFF; + const u32 addr = address & 0x3F9FFFFF; const u32 bpp = framebuffer->format == GE_FORMAT_8888 ? 4 : 2; const u64 cacheKey = (u64)addr << 32; // If it has a clut, those are the low 32 bits, so it'll be inside this range. @@ -750,8 +750,7 @@ bool TextureCacheCommon::AttachFramebuffer(TexCacheEntry *entry, u32 address, Vi AttachedFramebufferInfo fbInfo = { 0 }; const u64 mirrorMask = 0x00600000; - // Must be in VRAM so | 0x04000000 it is. Also, ignore memory mirrors. - const u32 addr = (address | 0x04000000) & 0x3FFFFFFF & ~mirrorMask; + const u32 addr = (address & 0x3FFFFFFF) & ~mirrorMask; const u32 texaddr = ((entry->addr + texaddrOffset) & ~mirrorMask); const bool noOffset = texaddr == addr; const bool exactMatch = noOffset && entry->format < 4; @@ -990,7 +989,7 @@ void TextureCacheCommon::LoadClut(u32 clutAddr, u32 loadBytes) { clutRenderOffset_ = MAX_CLUT_OFFSET; for (size_t i = 0, n = fbCache_.size(); i < n; ++i) { auto framebuffer = fbCache_[i]; - const u32 fb_address = framebuffer->fb_address | 0x04000000; + const u32 fb_address = framebuffer->fb_address & 0x3FFFFFFF; const u32 bpp = framebuffer->drawnFormat == GE_FORMAT_8888 ? 4 : 2; u32 offset = clutFramebufAddr - fb_address; diff --git a/GPU/D3D11/FramebufferManagerD3D11.cpp b/GPU/D3D11/FramebufferManagerD3D11.cpp index bb6ee6b660..01bfd06fb6 100644 --- a/GPU/D3D11/FramebufferManagerD3D11.cpp +++ b/GPU/D3D11/FramebufferManagerD3D11.cpp @@ -687,7 +687,7 @@ void FramebufferManagerD3D11::PackDepthbuffer(VirtualFramebuffer *vfb, int x, in return; } - const u32 z_address = (0x04000000) | vfb->z_address; + const u32 z_address = vfb->z_address; // TODO } diff --git a/GPU/Directx9/FramebufferDX9.cpp b/GPU/Directx9/FramebufferDX9.cpp index 88bddb3bee..75a3beeb3a 100644 --- a/GPU/Directx9/FramebufferDX9.cpp +++ b/GPU/Directx9/FramebufferDX9.cpp @@ -585,7 +585,7 @@ static const D3DVERTEXELEMENT9 g_FramebufferVertexElements[] = { return; } - const u32 fb_address = (0x04000000) | vfb->fb_address; + const u32 fb_address = vfb->fb_address & 0x3FFFFFFF; const int dstBpp = vfb->format == GE_FORMAT_8888 ? 4 : 2; // We always need to convert from the framebuffer native format. @@ -627,7 +627,7 @@ static const D3DVERTEXELEMENT9 g_FramebufferVertexElements[] = { } // We always read the depth buffer in 24_8 format. - const u32 z_address = (0x04000000) | vfb->z_address; + const u32 z_address = vfb->z_address; DEBUG_LOG(FRAMEBUF, "Reading depthbuffer to mem at %08x for vfb=%08x", z_address, vfb->fb_address); @@ -732,7 +732,7 @@ static const D3DVERTEXELEMENT9 g_FramebufferVertexElements[] = { if (!vfb) { // If there's no vfb and we're drawing there, must be memory? - buffer = GPUDebugBuffer(Memory::GetPointer(fb_address | 0x04000000), fb_stride, 512, fb_format); + buffer = GPUDebugBuffer(Memory::GetPointer(fb_address), fb_stride, 512, fb_format); return true; } LPDIRECT3DSURFACE9 renderTarget = vfb->fbo ? (LPDIRECT3DSURFACE9)draw_->GetFramebufferAPITexture(vfb->fbo, Draw::FB_COLOR_BIT | Draw::FB_SURFACE_BIT, 0) : nullptr; @@ -809,7 +809,7 @@ static const D3DVERTEXELEMENT9 g_FramebufferVertexElements[] = { if (!vfb) { // If there's no vfb and we're drawing there, must be memory? - buffer = GPUDebugBuffer(Memory::GetPointer(z_address | 0x04000000), z_stride, 512, GPU_DBG_FORMAT_16BIT); + buffer = GPUDebugBuffer(Memory::GetPointer(z_address), z_stride, 512, GPU_DBG_FORMAT_16BIT); return true; } @@ -847,7 +847,7 @@ static const D3DVERTEXELEMENT9 g_FramebufferVertexElements[] = { if (!vfb) { // If there's no vfb and we're drawing there, must be memory? - buffer = GPUDebugBuffer(Memory::GetPointer(vfb->z_address | 0x04000000), vfb->z_stride, 512, GPU_DBG_FORMAT_16BIT); + buffer = GPUDebugBuffer(Memory::GetPointer(vfb->z_address), vfb->z_stride, 512, GPU_DBG_FORMAT_16BIT); return true; } diff --git a/GPU/GLES/DrawEngineGLES.cpp b/GPU/GLES/DrawEngineGLES.cpp index a2bb108860..30f71fdd99 100644 --- a/GPU/GLES/DrawEngineGLES.cpp +++ b/GPU/GLES/DrawEngineGLES.cpp @@ -311,7 +311,7 @@ void DrawEngineGLES::DoFlush() { gstate_c.Clean(DIRTY_TEXTURE_IMAGE | DIRTY_TEXTURE_PARAMS); textureNeedsApply = true; } else if (gstate.getTextureAddress(0) == ((gstate.getFrameBufRawAddress() | 0x04000000) & 0x3FFFFFFF)) { - // This catches the case of clearing a texture. + // This catches the case of clearing a texture. (#10957) gstate_c.Dirty(DIRTY_TEXTURE_IMAGE); }