Explicitly download rendered cluts.

This avoids triggering logic that tries to get the sizing right, or
optimize frequent copies.  CLUTs often get estimated wrong, so it's better
to copy just the correct range, always.
This commit is contained in:
Unknown W. Brackets 2016-01-04 21:29:03 -08:00
parent 4e088aebb7
commit 28a07c70c6
11 changed files with 111 additions and 21 deletions

View file

@ -168,6 +168,7 @@ public:
void NotifyBlockTransferAfter(u32 dstBasePtr, int dstStride, int dstX, int dstY, u32 srcBasePtr, int srcStride, int srcX, int srcY, int w, int h, int bpp, u32 skipDrawReason);
virtual void ReadFramebufferToMemory(VirtualFramebuffer *vfb, bool sync, int x, int y, int w, int h) = 0;
virtual void DownloadFramebufferForClut(void *clut, u32 fb_address, u32 loadBytes) = 0;
virtual void MakePixelTexture(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height) = 0;
virtual void DrawPixels(VirtualFramebuffer *vfb, int dstX, int dstY, const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height) = 0;
virtual void DrawFramebufferToOutput(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, bool applyPostShader) = 0;

View file

@ -227,32 +227,35 @@ void TextureCacheCommon::LoadClut(u32 clutAddr, u32 loadBytes) {
// It's possible for a game to (successfully) access outside valid memory.
u32 bytes = Memory::ValidSize(clutAddr, loadBytes);
if (clutRenderAddress_ != 0xFFFFFFFF && !g_Config.bDisableSlowFramebufEffects) {
gpu->PerformMemoryDownload(clutAddr, bytes);
}
#ifdef _M_SSE
int numBlocks = bytes / 16;
if (bytes == loadBytes) {
const __m128i *source = (const __m128i *)Memory::GetPointerUnchecked(clutAddr);
__m128i *dest = (__m128i *)clutBufRaw_;
for (int i = 0; i < numBlocks; i++, source += 2, dest += 2) {
__m128i data1 = _mm_loadu_si128(source);
__m128i data2 = _mm_loadu_si128(source + 1);
_mm_store_si128(dest, data1);
_mm_store_si128(dest + 1, data2);
DownloadFramebufferForClut(clutAddr, bytes);
if (bytes < loadBytes) {
memset((u8 *)clutBufRaw_ + bytes, 0x00, loadBytes - bytes);
}
} else {
#ifdef _M_SSE
int numBlocks = bytes / 16;
if (bytes == loadBytes) {
const __m128i *source = (const __m128i *)Memory::GetPointerUnchecked(clutAddr);
__m128i *dest = (__m128i *)clutBufRaw_;
for (int i = 0; i < numBlocks; i++, source += 2, dest += 2) {
__m128i data1 = _mm_loadu_si128(source);
__m128i data2 = _mm_loadu_si128(source + 1);
_mm_store_si128(dest, data1);
_mm_store_si128(dest + 1, data2);
}
} else {
Memory::MemcpyUnchecked(clutBufRaw_, clutAddr, bytes);
if (bytes < loadBytes) {
memset((u8 *)clutBufRaw_ + bytes, 0x00, loadBytes - bytes);
}
}
#else
Memory::MemcpyUnchecked(clutBufRaw_, clutAddr, bytes);
if (bytes < loadBytes) {
memset((u8 *)clutBufRaw_ + bytes, 0x00, loadBytes - bytes);
}
}
#else
Memory::MemcpyUnchecked(clutBufRaw_, clutAddr, bytes);
if (bytes < clutTotalBytes_) {
memset((u8 *)clutBufRaw_ + bytes, 0x00, clutTotalBytes_ - bytes);
}
#endif
}
} else {
memset(clutBufRaw_, 0x00, loadBytes);
}

View file

@ -139,6 +139,8 @@ protected:
virtual bool AttachFramebuffer(TexCacheEntry *entry, u32 address, VirtualFramebuffer *framebuffer, u32 texaddrOffset = 0) = 0;
virtual void DetachFramebuffer(TexCacheEntry *entry, u32 address, VirtualFramebuffer *framebuffer) = 0;
virtual void DownloadFramebufferForClut(u32 clutAddr, u32 bytes) = 0;
TexCache cache;
std::vector<VirtualFramebuffer *> fbCache_;

View file

@ -859,6 +859,38 @@ namespace DX9 {
}
}
void FramebufferManagerDX9::DownloadFramebufferForClut(void *clut, u32 fb_address, u32 loadBytes) {
VirtualFramebuffer *vfb = GetVFBAt(fb_address);
if (vfb && vfb->fb_stride != 0) {
const u32 bpp = vfb->drawnFormat == GE_FORMAT_8888 ? 4 : 2;
int x = 0;
int y = 0;
int pixels = loadBytes / bpp;
// The height will be 1 for each stride or part thereof.
int w = std::min(pixels % vfb->fb_stride, (int)vfb->width);
int h = std::min((pixels + vfb->fb_stride - 1) / vfb->fb_stride, (int)vfb->height);
// We intentionally don't call OptimizeDownloadRange() here - we don't want to over download.
// CLUT framebuffers are often incorrectly estimated in size.
if (x == 0 && y == 0 && w == vfb->width && h == vfb->height) {
vfb->memoryUpdated = true;
}
// We'll pseudo-blit framebuffers here to get a resized version of vfb.
VirtualFramebuffer *nvfb = FindDownloadTempBuffer(vfb);
BlitFramebuffer(nvfb, x, y, vfb, x, y, w, h, 0);
PackFramebufferDirectx9_(nvfb, x, y, w, h);
textureCache_->ForgetLastTexture();
RebindFramebuffer();
}
if (Memory::IsValidAddress(fb_address | 0x04000000)) {
Memory::MemcpyUnchecked(clut, fb_address | 0x04000000, loadBytes);
}
}
bool FramebufferManagerDX9::CreateDownloadTempBuffer(VirtualFramebuffer *nvfb) {
nvfb->colorDepth = FBO_8888;

View file

@ -73,7 +73,8 @@ public:
void BindFramebufferColor(int stage, VirtualFramebuffer *framebuffer, int flags);
virtual void ReadFramebufferToMemory(VirtualFramebuffer *vfb, bool sync, int x, int y, int w, int h) override;
void ReadFramebufferToMemory(VirtualFramebuffer *vfb, bool sync, int x, int y, int w, int h) override;
void DownloadFramebufferForClut(void *clut, u32 fb_address, u32 loadBytes) override;
std::vector<FramebufferInfo> GetFramebufferList();

View file

@ -804,6 +804,10 @@ void TextureCacheDX9::ApplyTexture() {
nextTexture_ = nullptr;
}
void TextureCacheDX9::DownloadFramebufferForClut(u32 clutAddr, u32 bytes) {
framebufferManager_->DownloadFramebufferForClut(clutBufRaw_, clutAddr, bytes);
}
class TextureShaderApplierDX9 {
public:
struct Pos {

View file

@ -72,6 +72,9 @@ public:
void ApplyTexture();
protected:
void DownloadFramebufferForClut(u32 clutAddr, u32 bytes) override;
private:
void Decimate(); // Run this once per frame to get rid of old textures.
void DeleteTexture(TexCache::iterator it);

View file

@ -1241,6 +1241,42 @@ void FramebufferManager::ReadFramebufferToMemory(VirtualFramebuffer *vfb, bool s
}
}
void FramebufferManager::DownloadFramebufferForClut(void *clut, u32 fb_address, u32 loadBytes) {
PROFILE_THIS_SCOPE("gpu-readback");
// Flush async just in case.
PackFramebufferAsync_(nullptr);
VirtualFramebuffer *vfb = GetVFBAt(fb_address);
if (vfb && vfb->fb_stride != 0) {
const u32 bpp = vfb->drawnFormat == GE_FORMAT_8888 ? 4 : 2;
int x = 0;
int y = 0;
int pixels = loadBytes / bpp;
// The height will be 1 for each stride or part thereof.
int w = std::min(pixels % vfb->fb_stride, (int)vfb->width);
int h = std::min((pixels + vfb->fb_stride - 1) / vfb->fb_stride, (int)vfb->height);
// We intentionally don't call OptimizeDownloadRange() here - we don't want to over download.
// CLUT framebuffers are often incorrectly estimated in size.
if (x == 0 && y == 0 && w == vfb->width && h == vfb->height) {
vfb->memoryUpdated = true;
}
// We'll pseudo-blit framebuffers here to get a resized version of vfb.
VirtualFramebuffer *nvfb = FindDownloadTempBuffer(vfb);
BlitFramebuffer(nvfb, x, y, vfb, x, y, w, h, 0);
PackFramebufferSync_(nvfb, x, y, w, h);
textureCache_->ForgetLastTexture();
RebindFramebuffer();
}
if (Memory::IsValidAddress(fb_address | 0x04000000)) {
Memory::MemcpyUnchecked(clut, fb_address | 0x04000000, loadBytes);
}
}
bool FramebufferManager::CreateDownloadTempBuffer(VirtualFramebuffer *nvfb) {
// When updating VRAM, it need to be exact format.
if (!gstate_c.Supports(GPU_PREFER_CPU_DOWNLOAD)) {

View file

@ -101,7 +101,8 @@ public:
void BindFramebufferColor(int stage, u32 fbRawAddress, VirtualFramebuffer *framebuffer, int flags);
// Reads a rectangular subregion of a framebuffer to the right position in its backing memory.
virtual void ReadFramebufferToMemory(VirtualFramebuffer *vfb, bool sync, int x, int y, int w, int h) override;
void ReadFramebufferToMemory(VirtualFramebuffer *vfb, bool sync, int x, int y, int w, int h) override;
void DownloadFramebufferForClut(void *clut, u32 fb_address, u32 loadBytes) override;
std::vector<FramebufferInfo> GetFramebufferList();

View file

@ -879,6 +879,10 @@ void TextureCache::ApplyTexture() {
nextTexture_ = nullptr;
}
void TextureCache::DownloadFramebufferForClut(u32 clutAddr, u32 bytes) {
framebufferManager_->DownloadFramebufferForClut(clutBufRaw_, clutAddr, bytes);
}
class TextureShaderApplier {
public:
struct Pos {

View file

@ -87,6 +87,9 @@ public:
void ApplyTexture();
protected:
void DownloadFramebufferForClut(u32 clutAddr, u32 bytes) override;
private:
void Decimate(); // Run this once per frame to get rid of old textures.
void DeleteTexture(TexCache::iterator it);