mirror of
https://github.com/hrydgard/ppsspp.git
synced 2025-04-02 11:01:50 -04:00
Explicitly download rendered cluts.
This avoids triggering logic that tries to get the sizing right, or optimize frequent copies. CLUTs often get estimated wrong, so it's better to copy just the correct range, always.
This commit is contained in:
parent
4e088aebb7
commit
28a07c70c6
11 changed files with 111 additions and 21 deletions
|
@ -168,6 +168,7 @@ public:
|
|||
void NotifyBlockTransferAfter(u32 dstBasePtr, int dstStride, int dstX, int dstY, u32 srcBasePtr, int srcStride, int srcX, int srcY, int w, int h, int bpp, u32 skipDrawReason);
|
||||
|
||||
virtual void ReadFramebufferToMemory(VirtualFramebuffer *vfb, bool sync, int x, int y, int w, int h) = 0;
|
||||
virtual void DownloadFramebufferForClut(void *clut, u32 fb_address, u32 loadBytes) = 0;
|
||||
virtual void MakePixelTexture(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height) = 0;
|
||||
virtual void DrawPixels(VirtualFramebuffer *vfb, int dstX, int dstY, const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height) = 0;
|
||||
virtual void DrawFramebufferToOutput(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, bool applyPostShader) = 0;
|
||||
|
|
|
@ -227,32 +227,35 @@ void TextureCacheCommon::LoadClut(u32 clutAddr, u32 loadBytes) {
|
|||
// It's possible for a game to (successfully) access outside valid memory.
|
||||
u32 bytes = Memory::ValidSize(clutAddr, loadBytes);
|
||||
if (clutRenderAddress_ != 0xFFFFFFFF && !g_Config.bDisableSlowFramebufEffects) {
|
||||
gpu->PerformMemoryDownload(clutAddr, bytes);
|
||||
}
|
||||
|
||||
#ifdef _M_SSE
|
||||
int numBlocks = bytes / 16;
|
||||
if (bytes == loadBytes) {
|
||||
const __m128i *source = (const __m128i *)Memory::GetPointerUnchecked(clutAddr);
|
||||
__m128i *dest = (__m128i *)clutBufRaw_;
|
||||
for (int i = 0; i < numBlocks; i++, source += 2, dest += 2) {
|
||||
__m128i data1 = _mm_loadu_si128(source);
|
||||
__m128i data2 = _mm_loadu_si128(source + 1);
|
||||
_mm_store_si128(dest, data1);
|
||||
_mm_store_si128(dest + 1, data2);
|
||||
DownloadFramebufferForClut(clutAddr, bytes);
|
||||
if (bytes < loadBytes) {
|
||||
memset((u8 *)clutBufRaw_ + bytes, 0x00, loadBytes - bytes);
|
||||
}
|
||||
} else {
|
||||
#ifdef _M_SSE
|
||||
int numBlocks = bytes / 16;
|
||||
if (bytes == loadBytes) {
|
||||
const __m128i *source = (const __m128i *)Memory::GetPointerUnchecked(clutAddr);
|
||||
__m128i *dest = (__m128i *)clutBufRaw_;
|
||||
for (int i = 0; i < numBlocks; i++, source += 2, dest += 2) {
|
||||
__m128i data1 = _mm_loadu_si128(source);
|
||||
__m128i data2 = _mm_loadu_si128(source + 1);
|
||||
_mm_store_si128(dest, data1);
|
||||
_mm_store_si128(dest + 1, data2);
|
||||
}
|
||||
} else {
|
||||
Memory::MemcpyUnchecked(clutBufRaw_, clutAddr, bytes);
|
||||
if (bytes < loadBytes) {
|
||||
memset((u8 *)clutBufRaw_ + bytes, 0x00, loadBytes - bytes);
|
||||
}
|
||||
}
|
||||
#else
|
||||
Memory::MemcpyUnchecked(clutBufRaw_, clutAddr, bytes);
|
||||
if (bytes < loadBytes) {
|
||||
memset((u8 *)clutBufRaw_ + bytes, 0x00, loadBytes - bytes);
|
||||
}
|
||||
}
|
||||
#else
|
||||
Memory::MemcpyUnchecked(clutBufRaw_, clutAddr, bytes);
|
||||
if (bytes < clutTotalBytes_) {
|
||||
memset((u8 *)clutBufRaw_ + bytes, 0x00, clutTotalBytes_ - bytes);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
} else {
|
||||
memset(clutBufRaw_, 0x00, loadBytes);
|
||||
}
|
||||
|
|
|
@ -139,6 +139,8 @@ protected:
|
|||
virtual bool AttachFramebuffer(TexCacheEntry *entry, u32 address, VirtualFramebuffer *framebuffer, u32 texaddrOffset = 0) = 0;
|
||||
virtual void DetachFramebuffer(TexCacheEntry *entry, u32 address, VirtualFramebuffer *framebuffer) = 0;
|
||||
|
||||
virtual void DownloadFramebufferForClut(u32 clutAddr, u32 bytes) = 0;
|
||||
|
||||
TexCache cache;
|
||||
std::vector<VirtualFramebuffer *> fbCache_;
|
||||
|
||||
|
|
|
@ -859,6 +859,38 @@ namespace DX9 {
|
|||
}
|
||||
}
|
||||
|
||||
void FramebufferManagerDX9::DownloadFramebufferForClut(void *clut, u32 fb_address, u32 loadBytes) {
|
||||
VirtualFramebuffer *vfb = GetVFBAt(fb_address);
|
||||
if (vfb && vfb->fb_stride != 0) {
|
||||
const u32 bpp = vfb->drawnFormat == GE_FORMAT_8888 ? 4 : 2;
|
||||
int x = 0;
|
||||
int y = 0;
|
||||
int pixels = loadBytes / bpp;
|
||||
// The height will be 1 for each stride or part thereof.
|
||||
int w = std::min(pixels % vfb->fb_stride, (int)vfb->width);
|
||||
int h = std::min((pixels + vfb->fb_stride - 1) / vfb->fb_stride, (int)vfb->height);
|
||||
|
||||
// We intentionally don't call OptimizeDownloadRange() here - we don't want to over download.
|
||||
// CLUT framebuffers are often incorrectly estimated in size.
|
||||
if (x == 0 && y == 0 && w == vfb->width && h == vfb->height) {
|
||||
vfb->memoryUpdated = true;
|
||||
}
|
||||
|
||||
// We'll pseudo-blit framebuffers here to get a resized version of vfb.
|
||||
VirtualFramebuffer *nvfb = FindDownloadTempBuffer(vfb);
|
||||
BlitFramebuffer(nvfb, x, y, vfb, x, y, w, h, 0);
|
||||
|
||||
PackFramebufferDirectx9_(nvfb, x, y, w, h);
|
||||
|
||||
textureCache_->ForgetLastTexture();
|
||||
RebindFramebuffer();
|
||||
}
|
||||
|
||||
if (Memory::IsValidAddress(fb_address | 0x04000000)) {
|
||||
Memory::MemcpyUnchecked(clut, fb_address | 0x04000000, loadBytes);
|
||||
}
|
||||
}
|
||||
|
||||
bool FramebufferManagerDX9::CreateDownloadTempBuffer(VirtualFramebuffer *nvfb) {
|
||||
nvfb->colorDepth = FBO_8888;
|
||||
|
||||
|
|
|
@ -73,7 +73,8 @@ public:
|
|||
|
||||
void BindFramebufferColor(int stage, VirtualFramebuffer *framebuffer, int flags);
|
||||
|
||||
virtual void ReadFramebufferToMemory(VirtualFramebuffer *vfb, bool sync, int x, int y, int w, int h) override;
|
||||
void ReadFramebufferToMemory(VirtualFramebuffer *vfb, bool sync, int x, int y, int w, int h) override;
|
||||
void DownloadFramebufferForClut(void *clut, u32 fb_address, u32 loadBytes) override;
|
||||
|
||||
std::vector<FramebufferInfo> GetFramebufferList();
|
||||
|
||||
|
|
|
@ -804,6 +804,10 @@ void TextureCacheDX9::ApplyTexture() {
|
|||
nextTexture_ = nullptr;
|
||||
}
|
||||
|
||||
void TextureCacheDX9::DownloadFramebufferForClut(u32 clutAddr, u32 bytes) {
|
||||
framebufferManager_->DownloadFramebufferForClut(clutBufRaw_, clutAddr, bytes);
|
||||
}
|
||||
|
||||
class TextureShaderApplierDX9 {
|
||||
public:
|
||||
struct Pos {
|
||||
|
|
|
@ -72,6 +72,9 @@ public:
|
|||
|
||||
void ApplyTexture();
|
||||
|
||||
protected:
|
||||
void DownloadFramebufferForClut(u32 clutAddr, u32 bytes) override;
|
||||
|
||||
private:
|
||||
void Decimate(); // Run this once per frame to get rid of old textures.
|
||||
void DeleteTexture(TexCache::iterator it);
|
||||
|
|
|
@ -1241,6 +1241,42 @@ void FramebufferManager::ReadFramebufferToMemory(VirtualFramebuffer *vfb, bool s
|
|||
}
|
||||
}
|
||||
|
||||
void FramebufferManager::DownloadFramebufferForClut(void *clut, u32 fb_address, u32 loadBytes) {
|
||||
PROFILE_THIS_SCOPE("gpu-readback");
|
||||
// Flush async just in case.
|
||||
PackFramebufferAsync_(nullptr);
|
||||
|
||||
VirtualFramebuffer *vfb = GetVFBAt(fb_address);
|
||||
if (vfb && vfb->fb_stride != 0) {
|
||||
const u32 bpp = vfb->drawnFormat == GE_FORMAT_8888 ? 4 : 2;
|
||||
int x = 0;
|
||||
int y = 0;
|
||||
int pixels = loadBytes / bpp;
|
||||
// The height will be 1 for each stride or part thereof.
|
||||
int w = std::min(pixels % vfb->fb_stride, (int)vfb->width);
|
||||
int h = std::min((pixels + vfb->fb_stride - 1) / vfb->fb_stride, (int)vfb->height);
|
||||
|
||||
// We intentionally don't call OptimizeDownloadRange() here - we don't want to over download.
|
||||
// CLUT framebuffers are often incorrectly estimated in size.
|
||||
if (x == 0 && y == 0 && w == vfb->width && h == vfb->height) {
|
||||
vfb->memoryUpdated = true;
|
||||
}
|
||||
|
||||
// We'll pseudo-blit framebuffers here to get a resized version of vfb.
|
||||
VirtualFramebuffer *nvfb = FindDownloadTempBuffer(vfb);
|
||||
BlitFramebuffer(nvfb, x, y, vfb, x, y, w, h, 0);
|
||||
|
||||
PackFramebufferSync_(nvfb, x, y, w, h);
|
||||
|
||||
textureCache_->ForgetLastTexture();
|
||||
RebindFramebuffer();
|
||||
}
|
||||
|
||||
if (Memory::IsValidAddress(fb_address | 0x04000000)) {
|
||||
Memory::MemcpyUnchecked(clut, fb_address | 0x04000000, loadBytes);
|
||||
}
|
||||
}
|
||||
|
||||
bool FramebufferManager::CreateDownloadTempBuffer(VirtualFramebuffer *nvfb) {
|
||||
// When updating VRAM, it need to be exact format.
|
||||
if (!gstate_c.Supports(GPU_PREFER_CPU_DOWNLOAD)) {
|
||||
|
|
|
@ -101,7 +101,8 @@ public:
|
|||
void BindFramebufferColor(int stage, u32 fbRawAddress, VirtualFramebuffer *framebuffer, int flags);
|
||||
|
||||
// Reads a rectangular subregion of a framebuffer to the right position in its backing memory.
|
||||
virtual void ReadFramebufferToMemory(VirtualFramebuffer *vfb, bool sync, int x, int y, int w, int h) override;
|
||||
void ReadFramebufferToMemory(VirtualFramebuffer *vfb, bool sync, int x, int y, int w, int h) override;
|
||||
void DownloadFramebufferForClut(void *clut, u32 fb_address, u32 loadBytes) override;
|
||||
|
||||
std::vector<FramebufferInfo> GetFramebufferList();
|
||||
|
||||
|
|
|
@ -879,6 +879,10 @@ void TextureCache::ApplyTexture() {
|
|||
nextTexture_ = nullptr;
|
||||
}
|
||||
|
||||
void TextureCache::DownloadFramebufferForClut(u32 clutAddr, u32 bytes) {
|
||||
framebufferManager_->DownloadFramebufferForClut(clutBufRaw_, clutAddr, bytes);
|
||||
}
|
||||
|
||||
class TextureShaderApplier {
|
||||
public:
|
||||
struct Pos {
|
||||
|
|
|
@ -87,6 +87,9 @@ public:
|
|||
|
||||
void ApplyTexture();
|
||||
|
||||
protected:
|
||||
void DownloadFramebufferForClut(u32 clutAddr, u32 bytes) override;
|
||||
|
||||
private:
|
||||
void Decimate(); // Run this once per frame to get rid of old textures.
|
||||
void DeleteTexture(TexCache::iterator it);
|
||||
|
|
Loading…
Add table
Reference in a new issue