Merge pull request #8240 from unknownbrackets/clut-render

Initial CPU side render-to-clut handling
This commit is contained in:
Henrik Rydgård 2015-11-27 00:08:08 +01:00
commit 5827b583d3
6 changed files with 78 additions and 3 deletions

View file

@ -420,6 +420,7 @@ VirtualFramebuffer *FramebufferManagerCommon::DoSetRenderFrameBuffer(const Frame
vfb->last_frame_used = 0;
vfb->last_frame_attached = 0;
vfb->last_frame_displayed = 0;
vfb->last_frame_clut = 0;
frameLastFramebufUsed_ = gpuStats.numFlips;
vfbs_.push_back(vfb);
currentRenderVfb_ = vfb;
@ -571,6 +572,13 @@ bool FramebufferManagerCommon::NotifyFramebufferCopy(u32 src, u32 dst, int size,
srcBuffer = vfb;
srcY = yOffset;
srcH = 1;
} else if (yOffset == 0 && yOffset < srcY) {
// Okay, last try - it might be a clut.
if (vfb->usageFlags & FB_USAGE_CLUT) {
srcBuffer = vfb;
srcY = yOffset;
srcH = 1;
}
}
}
}
@ -664,7 +672,13 @@ void FramebufferManagerCommon::FindTransferFramebuffers(VirtualFramebuffer *&dst
// Grand Knights History copies with a mismatching stride but a full line at a time.
// Makes it hard to detect the wrong transfers in e.g. God of War.
if (width != dstStride || (byteStride * height != vfb_byteStride && byteStride * height != vfb_byteWidth)) {
match = false;
// However, some other games write cluts to framebuffers.
// Let's catch this and upload. Otherwise reject the match.
match = (vfb->usageFlags & FB_USAGE_CLUT) != 0;
if (match) {
dstWidth = byteStride * height / vfb_bpp;
dstHeight = 1;
}
} else {
dstWidth = byteStride * height / vfb_bpp;
dstHeight = 1;
@ -866,6 +880,7 @@ void FramebufferManagerCommon::UpdateFramebufUsage(VirtualFramebuffer *vfb) {
checkFlag(FB_USAGE_DISPLAYED_FRAMEBUFFER, vfb->last_frame_displayed);
checkFlag(FB_USAGE_TEXTURE, vfb->last_frame_used);
checkFlag(FB_USAGE_RENDERTARGET, vfb->last_frame_render);
checkFlag(FB_USAGE_CLUT, vfb->last_frame_clut);
}
void FramebufferManagerCommon::ShowScreenResolution() {

View file

@ -29,6 +29,7 @@ enum {
FB_USAGE_DISPLAYED_FRAMEBUFFER = 1,
FB_USAGE_RENDERTARGET = 2,
FB_USAGE_TEXTURE = 4,
FB_USAGE_CLUT = 8,
};
enum {
@ -55,6 +56,7 @@ struct VirtualFramebuffer {
int last_frame_attached;
int last_frame_render;
int last_frame_displayed;
int last_frame_clut;
bool memoryUpdated;
bool depthUpdated;

View file

@ -61,7 +61,7 @@ namespace DX9 {
#define TEXCACHE_MIN_PRESSURE 16 * 1024 * 1024 // Total in VRAM
#define TEXCACHE_SECOND_MIN_PRESSURE 4 * 1024 * 1024
TextureCacheDX9::TextureCacheDX9() : cacheSizeEstimate_(0), secondCacheSizeEstimate_(0), clearCacheNextFrame_(false), lowMemoryMode_(false), clutBuf_(NULL), clutMaxBytes_(0), texelsScaledThisFrame_(0) {
TextureCacheDX9::TextureCacheDX9() : cacheSizeEstimate_(0), secondCacheSizeEstimate_(0), clearCacheNextFrame_(false), lowMemoryMode_(false), clutBuf_(NULL), clutMaxBytes_(0), clutRenderAddress_(0), texelsScaledThisFrame_(0) {
timesInvalidatedAllThisFrame_ = 0;
lastBoundTexture = INVALID_TEX;
decimationCounter_ = TEXCACHE_DECIMATION_INTERVAL;
@ -779,10 +779,30 @@ static inline u32 QuickTexHash(u32 addr, int bufw, int w, int h, GETextureFormat
}
void TextureCacheDX9::LoadClut(u32 clutAddr, u32 loadBytes) {
// Clear the uncached bit, etc. to match framebuffers.
clutAddr = clutAddr & 0x3FFFFFFF;
bool foundFramebuffer = false;
clutRenderAddress_ = 0;
for (size_t i = 0, n = fbCache_.size(); i < n; ++i) {
auto framebuffer = fbCache_[i];
if ((framebuffer->fb_address | 0x04000000) == clutAddr) {
framebuffer->last_frame_clut = gpuStats.numFlips;
framebuffer->usageFlags |= FB_USAGE_CLUT;
foundFramebuffer = true;
WARN_LOG_REPORT_ONCE(clutrenderdx9, G3D, "Using rendered CLUT for texture decode at %08x (%dx%dx%d)", clutAddr, framebuffer->width, framebuffer->height, framebuffer->colorDepth);
clutRenderAddress_ = framebuffer->fb_address;
}
}
clutTotalBytes_ = loadBytes;
if (Memory::IsValidAddress(clutAddr)) {
// It's possible for a game to (successfully) access outside valid memory.
u32 bytes = Memory::ValidSize(clutAddr, loadBytes);
if (foundFramebuffer && !g_Config.bDisableSlowFramebufEffects) {
gpu->PerformMemoryDownload(clutAddr, bytes);
}
#ifdef _M_SSE
int numBlocks = bytes / 16;
if (bytes == loadBytes) {
@ -1167,6 +1187,10 @@ void TextureCacheDX9::SetTexture(bool force) {
// Check for FBO - slow!
if (entry->framebuffer) {
if (match) {
if (hasClut && clutRenderAddress_ != 0) {
WARN_LOG_REPORT_ONCE(clutAndTexRender, G3D, "Using rendered texture with rendered CLUT: texfmt=%d, clutfmt=%d", gstate.getTextureFormat(), gstate.getClutPaletteFormat());
}
SetTextureFramebuffer(entry, entry->framebuffer);
entry->lastFrame = gpuStats.numFlips;
return;
@ -1331,6 +1355,10 @@ void TextureCacheDX9::SetTexture(bool force) {
TexCacheEntry entryNew = {0};
cache[cachekey] = entryNew;
if (hasClut && clutRenderAddress_ != 0) {
WARN_LOG_REPORT_ONCE(clutUseRender, G3D, "Using texture with rendered CLUT: texfmt=%d, clutfmt=%d", gstate.getTextureFormat(), gstate.getClutPaletteFormat());
}
entry = &cache[cachekey];
if (g_Config.bTextureBackoffCache) {
entry->status = TexCacheEntry::STATUS_HASHING;

View file

@ -150,6 +150,7 @@ private:
// True if the clut is just alpha values in the same order (RGBA4444-bit only.)
bool clutAlphaLinear_;
u16 clutAlphaLinearColor_;
u32 clutRenderAddress_;
LPDIRECT3DTEXTURE9 lastBoundTexture;
float maxAnisotropyLevel;

View file

@ -68,7 +68,7 @@
// Hack!
extern int g_iNumVideos;
TextureCache::TextureCache() : cacheSizeEstimate_(0), secondCacheSizeEstimate_(0), clearCacheNextFrame_(false), lowMemoryMode_(false), clutBuf_(NULL), clutMaxBytes_(0), texelsScaledThisFrame_(0) {
TextureCache::TextureCache() : cacheSizeEstimate_(0), secondCacheSizeEstimate_(0), clearCacheNextFrame_(false), lowMemoryMode_(false), clutBuf_(NULL), clutMaxBytes_(0), clutRenderAddress_(0), texelsScaledThisFrame_(0) {
timesInvalidatedAllThisFrame_ = 0;
lastBoundTexture = -1;
decimationCounter_ = TEXCACHE_DECIMATION_INTERVAL;
@ -801,10 +801,30 @@ static inline u32 QuickTexHash(u32 addr, int bufw, int w, int h, GETextureFormat
}
void TextureCache::LoadClut(u32 clutAddr, u32 loadBytes) {
// Clear the uncached bit, etc. to match framebuffers.
clutAddr = clutAddr & 0x3FFFFFFF;
bool foundFramebuffer = false;
clutRenderAddress_ = 0;
for (size_t i = 0, n = fbCache_.size(); i < n; ++i) {
auto framebuffer = fbCache_[i];
if ((framebuffer->fb_address | 0x04000000) == clutAddr) {
framebuffer->last_frame_clut = gpuStats.numFlips;
framebuffer->usageFlags |= FB_USAGE_CLUT;
foundFramebuffer = true;
WARN_LOG_REPORT_ONCE(clutrender, G3D, "Using rendered CLUT for texture decode at %08x (%dx%dx%d)", clutAddr, framebuffer->width, framebuffer->height, framebuffer->colorDepth);
clutRenderAddress_ = framebuffer->fb_address;
}
}
clutTotalBytes_ = loadBytes;
if (Memory::IsValidAddress(clutAddr)) {
// It's possible for a game to (successfully) access outside valid memory.
u32 bytes = Memory::ValidSize(clutAddr, loadBytes);
if (foundFramebuffer && !g_Config.bDisableSlowFramebufEffects) {
gpu->PerformMemoryDownload(clutAddr, bytes);
}
#ifdef _M_SSE
int numBlocks = bytes / 16;
if (bytes == loadBytes) {
@ -1237,6 +1257,10 @@ void TextureCache::SetTexture(bool force) {
// Check for FBO - slow!
if (entry->framebuffer) {
if (match) {
if (hasClut && clutRenderAddress_ != 0) {
WARN_LOG_REPORT_ONCE(clutAndTexRender, G3D, "Using rendered texture with rendered CLUT: texfmt=%d, clutfmt=%d", gstate.getTextureFormat(), gstate.getClutPaletteFormat());
}
SetTextureFramebuffer(entry, entry->framebuffer);
entry->lastFrame = gpuStats.numFlips;
return;
@ -1399,6 +1423,10 @@ void TextureCache::SetTexture(bool force) {
TexCacheEntry entryNew = {0};
cache[cachekey] = entryNew;
if (hasClut && clutRenderAddress_ != 0) {
WARN_LOG_REPORT_ONCE(clutUseRender, G3D, "Using texture with rendered CLUT: texfmt=%d, clutfmt=%d", gstate.getTextureFormat(), gstate.getClutPaletteFormat());
}
entry = &cache[cachekey];
if (g_Config.bTextureBackoffCache) {
entry->status = TexCacheEntry::STATUS_HASHING;

View file

@ -152,6 +152,7 @@ private:
// True if the clut is just alpha values in the same order (RGBA4444-bit only.)
bool clutAlphaLinear_;
u16 clutAlphaLinearColor_;
u32 clutRenderAddress_;
u32 lastBoundTexture;
float maxAnisotropyLevel;