From e374ea6b21e5117c8410c18f5dcceaf3e6f73fc7 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 20 Aug 2022 17:32:45 -0700 Subject: [PATCH 1/3] GPU: Restrict mip CLUT enhancement a bit. Can't replicate this behavior on a real PSP. In case a game sets the separate CLUT flag by accident, ignore except in a safe case that occurs in Misshitsu no Sacrifice. See #15727. --- GPU/Common/TextureCacheCommon.cpp | 4 +++- GPU/GPUState.h | 10 ++++++++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/GPU/Common/TextureCacheCommon.cpp b/GPU/Common/TextureCacheCommon.cpp index 014b8feb3a..af642a7df1 100644 --- a/GPU/Common/TextureCacheCommon.cpp +++ b/GPU/Common/TextureCacheCommon.cpp @@ -1666,7 +1666,9 @@ CheckAlphaResult TextureCacheCommon::ReadIndexedTex(u8 *out, int outPitch, int l texptr = (u8 *)tmpTexBuf32_.data(); } - const bool mipmapShareClut = gstate.isClutSharedForMipmaps(); + // Misshitsu no Sacrifice has separate CLUT data, this is a hack to allow it. + // Normally separate CLUTs are not allowed for 8-bit or higher indices. + const bool mipmapShareClut = gstate.isClutSharedForMipmaps() && gstate.getClutLoadBlocks() == 0x40; const int clutSharingOffset = mipmapShareClut ? 0 : (level & 1) * 256; GEPaletteFormat palFormat = (GEPaletteFormat)gstate.getClutPaletteFormat(); diff --git a/GPU/GPUState.h b/GPU/GPUState.h index 6aaa79a8fd..ae1c2c6510 100644 --- a/GPU/GPUState.h +++ b/GPU/GPUState.h @@ -300,8 +300,14 @@ struct GPUgstate { bool isTextureFormatIndexed() const { return (texformat & 4) != 0; } // GE_TFMT_CLUT4 - GE_TFMT_CLUT32 are 0b1xx. int getTextureEnvColRGB() const { return texenvcolor & 0x00FFFFFF; } u32 getClutAddress() const { return (clutaddr & 0x00FFFFF0) | ((clutaddrupper << 8) & 0x0F000000); } - int getClutLoadBytes() const { return (loadclut & 0x7F) * 32; } - int getClutLoadBlocks() const { return (loadclut & 0x7F); } + int getClutLoadBytes() const { return getClutLoadBlocks() * 32; } + int getClutLoadBlocks() const { + // The PSP only supports 0x3F, but Misshitsu no Sacrifice has extra color data (see #15727.) + // 0x40 would be 0, which would be a no-op, so we allow it. + if ((loadclut & 0x7F) == 0x40) + return 0x40; + return loadclut & 0x3F; + } GEPaletteFormat getClutPaletteFormat() const { return static_cast(clutformat & 3); } int getClutIndexShift() const { return (clutformat >> 2) & 0x1F; } int getClutIndexMask() const { return (clutformat >> 8) & 0xFF; } From 229c9442088fc8dc7e84c1448653337e2c3facee Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 20 Aug 2022 17:35:06 -0700 Subject: [PATCH 2/3] GE Debugger: Include extended CLUT in frame dumps. Not available on a real PSP, but used in an enhancement (see #15727.) --- GPU/Debugger/Record.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/GPU/Debugger/Record.cpp b/GPU/Debugger/Record.cpp index 0a15fb8f43..907c7b82c4 100644 --- a/GPU/Debugger/Record.cpp +++ b/GPU/Debugger/Record.cpp @@ -454,7 +454,9 @@ static void EmitTransfer(u32 op) { static void EmitClut(u32 op) { u32 addr = gstate.getClutAddress(); - u32 bytes = (op & 0x3F) * 32; + // Actually should only be 0x3F, but we allow enhanced CLUTs. See #15727. + u32 blocks = (op & 0x7F) == 0x40 ? 0x40 : (op & 0x3F); + u32 bytes = blocks * 32; bytes = Memory::ValidSize(addr, bytes); if (bytes != 0) { From 3d52b445f17adb74e273f04c9016b5a0dbb8c949 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 20 Aug 2022 17:36:15 -0700 Subject: [PATCH 3/3] softgpu: Restrict CLUT to proper size. We had 15 KB more space than needed before. --- GPU/Software/Rasterizer.cpp | 2 +- GPU/Software/SoftGpu.cpp | 7 +++++-- GPU/Software/SoftGpu.h | 2 +- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/GPU/Software/Rasterizer.cpp b/GPU/Software/Rasterizer.cpp index 36cd871ecb..48e51cd9b2 100644 --- a/GPU/Software/Rasterizer.cpp +++ b/GPU/Software/Rasterizer.cpp @@ -1408,7 +1408,7 @@ bool GetCurrentTexture(GPUDebugBuffer &buffer, int level) SamplerID id; ComputeSamplerID(&id); - id.cached.clut = (const u8 *)clut; + id.cached.clut = clut; Sampler::FetchFunc sampler = Sampler::GetFetchFunc(id); diff --git a/GPU/Software/SoftGpu.cpp b/GPU/Software/SoftGpu.cpp index 8015a9db05..117faced52 100644 --- a/GPU/Software/SoftGpu.cpp +++ b/GPU/Software/SoftGpu.cpp @@ -52,7 +52,7 @@ const int FB_WIDTH = 480; const int FB_HEIGHT = 272; -u32 clut[4096]; +uint8_t clut[1024]; FormatBuffer fb; FormatBuffer depthbuf; @@ -971,7 +971,10 @@ void SoftGPU::Execute_Spline(u32 op, u32 diff) { void SoftGPU::Execute_LoadClut(u32 op, u32 diff) { u32 clutAddr = gstate.getClutAddress(); - u32 clutTotalBytes = gstate.getClutLoadBytes(); + // Avoid the hack in getClutLoadBytes() to inaccurately allow more palette data. + u32 clutTotalBytes = (gstate.getClutLoadBlocks() & 0x3F) * 32; + if (clutTotalBytes > 1024) + clutTotalBytes = 1024; // Might be copying drawing into the CLUT, so flush. drawEngine_->transformUnit.FlushIfOverlap("loadclut", clutAddr, clutTotalBytes, clutTotalBytes, 1); diff --git a/GPU/Software/SoftGpu.h b/GPU/Software/SoftGpu.h index 712ec6bbdd..11d5dd16f3 100644 --- a/GPU/Software/SoftGpu.h +++ b/GPU/Software/SoftGpu.h @@ -216,7 +216,7 @@ private: }; // TODO: These shouldn't be global. -extern u32 clut[4096]; +extern uint8_t clut[1024]; extern FormatBuffer fb; extern FormatBuffer depthbuf;