Workaround for some SOCOM game's misuse of CLUT8 textures.

Emulating this correctly would be possible too but would only work at 1x rendering resolution.
2025-04-02 11:01:50 -04:00 · 2023-04-17 09:34:26 +02:00 · 2023-04-17 09:34:26 +02:00 · 9a3ff69091
commit 9a3ff69091
parent 1143661dec
9 changed files with 76 additions and 25 deletions
--- a/Core/Compatibility.cpp
+++ b/Core/Compatibility.cpp
@ -130,6 +130,7 @@ void Compatibility::CheckSettings(IniFile &iniFile, const std::string &gameID) {
 	CheckSetting(iniFile, gameID, "BlockTransferDepth", &flags_.BlockTransferDepth);
 	CheckSetting(iniFile, gameID, "DaxterRotatedAnalogStick", &flags_.DaxterRotatedAnalogStick);
 	CheckSetting(iniFile, gameID, "ForceMaxDepthResolution", &flags_.ForceMaxDepthResolution);
+	CheckSetting(iniFile, gameID, "SOCOMClut8Replacement", &flags_.SOCOMClut8Replacement);
 }

 void Compatibility::CheckVRSettings(IniFile &iniFile, const std::string &gameID) {
--- a/Core/Compatibility.h
+++ b/Core/Compatibility.h
@ -100,6 +100,7 @@ struct CompatFlags {
 	bool BlockTransferDepth;
 	bool DaxterRotatedAnalogStick;
 	bool ForceMaxDepthResolution;
+	bool SOCOMClut8Replacement;
 };

 struct VRCompat {
--- a/GPU/Common/DepalettizeShaderCommon.cpp
+++ b/GPU/Common/DepalettizeShaderCommon.cpp
@ -111,6 +111,13 @@ void GenerateDepalShader300(ShaderWriter &writer, const DepalConfig &config) {
 		if (shiftedMask & 0x7C00) writer.C("  int b = int(color.b * 31.99);\n"); else writer.C("  int b = 0;\n");
 		if (shiftedMask & 0x8000) writer.C("  int a = int(color.a);\n"); else writer.C("  int a = 0;\n");
 		writer.C("  int index = (a << 15) | (b << 10) | (g << 5) | (r);\n");
+
+		if (config.textureFormat == GE_TFMT_CLUT8) {
+			// SOCOM case. #16210
+			// To debug the issue, remove this shift to see the texture (check for clamping etc).
+			writer.C("  index >>= 8;\n");
+		}
+
 		break;
 	case GE_FORMAT_DEPTH16:
 		// Decode depth buffer.
--- a/GPU/Common/Draw2D.cpp
+++ b/GPU/Common/Draw2D.cpp
@ -261,7 +261,7 @@ Draw2DPipeline *Draw2D::Create2DPipeline(std::function<Draw2DPipelineInfo (Shade

 	ShaderModule *fs = draw_->CreateShaderModule(ShaderStage::Fragment, shaderLanguageDesc.shaderLanguage, (const uint8_t *)fsCode, strlen(fsCode), info.tag);

-	_assert_(fs);
+	_assert_msg_(fs, "Failed to create shader module!\n%s", fsCode);

 	// verts have positions in 2D clip coordinates.
 	static const InputLayoutDesc desc = {
--- a/GPU/Common/TextureCacheCommon.cpp
+++ b/GPU/Common/TextureCacheCommon.cpp
@ -1028,18 +1028,28 @@ bool TextureCacheCommon::MatchFramebuffer(
 		}

 		// Check works for D16 too.
+		// These are combinations that we have special-cased handling for. There are more
+		// ones possible, but rare - we'll add them as we find them used.
 		const bool matchingClutFormat =
 			(fb_format == GE_FORMAT_DEPTH16 && entry.format == GE_TFMT_CLUT16) ||
 			(fb_format == GE_FORMAT_DEPTH16 && entry.format == GE_TFMT_5650) ||
 			(fb_format == GE_FORMAT_8888 && entry.format == GE_TFMT_CLUT32) ||
 			(fb_format != GE_FORMAT_8888 && entry.format == GE_TFMT_CLUT16) ||
-			(fb_format == GE_FORMAT_8888 && entry.format == GE_TFMT_CLUT8);
+			(fb_format == GE_FORMAT_8888 && entry.format == GE_TFMT_CLUT8) ||
+			(fb_format == GE_FORMAT_5551 && entry.format == GE_TFMT_CLUT8 && PSP_CoreParameter().compat.flags().SOCOMClut8Replacement);

-		const int texBitsPerPixel = std::max(1U, (u32)textureBitsPerPixel[entry.format]);
+		const int texBitsPerPixel = TextureFormatBitsPerPixel(entry.format);
 		const int byteOffset = texaddr - addr;
 		if (byteOffset > 0) {
+			int texbpp = texBitsPerPixel;
+			if (fb_format == GE_FORMAT_5551 && entry.format == GE_TFMT_CLUT8) {
+				// In this case we treat CLUT8 as if it were CLUT16, see issue #16210. So we need
+				// to compute the x offset appropriately.
+				texbpp = 16;
+			}
+
 			matchInfo->yOffset = byteOffset / fb_stride_in_bytes;
-			matchInfo->xOffset = 8 * (byteOffset % fb_stride_in_bytes) / texBitsPerPixel;
+			matchInfo->xOffset = 8 * (byteOffset % fb_stride_in_bytes) / texbpp;
 		} else if (byteOffset < 0) {
 			int texelOffset = 8 * byteOffset / texBitsPerPixel;
 			// We don't support negative Y offsets, and negative X offsets are only for the Killzone workaround.
@ -1066,7 +1076,7 @@ bool TextureCacheCommon::MatchFramebuffer(
 		// Trying to play it safe.  Below 0x04110000 is almost always framebuffers.
 		// TODO: Maybe we can reduce this check and find a better way above 0x04110000?
 		if (matchInfo->yOffset > MAX_SUBAREA_Y_OFFSET_SAFE && addr > 0x04110000 && !PSP_CoreParameter().compat.flags().AllowLargeFBTextureOffsets) {
-			WARN_LOG_REPORT_ONCE(subareaIgnored, G3D, "Ignoring possible texturing from framebuffer at %08x +%dx%d / %dx%d", fb_address, matchInfo->xOffset, matchInfo->yOffset, framebuffer->width, framebuffer->height);
+			WARN_LOG_ONCE(subareaIgnored, G3D, "Ignoring possible texturing from framebuffer at %08x +%dx%d / %dx%d", fb_address, matchInfo->xOffset, matchInfo->yOffset, framebuffer->width, framebuffer->height);
 			return false;
 		}

@ -1133,6 +1143,11 @@ void TextureCacheCommon::SetTextureFramebuffer(const AttachCandidate &candidate)
 		gstate_c.curTextureWidth = framebuffer->bufferWidth;
 		gstate_c.curTextureHeight = framebuffer->bufferHeight;

+		if (candidate.channel == RASTER_COLOR && gstate.getTextureFormat() == GE_TFMT_CLUT8 && framebuffer->fb_format == GE_FORMAT_5551 && PSP_CoreParameter().compat.flags().SOCOMClut8Replacement) {
+			// See #16210. UV must be adjusted as if the texture was twice the width.
+			gstate_c.curTextureWidth *= 2.0f;
+		}
+
 		if (needsDepthXSwizzle) {
 			gstate_c.curTextureWidth = RoundUpToPowerOf2(gstate_c.curTextureWidth);
 		}
@ -2145,6 +2160,7 @@ void TextureCacheCommon::ApplyTexture() {
 	}
 }

+// Can we depalettize at all? This refers to both in-fragment-shader depal and "traditional" depal through a separate pass.
 static bool CanDepalettize(GETextureFormat texFormat, GEBufferFormat bufferFormat) {
 	if (IsClutFormat(texFormat)) {
 		switch (bufferFormat) {
@ -2155,6 +2171,10 @@ static bool CanDepalettize(GETextureFormat texFormat, GEBufferFormat bufferForma
 			if (texFormat == GE_TFMT_CLUT16) {
 				return true;
 			}
+			if (texFormat == GE_TFMT_CLUT8 && bufferFormat == GE_FORMAT_5551 && PSP_CoreParameter().compat.flags().SOCOMClut8Replacement) {
+				// Wacky case from issue #16210 (SOCOM etc).
+				return true;
+			}
 			break;
 		case GE_FORMAT_8888:
 			if (texFormat == GE_TFMT_CLUT32 || texFormat == GE_TFMT_CLUT8) {  // clut8 takes a special depal mode.
@ -2214,7 +2234,8 @@ void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer
 	bool useShaderDepal = framebufferManager_->GetCurrentRenderVFB() != framebuffer &&
 		!depth && clutRenderAddress_ == 0xFFFFFFFF &&
 		!gstate_c.curTextureIs3D &&
-		draw_->GetShaderLanguageDesc().bitwiseOps;
+		draw_->GetShaderLanguageDesc().bitwiseOps &&
+		!(texFormat == GE_TFMT_CLUT8 && framebuffer->fb_format == GE_FORMAT_5551);  // socom

 	switch (draw_->GetShaderLanguageDesc().shaderLanguage) {
 	case ShaderLanguage::HLSL_D3D9:
--- a/GPU/Common/TextureDecoder.cpp
+++ b/GPU/Common/TextureDecoder.cpp
@ -48,6 +48,25 @@
 #define DO_NOT_VECTORIZE_LOOP
 #endif

+const u8 textureBitsPerPixel[16] = {
+	16,  //GE_TFMT_5650,
+	16,  //GE_TFMT_5551,
+	16,  //GE_TFMT_4444,
+	32,  //GE_TFMT_8888,
+	4,   //GE_TFMT_CLUT4,
+	8,   //GE_TFMT_CLUT8,
+	16,  //GE_TFMT_CLUT16,
+	32,  //GE_TFMT_CLUT32,
+	4,   //GE_TFMT_DXT1,
+	8,   //GE_TFMT_DXT3,
+	8,   //GE_TFMT_DXT5,
+	0,   // INVALID,
+	0,   // INVALID,
+	0,   // INVALID,
+	0,   // INVALID,
+	0,   // INVALID,
+};
+
 #ifdef _M_SSE

 static u32 QuickTexHashSSE2(const void *checkp, u32 size) {
--- a/GPU/Common/TextureDecoder.h
+++ b/GPU/Common/TextureDecoder.h
@ -73,27 +73,16 @@ uint32_t GetDXT1Texel(const DXT1Block *src, int x, int y);
 uint32_t GetDXT3Texel(const DXT3Block *src, int x, int y);
 uint32_t GetDXT5Texel(const DXT5Block *src, int x, int y);

-static const u8 textureBitsPerPixel[16] = {
-	16,  //GE_TFMT_5650,
-	16,  //GE_TFMT_5551,
-	16,  //GE_TFMT_4444,
-	32,  //GE_TFMT_8888,
-	4,   //GE_TFMT_CLUT4,
-	8,   //GE_TFMT_CLUT8,
-	16,  //GE_TFMT_CLUT16,
-	32,  //GE_TFMT_CLUT32,
-	4,   //GE_TFMT_DXT1,
-	8,   //GE_TFMT_DXT3,
-	8,   //GE_TFMT_DXT5,
-	0,   // INVALID,
-	0,   // INVALID,
-	0,   // INVALID,
-	0,   // INVALID,
-	0,   // INVALID,
-};
+extern const u8 textureBitsPerPixel[16];

 u32 GetTextureBufw(int level, u32 texaddr, GETextureFormat format);

+// WARNING: Bits not bytes, this is needed due to the presence of 4 - bit formats.
+inline u32 TextureFormatBitsPerPixel(GETextureFormat format) {
+	u32 bits = textureBitsPerPixel[(int)format];
+	return bits != 0 ? bits : 1;  // Best to return 1 here to survive divisions in case of invalid data.
+}
+
 inline bool AlphaSumIsFull(u32 alphaSum, u32 fullAlphaMask) {
 	return fullAlphaMask != 0 && (alphaSum & fullAlphaMask) == fullAlphaMask;
 }
--- a/GPU/GLES/ShaderManagerGLES.h
+++ b/GPU/GLES/ShaderManagerGLES.h
@ -209,7 +209,7 @@ private:
 	GLRenderManager *render_;
 	LinkedShaderCache linkedShaderCache_;

-	bool lastVShaderSame_;
+	bool lastVShaderSame_ = false;

 	FShaderID lastFSID_;
 	VShaderID lastVSID_;
--- a/assets/compat.ini
+++ b/assets/compat.ini
@ -1555,3 +1555,16 @@ UCET00844 = true
 UCUS98705 = true
 UCED00971 = true
 UCUS98713 = true
+
+[SOCOMClut8Replacement]
+# SOCOM and other games use CLUT8 with crafty sampling as if it was CLUT16. Issue #16210
+UCES00855 = true
+UCUS98649 = true
+NPUG70003 = true  # demo
+UCUS98714 = true  # demo
+
+# SOCOM Fireteam Bravo 3
+UCES01242 = true
+NPHG00032 = true
+UCUS98716 = true
+NPEG90024 = true  # demo