softjit: Implement accurate fog color blending.

2025-04-02 11:01:50 -04:00 · 2022-09-11 08:50:07 -07:00 · 2022-09-11 08:50:07 -07:00 · e72309745e
commit e72309745e
parent b90fc7137f
3 changed files with 12 additions and 18 deletions
--- a/GPU/Software/DrawPixel.cpp
+++ b/GPU/Software/DrawPixel.cpp
@ -550,8 +550,6 @@ void PixelJitCache::Clear() {

 	constBlendHalf_11_4s_ = nullptr;
 	constBlendInvert_11_4s_ = nullptr;
-	const255_16s_ = nullptr;
-	constBy255i_ = nullptr;
 }

 std::string PixelJitCache::DescribeCodePtr(const u8 *ptr) {
--- a/GPU/Software/DrawPixel.h
+++ b/GPU/Software/DrawPixel.h
@ -106,8 +106,6 @@ private:

 	const u8 *constBlendHalf_11_4s_ = nullptr;
 	const u8 *constBlendInvert_11_4s_ = nullptr;
-	const u8 *const255_16s_ = nullptr;
-	const u8 *constBy255i_ = nullptr;

 #if PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64)
 	void Discard();
--- a/GPU/Software/DrawPixelX86.cpp
+++ b/GPU/Software/DrawPixelX86.cpp
@ -336,12 +336,6 @@ void PixelJitCache::WriteConstantPool(const PixelFuncID &id) {

 	// This is used for shifted blend factors, to inverse them.
 	WriteSimpleConst8x16(constBlendInvert_11_4s_, 0xFF << 4);
-
-	// A set of 255s, used to inverse fog.
-	WriteSimpleConst8x16(const255_16s_, 0xFF);
-
-	// This is used for a multiply that divides by 255 with shifting.
-	WriteSimpleConst8x16(constBy255i_, 0x8081);
 }

 bool PixelJitCache::Jit_ApplyDepthRange(const PixelFuncID &id) {
@ -535,7 +529,8 @@ bool PixelJitCache::Jit_ApplyFog(const PixelFuncID &id) {

 	// Load a set of 255s at 16 bit into a reg for later...
 	X64Reg invertReg = regCache_.Alloc(RegCache::VEC_TEMP2);
-	MOVDQA(invertReg, M(const255_16s_));
+	PCMPEQW(invertReg, R(invertReg));
+	PSRLW(invertReg, 8);

 	// Expand (we clamped) color to 16 bit as well, so we can multiply with fog.
 	X64Reg argColorReg = regCache_.Find(RegCache::VEC_ARG_COLOR);
@ -568,21 +563,24 @@ bool PixelJitCache::Jit_ApplyFog(const PixelFuncID &id) {
 	// We can free up the actual fog reg now.
 	regCache_.ForceRelease(RegCache::GEN_ARG_FOG);

+	// Our goal here is to calculate this formula:
+	// (argColor * fog + fogColor * (255 - fog) + 255) / 256
+
 	// Now we multiply the existing color by fog...
 	PMULLW(argColorReg, R(fogMultReg));
-	// And then inverse the fog value using those 255s we loaded, and multiply by fog color.
-	PSUBUSW(invertReg, R(fogMultReg));
+	// Before inversing, let's add that 255 we loaded in as well, since we have it.
+	PADDW(argColorReg, R(invertReg));
+	// And then inverse the fog value using those 255s, and multiply by fog color.
+	PSUBW(invertReg, R(fogMultReg));
 	PMULLW(fogColorReg, R(invertReg));
 	// At this point, argColorReg and fogColorReg are multiplied at 16-bit, so we need to sum.
-	PADDUSW(argColorReg, R(fogColorReg));
+	PADDW(argColorReg, R(fogColorReg));
 	regCache_.Release(fogColorReg, RegCache::VEC_TEMP1);
 	regCache_.Release(invertReg, RegCache::VEC_TEMP2);
 	regCache_.Release(fogMultReg, RegCache::VEC_TEMP3);

-	// Now to divide by 255, we use bit tricks: multiply by 0x8081, and shift right by 16+7.
-	PMULHUW(argColorReg, M(constBy255i_));
-	// Now shift right by 7 (PMULHUW already did 16 of the shift.)
-	PSRLW(argColorReg, 7);
+	// Now we simply divide by 256, or in other words shift by 8.
+	PSRLW(argColorReg, 8);

 	// Okay, put A back in, we'll shrink it to 8888 when needed.
 	PINSRW(argColorReg, R(alphaReg), 3);