Implement load-on-demand of vfpu tables

2025-04-02 11:01:50 -04:00 · 2023-03-12 05:11:59 -04:00 · 2023-03-12 05:11:59 -04:00 · 38fc21a2c0
commit 38fc21a2c0
parent c49c45a106
3 changed files with 70 additions and 41 deletions
--- a/Core/MIPS/MIPSIntVFPU.cpp
+++ b/Core/MIPS/MIPSIntVFPU.cpp
@ -629,23 +629,17 @@ namespace MIPSInt
 			// vsat0 changes -0.0 to +0.0, both retain NAN.
 			case 4: if (s[i] <= 0) d[i] = 0; else {if(s[i] > 1.0f) d[i] = 1.0f; else d[i] = s[i];} break;    // vsat0
 			case 5: if (s[i] < -1.0f) d[i] = -1.0f; else {if(s[i] > 1.0f) d[i] = 1.0f; else d[i] = s[i];} break;  // vsat1
-			//case 16: d[i] = 1.0f / s[i]; break; //vrcp
 			case 16: { d[i] = vfpu_rcp(s[i]); } break; //vrcp
 			case 17: d[i] = USE_VFPU_SQRT ? vfpu_rsqrt(s[i]) : 1.0f / sqrtf(s[i]); break; //vrsq
 				
 			case 18: { d[i] = vfpu_sin(s[i]); } break; //vsin
 			case 19: { d[i] = vfpu_cos(s[i]); } break; //vcos
-			//case 20: d[i] = powf(2.0f, s[i]); break; //vexp2
 			case 20: { d[i] = vfpu_exp2(s[i]); } break; //vexp2
-			//case 21: d[i] = logf(s[i])/log(2.0f); break; //vlog2
 			case 21: { d[i] = vfpu_log2(s[i]); } break; //vlog2
 			case 22: d[i] = USE_VFPU_SQRT ? vfpu_sqrt(s[i])  : fabsf(sqrtf(s[i])); break; //vsqrt
-			//case 23: d[i] = (float)(asinf(s[i]) / M_PI_2); break; //vasin
 			case 23: { d[i] = vfpu_asin(s[i]); } break; //vasin
-			//case 24: d[i] = -1.0f / s[i]; break; // vnrcp
 			case 24: { d[i] = -vfpu_rcp(s[i]); } break; // vnrcp
 			case 26: { d[i] = -vfpu_sin(s[i]); } break; // vnsin
-			//case 28: d[i] = 1.0f / powf(2.0, s[i]); break; // vrexp2
 			case 28: { d[i] = vfpu_rexp2(s[i]); } break; // vrexp2
 			default:
 				_dbg_assert_msg_( false, "Invalid VV2Op op type %d", optype);
--- a/Core/MIPS/MIPSVFPUUtils.cpp
+++ b/Core/MIPS/MIPSVFPUUtils.cpp
@ -802,6 +802,7 @@ float vfpu_dot(const float a[4], const float b[4]) {
 // See https://github.com/hrydgard/ppsspp/issues/16946 for details.

 // Lookup tables.
+// Note: these are never unloaded, and stay till program termination.
 static uint32_t (*vfpu_sin_lut8192)=nullptr;
 static  int8_t  (*vfpu_sin_lut_delta)[2]=nullptr;
 static  int16_t (*vfpu_sin_lut_interval_delta)=nullptr;
@ -824,6 +825,30 @@ static uint16_t (*vfpu_asin_lut_indices)=nullptr;

 static  int8_t  (*vfpu_rcp_lut)[2]=nullptr;

+template<typename T>
+static inline bool load_vfpu_table(T *&ptr,const char *filename, size_t expected_size) {
+#if COMMON_BIG_ENDIAN
+	// Tables are little-endian.
+#error Byteswap for VFPU tables not implemented
+#endif
+	if(ptr) return true; // Already loaded.
+	size_t size = 0u;
+	INFO_LOG(CPU, "Loading '%s'...", filename);
+	ptr = reinterpret_cast<decltype(&*ptr)>(g_VFS.ReadFile(filename, &size));
+	if(!ptr || size != expected_size)
+	{
+		ERROR_LOG(CPU, "Error loading '%s' (size=%u, expected: %u)", filename, (unsigned)size, (unsigned)expected_size);
+		if(ptr) delete[] ptr;
+		ptr = nullptr;
+		return false;
+	}
+	INFO_LOG(CPU, "Successfully loaded '%s'", filename);
+	return true;
+}
+
+#define LOAD_TABLE(name, expected_size)\
+	load_vfpu_table(name,"vfpu/" #name ".dat",expected_size)
+
 // Note: PSP sin/cos output only has 22 significant
 // binary digits.
 static inline uint32_t vfpu_sin_quantum(uint32_t x) {
@ -877,6 +902,11 @@ static inline uint32_t vfpu_sin_fixed(uint32_t arg) {
 }

 float vfpu_sin(float x) {
+	static bool loaded =
+		LOAD_TABLE(vfpu_sin_lut8192,              4100)&&
+		LOAD_TABLE(vfpu_sin_lut_delta,          262144)&&
+		LOAD_TABLE(vfpu_sin_lut_interval_delta, 131074)&&
+		LOAD_TABLE(vfpu_sin_lut_exceptions,      86938);
 	uint32_t bits;
 	memcpy(&bits, &x, sizeof(x));
 	uint32_t sign = bits & 0x80000000u;
@ -909,6 +939,11 @@ float vfpu_sin(float x) {
 }

 float vfpu_cos(float x) {
+	static bool loaded =
+		LOAD_TABLE(vfpu_sin_lut8192,              4100)&&
+		LOAD_TABLE(vfpu_sin_lut_delta,          262144)&&
+		LOAD_TABLE(vfpu_sin_lut_interval_delta, 131074)&&
+		LOAD_TABLE(vfpu_sin_lut_exceptions,      86938);
 	uint32_t bits;
 	memcpy(&bits, &x, sizeof(x));
 	bits &= 0x7FFFFFFFu;
@ -1012,6 +1047,8 @@ static inline uint32_t vfpu_sqrt_fixed(uint32_t x) {
 }

 float vfpu_sqrt(float x) {
+	static bool loaded =
+		LOAD_TABLE(vfpu_sqrt_lut, 262144);
 	uint32_t bits;
 	memcpy(&bits, &x, sizeof(bits));
 	if((bits & 0x7FFFFFFFu) <= 0x007FFFFFu) {
@ -1096,6 +1133,8 @@ static inline uint32_t vfpu_rsqrt_fixed(uint32_t x) {
 }

 float vfpu_rsqrt(float x) {
+	static bool loaded =
+		LOAD_TABLE(vfpu_rsqrt_lut, 262144);
 	uint32_t bits;
 	memcpy(&bits, &x, sizeof(bits));
 	if((bits & 0x7FFFFFFFu) <= 0x007FFFFFu) {
@ -1153,6 +1192,10 @@ static uint32_t vfpu_asin_fixed(uint32_t x) {
 }

 float vfpu_asin(float x) {
+	static bool loaded =
+		LOAD_TABLE(vfpu_asin_lut65536,      1536)&&
+		LOAD_TABLE(vfpu_asin_lut_indices, 798916)&&
+		LOAD_TABLE(vfpu_asin_lut_deltas,  517448);
 	uint32_t bits;
 	memcpy(&bits, &x, sizeof(x));
 	uint32_t sign = bits & 0x80000000u;
@ -1190,6 +1233,9 @@ static inline uint32_t vfpu_exp2_fixed(uint32_t x) {
 }

 float vfpu_exp2(float x) {
+	static bool loaded =
+	        LOAD_TABLE(vfpu_exp2_lut65536,    512)&&
+		LOAD_TABLE(vfpu_exp2_lut,      262144);
 	int32_t bits;
 	memcpy(&bits, &x, sizeof(bits));
 	if((bits & 0x7FFFFFFF) <= 0x007FFFFF) {
@ -1239,6 +1285,10 @@ static inline uint32_t vfpu_log2_approx(uint32_t x) {

 // Matches PSP output on all known values.
 float vfpu_log2(float x) {
+	static bool loaded =
+		LOAD_TABLE(vfpu_log2_lut65536,               516)&&
+		LOAD_TABLE(vfpu_log2_lut65536_quadratic,     512)&&
+		LOAD_TABLE(vfpu_log2_lut,                2097152);
 	uint32_t bits;
 	memcpy(&bits, &x, sizeof(bits));
 	if((bits & 0x7FFFFFFFu) <= 0x007FFFFFu) {
@ -1289,6 +1339,8 @@ static inline uint32_t vfpu_rcp_approx(uint32_t i) {
 }

 float vfpu_rcp(float x) {
+	static bool loaded =
+		LOAD_TABLE(vfpu_rcp_lut, 262144);
 	uint32_t bits;
 	memcpy(&bits, &x, sizeof(bits));
 	uint32_t s = bits & 0x80000000u;
@ -1318,35 +1370,22 @@ float vfpu_rcp(float x) {
 //==============================================================================

 void InitVFPU() {
-#if COMMON_BIG_ENDIAN
-	// Tables are little-endian.
-#error Byteswap for VFPU tables not implemented
+#if 0
+	// Load all in advance.
+	LOAD_TABLE(vfpu_asin_lut65536          ,    1536); 
+	LOAD_TABLE(vfpu_asin_lut_deltas        ,  517448); 
+	LOAD_TABLE(vfpu_asin_lut_indices       ,  798916); 
+	LOAD_TABLE(vfpu_exp2_lut65536          ,     512); 
+	LOAD_TABLE(vfpu_exp2_lut               ,  262144); 
+	LOAD_TABLE(vfpu_log2_lut65536          ,     516); 
+	LOAD_TABLE(vfpu_log2_lut65536_quadratic,     512); 
+	LOAD_TABLE(vfpu_log2_lut               , 2097152); 
+	LOAD_TABLE(vfpu_rcp_lut                ,  262144); 
+	LOAD_TABLE(vfpu_rsqrt_lut              ,  262144); 
+	LOAD_TABLE(vfpu_sin_lut8192            ,    4100); 
+	LOAD_TABLE(vfpu_sin_lut_delta          ,  262144); 
+	LOAD_TABLE(vfpu_sin_lut_exceptions     ,   86938); 
+	LOAD_TABLE(vfpu_sin_lut_interval_delta ,  131074); 
+	LOAD_TABLE(vfpu_sqrt_lut               ,  262144); 
 #endif
-	size_t size=0;
-#define LOAD(expected,name)\
-	if(!name) {\
-		const char *filename = "vfpu/" #name ".dat";\
-		INFO_LOG(CPU, "Loading '%s'...", filename);\
-		name=reinterpret_cast<decltype(name)>(g_VFS.ReadFile(filename, &size));\
-		if(size!=(expected))\
-			ERROR_LOG(CPU, "Error loading '%s' (size=%u, expected: %u)", filename, (unsigned)size, (unsigned)(expected));\
-		else\
-			INFO_LOG(CPU, "Successfully loaded '%s'", filename);\
-	}
-	LOAD(    1536, vfpu_asin_lut65536);
-	LOAD(  517448, vfpu_asin_lut_deltas);
-	LOAD(  798916, vfpu_asin_lut_indices);
-	LOAD(     512, vfpu_exp2_lut65536);
-	LOAD(  262144, vfpu_exp2_lut);
-	LOAD(     516, vfpu_log2_lut65536);
-	LOAD(     512, vfpu_log2_lut65536_quadratic);
-	LOAD( 2097152, vfpu_log2_lut);
-	LOAD(  262144, vfpu_rcp_lut);
-	LOAD(  262144, vfpu_rsqrt_lut);
-	LOAD(    4100, vfpu_sin_lut8192);
-	LOAD(  262144, vfpu_sin_lut_delta);
-	LOAD(   86938, vfpu_sin_lut_exceptions);
-	LOAD(  131074, vfpu_sin_lut_interval_delta);
-	LOAD(  262144, vfpu_sqrt_lut);
-#undef LOAD
 }
--- a/Core/MIPS/MIPSVFPUUtils.h
+++ b/Core/MIPS/MIPSVFPUUtils.h
@ -42,6 +42,7 @@ inline int Xpose(int v) {
 //     Final Fantasy III               (#2921 )
 //     Hitman Reborn 2                 (#12900)
 //     Cho Aniki Zero                  (#13705)
+//     Hajime no Ippo                  (#13671) 
 //     Dissidia Duodecim Final Fantasy (#6710 )
 //
 // Messing around with the modulo functions? try https://www.desmos.com/calculator.
@ -50,11 +51,6 @@ extern float vfpu_sin(float);
 extern float vfpu_cos(float);
 extern void vfpu_sincos(float, float&, float&);

-/*
-inline float vfpu_asin(float angle) {
-	return (float)(asinf(angle) / M_PI_2);
-}
-*/
 extern float vfpu_asin(float);

 inline float vfpu_clamp(float v, float min, float max) {