diff --git a/Core/MIPS/MIPSIntVFPU.cpp b/Core/MIPS/MIPSIntVFPU.cpp index c6585f21f2..5e40a1e434 100644 --- a/Core/MIPS/MIPSIntVFPU.cpp +++ b/Core/MIPS/MIPSIntVFPU.cpp @@ -629,23 +629,17 @@ namespace MIPSInt // vsat0 changes -0.0 to +0.0, both retain NAN. case 4: if (s[i] <= 0) d[i] = 0; else {if(s[i] > 1.0f) d[i] = 1.0f; else d[i] = s[i];} break; // vsat0 case 5: if (s[i] < -1.0f) d[i] = -1.0f; else {if(s[i] > 1.0f) d[i] = 1.0f; else d[i] = s[i];} break; // vsat1 - //case 16: d[i] = 1.0f / s[i]; break; //vrcp case 16: { d[i] = vfpu_rcp(s[i]); } break; //vrcp case 17: d[i] = USE_VFPU_SQRT ? vfpu_rsqrt(s[i]) : 1.0f / sqrtf(s[i]); break; //vrsq case 18: { d[i] = vfpu_sin(s[i]); } break; //vsin case 19: { d[i] = vfpu_cos(s[i]); } break; //vcos - //case 20: d[i] = powf(2.0f, s[i]); break; //vexp2 case 20: { d[i] = vfpu_exp2(s[i]); } break; //vexp2 - //case 21: d[i] = logf(s[i])/log(2.0f); break; //vlog2 case 21: { d[i] = vfpu_log2(s[i]); } break; //vlog2 case 22: d[i] = USE_VFPU_SQRT ? vfpu_sqrt(s[i]) : fabsf(sqrtf(s[i])); break; //vsqrt - //case 23: d[i] = (float)(asinf(s[i]) / M_PI_2); break; //vasin case 23: { d[i] = vfpu_asin(s[i]); } break; //vasin - //case 24: d[i] = -1.0f / s[i]; break; // vnrcp case 24: { d[i] = -vfpu_rcp(s[i]); } break; // vnrcp case 26: { d[i] = -vfpu_sin(s[i]); } break; // vnsin - //case 28: d[i] = 1.0f / powf(2.0, s[i]); break; // vrexp2 case 28: { d[i] = vfpu_rexp2(s[i]); } break; // vrexp2 default: _dbg_assert_msg_( false, "Invalid VV2Op op type %d", optype); diff --git a/Core/MIPS/MIPSVFPUUtils.cpp b/Core/MIPS/MIPSVFPUUtils.cpp index ca4f4c889b..d5d622f752 100644 --- a/Core/MIPS/MIPSVFPUUtils.cpp +++ b/Core/MIPS/MIPSVFPUUtils.cpp @@ -802,6 +802,7 @@ float vfpu_dot(const float a[4], const float b[4]) { // See https://github.com/hrydgard/ppsspp/issues/16946 for details. // Lookup tables. +// Note: these are never unloaded, and stay till program termination. static uint32_t (*vfpu_sin_lut8192)=nullptr; static int8_t (*vfpu_sin_lut_delta)[2]=nullptr; static int16_t (*vfpu_sin_lut_interval_delta)=nullptr; @@ -824,6 +825,30 @@ static uint16_t (*vfpu_asin_lut_indices)=nullptr; static int8_t (*vfpu_rcp_lut)[2]=nullptr; +template +static inline bool load_vfpu_table(T *&ptr,const char *filename, size_t expected_size) { +#if COMMON_BIG_ENDIAN + // Tables are little-endian. +#error Byteswap for VFPU tables not implemented +#endif + if(ptr) return true; // Already loaded. + size_t size = 0u; + INFO_LOG(CPU, "Loading '%s'...", filename); + ptr = reinterpret_cast(g_VFS.ReadFile(filename, &size)); + if(!ptr || size != expected_size) + { + ERROR_LOG(CPU, "Error loading '%s' (size=%u, expected: %u)", filename, (unsigned)size, (unsigned)expected_size); + if(ptr) delete[] ptr; + ptr = nullptr; + return false; + } + INFO_LOG(CPU, "Successfully loaded '%s'", filename); + return true; +} + +#define LOAD_TABLE(name, expected_size)\ + load_vfpu_table(name,"vfpu/" #name ".dat",expected_size) + // Note: PSP sin/cos output only has 22 significant // binary digits. static inline uint32_t vfpu_sin_quantum(uint32_t x) { @@ -877,6 +902,11 @@ static inline uint32_t vfpu_sin_fixed(uint32_t arg) { } float vfpu_sin(float x) { + static bool loaded = + LOAD_TABLE(vfpu_sin_lut8192, 4100)&& + LOAD_TABLE(vfpu_sin_lut_delta, 262144)&& + LOAD_TABLE(vfpu_sin_lut_interval_delta, 131074)&& + LOAD_TABLE(vfpu_sin_lut_exceptions, 86938); uint32_t bits; memcpy(&bits, &x, sizeof(x)); uint32_t sign = bits & 0x80000000u; @@ -909,6 +939,11 @@ float vfpu_sin(float x) { } float vfpu_cos(float x) { + static bool loaded = + LOAD_TABLE(vfpu_sin_lut8192, 4100)&& + LOAD_TABLE(vfpu_sin_lut_delta, 262144)&& + LOAD_TABLE(vfpu_sin_lut_interval_delta, 131074)&& + LOAD_TABLE(vfpu_sin_lut_exceptions, 86938); uint32_t bits; memcpy(&bits, &x, sizeof(x)); bits &= 0x7FFFFFFFu; @@ -1012,6 +1047,8 @@ static inline uint32_t vfpu_sqrt_fixed(uint32_t x) { } float vfpu_sqrt(float x) { + static bool loaded = + LOAD_TABLE(vfpu_sqrt_lut, 262144); uint32_t bits; memcpy(&bits, &x, sizeof(bits)); if((bits & 0x7FFFFFFFu) <= 0x007FFFFFu) { @@ -1096,6 +1133,8 @@ static inline uint32_t vfpu_rsqrt_fixed(uint32_t x) { } float vfpu_rsqrt(float x) { + static bool loaded = + LOAD_TABLE(vfpu_rsqrt_lut, 262144); uint32_t bits; memcpy(&bits, &x, sizeof(bits)); if((bits & 0x7FFFFFFFu) <= 0x007FFFFFu) { @@ -1153,6 +1192,10 @@ static uint32_t vfpu_asin_fixed(uint32_t x) { } float vfpu_asin(float x) { + static bool loaded = + LOAD_TABLE(vfpu_asin_lut65536, 1536)&& + LOAD_TABLE(vfpu_asin_lut_indices, 798916)&& + LOAD_TABLE(vfpu_asin_lut_deltas, 517448); uint32_t bits; memcpy(&bits, &x, sizeof(x)); uint32_t sign = bits & 0x80000000u; @@ -1190,6 +1233,9 @@ static inline uint32_t vfpu_exp2_fixed(uint32_t x) { } float vfpu_exp2(float x) { + static bool loaded = + LOAD_TABLE(vfpu_exp2_lut65536, 512)&& + LOAD_TABLE(vfpu_exp2_lut, 262144); int32_t bits; memcpy(&bits, &x, sizeof(bits)); if((bits & 0x7FFFFFFF) <= 0x007FFFFF) { @@ -1239,6 +1285,10 @@ static inline uint32_t vfpu_log2_approx(uint32_t x) { // Matches PSP output on all known values. float vfpu_log2(float x) { + static bool loaded = + LOAD_TABLE(vfpu_log2_lut65536, 516)&& + LOAD_TABLE(vfpu_log2_lut65536_quadratic, 512)&& + LOAD_TABLE(vfpu_log2_lut, 2097152); uint32_t bits; memcpy(&bits, &x, sizeof(bits)); if((bits & 0x7FFFFFFFu) <= 0x007FFFFFu) { @@ -1289,6 +1339,8 @@ static inline uint32_t vfpu_rcp_approx(uint32_t i) { } float vfpu_rcp(float x) { + static bool loaded = + LOAD_TABLE(vfpu_rcp_lut, 262144); uint32_t bits; memcpy(&bits, &x, sizeof(bits)); uint32_t s = bits & 0x80000000u; @@ -1318,35 +1370,22 @@ float vfpu_rcp(float x) { //============================================================================== void InitVFPU() { -#if COMMON_BIG_ENDIAN - // Tables are little-endian. -#error Byteswap for VFPU tables not implemented +#if 0 + // Load all in advance. + LOAD_TABLE(vfpu_asin_lut65536 , 1536); + LOAD_TABLE(vfpu_asin_lut_deltas , 517448); + LOAD_TABLE(vfpu_asin_lut_indices , 798916); + LOAD_TABLE(vfpu_exp2_lut65536 , 512); + LOAD_TABLE(vfpu_exp2_lut , 262144); + LOAD_TABLE(vfpu_log2_lut65536 , 516); + LOAD_TABLE(vfpu_log2_lut65536_quadratic, 512); + LOAD_TABLE(vfpu_log2_lut , 2097152); + LOAD_TABLE(vfpu_rcp_lut , 262144); + LOAD_TABLE(vfpu_rsqrt_lut , 262144); + LOAD_TABLE(vfpu_sin_lut8192 , 4100); + LOAD_TABLE(vfpu_sin_lut_delta , 262144); + LOAD_TABLE(vfpu_sin_lut_exceptions , 86938); + LOAD_TABLE(vfpu_sin_lut_interval_delta , 131074); + LOAD_TABLE(vfpu_sqrt_lut , 262144); #endif - size_t size=0; -#define LOAD(expected,name)\ - if(!name) {\ - const char *filename = "vfpu/" #name ".dat";\ - INFO_LOG(CPU, "Loading '%s'...", filename);\ - name=reinterpret_cast(g_VFS.ReadFile(filename, &size));\ - if(size!=(expected))\ - ERROR_LOG(CPU, "Error loading '%s' (size=%u, expected: %u)", filename, (unsigned)size, (unsigned)(expected));\ - else\ - INFO_LOG(CPU, "Successfully loaded '%s'", filename);\ - } - LOAD( 1536, vfpu_asin_lut65536); - LOAD( 517448, vfpu_asin_lut_deltas); - LOAD( 798916, vfpu_asin_lut_indices); - LOAD( 512, vfpu_exp2_lut65536); - LOAD( 262144, vfpu_exp2_lut); - LOAD( 516, vfpu_log2_lut65536); - LOAD( 512, vfpu_log2_lut65536_quadratic); - LOAD( 2097152, vfpu_log2_lut); - LOAD( 262144, vfpu_rcp_lut); - LOAD( 262144, vfpu_rsqrt_lut); - LOAD( 4100, vfpu_sin_lut8192); - LOAD( 262144, vfpu_sin_lut_delta); - LOAD( 86938, vfpu_sin_lut_exceptions); - LOAD( 131074, vfpu_sin_lut_interval_delta); - LOAD( 262144, vfpu_sqrt_lut); -#undef LOAD } diff --git a/Core/MIPS/MIPSVFPUUtils.h b/Core/MIPS/MIPSVFPUUtils.h index a8b2bebd79..1b03ca390a 100644 --- a/Core/MIPS/MIPSVFPUUtils.h +++ b/Core/MIPS/MIPSVFPUUtils.h @@ -42,6 +42,7 @@ inline int Xpose(int v) { // Final Fantasy III (#2921 ) // Hitman Reborn 2 (#12900) // Cho Aniki Zero (#13705) +// Hajime no Ippo (#13671) // Dissidia Duodecim Final Fantasy (#6710 ) // // Messing around with the modulo functions? try https://www.desmos.com/calculator. @@ -50,11 +51,6 @@ extern float vfpu_sin(float); extern float vfpu_cos(float); extern void vfpu_sincos(float, float&, float&); -/* -inline float vfpu_asin(float angle) { - return (float)(asinf(angle) / M_PI_2); -} -*/ extern float vfpu_asin(float); inline float vfpu_clamp(float v, float min, float max) {