Implement load-on-demand of vfpu tables

This commit is contained in:
fp64 2023-03-12 05:11:59 -04:00
parent c49c45a106
commit 38fc21a2c0
3 changed files with 70 additions and 41 deletions

View file

@ -629,23 +629,17 @@ namespace MIPSInt
// vsat0 changes -0.0 to +0.0, both retain NAN.
case 4: if (s[i] <= 0) d[i] = 0; else {if(s[i] > 1.0f) d[i] = 1.0f; else d[i] = s[i];} break; // vsat0
case 5: if (s[i] < -1.0f) d[i] = -1.0f; else {if(s[i] > 1.0f) d[i] = 1.0f; else d[i] = s[i];} break; // vsat1
//case 16: d[i] = 1.0f / s[i]; break; //vrcp
case 16: { d[i] = vfpu_rcp(s[i]); } break; //vrcp
case 17: d[i] = USE_VFPU_SQRT ? vfpu_rsqrt(s[i]) : 1.0f / sqrtf(s[i]); break; //vrsq
case 18: { d[i] = vfpu_sin(s[i]); } break; //vsin
case 19: { d[i] = vfpu_cos(s[i]); } break; //vcos
//case 20: d[i] = powf(2.0f, s[i]); break; //vexp2
case 20: { d[i] = vfpu_exp2(s[i]); } break; //vexp2
//case 21: d[i] = logf(s[i])/log(2.0f); break; //vlog2
case 21: { d[i] = vfpu_log2(s[i]); } break; //vlog2
case 22: d[i] = USE_VFPU_SQRT ? vfpu_sqrt(s[i]) : fabsf(sqrtf(s[i])); break; //vsqrt
//case 23: d[i] = (float)(asinf(s[i]) / M_PI_2); break; //vasin
case 23: { d[i] = vfpu_asin(s[i]); } break; //vasin
//case 24: d[i] = -1.0f / s[i]; break; // vnrcp
case 24: { d[i] = -vfpu_rcp(s[i]); } break; // vnrcp
case 26: { d[i] = -vfpu_sin(s[i]); } break; // vnsin
//case 28: d[i] = 1.0f / powf(2.0, s[i]); break; // vrexp2
case 28: { d[i] = vfpu_rexp2(s[i]); } break; // vrexp2
default:
_dbg_assert_msg_( false, "Invalid VV2Op op type %d", optype);

View file

@ -802,6 +802,7 @@ float vfpu_dot(const float a[4], const float b[4]) {
// See https://github.com/hrydgard/ppsspp/issues/16946 for details.
// Lookup tables.
// Note: these are never unloaded, and stay till program termination.
static uint32_t (*vfpu_sin_lut8192)=nullptr;
static int8_t (*vfpu_sin_lut_delta)[2]=nullptr;
static int16_t (*vfpu_sin_lut_interval_delta)=nullptr;
@ -824,6 +825,30 @@ static uint16_t (*vfpu_asin_lut_indices)=nullptr;
static int8_t (*vfpu_rcp_lut)[2]=nullptr;
template<typename T>
static inline bool load_vfpu_table(T *&ptr,const char *filename, size_t expected_size) {
#if COMMON_BIG_ENDIAN
// Tables are little-endian.
#error Byteswap for VFPU tables not implemented
#endif
if(ptr) return true; // Already loaded.
size_t size = 0u;
INFO_LOG(CPU, "Loading '%s'...", filename);
ptr = reinterpret_cast<decltype(&*ptr)>(g_VFS.ReadFile(filename, &size));
if(!ptr || size != expected_size)
{
ERROR_LOG(CPU, "Error loading '%s' (size=%u, expected: %u)", filename, (unsigned)size, (unsigned)expected_size);
if(ptr) delete[] ptr;
ptr = nullptr;
return false;
}
INFO_LOG(CPU, "Successfully loaded '%s'", filename);
return true;
}
#define LOAD_TABLE(name, expected_size)\
load_vfpu_table(name,"vfpu/" #name ".dat",expected_size)
// Note: PSP sin/cos output only has 22 significant
// binary digits.
static inline uint32_t vfpu_sin_quantum(uint32_t x) {
@ -877,6 +902,11 @@ static inline uint32_t vfpu_sin_fixed(uint32_t arg) {
}
float vfpu_sin(float x) {
static bool loaded =
LOAD_TABLE(vfpu_sin_lut8192, 4100)&&
LOAD_TABLE(vfpu_sin_lut_delta, 262144)&&
LOAD_TABLE(vfpu_sin_lut_interval_delta, 131074)&&
LOAD_TABLE(vfpu_sin_lut_exceptions, 86938);
uint32_t bits;
memcpy(&bits, &x, sizeof(x));
uint32_t sign = bits & 0x80000000u;
@ -909,6 +939,11 @@ float vfpu_sin(float x) {
}
float vfpu_cos(float x) {
static bool loaded =
LOAD_TABLE(vfpu_sin_lut8192, 4100)&&
LOAD_TABLE(vfpu_sin_lut_delta, 262144)&&
LOAD_TABLE(vfpu_sin_lut_interval_delta, 131074)&&
LOAD_TABLE(vfpu_sin_lut_exceptions, 86938);
uint32_t bits;
memcpy(&bits, &x, sizeof(x));
bits &= 0x7FFFFFFFu;
@ -1012,6 +1047,8 @@ static inline uint32_t vfpu_sqrt_fixed(uint32_t x) {
}
float vfpu_sqrt(float x) {
static bool loaded =
LOAD_TABLE(vfpu_sqrt_lut, 262144);
uint32_t bits;
memcpy(&bits, &x, sizeof(bits));
if((bits & 0x7FFFFFFFu) <= 0x007FFFFFu) {
@ -1096,6 +1133,8 @@ static inline uint32_t vfpu_rsqrt_fixed(uint32_t x) {
}
float vfpu_rsqrt(float x) {
static bool loaded =
LOAD_TABLE(vfpu_rsqrt_lut, 262144);
uint32_t bits;
memcpy(&bits, &x, sizeof(bits));
if((bits & 0x7FFFFFFFu) <= 0x007FFFFFu) {
@ -1153,6 +1192,10 @@ static uint32_t vfpu_asin_fixed(uint32_t x) {
}
float vfpu_asin(float x) {
static bool loaded =
LOAD_TABLE(vfpu_asin_lut65536, 1536)&&
LOAD_TABLE(vfpu_asin_lut_indices, 798916)&&
LOAD_TABLE(vfpu_asin_lut_deltas, 517448);
uint32_t bits;
memcpy(&bits, &x, sizeof(x));
uint32_t sign = bits & 0x80000000u;
@ -1190,6 +1233,9 @@ static inline uint32_t vfpu_exp2_fixed(uint32_t x) {
}
float vfpu_exp2(float x) {
static bool loaded =
LOAD_TABLE(vfpu_exp2_lut65536, 512)&&
LOAD_TABLE(vfpu_exp2_lut, 262144);
int32_t bits;
memcpy(&bits, &x, sizeof(bits));
if((bits & 0x7FFFFFFF) <= 0x007FFFFF) {
@ -1239,6 +1285,10 @@ static inline uint32_t vfpu_log2_approx(uint32_t x) {
// Matches PSP output on all known values.
float vfpu_log2(float x) {
static bool loaded =
LOAD_TABLE(vfpu_log2_lut65536, 516)&&
LOAD_TABLE(vfpu_log2_lut65536_quadratic, 512)&&
LOAD_TABLE(vfpu_log2_lut, 2097152);
uint32_t bits;
memcpy(&bits, &x, sizeof(bits));
if((bits & 0x7FFFFFFFu) <= 0x007FFFFFu) {
@ -1289,6 +1339,8 @@ static inline uint32_t vfpu_rcp_approx(uint32_t i) {
}
float vfpu_rcp(float x) {
static bool loaded =
LOAD_TABLE(vfpu_rcp_lut, 262144);
uint32_t bits;
memcpy(&bits, &x, sizeof(bits));
uint32_t s = bits & 0x80000000u;
@ -1318,35 +1370,22 @@ float vfpu_rcp(float x) {
//==============================================================================
void InitVFPU() {
#if COMMON_BIG_ENDIAN
// Tables are little-endian.
#error Byteswap for VFPU tables not implemented
#if 0
// Load all in advance.
LOAD_TABLE(vfpu_asin_lut65536 , 1536);
LOAD_TABLE(vfpu_asin_lut_deltas , 517448);
LOAD_TABLE(vfpu_asin_lut_indices , 798916);
LOAD_TABLE(vfpu_exp2_lut65536 , 512);
LOAD_TABLE(vfpu_exp2_lut , 262144);
LOAD_TABLE(vfpu_log2_lut65536 , 516);
LOAD_TABLE(vfpu_log2_lut65536_quadratic, 512);
LOAD_TABLE(vfpu_log2_lut , 2097152);
LOAD_TABLE(vfpu_rcp_lut , 262144);
LOAD_TABLE(vfpu_rsqrt_lut , 262144);
LOAD_TABLE(vfpu_sin_lut8192 , 4100);
LOAD_TABLE(vfpu_sin_lut_delta , 262144);
LOAD_TABLE(vfpu_sin_lut_exceptions , 86938);
LOAD_TABLE(vfpu_sin_lut_interval_delta , 131074);
LOAD_TABLE(vfpu_sqrt_lut , 262144);
#endif
size_t size=0;
#define LOAD(expected,name)\
if(!name) {\
const char *filename = "vfpu/" #name ".dat";\
INFO_LOG(CPU, "Loading '%s'...", filename);\
name=reinterpret_cast<decltype(name)>(g_VFS.ReadFile(filename, &size));\
if(size!=(expected))\
ERROR_LOG(CPU, "Error loading '%s' (size=%u, expected: %u)", filename, (unsigned)size, (unsigned)(expected));\
else\
INFO_LOG(CPU, "Successfully loaded '%s'", filename);\
}
LOAD( 1536, vfpu_asin_lut65536);
LOAD( 517448, vfpu_asin_lut_deltas);
LOAD( 798916, vfpu_asin_lut_indices);
LOAD( 512, vfpu_exp2_lut65536);
LOAD( 262144, vfpu_exp2_lut);
LOAD( 516, vfpu_log2_lut65536);
LOAD( 512, vfpu_log2_lut65536_quadratic);
LOAD( 2097152, vfpu_log2_lut);
LOAD( 262144, vfpu_rcp_lut);
LOAD( 262144, vfpu_rsqrt_lut);
LOAD( 4100, vfpu_sin_lut8192);
LOAD( 262144, vfpu_sin_lut_delta);
LOAD( 86938, vfpu_sin_lut_exceptions);
LOAD( 131074, vfpu_sin_lut_interval_delta);
LOAD( 262144, vfpu_sqrt_lut);
#undef LOAD
}

View file

@ -42,6 +42,7 @@ inline int Xpose(int v) {
// Final Fantasy III (#2921 )
// Hitman Reborn 2 (#12900)
// Cho Aniki Zero (#13705)
// Hajime no Ippo (#13671)
// Dissidia Duodecim Final Fantasy (#6710 )
//
// Messing around with the modulo functions? try https://www.desmos.com/calculator.
@ -50,11 +51,6 @@ extern float vfpu_sin(float);
extern float vfpu_cos(float);
extern void vfpu_sincos(float, float&, float&);
/*
inline float vfpu_asin(float angle) {
return (float)(asinf(angle) / M_PI_2);
}
*/
extern float vfpu_asin(float);
inline float vfpu_clamp(float v, float min, float max) {