nulldc-360/nullDC/dc/sh4/sh4_fpu.cpp
2022-02-11 13:27:24 +01:00

677 lines
No EOL
11 KiB
C++

/*
sh4 base core
most of it is (very) old
could use many cleanups, lets hope someone does them
*/
#include "types.h"
#include "sh4_interpreter.h"
#include "sh4_registers.h"
#include "dc/mem/sh4_mem.h"
#include <math.h>
#include <float.h>
#include "sh4r_rename.h"
#define sh4op(str) void __fastcall str (u32 op)
#define GetN(str) ((str>>8) & 0xf)
#define GetM(str) ((str>>4) & 0xf)
#define GetImm4(str) ((str>>0) & 0xf)
#define GetImm8(str) ((str>>0) & 0xff)
#define GetImm12(str) ((str>>0) & 0xfff)
#define GetDN(opc) ((op&0x0F00)>>9)
#define GetDM(opc) ((op&0x00F0)>>5)
#define pi (3.14159265f)
void iNimp(char*str);
#define IS_DENORMAL(f) (((*(f))&0x7f800000) == 0)
INLINE void Denorm32(float &value)
{
if (fpscr.DN)
{
u32* v=(u32*)&value;
if (IS_DENORMAL(v) && (*v&0x7fFFFFFF)!=0)
{
*v&=0x80000000;
//dlog("Denromal ..\n");
}
if ((*v<=0x007FFFFF) && *v>0)
{
*v=0;
dlog("Fixed +denorm\n");
}
else if ((*v<=0x807FFFFF) && *v>0x80000000)
{
*v=0x80000000;
dlog("Fixed -denorm\n");
}
}
}
#define CHECK_FPU_32(v) Denorm32(v)
#define CHECK_FPU_64(v)
#ifdef XENON
#define START64()
#define END64()
#define STARTMODE64()
#define ENDMODE64()
#else
#define START64() _controlfp(_PC_53, MCW_PC)
#define END64() _controlfp(_PC_24, MCW_PC)
#define STARTMODE64() _controlfp(_PC_53, MCW_PC)
#define ENDMODE64() _controlfp(_PC_24, MCW_PC)
#endif
//all fpu emulation ops :)
// ZeZu[7/9/05] fpu needs some work ;)
// need dr and a good way to handle fp stuff as int (union) : You can use fr_hex/xf_hex
// also need to know about XD ie: sz=1 (DR) but unused N/M bit if 1 = XD (float[4])
//fpscr is now easy to handle
// Templates "could" be a very good way to handle DR/XD
// BTW: you might want to check these, no sleep + heat != good code ;p
// drkIIRaziel 11/7/05 [dd/mm] replaced fpu emulation with my fpu code :) (buggy most prob
// but at least kos demos should work now :) :) :) ) heh
//fadd <FREG_M>,<FREG_N>
sh4op(i1111_nnnn_mmmm_0000)
{
if (fpscr.PR == 0)
{
u32 n = GetN(op);
u32 m = GetM(op);
fr[n] += fr[m];
//CHECK_FPU_32(fr[n]);
}
else
{
u32 n = (op >> 9) & 0x07;
u32 m = (op >> 5) & 0x07;
START64();
double drn=GetDR(n), drm=GetDR(m);
drn += drm;
CHECK_FPU_64(drn);
SetDR(n,drn);
END64();
}
}
//fsub <FREG_M>,<FREG_N>
sh4op(i1111_nnnn_mmmm_0001)
{
if (fpscr.PR == 0)
{
u32 n = GetN(op);
u32 m = GetM(op);
fr[n] -= fr[m];
CHECK_FPU_32(fr[n]);
}
else
{
u32 n = (op >> 9) & 0x07;
u32 m = (op >> 5) & 0x07;
START64();
double drn=GetDR(n), drm=GetDR(m);
drn-=drm;
//dr[n] -= dr[m];
SetDR(n,drn);
END64();
}
}
//fmul <FREG_M>,<FREG_N>
sh4op(i1111_nnnn_mmmm_0010)
{
if (fpscr.PR == 0)
{
u32 n = GetN(op);
u32 m = GetM(op);
fr[n] *= fr[m];
CHECK_FPU_32(fr[n]);
}
else
{
u32 n = (op >> 9) & 0x07;
u32 m = (op >> 5) & 0x07;
START64();
double drn=GetDR(n), drm=GetDR(m);
drn*=drm;
//dr[n] *= dr[m];
SetDR(n,drn);
END64();
}
}
//fdiv <FREG_M>,<FREG_N>
sh4op(i1111_nnnn_mmmm_0011)
{
if (fpscr.PR == 0)
{
u32 n = GetN(op);
u32 m = GetM(op);
fr[n] /= fr[m];
CHECK_FPU_32(fr[n]);
}
else
{
u32 n = (op >> 9) & 0x07;
u32 m = (op >> 5) & 0x07;
START64();
double drn=GetDR(n), drm=GetDR(m);
drn/=drm;
SetDR(n,drn);
END64();
}
}
//fcmp/eq <FREG_M>,<FREG_N>
sh4op(i1111_nnnn_mmmm_0100)
{
if (fpscr.PR == 0)
{
u32 n = GetN(op);
u32 m = GetM(op);
sr.SetT( (fr[m] == fr[n]) ? 1 : 0);
}
else
{
u32 n = (op >> 9) & 0x07;
u32 m = (op >> 5) & 0x07;
START64();
sr.SetT( (GetDR(m) == GetDR(n)) ? 1 : 0);
END64();
}
}
//fcmp/gt <FREG_M>,<FREG_N>
sh4op(i1111_nnnn_mmmm_0101)
{
if (fpscr.PR == 0)
{
u32 n = GetN(op);
u32 m = GetM(op);
if (fr[n] > fr[m])
sr.SetT( 1);
else
sr.SetT( 0);
}
else
{
u32 n = (op >> 9) & 0x07;
u32 m = (op >> 5) & 0x07;
START64();
if (GetDR(n) > GetDR(m))
sr.SetT( 1);
else
sr.SetT( 0);
END64();
}
}
//fmov <FREG_M>,<FREG_N>
sh4op(i1111_nnnn_mmmm_1100)
{
if (fpscr.SZ == 0)
{
u32 n = GetN(op);
u32 m = GetM(op);
fr[n] = fr[m];
}
else
{
u32 n = (op >> 8) & 0xE;
u32 m = (op >> 4) & 0xE;
switch ((op >> 4) & 0x11)
{
case 0x00:
//dr[n] = dr[m];
fr_hex[n] = fr_hex[m];
fr_hex[n + 1] = fr_hex[m + 1];
break;
case 0x01:
//dr[n] = xf[m];
fr_hex[n] = xf_hex[m];
fr_hex[n + 1] = xf_hex[m + 1];
break;
case 0x10:
//xf[n] = dr[m];
xf_hex[n] = fr_hex[m];
xf_hex[n + 1] = fr_hex[m + 1];
break;
case 0x11:
//xf[n] = xf[m];
xf_hex[n] = xf_hex[m];
xf_hex[n + 1] = xf_hex[m + 1];
break;
}
}
}
//fabs <FREG_N>
sh4op(i1111_nnnn_0101_1101)
{
int n=GetN(op);
if (fpscr.PR ==0)
fr_hex[n]&=0x7FFFFFFF;
else
fr_hex[(n&0xE)]&=0x7FFFFFFF;
}
//FSCA FPUL, DRn//F0FD//1111_nnn0_1111_1101
sh4op(i1111_nnn0_1111_1101)
{
int n=GetN(op) & 0xE;
//cosine(x) = sine(pi/2 + x).
if (fpscr.PR==0)
{
//float real_pi=(((float)(s32)fpul)/65536)*(2*pi);
u32 pi_index=(u16)fpul;
fr[n | 0] = sin_table[pi_index];//sinf(real_pi);
fr[n | 1] = sin_table[0x4000 + pi_index];//cosf(real_pi); // -> no need for warparound, sin_table has 0x4000 more entries
CHECK_FPU_32(fr[n]);
CHECK_FPU_32(fr[n+1]);
}
else
iNimp("FSCA : Double precision mode");
}
//FSRRA //1111_nnnn_0111_1101
sh4op(i1111_nnnn_0111_1101)
{
// What about double precision?
u32 n = GetN(op);
if (fpscr.PR==0)
{
fr[n] = (float)(1/sqrt((double)fr[n]));
CHECK_FPU_32(fr[n]);
}
else
iNimp("FSRRA : Double precision mode");
}
//fcnvds <DR_N>,FPUL
sh4op(i1111_nnnn_1011_1101)
{
if (fpscr.PR == 1)
{
START64();
//iNimp("fcnvds <DR_N>,FPUL");
u32 n = (op >> 9) & 0x07;
u32*p=&fpul;
*((float*)p) = (float)GetDR(n);
//fpul= (int)GetDR(n);
END64();
}
else
{
iNimp("fcnvds <DR_N>,FPUL,m=0");
}
}
//fcnvsd FPUL,<DR_N>
sh4op(i1111_nnnn_1010_1101)
{
if (fpscr.PR == 1)
{
START64();
u32 n = (op >> 9) & 0x07;
u32* p = &fpul;
SetDR(n,(double)*((float*)p));
//SetDR(n,(double)fpul);
END64();
}
else
{
iNimp("fcnvsd FPUL,<DR_N>,m=0");
}
}
//fipr <FV_M>,<FV_N>
sh4op(i1111_nnmm_1110_1101)
{
// iNimp("fipr <FV_M>,<FV_N>");
int n=GetN(op)&0xC;
int m=(GetN(op)&0x3)<<2;
if(fpscr.PR ==0)
{
float idp;
idp=fr[n+0]*fr[m+0];
idp+=fr[n+1]*fr[m+1];
idp+=fr[n+2]*fr[m+2];
idp+=fr[n+3]*fr[m+3];
CHECK_FPU_32(idp);
fr[n+3]=idp;
}
else
iNimp("FIPR Precision=1");
/*
u32 n = (op >> 8) & 0xC;
u32 m = ((op >> 8) & 0x3)<<2;
fr[n+3] = fr[m+0] * fr[n+0] +
fr[m+1] * fr[n+1] +
fr[m+2] * fr[n+2] +
fr[m+3] * fr[n+3];
union {
double d;
int l[2];
} mlt[4];
float dstf;
int i;
for(i=0;i<4;i++) {
mlt[i].d = fr[m+i];
mlt[i].d *= fr[n+i];
// The multiplication array emulation is necessary for obtaining the
//same result as that of the FIPR hardware, because the hardware cut
//lower 18 bits of the array output before carry propagate addition.
//The following flow is different from the hardware algorism but simple. //
mlt[i].l[1] &= 0xff000000;
mlt[i].l[1] |= 0x00800000;
}
mlt[0].d += mlt[1].d + mlt[2].d + mlt[3].d;
mlt[0].l[1] &= 0xff800000;
dstf =(float) mlt[0].d;
// fp_set_I();*/
}
//fldi0 <FREG_N>
sh4op(i1111_nnnn_1000_1101)
{
if (fpscr.PR!=0)
iNimp("fldi0 <Dreg_N>");
//iNimp("fldi0 <FREG_N>");
u32 n = GetN(op);
fr[n] = 0.0f;
}
//fldi1 <FREG_N>
sh4op(i1111_nnnn_1001_1101)
{
if (fpscr.PR!=0)
iNimp("fldi1 <Dreg_N>");
//iNimp("fldi1 <FREG_N>");
u32 n = GetN(op);
fr[n] = 1.0f;
}
//flds <FREG_N>,FPUL
sh4op(i1111_nnnn_0001_1101)
{
//iNimp("flds <FREG_N>,FPUL");
/*if (fpscr.PR != 0)
iNimp("flds <DREG_N>,FPUL");*/
u32 n = GetN(op);
fpul = fr_hex[n];
}
//fsts FPUL,<FREG_N>
sh4op(i1111_nnnn_0000_1101)
{
//iNimp("fsts FPUL,<FREG_N>");
/*if (fpscr.PR != 0)
iNimp("fsts FPUL,<DREG_N>");*/
u32 n = GetN(op);
fr_hex[n] = fpul;
}
//float FPUL,<FREG_N>
sh4op(i1111_nnnn_0010_1101)
{
if (fpscr.PR == 0)
{
u32 n = GetN(op);
fr[n] = (float)(int)fpul;
}
else
{
START64();
u32 n = (op >> 9) & 0x07;
SetDR(n, (double)(int)fpul);
//iNimp("float FPUL,<DREG_N>");
END64();
}
}
//fneg <FREG_N>
sh4op(i1111_nnnn_0100_1101)
{
u32 n = GetN(op);
if (fpscr.PR ==0)
fr_hex[n]^=0x80000000;
else
fr_hex[(n&0xE)]^=0x80000000;
}
//frchg
sh4op(i1111_1011_1111_1101)
{
fpscr.FR = 1 - fpscr.FR;
UpdateFPSCR();
}
//fschg
sh4op(i1111_0011_1111_1101)
{
//iNimp("fschg");
fpscr.SZ = 1 - fpscr.SZ;
//dlog("SZ %d %08X\n",fpscr.SZ,id);
}
//fsqrt <FREG_N>
sh4op(i1111_nnnn_0110_1101)
{
if (fpscr.PR == 0)
{
//iNimp("fsqrt <FREG_N>");
u32 n = GetN(op);
fr[n] = sqrt(fr[n]);
CHECK_FPU_32(fr[n]);
}
else
{
//Operation _can_ be done on sh4
u32 n = GetN(op)>>1;
START64();
SetDR(n,sqrt(GetDR(n)));
//CHECK_FPU_32(fr[n]);
//iNimp("fsqrt <DREG_N>");
END64();
}
}
//ftrc <FREG_N>, FPUL
sh4op(i1111_nnnn_0011_1101)
{
if (fpscr.PR == 0)
{
u32 n = GetN(op);
fpul = (u32)(s32)min(fr[n],(float)0x7FFFFFBF);
/*
if (fpul==0x80000000)
{
if (fr[n]>0)
fpul--;
}*/
}
else
{
START64();
u32 n = (op >> 9) & 0x07;
fpul = (u32)(s32)min(GetDR(n),(double)0x7FFFFFFF);
/*
if (fpul==0x80000000)
{
if (GetDR(n)>0)
fpul--;
}*/
END64();
}
}
//fmac <FREG_0>,<FREG_M>,<FREG_N>
sh4op(i1111_nnnn_mmmm_1110)
{
//iNimp("fmac <FREG_0>,<FREG_M>,<FREG_N>");
if (fpscr.PR==0)
{
u32 n = GetN(op);
u32 m = GetM(op);
fr[n] =(f32) ((f64)fr[n]+(f64)fr[0] * (f64)fr[m]);
CHECK_FPU_32(fr[n]);
}
else
{
iNimp("fmac <DREG_0>,<DREG_M>,<DREG_N>");
}
}
INLINE float ftrv_dot_prod_1 (u8 val,u8 n)
{
float frrez = 0.0f; //d = 0.0f;
for (int i = 0; i < 4; i++)
{
frrez+=xf[val+(i<<2)]*fr[n+i];
}
//fp_set_I();
return frrez;
}
//ftrv xmtrx,<FV_N>
sh4op(i1111_nn01_1111_1101)
{
//iNimp("ftrv xmtrx,<FV_N>");
/*
XF[0] XF[4] XF[8] XF[12] FR[n] FR[n]
XF[1] XF[5] XF[9] XF[13] * FR[n+1] -> FR[n+1]
XF[2] XF[6] XF[10] XF[14] FR[n+2] FR[n+2]
XF[3] XF[7] XF[11] XF[15] FR[n+3] FR[n+3]
fucking *nih* maths ....*/
u32 n=GetN(op)&0xC;
if (fpscr.PR==0)
{
float tmp[4];
//double tmp[4];
for (u8 i=0;i<4;i++)
tmp[i]=ftrv_dot_prod_1(i,(u8)n);
for (u8 i=0;i<4;i++)
fr[n+i]=(float)tmp[i];
}
else
iNimp("FTRV in dp mode");
/* matrix:
XF0 XF4 XF8 XF12
XF1 XF5 XF9 XF13
XF2 XF6 XF10 XF14
XF3 XF7 XF11 XF15 */
/*
float v1, v2, v3, v4;
v1 = xf[0] * fr[ n + 0] +
xf[4] * fr[n + 1] +
xf[8] * fr[n + 2] +
xf[12] * fr [n + 3];
v2 = xf[1] * fr[ n + 0] +
xf[5] * fr[ n + 1] +
xf[9] * fr[ n + 2] +
xf[13] * fr[ n + 3];
v3 = xf[2] * fr[ n + 0] +
xf[6] * fr[n + 1] +
xf[10] * fr[ n + 2] +
xf[14] * fr[ n + 3];
v4 = xf[3] * fr[ n + 0] +
xf[7] * fr[n + 1] +
xf[11] * fr[ n + 2]+
xf[15] *
fr[ n + 3];
CHECK_FPU_32(v1);
CHECK_FPU_32(v2);
CHECK_FPU_32(v3);
CHECK_FPU_32(v4);
fr[n + 0] = v1;
fr[n + 1] = v2;
fr[n + 2] = v3;
fr[n + 3] = v4;*/
}
void iNimp(char*str)
{
dlog("Unimplemented sh4 fpu instruction: ");
dlog(str);
dlog("\n");
//Sh4_int_Stop();
}