/* Pcsx2 - Pc Ps2 Emulator * Copyright (C) 2002-2005 Pcsx2 Team * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ // recompiler reworked to add dynamic linking Jan06 // and added reg caching, const propagation, block analysis Jun06 // zerofrog(@gmail.com) // stop compiling if NORECBUILD build (only for Visual Studio) #if !(defined(_MSC_VER) && defined(PCSX2_NORECBUILD)) #ifdef _WIN32 #pragma warning(disable:4244) #pragma warning(disable:4761) #endif extern "C" { #include #include #include #include #include #include "PS2Etypes.h" #if defined(_WIN32) #include #else #include #include #endif #include "System.h" #include "zlib.h" #include "Memory.h" #include "Misc.h" #include "Vif.h" #include "VU.h" #include "R3000A.h" #include "PsxMem.h" #include "ix86/ix86.h" #include "iCore.h" #include "iR3000A.h" #include "PsxCounters.h" extern u32 psxNextCounter, psxNextsCounter; u32 g_psxMaxRecMem = 0; extern char *disRNameGPR[]; extern char* disR3000Fasm(u32 code, u32 pc); void psxRecRecompile(u32 startpc); uptr *psxRecLUT; } #define PSX_NUMBLOCKS (1<<12) #define MAPBASE 0x48000000 #define RECMEM_SIZE (8*1024*1024) #define PSX_MEMMASK 0x5fffffff // mask when comparing two pcs // R3000A statics int psxreclog = 0; static u32 s_BranchCount = 0; static char *recMem; // the recompiled blocks will be here static BASEBLOCK *recRAM; // and the ptr to the blocks here static BASEBLOCK *recROM; // and here static BASEBLOCK *recROM1; // also here static BASEBLOCKEX *recBlocks = NULL; static char *recPtr; u32 psxpc; // recompiler psxpc int psxbranch; // set for branch static EEINST* s_pInstCache = NULL; static u32 s_nInstCacheSize = 0; static BASEBLOCK* s_pCurBlock = NULL; static BASEBLOCKEX* s_pCurBlockEx = NULL; static BASEBLOCK* s_pDispatchBlock = NULL; static u32 s_nEndBlock = 0; // what psxpc the current block ends static u32 s_nNextBlock = 0; // next free block in recBlocks static u32 s_ConstGPRreg; static u32 s_saveConstGPRreg = 0, s_saveHasConstReg = 0, s_saveFlushedConstReg = 0; static EEINST* s_psaveInstInfo = NULL; u32 s_psxBlockCycles = 0; // cycles of current block recompiling static u32 s_savenBlockCycles = 0; static void iPsxBranchTest(u32 newpc, u32 cpuBranch); void psxRecompileNextInstruction(int delayslot); extern void (*rpsxBSC[64])(); extern void (*rpsxBSC_co[64])(); void rpsxpropBSC(EEINST* prev, EEINST* pinst); #ifdef _DEBUG extern "C" { u32 psxdump = 0; } #else #define psxdump 0 #endif #define PSX_GETBLOCK(x) PC_GETBLOCK_(x, psxRecLUT) #define PSXREC_CLEARM(mem) { \ if ((mem) < g_psxMaxRecMem && psxRecLUT[(mem) >> 16]) { \ BASEBLOCK* p = PSX_GETBLOCK(mem); \ if( *(u32*)p ) psxRecClearMem(p); \ } \ } \ BASEBLOCKEX* PSX_GETBLOCKEX(BASEBLOCK* p) { // BASEBLOCKEX* pex = *(BASEBLOCKEX**)(p+1); // if( pex >= recBlocks && pex < recBlocks+PSX_NUMBLOCKS ) // return pex; // otherwise, use the sorted list return GetBaseBlockEx(p->startpc, 1); } //////////////////////////////////////////////////// static void iDumpBlock( int startpc, char * ptr ) { FILE *f; char filename[ 256 ]; #ifdef __LINUX__ char command[256]; #endif u32 i, j; EEINST* pcur; u8 used[34]; int numused, count; SysPrintf( "dump1 %x:%x, %x\n", startpc, psxpc, psxRegs.cycle ); #ifdef _WIN32 CreateDirectory("dumps", NULL); sprintf( filename, "dumps\\psxdump%.8X.txt", startpc); #else mkdir("dumps", 0755); sprintf( filename, "dumps/psxdump%.8X.txt", startpc); #endif fflush( stdout ); f = fopen( filename, "w" ); assert( f != NULL ); for ( i = startpc; i < s_nEndBlock; i += 4 ) { fprintf( f, "%s\n", disR3000Fasm( *(u32*)PSXM( i ), i ) ); } // write the instruction info fprintf(f, "\n\nlive0 - %x, lastuse - %x used - %x\n", EEINST_LIVE0, EEINST_LASTUSE, EEINST_USED); memset(used, 0, sizeof(used)); numused = 0; for(i = 0; i < ARRAYSIZE(s_pInstCache->regs); ++i) { if( s_pInstCache->regs[i] & EEINST_USED ) { used[i] = 1; numused++; } } fprintf(f, " "); for(i = 0; i < ARRAYSIZE(s_pInstCache->regs); ++i) { if( used[i] ) fprintf(f, "%2d ", i); } fprintf(f, "\n"); fprintf(f, " "); for(i = 0; i < ARRAYSIZE(s_pInstCache->regs); ++i) { if( used[i] ) fprintf(f, "%s ", disRNameGPR[i]); } fprintf(f, "\n"); pcur = s_pInstCache+1; for( i = 0; i < (s_nEndBlock-startpc)/4; ++i, ++pcur) { fprintf(f, "%2d: %2.2x ", i+1, pcur->info); count = 1; for(j = 0; j < ARRAYSIZE(s_pInstCache->regs); j++) { if( used[j] ) { fprintf(f, "%2.2x%s", pcur->regs[j], ((count%8)&&count tempdump", filename ); #else sprintf( command, "objdump -D --target=binary --architecture=i386 -M intel mydump1 | cat %s - > tempdump", filename ); #endif system( command ); sprintf(command, "mv tempdump %s", filename); system(command); f = fopen( filename, "a+" ); #endif } u8 _psxLoadWritesRs(u32 tempcode) { switch(tempcode>>26) { case 32: case 33: case 34: case 35: case 36: case 37: case 38: return ((tempcode>>21)&0x1f)==((tempcode>>16)&0x1f); // rs==rt } return 0; } u8 _psxIsLoadStore(u32 tempcode) { switch(tempcode>>26) { case 32: case 33: case 34: case 35: case 36: case 37: case 38: // 4 byte stores case 40: case 41: case 42: case 43: case 46: return 1; } return 0; } void _psxFlushAllUnused() { int i; for(i = 0; i < 34; ++i) { if( psxpc < s_nEndBlock ) { if( (g_pCurInstInfo[1].regs[i]&EEINST_USED) ) continue; } else if( (g_pCurInstInfo[0].regs[i]&EEINST_USED) ) continue; if( i < 32 && PSX_IS_CONST1(i) ) _psxFlushConstReg(i); else { _deleteX86reg(X86TYPE_PSX, i, 1); } } } int _psxFlushUnusedConstReg() { int i; for(i = 1; i < 32; ++i) { if( (g_psxHasConstReg & (1< 0 ); // make sure right GPR was saved assert( g_psxHasConstReg == s_saveHasConstReg || (g_psxHasConstReg ^ s_saveHasConstReg) == (1<>26) == 9 ) { //ADDIU, call bios MOV32ItoM( (uptr)&psxRegs.code, psxRegs.code ); MOV32ItoM( (uptr)&psxRegs.pc, psxpc ); _psxFlushCall(FLUSH_NODESTROY); CALLFunc((uptr)zeroEx); } #endif return; } // for now, don't support xmm PSX_CHECK_SAVE_REG(_Rt_); _deleteX86reg(X86TYPE_PSX, _Rs_, 1); _deleteX86reg(X86TYPE_PSX, _Rt_, 0); if( PSX_IS_CONST1(_Rs_) ) { PSX_SET_CONST(_Rt_); constcode(); return; } noconstcode(0); PSX_DEL_CONST(_Rt_); } // rd = rt op sa void psxRecompileCodeConst2(R3000AFNPTR constcode, R3000AFNPTR_INFO noconstcode) { if ( ! _Rd_ ) return; // for now, don't support xmm PSX_CHECK_SAVE_REG(_Rd_); _deleteX86reg(X86TYPE_PSX, _Rt_, 1); _deleteX86reg(X86TYPE_PSX, _Rd_, 0); if( PSX_IS_CONST1(_Rt_) ) { PSX_SET_CONST(_Rd_); constcode(); return; } noconstcode(0); PSX_DEL_CONST(_Rd_); } // rd = rt MULT rs (SPECIAL) void psxRecompileCodeConst3(R3000AFNPTR constcode, R3000AFNPTR_INFO constscode, R3000AFNPTR_INFO consttcode, R3000AFNPTR_INFO noconstcode, int LOHI) { _deleteX86reg(X86TYPE_PSX, _Rs_, 1); _deleteX86reg(X86TYPE_PSX, _Rt_, 1); if( LOHI ) { _deleteX86reg(X86TYPE_PSX, PSX_HI, 1); _deleteX86reg(X86TYPE_PSX, PSX_LO, 1); } if( PSX_IS_CONST2(_Rs_, _Rt_) ) { constcode(); return; } if( PSX_IS_CONST1(_Rs_) ) { constscode(0); return; } if( PSX_IS_CONST1(_Rt_) ) { consttcode(0); return; } noconstcode(0); } static int recInit() { int i; uptr startaddr; // can't have upper 4 bits nonzero! startaddr = 0x0f000000; while(!(startaddr & 0xf0000000)) { recMem = (char*)SysMmap(startaddr, RECMEM_SIZE); if( (uptr)recMem & 0xf0000000 ) { SysMunmap((uptr)recMem, RECMEM_SIZE); recMem = NULL; startaddr += 0x00100000; continue; } else break; } if( recMem == NULL ) { SysPrintf("R3000A bad rec memory allocation\n"); return 1; } psxRecLUT = (uptr*) malloc(0x010000 * sizeof(uptr)); memset(psxRecLUT, 0, 0x010000 * sizeof(uptr)); recRAM = (BASEBLOCK*) _aligned_malloc(sizeof(BASEBLOCK)/4*0x200000, 16); recROM = (BASEBLOCK*) _aligned_malloc(sizeof(BASEBLOCK)/4*0x400000, 16); recROM1= (BASEBLOCK*) _aligned_malloc(sizeof(BASEBLOCK)/4*0x040000, 16); recBlocks = (BASEBLOCKEX*) _aligned_malloc( sizeof(BASEBLOCKEX)*PSX_NUMBLOCKS, 16); if (recRAM == NULL || recROM == NULL || recROM1 == NULL || recMem == NULL || psxRecLUT == NULL) { SysMessage("Error allocating memory"); return -1; } s_nInstCacheSize = 128; s_pInstCache = (EEINST*)malloc( sizeof(EEINST) * s_nInstCacheSize ); for (i=0; i<0x80; i++) psxRecLUT[i + 0x0000] = (uptr)&recRAM[(i & 0x1f) << 14]; for (i=0; i<0x80; i++) psxRecLUT[i + 0x8000] = (uptr)&recRAM[(i & 0x1f) << 14]; for (i=0; i<0x80; i++) psxRecLUT[i + 0xa000] = (uptr)&recRAM[(i & 0x1f) << 14]; for (i=0; i<0x40; i++) psxRecLUT[i + 0x1fc0] = (uptr)&recROM[i << 14]; for (i=0; i<0x40; i++) psxRecLUT[i + 0x9fc0] = (uptr)&recROM[i << 14]; for (i=0; i<0x40; i++) psxRecLUT[i + 0xbfc0] = (uptr)&recROM[i << 14]; for (i=0; i<0x40; i++) psxRecLUT[i + 0x1e00] = (uptr)&recROM1[i << 14]; for (i=0; i<0x40; i++) psxRecLUT[i + 0x9e00] = (uptr)&recROM1[i << 14]; for (i=0; i<0x40; i++) psxRecLUT[i + 0xbe00] = (uptr)&recROM1[i << 14]; memset(recMem, 0xcd, RECMEM_SIZE); return 0; } static void recReset() { memset(recRAM, 0, sizeof(BASEBLOCK)/4*0x200000); memset(recROM, 0, sizeof(BASEBLOCK)/4*0x400000); memset(recROM1,0, sizeof(BASEBLOCK)/4*0x040000); memset( recBlocks, 0, sizeof(BASEBLOCKEX)*PSX_NUMBLOCKS ); if( s_pInstCache ) memset( s_pInstCache, 0, sizeof(EEINST)*s_nInstCacheSize ); ResetBaseBlockEx(1); g_psxMaxRecMem = 0; recPtr = recMem; psxbranch = 0; } static void recShutdown() { if (recMem == NULL) return; free(psxRecLUT); SysMunmap((uptr)recMem, RECMEM_SIZE); _aligned_free(recRAM); _aligned_free(recROM); _aligned_free(recROM1); _aligned_free( recBlocks ); recBlocks = NULL; free( s_pInstCache ); s_pInstCache = NULL; s_nInstCacheSize = 0; x86Shutdown(); } #pragma warning(disable:4731) // frame pointer register 'ebp' modified by inline assembly code #if !defined(__x86_64__) static u32 s_uSaveESP = 0; static void R3000AExecute() { #ifdef _DEBUG u8* fnptr; u32 oldesi; /*#else R3000AFNPTR pfn;*/ #endif BASEBLOCK* pblock; while (EEsCycle > 0) { pblock = PSX_GETBLOCK(psxRegs.pc); if ( !pblock->pFnptr || (pblock->startpc&PSX_MEMMASK) != (psxRegs.pc&PSX_MEMMASK) ) { psxRecRecompile(psxRegs.pc); } assert( pblock->pFnptr != 0 ); #ifdef _DEBUG fnptr = (u8*)pblock->pFnptr; #ifdef _MSC_VER __asm { // save data mov oldesi, esi; mov s_uSaveESP, esp; sub s_uSaveESP, 8; push ebp; call fnptr; // jump into function // restore data pop ebp; mov esi, oldesi; } #else // linux __asm__("movl %%esi, %0\n" "movl %%esp, %1\n" "sub $8, %%esp\n" "push %%ebp\n" "call *%2\n" "pop %%ebp\n" "movl %0, %%esi\n" : "=m"(oldesi), "=m"(s_uSaveESP) : "c"(fnptr) : ); #endif // _MSC_VER #else ((R3000AFNPTR)pblock->pFnptr)(); #endif } } #else extern "C" void R3000AExecute(); #endif extern u32 g_psxNextBranchCycle; u32 g_psxlastpc = 0; #if defined(_MSC_VER) && !defined(__x86_64__) static u32 g_temp; // jumped to when invalid psxpc address __declspec(naked,noreturn) void psxDispatcher() { // EDX contains the current psxpc to jump to, stack contains the jump addr to modify __asm push edx // calc PSX_GETBLOCK s_pDispatchBlock = PSX_GETBLOCK(psxRegs.pc); __asm { mov eax, s_pDispatchBlock // check if startpc&PSX_MEMMASK == psxRegs.pc&PSX_MEMMASK mov ecx, psxRegs.pc mov edx, [eax+BLOCKTYPE_STARTPC]; and ecx, PSX_MEMMASK // remove higher bits and edx, PSX_MEMMASK cmp ecx, edx je CheckPtr // recompile push psxRegs.pc // psxpc call psxRecRecompile add esp, 4 // pop old param mov eax, s_pDispatchBlock CheckPtr: mov eax, dword ptr [eax] } #ifdef _DEBUG __asm mov g_temp, eax assert( g_temp ); #endif // __asm { // test eax, 0x40000000 // BLOCKTYPE_NEEDCLEAR // jz Done // // move new psxpc // and eax, 0x0fffffff // mov ecx, psxRegs.pc // mov dword ptr [eax+1], ecx // } __asm { and eax, 0x0fffffff mov edx, eax pop ecx // x86Ptr to mod sub edx, ecx sub edx, 4 mov dword ptr [ecx], edx jmp eax } } __declspec(naked,noreturn) void psxDispatcherClear() { // EDX contains the current psxpc __asm mov psxRegs.pc, edx __asm push edx // calc PSX_GETBLOCK s_pDispatchBlock = PSX_GETBLOCK(psxRegs.pc); if( (s_pDispatchBlock->startpc&PSX_MEMMASK) == (psxRegs.pc&PSX_MEMMASK) ) { assert( s_pDispatchBlock->pFnptr != 0 ); // already modded the code, jump to the new place __asm { pop edx add esp, 4 // ignore stack mov eax, s_pDispatchBlock mov eax, dword ptr [eax] and eax, 0x0fffffff jmp eax } } __asm { call psxRecRecompile add esp, 4 // pop old param mov eax, s_pDispatchBlock mov eax, dword ptr [eax] pop ecx // old fnptr and eax, 0x0fffffff mov byte ptr [ecx], 0xe9 // jmp32 mov edx, eax sub edx, ecx sub edx, 5 mov dword ptr [ecx+1], edx jmp eax } } // called when jumping to variable psxpc address __declspec(naked,noreturn) void psxDispatcherReg() { __asm { //s_pDispatchBlock = PSX_GETBLOCK(psxRegs.pc); mov edx, psxRegs.pc mov ecx, edx } __asm { shr edx, 14 and edx, 0xfffffffc add edx, psxRecLUT mov edx, dword ptr [edx] mov eax, ecx and eax, 0xfffc // edx += 2*eax shl eax, 1 add edx, eax // check if startpc == psxRegs.pc mov eax, ecx //and eax, 0x5fffffff // remove higher bits cmp eax, dword ptr [edx+BLOCKTYPE_STARTPC] jne recomp mov eax, dword ptr [edx] } #ifdef _DEBUG __asm mov g_temp, eax assert( g_temp ); #endif __asm { and eax, 0x0fffffff jmp eax // fnptr recomp: sub esp, 8 mov dword ptr [esp+4], edx mov dword ptr [esp], ecx call psxRecRecompile mov edx, dword ptr [esp+4] add esp, 8 mov eax, dword ptr [edx] and eax, 0x0fffffff jmp eax // fnptr } } #else // _MSC_VER #ifdef __cplusplus extern "C" { #endif void psxDispatcher(); void psxDispatcherClear(); void psxDispatcherReg(); #ifdef __cplusplus } #endif #endif // _MSC_VER static void recClear(u32 Addr, u32 Size) { u32 i; for(i = 0; i < Size; ++i, Addr+=4) { PSXREC_CLEARM(Addr); } } #ifdef __x86_64__ #define EE_MIN_BLOCK_BYTES 16 #else #define EE_MIN_BLOCK_BYTES 15 #endif void rpsxMemConstClear(u32 mem) { // NOTE! This assumes recLUT never changes its mapping if( !psxRecLUT[mem>>16] ) return; CMP32ItoM((uptr)PSX_GETBLOCK(mem), 0); j8Ptr[6] = JE8(0); _callFunctionArg1((uptr)psxRecClearMem, MEM_CONSTTAG, (uptr)PSX_GETBLOCK(mem)); x86SetJ8(j8Ptr[6]); } void psxRecClearMem(BASEBLOCK* p) { BASEBLOCKEX* pexblock; BASEBLOCK* pstart; int lastdelay; assert( p != NULL ); if( p->uType & BLOCKTYPE_DELAYSLOT ) { psxRecClearMem(p-1); if( p->pFnptr == 0 ) return; } assert( p->pFnptr != 0 ); assert( p->startpc ); x86Ptr = (s8*)p->pFnptr; // there is a small problem: mem can be ored with 0xa<<28 or 0x8<<28, and don't know which MOV32ItoR(EDX, p->startpc); assert( (uptr)x86Ptr <= 0xffffffff ); #ifdef __x86_64__ MOV32ItoR(R15, (uptr)x86Ptr); // will be replaced by JMP32 #else PUSH32I((uptr)x86Ptr); #endif JMP32((uptr)psxDispatcherClear - ( (uptr)x86Ptr + 5 )); assert( x86Ptr == (s8*)p->pFnptr + EE_MIN_BLOCK_BYTES ); pstart = PSX_GETBLOCK(p->startpc); pexblock = PSX_GETBLOCKEX(pstart); assert( pexblock->startpc == pstart->startpc ); // if( pexblock->pOldFnptr ) { // // have to mod oldfnptr too // x86Ptr = pexblock->pOldFnptr; // // MOV32ItoR(EDX, p->startpc); // JMP32((uptr)psxDispatcherClear - ( (uptr)x86Ptr + 5 )); // } // else // pexblock->pOldFnptr = (u8*)p->pFnptr; // don't delete if last is delay lastdelay = pexblock->size; if( pstart[pexblock->size-1].uType & BLOCKTYPE_DELAYSLOT ) { assert( pstart[pexblock->size-1].pFnptr != pstart->pFnptr ); if( pstart[pexblock->size-1].pFnptr != 0 ) { pstart[pexblock->size-1].uType = 0; --lastdelay; } } memset(pstart, 0, lastdelay*sizeof(BASEBLOCK)); RemoveBaseBlockEx(pexblock, 1); pexblock->size = 0; pexblock->startpc = 0; } void psxSetBranchReg(u32 reg) { psxbranch = 1; if( reg != 0xffffffff ) { _allocX86reg(ESI, X86TYPE_PCWRITEBACK, 0, MODE_WRITE); _psxMoveGPRtoR(ESI, reg); psxRecompileNextInstruction(1); if( x86regs[ESI].inuse ) { assert( x86regs[ESI].type == X86TYPE_PCWRITEBACK ); MOV32RtoM((uptr)&psxRegs.pc, ESI); x86regs[ESI].inuse = 0; } else { MOV32MtoR(EAX, (uptr)&g_recWriteback); MOV32RtoM((uptr)&psxRegs.pc, EAX); } } _psxFlushCall(FLUSH_EVERYTHING); iPsxBranchTest(0xffffffff, 1); JMP32((uptr)psxDispatcherReg - ( (uptr)x86Ptr + 5 )); } void psxSetBranchImm( u32 imm ) { u32* ptr; psxbranch = 1; assert( imm ); // end the current block MOV32ItoM( (uptr)&psxRegs.pc, imm ); _psxFlushCall(FLUSH_EVERYTHING); iPsxBranchTest(imm, imm <= psxpc); MOV32ItoR(EDX, 0); ptr = (u32*)(x86Ptr-4); *ptr = (uptr)JMP32((uptr)psxDispatcher - ( (uptr)x86Ptr + 5 )); } #define USE_FAST_BRANCHES 0 #define PSXCYCLE_MULT 17/16 static void iPsxBranchTest(u32 newpc, u32 cpuBranch) { if( !USE_FAST_BRANCHES || cpuBranch ) { MOV32MtoR(ECX, (uptr)&psxRegs.cycle); ADD32ItoR(ECX, s_psxBlockCycles*PSXCYCLE_MULT); // greater mult factor causes nfsmw to crash MOV32RtoM((uptr)&psxRegs.cycle, ECX); // update cycles } else { ADD32ItoM((uptr)&psxRegs.cycle, s_psxBlockCycles*PSXCYCLE_MULT); return; } SUB32MtoR(ECX, (uptr)&g_psxNextBranchCycle); // check if should branch j8Ptr[0] = JS8( 0 ); CALLFunc((uptr)psxBranchTest); CMP32ItoM((uptr)&EEsCycle, 0); j8Ptr[2] = JG8(0); if( REC_INC_STACK ) ADD64ItoR(ESP, REC_INC_STACK); RET2(); x86SetJ8( j8Ptr[2] ); if( newpc != 0xffffffff ) { CMP32ItoM((uptr)&psxRegs.pc, newpc); JNE32((uptr)psxDispatcherReg - ( (uptr)x86Ptr + 6 )); } x86SetJ8( j8Ptr[0] ); } static int *s_pCode; #if !defined(_MSC_VER) || !defined(__x86_64__) static void checkcodefn() { int pctemp; #ifdef _MSC_VER __asm mov pctemp, eax; #else __asm__("movl %%eax, %0" : : "m"(pctemp) ); #endif SysPrintf("iop code changed! %x\n", pctemp); } #endif void rpsxSYSCALL() { MOV32ItoM( (uptr)&psxRegs.code, psxRegs.code ); MOV32ItoM((uptr)&psxRegs.pc, psxpc - 4); _psxFlushCall(FLUSH_NODESTROY); _callFunctionArg2((uptr)psxException, MEM_CONSTTAG, MEM_CONSTTAG, 0x20, psxbranch==1); CMP32ItoM((uptr)&psxRegs.pc, psxpc-4); j8Ptr[0] = JE8(0); ADD32ItoM((uptr)&psxRegs.cycle, s_psxBlockCycles); JMP32((uptr)psxDispatcherReg - ( (uptr)x86Ptr + 5 )); x86SetJ8(j8Ptr[0]); //if (!psxbranch) psxbranch = 2; } extern "C" void psxBREAK(); void rpsxBREAK() { MOV32ItoM( (uptr)&psxRegs.code, psxRegs.code ); MOV32ItoM((uptr)&psxRegs.pc, psxpc - 4); _psxFlushCall(FLUSH_NODESTROY); _callFunctionArg2((uptr)psxBREAK, MEM_CONSTTAG, MEM_CONSTTAG, 0x24, psxbranch==1); CMP32ItoM((uptr)&psxRegs.pc, psxpc-4); j8Ptr[0] = JE8(0); ADD32ItoM((uptr)&psxRegs.cycle, s_psxBlockCycles); JMP32((uptr)psxDispatcherReg - ( (uptr)x86Ptr + 5 )); x86SetJ8(j8Ptr[0]); //if (!psxbranch) psxbranch = 2; } u32 psxRecompileCodeSafe(u32 temppc) { BASEBLOCK* pblock = PSX_GETBLOCK(temppc); if( pblock->pFnptr != 0 && pblock->startpc != s_pCurBlock->startpc ) { if( psxpc == pblock->startpc ) return 0; } return 1; } void psxRecompileNextInstruction(int delayslot) { static u8 s_bFlushReg = 1; BASEBLOCK* pblock = PSX_GETBLOCK(psxpc); // need *ppblock != s_pCurBlock because of branches if( pblock->pFnptr != 0 && pblock->startpc != s_pCurBlock->startpc ) { if( !delayslot && psxpc == pblock->startpc ) { // code already in place, so jump to it and exit recomp assert( PSX_GETBLOCKEX(pblock)->startpc == pblock->startpc ); _psxFlushCall(FLUSH_EVERYTHING); MOV32ItoM((uptr)&psxRegs.pc, psxpc); // if( pexblock->pOldFnptr ) { // // code already in place, so jump to it and exit recomp // JMP32((uptr)pexblock->pOldFnptr - ((uptr)x86Ptr + 5)); // branch = 3; // return; // } JMP32((uptr)pblock->pFnptr - ((uptr)x86Ptr + 5)); psxbranch = 3; return; } else { if( !(delayslot && pblock->startpc == psxpc) ) { s8* oldX86 = x86Ptr; //__Log("clear block %x\n", pblock->startpc); psxRecClearMem(pblock); x86Ptr = oldX86; if( delayslot ) SysPrintf("delay slot %x\n", psxpc); } } } if( delayslot ) pblock->uType = BLOCKTYPE_DELAYSLOT; #ifdef _DEBUG MOV32ItoR(EAX, psxpc); #endif s_pCode = (int *)PSXM( psxpc ); assert(s_pCode); psxRegs.code = *(int *)s_pCode; s_psxBlockCycles++; psxpc += 4; //#ifdef _DEBUG // CMP32ItoM((uptr)s_pCode, psxRegs.code); // j8Ptr[0] = JE8(0); // MOV32ItoR(EAX, psxpc); // CALLFunc((uptr)checkcodefn); // x86SetJ8( j8Ptr[ 0 ] ); //#endif g_pCurInstInfo++; // peephole optimizations if( g_pCurInstInfo->info & EEINSTINFO_COREC ) { assert(0); // recBSC_co[cpuRegs.code>>26](); // psxpc += 4; // s_psxBlockCycles++; // g_pCurInstInfo++; } else { assert( !(g_pCurInstInfo->info & EEINSTINFO_NOREC) ); rpsxBSC[ psxRegs.code >> 26 ](); } if( !delayslot ) { if( s_bFlushReg ) { //_psxFlushUnusedConstReg(); } else s_bFlushReg = 1; } else s_bFlushReg = 1; _clearNeededX86regs(); } static void recExecute() { for (;;) R3000AExecute(); } static void recExecuteBlock() { R3000AExecute(); } #include "PsxHw.h" extern "C" void iDumpPsxRegisters(u32 startpc, u32 temp) { int i; const char* pstr = temp ? "t" : ""; __Log("%spsxreg: %x %x ra:%x k0: %x %x\n", pstr, startpc, psxRegs.cycle, psxRegs.GPR.n.ra, psxRegs.GPR.n.k0, *(int*)PSXM(0x13c128)); for(i = 0; i < 34; i+=2) __Log("%spsx%s: %x %x\n", pstr, disRNameGPR[i], psxRegs.GPR.r[i], psxRegs.GPR.r[i+1]); __Log("%scycle: %x %x %x %x; counters %x %x\n", pstr, psxRegs.cycle, g_psxNextBranchCycle, EEsCycle, IOPoCycle, (uptr)psxNextsCounter, (uptr)psxNextCounter); __Log("psxdma%d c%x b%x m%x t%x\n", 9, HW_DMA9_CHCR, HW_DMA9_BCR, HW_DMA9_MADR, HW_DMA9_TADR); for(i = 0; i < 7; ++i) __Log("%scounter%d: %x %x %x\n", pstr, i, psxCounters[i].count, psxCounters[i].rate, psxCounters[i].sCycleT); // for(i = 0; i < 32; ++i) { // __Log("int%d: %x %x\n", i, psxRegs.sCycle[i], psxRegs.eCycle[i]); // } } void iDumpPsxRegisters(u32 startpc); static void printfn() { static int lastrec = 0; static int curcount = 0; const int skip = 0; //*(int*)PSXM(0x27990) = 1; // enables cdvd bios output for scph10000 if( psxRegs.cycle == 0x113a1be5 ) { // FILE* tempf = fopen("tempdmciop.txt", "wb"); // fwrite(PSXM(0), 0x200000, 1, tempf); // fclose(tempf); //psxdump |= 2; } // if( psxRegs.cycle == 0x114152d8 ) { // psxRegs.GPR.n.s0 = 0x55000; // } if( (psxdump&2) && lastrec != g_psxlastpc ) { curcount++; if( curcount > skip ) { iDumpPsxRegisters(g_psxlastpc, 1); curcount = 0; } lastrec = g_psxlastpc; } } u32 s_psxrecblocks[] = {0}; void psxRecRecompile(u32 startpc) { u32 i; u32 branchTo; u32 willbranch3 = 0; u32* ptr; #ifdef _DEBUG //psxdump |= 4; if( psxdump & 4 ) iDumpPsxRegisters(startpc, 0); #endif assert( startpc ); // if recPtr reached the mem limit reset whole mem if (((uptr)recPtr - (uptr)recMem) >= (RECMEM_SIZE - 0x10000)) recReset(); s_pCurBlock = PSX_GETBLOCK(startpc); if( s_pCurBlock->pFnptr ) { // clear if already taken assert( s_pCurBlock->startpc < startpc ); psxRecClearMem(s_pCurBlock); } if( s_pCurBlock->startpc == startpc ) { s_pCurBlockEx = PSX_GETBLOCKEX(s_pCurBlock); assert( s_pCurBlockEx->startpc == startpc ); } else { s_pCurBlockEx = NULL; for(i = 0; i < PSX_NUMBLOCKS; ++i) { if( recBlocks[(i+s_nNextBlock)%PSX_NUMBLOCKS].size == 0 ) { s_pCurBlockEx = recBlocks+(i+s_nNextBlock)%PSX_NUMBLOCKS; s_nNextBlock = (i+s_nNextBlock+1)%PSX_NUMBLOCKS; break; } } if( s_pCurBlockEx == NULL ) { //SysPrintf("ee reset (blocks)\n"); recReset(); s_nNextBlock = 0; s_pCurBlockEx = recBlocks; } s_pCurBlockEx->startpc = startpc; } x86SetPtr( recPtr ); x86Align(16); recPtr = x86Ptr; psxbranch = 0; s_pCurBlock->startpc = startpc; s_pCurBlock->pFnptr = (u32)(uptr)x86Ptr; s_psxBlockCycles = 0; // reset recomp state variables psxpc = startpc; s_saveConstGPRreg = 0; g_psxHasConstReg = g_psxFlushedConstReg = 1; _initX86regs(); #ifdef _DEBUG // for debugging purposes MOV32ItoM((uptr)&g_psxlastpc, psxpc); CALLFunc((uptr)printfn); #endif // go until the next branch i = startpc; s_nEndBlock = 0xffffffff; while(1) { BASEBLOCK* pblock = PSX_GETBLOCK(i); if( pblock->pFnptr != 0 && pblock->startpc != s_pCurBlock->startpc ) { if( i == pblock->startpc ) { // branch = 3 willbranch3 = 1; s_nEndBlock = i; break; } } psxRegs.code = *(int *)PSXM(i); switch(psxRegs.code >> 26) { case 0: // special if( _Funct_ == 8 || _Funct_ == 9 ) { // JR, JALR s_nEndBlock = i + 8; goto StartRecomp; } break; case 1: // regimm if( _Rt_ == 0 || _Rt_ == 1 || _Rt_ == 16 || _Rt_ == 17 ) { branchTo = _Imm_ * 4 + i + 4; if( branchTo > startpc && branchTo < i ) s_nEndBlock = branchTo; else s_nEndBlock = i+8; goto StartRecomp; } break; case 2: // J case 3: // JAL s_nEndBlock = i + 8; goto StartRecomp; // branches case 4: case 5: case 6: case 7: branchTo = _Imm_ * 4 + i + 4; if( branchTo > startpc && branchTo < i ) s_nEndBlock = branchTo; else s_nEndBlock = i+8; goto StartRecomp; } i += 4; } StartRecomp: // rec info // { EEINST* pcur; if( s_nInstCacheSize < (s_nEndBlock-startpc)/4+1 ) { free(s_pInstCache); s_nInstCacheSize = (s_nEndBlock-startpc)/4+10; s_pInstCache = (EEINST*)malloc(sizeof(EEINST)*s_nInstCacheSize); assert( s_pInstCache != NULL ); } pcur = s_pInstCache + (s_nEndBlock-startpc)/4; _recClearInst(pcur); pcur->info = 0; for(i = s_nEndBlock; i > startpc; i -= 4 ) { psxRegs.code = *(int *)PSXM(i-4); pcur[-1] = pcur[0]; rpsxpropBSC(pcur-1, pcur); pcur--; } } // peephole optimizations // // { // g_pCurInstInfo = s_pInstCache; // // for(i = startpc; i < s_nEndBlock-4; i += 4) { // g_pCurInstInfo++; // if( psxRecompileCodeSafe(i) ) { // u32 curcode = *(u32*)PSXM(i); // u32 nextcode = *(u32*)PSXM(i+4); // if( _psxIsLoadStore(curcode) && _psxIsLoadStore(nextcode) && (curcode>>26) == (nextcode>>26) && rpsxBSC_co[curcode>>26] != NULL ) { // // // rs has to be the same, and cannot be just written // if( ((curcode >> 21) & 0x1F) == ((nextcode >> 21) & 0x1F) && !_psxLoadWritesRs(curcode) ) { // // // good enough // g_pCurInstInfo[0].info |= EEINSTINFO_COREC; // g_pCurInstInfo[0].numpeeps = 1; // g_pCurInstInfo[1].info |= EEINSTINFO_NOREC; // g_pCurInstInfo++; // i += 4; // continue; // } // } // } // } // } #ifdef _DEBUG // dump code for(i = 0; i < ARRAYSIZE(s_psxrecblocks); ++i) { if( startpc == s_psxrecblocks[i] ) { iDumpBlock(startpc, recPtr); } } if( (psxdump & 1) ) iDumpBlock(startpc, recPtr); #endif g_pCurInstInfo = s_pInstCache; while (!psxbranch && psxpc < s_nEndBlock) { psxRecompileNextInstruction(0); } #ifdef _DEBUG if( (psxdump & 1) ) iDumpBlock(startpc, recPtr); #endif assert( (psxpc-startpc)>>2 <= 0xffff ); s_pCurBlockEx->size = (psxpc-startpc)>>2; for(i = 1; i < (u32)s_pCurBlockEx->size-1; ++i) { s_pCurBlock[i].pFnptr = s_pCurBlock->pFnptr; s_pCurBlock[i].startpc = s_pCurBlock->startpc; } // don't overwrite if delay slot if( i < (u32)s_pCurBlockEx->size && !(s_pCurBlock[i].uType & BLOCKTYPE_DELAYSLOT) ) { s_pCurBlock[i].pFnptr = s_pCurBlock->pFnptr; s_pCurBlock[i].startpc = s_pCurBlock->startpc; } // set the block ptr AddBaseBlockEx(s_pCurBlockEx, 1); if( !(psxpc&0x10000000) ) g_psxMaxRecMem = max( (psxpc&~0xa0000000), g_psxMaxRecMem ); if( psxbranch == 2 ) { _psxFlushCall(FLUSH_EVERYTHING); iPsxBranchTest(0xffffffff, 1); JMP32((uptr)psxDispatcherReg - ( (uptr)x86Ptr + 5 )); } else { assert( psxbranch != 3 ); if( psxbranch ) assert( !willbranch3 ); else ADD32ItoM((uptr)&psxRegs.cycle, s_psxBlockCycles*PSXCYCLE_MULT); if( willbranch3 ) { BASEBLOCK* pblock = PSX_GETBLOCK(s_nEndBlock); assert( psxpc == s_nEndBlock ); _psxFlushCall(FLUSH_EVERYTHING); MOV32ItoM((uptr)&psxRegs.pc, psxpc); JMP32((uptr)pblock->pFnptr - ((uptr)x86Ptr + 5)); psxbranch = 3; } else if( !psxbranch ) { // didn't branch, but had to stop MOV32ItoM( (uptr)&psxRegs.pc, psxpc ); _psxFlushCall(FLUSH_EVERYTHING); ptr = JMP32(0); //JMP32((uptr)psxDispatcherReg - ( (uptr)x86Ptr + 5 )); } } assert( x86Ptr >= (s8*)s_pCurBlock->pFnptr + EE_MIN_BLOCK_BYTES ); assert( x86Ptr < recMem+RECMEM_SIZE ); recPtr = x86Ptr; assert( (g_psxHasConstReg&g_psxFlushedConstReg) == g_psxHasConstReg ); if( !psxbranch ) { BASEBLOCK* pcurblock = s_pCurBlock; u32 nEndBlock = s_nEndBlock; s_pCurBlock = PSX_GETBLOCK(psxpc); assert( ptr != NULL ); if( s_pCurBlock->startpc != psxpc ) psxRecRecompile(psxpc); // could have reset if( pcurblock->startpc == startpc ) { assert( pcurblock->pFnptr ); assert( s_pCurBlock->startpc == nEndBlock ); *ptr = (u32)((uptr)s_pCurBlock->pFnptr - ( (uptr)ptr + 4 )); } else { psxRecRecompile(startpc); assert( pcurblock->pFnptr != 0 ); } } else assert( s_pCurBlock->pFnptr != 0 ); } R3000Acpu psxRec = { recInit, recReset, recExecute, recExecuteBlock, recClear, recShutdown }; #endif // PCSX2_NORECBUILD