/* Copyright (C) 2005 StrmnNrmn This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ #include "BuildOptions.h" #include "Base/Types.h" #include #include #include #include "Config/ConfigOptions.h" #include "Core/CPU.h" #include "Core/Memory.h" #include "Core/R4300.h" #include "Core/Registers.h" #include "Core/ROM.h" #include "Debug/DBGConsole.h" #include "DynaRec/AssemblyUtils.h" #include "DynaRec/Trace.h" #include "Base/MathUtil.h" #include "Ultra/ultra_R4300.h" #include "SysPSP/DynaRec/CodeGeneratorPSP.h" #include "SysPSP/DynaRec/N64RegisterCachePSP.h" #include "Base/Macros.h" #include "Core/PrintOpCode.h" #include "Utility/Profiler.h" using namespace AssemblyUtils; //Enable unaligned load/store(used in CBFD, OOT, Rayman2 and PD) //Corn #define ENABLE_LWR_LWL //#define ENABLE_SWR_SWL //Enable to load/store floats directly to/from FPU //Corn #define ENABLE_LWC1 #define ENABLE_SWC1 //#define ENABLE_LDC1 //#define ENABLE_SDC1 //Define to handle full 64bit checks for SLT,SLTU,SLTI & SLTIU //Corn //#define ENABLE_64BIT //Define to check for DIV / 0 //Corn //#define DIVZEROCHK //Define to enable exceptions for interpreter calls from DYNAREC //#define ALLOW_INTERPRETER_EXCEPTIONS #define NOT_IMPLEMENTED( x ) DAEDALUS_ERROR( x ) extern "C" { const void * g_MemoryLookupTableReadForDynarec = g_MemoryLookupTableRead; } //Important pointer for Dynarec see DynaRecStubs.s extern "C" { void _DDIV( s64 Num, s32 Div ); } //signed 64bit division //Corn extern "C" { void _DDIVU( u64 Num, u32 Div ); } //unsigned 64bit division //Corn extern "C" { void _DMULTU( u64 A, u64 B ); } //unsigned 64bit multiply //Corn extern "C" { void _DMULT( s64 A, s64 B ); } //signed 64bit multiply (result is 64bit not 128bit!) //Corn extern "C" { u64 _FloatToDouble( u32 _float); } //Uses CPU to pass f64/32 thats why its maskerading as u64/32 //Corn extern "C" { u32 _DoubleToFloat( u64 _double); } //Uses CPU to pass f64/32 thats why its maskerading as u64/32 //Corn extern "C" { void _ReturnFromDynaRec(); } extern "C" { void _DirectExitCheckNoDelay( u32 instructions_executed, u32 exit_pc ); } extern "C" { void _DirectExitCheckDelay( u32 instructions_executed, u32 exit_pc, u32 target_pc ); } extern "C" { void _IndirectExitCheck( u32 instructions_executed, CIndirectExitMap * p_map, u32 target_pc ); } extern "C" { u32 _ReadBitsDirect_u8( u32 address, u32 current_pc ); u32 _ReadBitsDirect_s8( u32 address, u32 current_pc ); u32 _ReadBitsDirect_u16( u32 address, u32 current_pc ); u32 _ReadBitsDirect_s16( u32 address, u32 current_pc ); u32 _ReadBitsDirect_u32( u32 address, u32 current_pc ); u32 _ReadBitsDirectBD_u8( u32 address, u32 current_pc ); u32 _ReadBitsDirectBD_s8( u32 address, u32 current_pc ); u32 _ReadBitsDirectBD_u16( u32 address, u32 current_pc ); u32 _ReadBitsDirectBD_s16( u32 address, u32 current_pc ); u32 _ReadBitsDirectBD_u32( u32 address, u32 current_pc ); // Dynarec calls this for simplicity void Write32BitsForDynaRec( u32 address, u32 value ) { Write32Bits_NoSwizzle( address, value ); } void Write16BitsForDynaRec( u32 address, u16 value ) { Write16Bits_NoSwizzle( address, value ); } void Write8BitsForDynaRec( u32 address, u8 value ) { Write8Bits_NoSwizzle( address, value ); } void _WriteBitsDirect_u32( u32 address, u32 value, u32 current_pc ); void _WriteBitsDirect_u16( u32 address, u32 value, u32 current_pc ); // Value in low 16 bits void _WriteBitsDirect_u8( u32 address, u32 value, u32 current_pc ); // Value in low 8 bits void _WriteBitsDirectBD_u32( u32 address, u32 value, u32 current_pc ); void _WriteBitsDirectBD_u16( u32 address, u32 value, u32 current_pc ); // Value in low 16 bits void _WriteBitsDirectBD_u8( u32 address, u32 value, u32 current_pc ); // Value in low 8 bits } #define ReadBitsDirect_u8 _ReadBitsDirect_u8 #define ReadBitsDirect_s8 _ReadBitsDirect_s8 #define ReadBitsDirect_u16 _ReadBitsDirect_u16 #define ReadBitsDirect_s16 _ReadBitsDirect_s16 #define ReadBitsDirect_u32 _ReadBitsDirect_u32 #define ReadBitsDirectBD_u8 _ReadBitsDirectBD_u8 #define ReadBitsDirectBD_s8 _ReadBitsDirectBD_s8 #define ReadBitsDirectBD_u16 _ReadBitsDirectBD_u16 #define ReadBitsDirectBD_s16 _ReadBitsDirectBD_s16 #define ReadBitsDirectBD_u32 _ReadBitsDirectBD_u32 #define WriteBitsDirect_u32 _WriteBitsDirect_u32 #define WriteBitsDirect_u16 _WriteBitsDirect_u16 #define WriteBitsDirect_u8 _WriteBitsDirect_u8 #define WriteBitsDirectBD_u32 _WriteBitsDirectBD_u32 #define WriteBitsDirectBD_u16 _WriteBitsDirectBD_u16 #define WriteBitsDirectBD_u8 _WriteBitsDirectBD_u8 //Used to print value from ASM just add these two OPs (all caller saved regs will to be saved including RA and A0) //Corn // jal _printf_asm // lw $a0, _Delay($fp) #ex: prints Delay value // OR ADD // JAL( CCodeLabel( (void*)_printf_asm ), false ); // OR( PspReg_A0, dst_reg, PspReg_R0 ); extern "C" { void _printf_asm( u32 val ); } extern "C" { void output_extern( u32 val ) { printf("%d\n", val); } } extern "C" { void _ReturnFromDynaRecIfStuffToDo( u32 register_mask ); } extern "C" { void _DaedalusICacheInvalidate( const void * address, u32 length ); } bool gHaveSavedPatchedOps = false; PspOpCode gOriginalOps[2]; PspOpCode gReplacementOps[2]; #define URO_HI_SIGN_EXTEND 0 // Sign extend from src #define URO_HI_CLEAR 1 // Clear hi bits void Dynarec_ClearedCPUStuffToDo() { // Replace first two ops of _ReturnFromDynaRecIfStuffToDo with 'jr ra, nop' u8 * p_void_function( reinterpret_cast< u8 * >( _ReturnFromDynaRecIfStuffToDo ) ); PspOpCode * p_function_address = reinterpret_cast< PspOpCode * >( make_uncached_ptr( p_void_function ) ); if(!gHaveSavedPatchedOps) { gOriginalOps[0] = p_function_address[0]; gOriginalOps[1] = p_function_address[1]; PspOpCode op_code; op_code._u32 = 0; op_code.op = OP_SPECOP; op_code.spec_op = SpecOp_JR; op_code.rs = PspReg_RA; gReplacementOps[0] = op_code; // JR RA gReplacementOps[1]._u32 = 0; // NOP gHaveSavedPatchedOps = true; } p_function_address[0] = gReplacementOps[0]; p_function_address[1] = gReplacementOps[1]; const u8 * p_lower( RoundPointerDown( p_void_function, 64 ) ); const u8 * p_upper( RoundPointerUp( p_void_function + 8, 64 ) ); const u32 size( p_upper - p_lower); //sceKernelDcacheWritebackRange( p_lower, size ); //sceKernelIcacheInvalidateRange( p_lower, size ); _DaedalusICacheInvalidate( p_lower, size ); } void Dynarec_SetCPUStuffToDo() { // Restore first two ops of _ReturnFromDynaRecIfStuffToDo u8 * p_void_function( reinterpret_cast< u8 * >( _ReturnFromDynaRecIfStuffToDo ) ); PspOpCode * p_function_address = reinterpret_cast< PspOpCode * >( make_uncached_ptr( p_void_function ) ); p_function_address[0] = gOriginalOps[0]; p_function_address[1] = gOriginalOps[1]; const u8 * p_lower( RoundPointerDown( p_void_function, 64 ) ); const u8 * p_upper( RoundPointerUp( p_void_function + 8, 64 ) ); const u32 size( p_upper - p_lower); //sceKernelDcacheWritebackRange( p_lower, size ); //sceKernelIcacheInvalidateRange( p_lower, size ); _DaedalusICacheInvalidate( p_lower, size ); } extern "C" { void HandleException_extern() { switch (gCPUState.Delay) { case DO_DELAY: gCPUState.Delay = EXEC_DELAY; //fall through to PC +=4 case NO_DELAY: gCPUState.CurrentPC += 4; break; case EXEC_DELAY: gCPUState.CurrentPC = gCPUState.TargetPC; gCPUState.Delay = NO_DELAY; break; default: NODEFAULT; } } } namespace { const EPspReg gMemUpperBoundReg = PspReg_S6; const EPspReg gMemoryBaseReg = PspReg_S7; const EPspReg gRegistersToUseForCaching[] = { // PspReg_V0, // Used as calculation temp // PspReg_V1, // Used as calculation temp // PspReg_A0, // Used as calculation temp // PspReg_A1, // Used as calculation temp // PspReg_A2, // Used as calculation temp // PspReg_A3, // Used as calculation temp PspReg_AT, PspReg_T0, PspReg_T1, PspReg_T2, PspReg_T3, PspReg_T4, PspReg_T5, PspReg_T6, PspReg_T7, PspReg_T8, PspReg_T9, PspReg_S0, PspReg_S1, PspReg_S2, PspReg_S3, PspReg_S4, PspReg_S5, // PspReg_S6, // Memory upper bound // PspReg_S7, // Used for g_pu8RamBase - 0x80000000 // PspReg_S8, // Used for base register (&gCPUState) // PspReg_K0, //Used as load base register. Normally it is reserved for Kernel but seems to work if we borrow it...(could come back and bite us if we use kernel stuff?) //Corn // PspReg_K1, //Used as store base register. Normally it is reserved for Kernel but seems to work if we borrow it...(could come back and bite us if we use kernel stuff?) // PspReg_GP, //This needs saving to work? }; } //u32 gTotalRegistersCached = 0; //u32 gTotalRegistersUncached = 0; // CCodeGeneratorPSP::CCodeGeneratorPSP( CAssemblyBuffer * p_buffer_a, CAssemblyBuffer * p_buffer_b ) : CCodeGenerator( ) , CAssemblyWriterPSP( p_buffer_a, p_buffer_b ) , mpBasePointer( nullptr ) , mBaseRegister( PspReg_S8 ) // TODO , mEntryAddress( 0 ) , mLoopTop( nullptr ) , mUseFixedRegisterAllocation( false ) { } // void CCodeGeneratorPSP::Initialise( u32 entry_address, u32 exit_address, u32 * hit_counter, const void * p_base, const SRegisterUsageInfo & register_usage ) { mEntryAddress = entry_address; mpBasePointer = reinterpret_cast< const u8 * >( p_base ); SetRegisterSpanList( register_usage, entry_address == exit_address ); mPreviousLoadBase = N64Reg_R0; //Invalidate mPreviousStoreBase = N64Reg_R0; //Invalidate mFloatCMPIsValid = false; mMultIsValid = false; if( hit_counter != nullptr ) { GetVar( PspReg_V0, hit_counter ); ADDIU( PspReg_V0, PspReg_V0, 1 ); SetVar( hit_counter, PspReg_V0 ); } } // void CCodeGeneratorPSP::Finalise( ExceptionHandlerFn p_exception_handler_fn, const std::vector< CJumpLocation > & exception_handler_jumps ) { // We handle exceptions directly with _ReturnFromDynaRecIfStuffToDo - we should never get here on the psp #ifdef DAEDALUS_ENABLE_ASSERTS DAEDALUS_ASSERT( exception_handler_jumps.empty(), "Not expecting to have any exception handler jumps to process" ); #endif GenerateAddressCheckFixups(); CAssemblyWriterPSP::Finalise(); } // void CCodeGeneratorPSP::SetRegisterSpanList( const SRegisterUsageInfo & register_usage, bool loops_to_self ) { mRegisterSpanList = register_usage.SpanList; // Sort in order of increasing start point std::sort( mRegisterSpanList.begin(), mRegisterSpanList.end(), SAscendingSpanStartSort() ); const u32 NUM_CACHE_REGS( sizeof(gRegistersToUseForCaching) / sizeof(gRegistersToUseForCaching[0]) ); // Push all the available registers in reverse order (i.e. use temporaries later) // Use temporaries first so we can avoid flushing them in case of a funcion call //Corn #ifdef DAEDALUS_ENABLE_ASSERTS DAEDALUS_ASSERT( mAvailableRegisters.empty(), "Why isn't the available register list empty?" ); #endif for( u32 i {0}; i < NUM_CACHE_REGS; i++ ) { mAvailableRegisters.push( gRegistersToUseForCaching[ i ] ); } // Optimization for self looping code if( gDynarecLoopOptimisation & loops_to_self ) { mUseFixedRegisterAllocation = true; u32 cache_reg_idx( 0 ); u32 HiLo {0}; while ( HiLo<2 ) // If there are still unused registers, assign to high part of reg { RegisterSpanList::const_iterator span_it = mRegisterSpanList.begin(); while( span_it < mRegisterSpanList.end() ) { const SRegisterSpan & span( *span_it ); if( cache_reg_idx < NUM_CACHE_REGS ) { EPspReg cachable_reg( gRegistersToUseForCaching[cache_reg_idx] ); mRegisterCache.SetCachedReg( span.Register, HiLo, cachable_reg ); cache_reg_idx++; } ++span_it; } ++HiLo; } // // Pull all the cached registers into memory // // Skip r0 u32 i {1}; while( i < NUM_N64_REGS ) { EN64Reg n64_reg = EN64Reg( i ); u32 lo_hi_idx {}; while( lo_hi_idx < 2) { if( mRegisterCache.IsCached( n64_reg, lo_hi_idx ) ) { PrepareCachedRegister( n64_reg, lo_hi_idx ); // // If the register is modified anywhere in the fragment, we need // to mark it as dirty so it's flushed correctly on exit. // if( register_usage.IsModified( n64_reg ) ) { mRegisterCache.MarkAsDirty( n64_reg, lo_hi_idx, true ); } } ++lo_hi_idx; } ++i; } mLoopTop = GetAssemblyBuffer()->GetLabel(); } //End of Loop optimization code } // void CCodeGeneratorPSP::ExpireOldIntervals( u32 instruction_idx ) { // mActiveIntervals is held in order of increasing end point for(RegisterSpanList::iterator span_it = mActiveIntervals.begin(); span_it < mActiveIntervals.end(); ++span_it ) { const SRegisterSpan & span( *span_it ); if( span.SpanEnd >= instruction_idx ) { break; } // This interval is no longer active - flush the register and return it to the list of available regs EPspReg psp_reg( mRegisterCache.GetCachedReg( span.Register, 0 ) ); FlushRegister( mRegisterCache, span.Register, 0, true ); mRegisterCache.ClearCachedReg( span.Register, 0 ); mAvailableRegisters.push( psp_reg ); span_it = mActiveIntervals.erase( span_it ); } } // void CCodeGeneratorPSP::SpillAtInterval( const SRegisterSpan & live_span ) { #ifdef DAEDALUS_ENABLE_ASSERTS DAEDALUS_ASSERT( !mActiveIntervals.empty(), "There are no active intervals" ); #endif const SRegisterSpan & last_span( mActiveIntervals.back() ); // Spill the last active interval (it has the greatest end point) if( last_span.SpanEnd > live_span.SpanEnd ) { // Uncache the old span EPspReg psp_reg( mRegisterCache.GetCachedReg( last_span.Register, 0 ) ); FlushRegister( mRegisterCache, last_span.Register, 0, true ); mRegisterCache.ClearCachedReg( last_span.Register, 0 ); // Cache the new span mRegisterCache.SetCachedReg( live_span.Register, 0, psp_reg ); mActiveIntervals.pop_back(); // Remove the last span mActiveIntervals.push_back( live_span ); // Insert in order of increasing end point std::sort( mActiveIntervals.begin(), mActiveIntervals.end(), SAscendingSpanEndSort() ); // XXXX - will be quicker to insert in the correct place rather than sorting each time } else { // There is no space for this register - we just don't update the register cache info, so we save/restore it from memory as needed } } // void CCodeGeneratorPSP::UpdateRegisterCaching( u32 instruction_idx ) { if( !mUseFixedRegisterAllocation ) { ExpireOldIntervals( instruction_idx ); for(RegisterSpanList::const_iterator span_it = mRegisterSpanList.begin(); span_it < mRegisterSpanList.end(); ++span_it ) { const SRegisterSpan & span( *span_it ); // As we keep the intervals sorted in order of SpanStart, we can exit as soon as we encounter a SpanStart in the future if( instruction_idx < span.SpanStart ) { break; } // Only process live intervals if( (instruction_idx >= span.SpanStart) & (instruction_idx <= span.SpanEnd) ) { if( !mRegisterCache.IsCached( span.Register, 0 ) ) { if( mAvailableRegisters.empty() ) { SpillAtInterval( span ); } else { // Use this register for caching mRegisterCache.SetCachedReg( span.Register, 0, mAvailableRegisters.top() ); // Pop this register from the available list mAvailableRegisters.pop(); mActiveIntervals.push_back( span ); // Insert in order of increasing end point std::sort( mActiveIntervals.begin(), mActiveIntervals.end(), SAscendingSpanEndSort() ); // XXXX - will be quicker to insert in the correct place rather than sorting each time } } } } } } // RegisterSnapshotHandle CCodeGeneratorPSP::GetRegisterSnapshot() { RegisterSnapshotHandle handle( mRegisterSnapshots.size() ); mRegisterSnapshots.push_back( mRegisterCache ); return handle; } // CCodeLabel CCodeGeneratorPSP::GetEntryPoint() const { return GetAssemblyBuffer()->GetStartAddress(); } // CCodeLabel CCodeGeneratorPSP::GetCurrentLocation() const { return GetAssemblyBuffer()->GetLabel(); } // u32 CCodeGeneratorPSP::GetCompiledCodeSize() const { return GetAssemblyBuffer()->GetSize(); } //Get a (cached) N64 register mapped to a PSP register(usefull for dst register) EPspReg CCodeGeneratorPSP::GetRegisterNoLoad( EN64Reg n64_reg, u32 lo_hi_idx, EPspReg scratch_reg ) { if( mRegisterCache.IsCached( n64_reg, lo_hi_idx ) ) { return mRegisterCache.GetCachedReg( n64_reg, lo_hi_idx ); } else { return scratch_reg; } } //Load value from an emulated N64 register or known value to a PSP register void CCodeGeneratorPSP::GetRegisterValue( EPspReg psp_reg, EN64Reg n64_reg, u32 lo_hi_idx ) { if( mRegisterCache.IsKnownValue( n64_reg, lo_hi_idx ) ) { //printf( "Loading %s[%d] <- %08x\n", RegNames[ n64_reg ], lo_hi_idx, mRegisterCache.GetKnownValue( n64_reg, lo_hi_idx ) ); LoadConstant( psp_reg, mRegisterCache.GetKnownValue( n64_reg, lo_hi_idx )._s32 ); if( mRegisterCache.IsCached( n64_reg, lo_hi_idx ) ) { mRegisterCache.MarkAsValid( n64_reg, lo_hi_idx, true ); mRegisterCache.MarkAsDirty( n64_reg, lo_hi_idx, true ); mRegisterCache.ClearKnownValue( n64_reg, lo_hi_idx ); } } else { GetVar( psp_reg, lo_hi_idx ? &gGPR[ n64_reg ]._u32_1 : &gGPR[ n64_reg ]._u32_0 ); } } //Get (cached) N64 register value mapped to a PSP register (or scratch reg) //and also load the value if not loaded yet(usefull for src register) EPspReg CCodeGeneratorPSP::GetRegisterAndLoad( EN64Reg n64_reg, u32 lo_hi_idx, EPspReg scratch_reg ) { EPspReg reg; bool need_load( false ); if( mRegisterCache.IsCached( n64_reg, lo_hi_idx ) ) { // gTotalRegistersCached++; reg = mRegisterCache.GetCachedReg( n64_reg, lo_hi_idx ); // We're loading it below, so set the valid flag if( !mRegisterCache.IsValid( n64_reg, lo_hi_idx ) ) { need_load = true; mRegisterCache.MarkAsValid( n64_reg, lo_hi_idx, true ); } } else if( n64_reg == N64Reg_R0 ) { reg = PspReg_R0; } else { // gTotalRegistersUncached++; reg = scratch_reg; need_load = true; } if( need_load ) { GetRegisterValue( reg, n64_reg, lo_hi_idx ); } return reg; } // Similar to GetRegisterAndLoad, but ALWAYS loads into the specified psp register void CCodeGeneratorPSP::LoadRegister( EPspReg psp_reg, EN64Reg n64_reg, u32 lo_hi_idx ) { if( mRegisterCache.IsCached( n64_reg, lo_hi_idx ) ) { EPspReg cached_reg( mRegisterCache.GetCachedReg( n64_reg, lo_hi_idx ) ); // gTotalRegistersCached++; // Load the register if it's currently invalid if( !mRegisterCache.IsValid( n64_reg,lo_hi_idx ) ) { GetRegisterValue( cached_reg, n64_reg, lo_hi_idx ); mRegisterCache.MarkAsValid( n64_reg, lo_hi_idx, true ); } // Copy the register if necessary if( psp_reg != cached_reg ) { OR( psp_reg, cached_reg, PspReg_R0 ); } } else if( n64_reg == N64Reg_R0 ) { OR( psp_reg, PspReg_R0, PspReg_R0 ); } else { // gTotalRegistersUncached++; GetRegisterValue( psp_reg, n64_reg, lo_hi_idx ); } } // This function pulls in a cached register so that it can be used at a later point. // This is usally done when we have a branching instruction - it guarantees that // the register is valid regardless of whether or not the branch is taken. void CCodeGeneratorPSP::PrepareCachedRegister( EN64Reg n64_reg, u32 lo_hi_idx ) { if( mRegisterCache.IsCached( n64_reg, lo_hi_idx ) ) { EPspReg cached_reg( mRegisterCache.GetCachedReg( n64_reg, lo_hi_idx ) ); // Load the register if it's currently invalid if( !mRegisterCache.IsValid( n64_reg,lo_hi_idx ) ) { GetRegisterValue( cached_reg, n64_reg, lo_hi_idx ); mRegisterCache.MarkAsValid( n64_reg, lo_hi_idx, true ); } } } // void CCodeGeneratorPSP::StoreRegister( EN64Reg n64_reg, u32 lo_hi_idx, EPspReg psp_reg ) { mRegisterCache.ClearKnownValue( n64_reg, lo_hi_idx ); if( mRegisterCache.IsCached( n64_reg, lo_hi_idx ) ) { EPspReg cached_reg( mRegisterCache.GetCachedReg( n64_reg, lo_hi_idx ) ); // gTotalRegistersCached++; // Update our copy as necessary if( psp_reg != cached_reg ) { OR( cached_reg, psp_reg, PspReg_R0 ); } mRegisterCache.MarkAsDirty( n64_reg, lo_hi_idx, true ); mRegisterCache.MarkAsValid( n64_reg, lo_hi_idx, true ); } else { // gTotalRegistersUncached++; SetVar( lo_hi_idx ? &gGPR[ n64_reg ]._u32_1 : &gGPR[ n64_reg ]._u32_0, psp_reg ); mRegisterCache.MarkAsDirty( n64_reg, lo_hi_idx, false ); } } // void CCodeGeneratorPSP::SetRegister64( EN64Reg n64_reg, s32 lo_value, s32 hi_value ) { SetRegister( n64_reg, 0, lo_value ); SetRegister( n64_reg, 1, hi_value ); } // Set the low 32 bits of a register to a known value (and hence the upper // 32 bits are also known though sign extension) inline void CCodeGeneratorPSP::SetRegister32s( EN64Reg n64_reg, s32 value ) { //SetRegister64( n64_reg, value, value >= 0 ? 0 : 0xffffffff ); SetRegister64( n64_reg, value, value >> 31 ); } // inline void CCodeGeneratorPSP::SetRegister( EN64Reg n64_reg, u32 lo_hi_idx, u32 value ) { mRegisterCache.SetKnownValue( n64_reg, lo_hi_idx, value ); mRegisterCache.MarkAsDirty( n64_reg, lo_hi_idx, true ); if( mRegisterCache.IsCached( n64_reg, lo_hi_idx ) ) { mRegisterCache.MarkAsValid( n64_reg, lo_hi_idx, false ); // The actual cache is invalid though! } } // void CCodeGeneratorPSP::UpdateRegister( EN64Reg n64_reg, EPspReg psp_reg, bool options ) { //if(n64_reg == N64Reg_R0) return; //Try to modify R0!!! StoreRegisterLo( n64_reg, psp_reg ); //Skip storing sign extension on some regs //Corn if( N64Reg_DontNeedSign( n64_reg ) ) return; if( options == URO_HI_SIGN_EXTEND ) { EPspReg scratch_reg = PspReg_V0; if( mRegisterCache.IsCached( n64_reg, 1 ) ) { scratch_reg = mRegisterCache.GetCachedReg( n64_reg, 1 ); } SRA( scratch_reg, psp_reg, 0x1f ); // Sign extend StoreRegisterHi( n64_reg, scratch_reg ); } else // == URO_HI_CLEAR { SetRegister( n64_reg, 1, 0 ); } } // EPspFloatReg CCodeGeneratorPSP::GetFloatRegisterAndLoad( EN64FloatReg n64_reg ) { EPspFloatReg psp_reg = EPspFloatReg( n64_reg ); // 1:1 mapping if( !mRegisterCache.IsFPValid( n64_reg ) ) { GetFloatVar( psp_reg, &gCPUState.FPU[n64_reg]._f32 ); mRegisterCache.MarkFPAsValid( n64_reg, true ); } mRegisterCache.MarkFPAsSim( n64_reg, false ); return psp_reg; } // inline void CCodeGeneratorPSP::UpdateFloatRegister( EN64FloatReg n64_reg ) { mRegisterCache.MarkFPAsDirty( n64_reg, true ); mRegisterCache.MarkFPAsValid( n64_reg, true ); mRegisterCache.MarkFPAsSim( n64_reg, false ); } //Get Double or SimDouble and add code to convert to simulated double if needed //Corn #define SIMULATESIG 0x1234 //Reduce signature to load value with one OP EPspFloatReg CCodeGeneratorPSP::GetSimFloatRegisterAndLoad( EN64FloatReg n64_reg ) { EPspFloatReg psp_reg_sig = EPspFloatReg( n64_reg ); // 1:1 mapping EPspFloatReg psp_reg = EPspFloatReg(n64_reg + 1); //This is Double Lo or signature if( !mRegisterCache.IsFPValid( n64_reg ) ) { GetFloatVar( psp_reg_sig, &gCPUState.FPU[n64_reg]._f32 ); mRegisterCache.MarkFPAsValid( n64_reg, true ); } //This is Double Hi or f32 if( !mRegisterCache.IsFPValid( EN64FloatReg(n64_reg + 1) ) ) { GetFloatVar( psp_reg, &gCPUState.FPU[n64_reg+1]._f32 ); mRegisterCache.MarkFPAsValid( EN64FloatReg(n64_reg+1), true ); } //If register is not SimDouble yet or unknown add check and conversion routine if( !mRegisterCache.IsFPSim( n64_reg ) ) { MFC1( PspReg_A0, psp_reg_sig ); //Get lo part of double LoadConstant( PspReg_A1, SIMULATESIG ); //Get signature CJumpLocation test_reg( BEQ( PspReg_A0, PspReg_A1, CCodeLabel(nullptr), false ) ); //compare float to signature MTC1( psp_reg_sig , PspReg_A1 ); //Always write back signature to float reg JAL( CCodeLabel( reinterpret_cast< const void * >( _DoubleToFloat ) ), false ); //Convert Double to Float MFC1( PspReg_A1, psp_reg ); //Get hi part of double MTC1( psp_reg , PspReg_V0 ); //store converted float PatchJumpLong( test_reg, GetAssemblyBuffer()->GetLabel() ); mRegisterCache.MarkFPAsDirty( n64_reg, true ); mRegisterCache.MarkFPAsSim( n64_reg, true ); mRegisterCache.MarkFPAsDirty( EN64FloatReg(n64_reg + 1), true ); } return psp_reg; } // inline void CCodeGeneratorPSP::UpdateSimDoubleRegister( EN64FloatReg n64_reg ) { mRegisterCache.MarkFPAsValid( n64_reg, true ); mRegisterCache.MarkFPAsDirty( n64_reg, true ); mRegisterCache.MarkFPAsSim( n64_reg, true ); mRegisterCache.MarkFPAsValid( EN64FloatReg(n64_reg + 1), true ); mRegisterCache.MarkFPAsDirty( EN64FloatReg(n64_reg + 1), true ); } // const CN64RegisterCachePSP & CCodeGeneratorPSP::GetRegisterCacheFromHandle( RegisterSnapshotHandle snapshot ) const { #ifdef DAEDALUS_ENABLE_ASSERTS DAEDALUS_ASSERT( snapshot.Handle < mRegisterSnapshots.size(), "Invalid snapshot handle" ); #endif return mRegisterSnapshots[ snapshot.Handle ]; } // Flush a specific register back to memory if dirty. // Clears the dirty flag and invalidates the contents if specified void CCodeGeneratorPSP::FlushRegister( CN64RegisterCachePSP & cache, EN64Reg n64_reg, u32 lo_hi_idx, bool invalidate ) { if( cache.IsDirty( n64_reg, lo_hi_idx ) ) { if( cache.IsKnownValue( n64_reg, lo_hi_idx ) ) { s32 known_value( cache.GetKnownValue( n64_reg, lo_hi_idx )._s32 ); SetVar( lo_hi_idx ? &gGPR[ n64_reg ]._u32_1 : &gGPR[ n64_reg ]._u32_0, known_value ); } else if( cache.IsCached( n64_reg, lo_hi_idx ) ) { #ifdef DAEDALUS_ENABLE_ASSERTS DAEDALUS_ASSERT( cache.IsValid( n64_reg, lo_hi_idx ), "Register is dirty but not valid?" ); #endif EPspReg cached_reg( cache.GetCachedReg( n64_reg, lo_hi_idx ) ); SetVar( lo_hi_idx ? &gGPR[ n64_reg ]._u32_1 : &gGPR[ n64_reg ]._u32_0, cached_reg ); } #ifdef DAEDALUS_DEBUG_CONSOLE else { DAEDALUS_ERROR( "Register is dirty, but not known or cached" ); } #endif // We're no longer dirty cache.MarkAsDirty( n64_reg, lo_hi_idx, false ); } // Invalidate the register, so we pick up any values the function might have changed if( invalidate ) { cache.ClearKnownValue( n64_reg, lo_hi_idx ); if( cache.IsCached( n64_reg, lo_hi_idx ) ) { cache.MarkAsValid( n64_reg, lo_hi_idx, false ); } } } // This function flushes all dirty registers back to memory // If the invalidate flag is set this also invalidates the known value/cached // register. This is primarily to ensure that we keep the register set // in a consistent set across calls to generic functions. Ideally we need // to reimplement generic functions with specialised code to avoid the flush. void CCodeGeneratorPSP::FlushAllRegisters( CN64RegisterCachePSP & cache, bool invalidate ) { mFloatCMPIsValid = false; //invalidate float compare register mMultIsValid = false; //Mult hi/lo are invalid // Skip r0 for( u32 i = 1; i < NUM_N64_REGS; i++ ) { EN64Reg n64_reg = EN64Reg( i ); FlushRegister( cache, n64_reg, 0, invalidate ); FlushRegister( cache, n64_reg, 1, invalidate ); } FlushAllFloatingPointRegisters( cache, invalidate ); } // Floating-point arguments are placed in $f12-$f15. // Floating-point return values go into $f0-$f1. // $f0-$f19 are caller-saved. // $f20-$f31 are callee-saved. void CCodeGeneratorPSP::FlushAllFloatingPointRegisters( CN64RegisterCachePSP & cache, bool invalidate ) { for( u32 i {0}; i < NUM_N64_FP_REGS; i++ ) { EN64FloatReg n64_reg = EN64FloatReg( i ); if( cache.IsFPDirty( n64_reg ) ) { #ifdef DAEDALUS_ENABLE_ASSERTS DAEDALUS_ASSERT( cache.IsFPValid( n64_reg ), "Register is dirty but not valid?" ); #endif EPspFloatReg psp_reg = EPspFloatReg( n64_reg ); SetFloatVar( &gCPUState.FPU[n64_reg]._f32, psp_reg ); cache.MarkFPAsDirty( n64_reg, false ); } if( invalidate ) { // Invalidate the register, so we pick up any values the function might have changed cache.MarkFPAsValid( n64_reg, false ); cache.MarkFPAsSim( n64_reg, false ); } } } // This function flushes all dirty *temporary* registers back to memory, and // marks them as invalid. void CCodeGeneratorPSP::FlushAllTemporaryRegisters( CN64RegisterCachePSP & cache, bool invalidate ) { // Skip r0 for( u32 i {1}; i < NUM_N64_REGS; i++ ) { EN64Reg n64_reg = EN64Reg( i ); if( cache.IsTemporary( n64_reg, 0 ) ) { FlushRegister( cache, n64_reg, 0, invalidate ); } if( cache.IsTemporary( n64_reg, 1 ) ) { FlushRegister( cache, n64_reg, 1, invalidate ); } } // XXXX some fp regs are preserved across function calls? FlushAllFloatingPointRegisters( cache, invalidate ); } // void CCodeGeneratorPSP::RestoreAllRegisters( CN64RegisterCachePSP & current_cache, CN64RegisterCachePSP & new_cache ) { // Skip r0 for( u32 i {1}; i < NUM_N64_REGS; i++ ) { EN64Reg n64_reg = EN64Reg( i ); if( new_cache.IsValid( n64_reg, 0 ) && !current_cache.IsValid( n64_reg, 0 ) ) { GetVar( new_cache.GetCachedReg( n64_reg, 0 ), &gGPR[ n64_reg ]._u32_0 ); } if( new_cache.IsValid( n64_reg, 1 ) && !current_cache.IsValid( n64_reg, 1 ) ) { GetVar( new_cache.GetCachedReg( n64_reg, 1 ), &gGPR[ n64_reg ]._u32_1 ); } } // XXXX some fp regs are preserved across function calls? for( u32 i {0}; i < NUM_N64_FP_REGS; ++i ) { EN64FloatReg n64_reg = EN64FloatReg( i ); if( new_cache.IsFPValid( n64_reg ) && !current_cache.IsFPValid( n64_reg ) ) { EPspFloatReg psp_reg = EPspFloatReg( n64_reg ); GetFloatVar( psp_reg, &gCPUState.FPU[n64_reg]._f32 ); } } } // CJumpLocation CCodeGeneratorPSP::GenerateExitCode( u32 exit_address, u32 jump_address, u32 num_instructions, CCodeLabel next_fragment ) { #ifdef DAEDALUS_ENABLE_ASSERTS //DAEDALUS_ASSERT( exit_address != u32( ~0 ), "Invalid exit address" ); DAEDALUS_ASSERT( !next_fragment.IsSet() || jump_address == 0, "Shouldn't be specifying a jump address if we have a next fragment?" ); #endif if( (exit_address == mEntryAddress) & mLoopTop.IsSet() ) { #ifdef DAEDALUS_ENABLE_ASSERTS DAEDALUS_ASSERT( mUseFixedRegisterAllocation, "Have mLoopTop but unfixed register allocation?" ); #endif FlushAllFloatingPointRegisters( mRegisterCache, false ); // Check if we're ok to continue, without flushing any registers GetVar( PspReg_V0, &gCPUState.CPUControl[C0_COUNT]._u32 ); GetVar( PspReg_A0, (const u32*)&gCPUState.Events[0].mCount ); // // Pull in any registers which may have been flushed for whatever reason. // // Skip r0 for( u32 i {1}; i < NUM_N64_REGS; i++ ) { EN64Reg n64_reg = EN64Reg( i ); //if( mRegisterCache.IsDirty( n64_reg, 0 ) & mRegisterCache.IsKnownValue( n64_reg, 0 ) ) //{ // FlushRegister( mRegisterCache, n64_reg, 0, false ); //} //if( mRegisterCache.IsDirty( n64_reg, 1 ) & mRegisterCache.IsKnownValue( n64_reg, 1 ) ) //{ // FlushRegister( mRegisterCache, n64_reg, 1, false ); //} PrepareCachedRegister( n64_reg, 0 ); PrepareCachedRegister( n64_reg, 1 ); } // Assuming we don't need to set CurrentPC/Delay flags before we branch to the top.. // ADDIU( PspReg_V0, PspReg_V0, num_instructions ); SetVar( &gCPUState.CPUControl[C0_COUNT]._u32, PspReg_V0 ); // // If the event counter is still positive, just jump directly to the top of our loop // ADDIU( PspReg_A0, PspReg_A0, -s16(num_instructions) ); BGTZ( PspReg_A0, mLoopTop, false ); SetVar( (u32*)&gCPUState.Events[0].mCount, PspReg_A0 ); // ASSUMES store is done in just a single op. FlushAllRegisters( mRegisterCache, true ); SetVar( &gCPUState.CurrentPC, exit_address ); JAL( CCodeLabel( reinterpret_cast< const void * >( CPU_HANDLE_COUNT_INTERRUPT ) ), false ); SetVar( &gCPUState.Delay, NO_DELAY ); // ASSUMES store is done in just a single op. J( CCodeLabel( reinterpret_cast< const void * >( _ReturnFromDynaRec ) ), true ); // // Return an invalid jump location to indicate we've handled our own linking. // return CJumpLocation( nullptr ); } FlushAllRegisters( mRegisterCache, true ); CCodeLabel exit_routine; // Use the branch delay slot to load the second argument PspOpCode op1; PspOpCode op2; if( jump_address != 0 ) { LoadConstant( PspReg_A0, num_instructions ); LoadConstant( PspReg_A1, exit_address ); GetLoadConstantOps( PspReg_A2, jump_address, &op1, &op2 ); exit_routine = CCodeLabel( reinterpret_cast< const void * >( _DirectExitCheckDelay ) ); } else { LoadConstant( PspReg_A0, num_instructions ); GetLoadConstantOps( PspReg_A1, exit_address, &op1, &op2 ); exit_routine = CCodeLabel( reinterpret_cast< const void * >( _DirectExitCheckNoDelay ) ); } if( op2._u32 == 0 ) { JAL( exit_routine, false ); // No branch delay AppendOp( op1 ); } else { AppendOp( op1 ); JAL( exit_routine, false ); // No branch delay AppendOp( op2 ); } // This jump may be nullptr, in which case we patch it below // This gets patched with a jump to the next fragment if the target is later found CJumpLocation jump_to_next_fragment( J( next_fragment, true ) ); // Patch up the exit jump if the target hasn't been compiled yet if( !next_fragment.IsSet() ) { PatchJumpLong( jump_to_next_fragment, CCodeLabel( reinterpret_cast< const void * >( _ReturnFromDynaRec ) ) ); } return jump_to_next_fragment; } // Handle branching back to the interpreter after an ERET void CCodeGeneratorPSP::GenerateEretExitCode( u32 num_instructions, CIndirectExitMap * p_map ) { FlushAllRegisters( mRegisterCache, false ); // Eret is a bit bodged so we exit at PC + 4 LoadConstant( PspReg_A0, num_instructions ); LoadConstant( PspReg_A1, reinterpret_cast< s32 >( p_map ) ); GetVar( PspReg_A2, &gCPUState.CurrentPC ); J( CCodeLabel( reinterpret_cast< const void * >( _IndirectExitCheck ) ), false ); ADDIU( PspReg_A2, PspReg_A2, 4 ); // Delay slot } // Handle branching back to the interpreter after an indirect jump void CCodeGeneratorPSP::GenerateIndirectExitCode( u32 num_instructions, CIndirectExitMap * p_map ) { FlushAllRegisters( mRegisterCache, false ); // New return address is in gCPUState.TargetPC PspOpCode op1, op2; GetLoadConstantOps(PspReg_A0, num_instructions, &op1, &op2); LoadConstant( PspReg_A1, reinterpret_cast< s32 >( p_map ) ); GetVar( PspReg_A2, &gCPUState.TargetPC ); if (op2._u32 == 0) { J( CCodeLabel( reinterpret_cast< const void * >( _IndirectExitCheck ) ), false ); AppendOp(op1); } else { AppendOp(op1); J( CCodeLabel( reinterpret_cast< const void * >( _IndirectExitCheck ) ), false ); AppendOp(op2); } } // void CCodeGeneratorPSP::GenerateBranchHandler( CJumpLocation branch_handler_jump, RegisterSnapshotHandle snapshot ) { #ifdef DAEDALUS_ENABLE_ASSERTS DAEDALUS_ASSERT( branch_handler_jump.IsSet(), "Why is the branch handler jump not set?" ); #endif CCodeGeneratorPSP::SetBufferA(); CCodeLabel current_label( GetAssemblyBuffer()->GetLabel() ); PatchJumpLong( branch_handler_jump, current_label ); mRegisterCache = GetRegisterCacheFromHandle( snapshot ); CJumpLocation jump_to_b( J( CCodeLabel( nullptr ), true ) ); CCodeGeneratorPSP::SetBufferB(); current_label = GetAssemblyBuffer()->GetLabel(); PatchJumpLong( jump_to_b, current_label ); } // CJumpLocation CCodeGeneratorPSP::GenerateBranchAlways( CCodeLabel target ) { return J( target, true ); } // CJumpLocation CCodeGeneratorPSP::GenerateBranchIfSet( const u32 * p_var, CCodeLabel target ) { GetVar( PspReg_V0, p_var ); return BNE( PspReg_V0, PspReg_R0, target, true ); } // CJumpLocation CCodeGeneratorPSP::GenerateBranchIfNotSet( const u32 * p_var, CCodeLabel target ) { GetVar( PspReg_V0, p_var ); return BEQ( PspReg_V0, PspReg_R0, target, true ); } // CJumpLocation CCodeGeneratorPSP::GenerateBranchIfEqual( const u32 * p_var, u32 value, CCodeLabel target ) { GetVar( PspReg_V0, p_var ); LoadConstant( PspReg_A0, value ); return BEQ( PspReg_V0, PspReg_A0, target, true ); } // CJumpLocation CCodeGeneratorPSP::GenerateBranchIfNotEqual( const u32 * p_var, u32 value, CCodeLabel target ) { GetVar( PspReg_V0, p_var ); LoadConstant( PspReg_A0, value ); return BNE( PspReg_V0, PspReg_A0, target, true ); } // CJumpLocation CCodeGeneratorPSP::GenerateBranchIfNotEqual( EPspReg reg_a, u32 value, CCodeLabel target ) { EPspReg reg_b( reg_a == PspReg_V0 ? PspReg_A0 : PspReg_V0 ); // Make sure we don't use the same reg LoadConstant( reg_b, value ); return BNE( reg_a, reg_b, target, true ); } // void CCodeGeneratorPSP::SetVar( u32 * p_var, EPspReg reg_src ) { //DAEDALUS_ASSERT( reg_src != PspReg_AT, "Whoops, splattering source register" ); EPspReg reg_base; s16 base_offset; GetBaseRegisterAndOffset( p_var, ®_base, &base_offset ); SW( reg_src, reg_base, base_offset ); } // void CCodeGeneratorPSP::SetVar( u32 * p_var, u32 value ) { EPspReg reg_value; if(value == 0) { reg_value = PspReg_R0; } else { LoadConstant( PspReg_V0, value ); reg_value = PspReg_V0; } EPspReg reg_base; s16 base_offset; GetBaseRegisterAndOffset( p_var, ®_base, &base_offset ); SW( reg_value, reg_base, base_offset ); } // void CCodeGeneratorPSP::SetFloatVar( f32 * p_var, EPspFloatReg reg_src ) { EPspReg reg_base; s16 base_offset; GetBaseRegisterAndOffset( p_var, ®_base, &base_offset ); SWC1( reg_src, reg_base, base_offset ); } // void CCodeGeneratorPSP::GetVar( EPspReg dst_reg, const u32 * p_var ) { EPspReg reg_base; s16 base_offset; GetBaseRegisterAndOffset( p_var, ®_base, &base_offset ); LW( dst_reg, reg_base, base_offset ); } // void CCodeGeneratorPSP::GetFloatVar( EPspFloatReg dst_reg, const f32 * p_var ) { EPspReg reg_base; s16 base_offset; GetBaseRegisterAndOffset( p_var, ®_base, &base_offset ); LWC1( dst_reg, reg_base, base_offset ); } // void CCodeGeneratorPSP::GetBaseRegisterAndOffset( const void * p_address, EPspReg * p_reg, s16 * p_offset ) { s32 base_pointer_offset( reinterpret_cast< const u8 * >( p_address ) - mpBasePointer ); if( (base_pointer_offset > SHRT_MIN) & (base_pointer_offset < SHRT_MAX) ) { *p_reg = mBaseRegister; *p_offset = base_pointer_offset; } else { u32 address( reinterpret_cast< u32 >( p_address ) ); u16 hi_bits( (u16)( address >> 16 ) ); u16 lo_bits( (u16)( address ) ); // // Move up // /*if(lo_bits >= 0x8000) { hi_bits++; }*/ hi_bits += lo_bits >> 15; s32 long_offset( address - ((s32)hi_bits<<16) ); #ifdef DAEDALUS_ENABLE_ASSERTS DAEDALUS_ASSERT( long_offset >= SHRT_MIN && long_offset <= SHRT_MAX, "Offset is out of range!" ); #endif s16 offset( (s16)long_offset ); #ifdef DAEDALUS_ENABLE_ASSERTS DAEDALUS_ASSERT( ((s32)hi_bits<<16) + offset == (s32)address, "Incorrect address calculation" ); #endif LUI( PspReg_A0, hi_bits ); *p_reg = PspReg_A0; *p_offset = offset; } } // /*void CCodeGeneratorPSP::UpdateAddressAndDelay( u32 address, bool set_branch_delay ) { DAEDALUS_ASSERT( !set_branch_delay || (address != 0), "Have branch delay and no address?" ); if( set_branch_delay ) { SetVar( &gCPUState.Delay, EXEC_DELAY ); mBranchDelaySet = true; } //else //{ // SetVar( &gCPUState.Delay, NO_DELAY ); //} if( address != 0 ) { SetVar( &gCPUState.CurrentPC, address ); } }*/ // Generates instruction handler for the specified op code. // Returns a jump location if an exception handler is required CJumpLocation CCodeGeneratorPSP::GenerateOpCode( const STraceEntry& ti, bool branch_delay_slot, const SBranchDetails * p_branch, CJumpLocation * p_branch_jump ) { #ifdef DAEDALUS_PROFILE DAEDALUS_PROFILE( "CCodeGeneratorPSP::GenerateOpCode" ); #endif u32 address = ti.Address; OpCode op_code = ti.OpCode; bool handled( false ); bool is_nop( op_code._u32 == 0 ); if( is_nop ) { if( branch_delay_slot ) { SetVar( &gCPUState.Delay, NO_DELAY ); } return CJumpLocation(); } if( branch_delay_slot ) mPreviousStoreBase = mPreviousLoadBase = N64Reg_R0; //Invalidate mQuickLoad = ti.Usage.Access8000; const EN64Reg rs = EN64Reg( op_code.rs ); const EN64Reg rt = EN64Reg( op_code.rt ); const EN64Reg rd = EN64Reg( op_code.rd ); const EN64Reg base = EN64Reg( op_code.base ); const u32 ft = op_code.ft; const u32 sa = op_code.sa; //const u32 jump_target( (address&0xF0000000) | (op_code.target<<2) ); //const u32 branch_target( address + ( ((s32)(s16)op_code.immediate)<<2 ) + 4); // // Look for opcodes we can handle manually // switch( op_code.op ) { case OP_SPECOP: switch( op_code.spec_op ) { case SpecOp_SLL: GenerateSLL( rd, rt, sa ); handled = true; break; case SpecOp_SRA: GenerateSRA( rd, rt, sa ); handled = true; break; case SpecOp_SRL: GenerateSRL( rd, rt, sa ); handled = true; break; case SpecOp_SLLV: GenerateSLLV( rd, rs, rt ); handled = true; break; case SpecOp_SRAV: GenerateSRAV( rd, rs, rt ); handled = true; break; case SpecOp_SRLV: GenerateSRLV( rd, rs, rt ); handled = true; break; case SpecOp_JR: GenerateJR( rs, p_branch, p_branch_jump ); handled = true; break; case SpecOp_JALR: GenerateJALR( rs, rd, address, p_branch, p_branch_jump ); handled = true; break; case SpecOp_MFLO: GenerateMFLO( rd ); handled = true; break; case SpecOp_MFHI: GenerateMFHI( rd ); handled = true; break; case SpecOp_MTLO: GenerateMTLO( rd ); handled = true; break; case SpecOp_MTHI: GenerateMTHI( rd ); handled = true; break; case SpecOp_MULT: GenerateMULT( rs, rt ); handled = true; break; case SpecOp_MULTU: GenerateMULTU( rs, rt ); handled = true; break; case SpecOp_DIV: GenerateDIV( rs, rt ); handled = true; break; case SpecOp_DIVU: GenerateDIVU( rs, rt ); handled = true; break; case SpecOp_DMULT: GenerateDMULT( rs, rt ); handled = true; break; case SpecOp_DMULTU: GenerateDMULTU( rs, rt ); handled = true; break; case SpecOp_DDIVU: GenerateDDIVU( rs, rt ); handled = true; break; case SpecOp_DDIV: GenerateDDIV( rs, rt ); handled = true; break; case SpecOp_ADD: GenerateADDU( rd, rs, rt ); handled = true; break; case SpecOp_ADDU: GenerateADDU( rd, rs, rt ); handled = true; break; case SpecOp_SUB: GenerateSUBU( rd, rs, rt ); handled = true; break; case SpecOp_SUBU: GenerateSUBU( rd, rs, rt ); handled = true; break; case SpecOp_AND: GenerateAND( rd, rs, rt ); handled = true; break; case SpecOp_OR: GenerateOR( rd, rs, rt ); handled = true; break; case SpecOp_XOR: GenerateXOR( rd, rs, rt ); handled = true; break; case SpecOp_NOR: GenerateNOR( rd, rs, rt ); handled = true; break; case SpecOp_SLT: GenerateSLT( rd, rs, rt ); handled = true; break; case SpecOp_SLTU: GenerateSLTU( rd, rs, rt ); handled = true; break; case SpecOp_DADD: GenerateDADDU( rd, rs, rt ); handled = true; break; case SpecOp_DADDU: GenerateDADDU( rd, rs, rt ); handled = true; break; case SpecOp_DSRA32: GenerateDSRA32( rd, rt, sa ); handled = true; break; case SpecOp_DSRA: GenerateDSRA( rd, rt, sa ); handled = true; break; case SpecOp_DSLL32: GenerateDSLL32( rd, rt, sa ); handled = true; break; case SpecOp_DSLL: GenerateDSLL( rd, rt, sa ); handled = true; break; default: break; } break; case OP_REGIMM: switch( op_code.regimm_op ) { // These can be handled by the same Generate function, as the 'likely' bit is handled elsewhere case RegImmOp_BLTZ: case RegImmOp_BLTZL: GenerateBLTZ( rs, p_branch, p_branch_jump ); handled = true; break; case RegImmOp_BGEZ: case RegImmOp_BGEZL: GenerateBGEZ( rs, p_branch, p_branch_jump ); handled = true; break; } break; case OP_J: /* nothing to do */ handled = true; break; case OP_JAL: GenerateJAL( address ); handled = true; break; case OP_BEQ: GenerateBEQ( rs, rt, p_branch, p_branch_jump ); handled = true; break; case OP_BEQL: GenerateBEQ( rs, rt, p_branch, p_branch_jump ); handled = true; break; case OP_BNE: GenerateBNE( rs, rt, p_branch, p_branch_jump ); handled = true; break; case OP_BNEL: GenerateBNE( rs, rt, p_branch, p_branch_jump ); handled = true; break; case OP_BLEZ: GenerateBLEZ( rs, p_branch, p_branch_jump ); handled = true; break; case OP_BLEZL: GenerateBLEZ( rs, p_branch, p_branch_jump ); handled = true; break; case OP_BGTZ: GenerateBGTZ( rs, p_branch, p_branch_jump ); handled = true; break; case OP_BGTZL: GenerateBGTZ( rs, p_branch, p_branch_jump ); handled = true; break; case OP_ADDI: GenerateADDIU( rt, rs, s16( op_code.immediate ) ); handled = true; break; case OP_ADDIU: GenerateADDIU( rt, rs, s16( op_code.immediate ) ); handled = true; break; case OP_SLTI: GenerateSLTI( rt, rs, s16( op_code.immediate ) ); handled = true; break; case OP_SLTIU: GenerateSLTIU( rt, rs, s16( op_code.immediate ) ); handled = true; break; case OP_DADDI: GenerateDADDIU( rt, rs, s16( op_code.immediate ) ); handled = true; break; case OP_DADDIU: GenerateDADDIU( rt, rs, s16( op_code.immediate ) ); handled = true; break; case OP_ANDI: GenerateANDI( rt, rs, op_code.immediate ); handled = true; break; case OP_ORI: GenerateORI( rt, rs, op_code.immediate ); handled = true; break; case OP_XORI: GenerateXORI( rt, rs, op_code.immediate ); handled = true; break; case OP_LUI: GenerateLUI( rt, s16( op_code.immediate ) ); handled = true; break; case OP_LB: GenerateLB( address, branch_delay_slot, rt, base, s16( op_code.immediate ) ); handled = true; break; case OP_LBU: GenerateLBU( address, branch_delay_slot, rt, base, s16( op_code.immediate ) ); handled = true; break; case OP_LH: GenerateLH( address, branch_delay_slot, rt, base, s16( op_code.immediate ) ); handled = true; break; case OP_LHU: GenerateLHU( address, branch_delay_slot, rt, base, s16( op_code.immediate ) ); handled = true; break; case OP_LW: GenerateLW( address, branch_delay_slot, rt, base, s16( op_code.immediate ) ); handled = true; break; case OP_LD: GenerateLD( address, branch_delay_slot, rt, base, s16( op_code.immediate ) ); handled = true; break; case OP_LWC1: GenerateLWC1( address, branch_delay_slot, ft, base, s16( op_code.immediate ) ); handled = true; break; #ifdef ENABLE_LDC1 case OP_LDC1: if( !branch_delay_slot & gMemoryAccessOptimisation & mQuickLoad ) { GenerateLDC1( address, branch_delay_slot, ft, base, s16( op_code.immediate ) ); handled = true; } break; #endif #ifdef ENABLE_LWR_LWL case OP_LWL: GenerateLWL( address, branch_delay_slot, rt, base, s16( op_code.immediate ) ); handled = true; break; case OP_LWR: GenerateLWR( address, branch_delay_slot, rt, base, s16( op_code.immediate ) ); handled = true; break; #endif case OP_SB: GenerateSB( address, branch_delay_slot, rt, base, s16( op_code.immediate ) ); handled = true; break; case OP_SH: GenerateSH( address, branch_delay_slot, rt, base, s16( op_code.immediate ) ); handled = true; break; case OP_SW: GenerateSW( address, branch_delay_slot, rt, base, s16( op_code.immediate ) ); handled = true; break; case OP_SD: GenerateSD( address, branch_delay_slot, rt, base, s16( op_code.immediate ) ); handled = true; break; case OP_SWC1: GenerateSWC1( address, branch_delay_slot, ft, base, s16( op_code.immediate ) ); handled = true; break; #ifdef ENABLE_SDC1 case OP_SDC1: if( !branch_delay_slot & gMemoryAccessOptimisation & mQuickLoad ) { GenerateSDC1( address, branch_delay_slot, ft, base, s16( op_code.immediate ) ); handled = true; } break; #endif #ifdef ENABLE_SWR_SWL case OP_SWL: GenerateSWL( address, branch_delay_slot, rt, base, s16( op_code.immediate ) ); handled = true; break; case OP_SWR: GenerateSWR( address, branch_delay_slot, rt, base, s16( op_code.immediate ) ); handled = true; break; #endif case OP_CACHE: GenerateCACHE( base, op_code.immediate, rt ); handled = true; break; case OP_COPRO0: switch( op_code.cop0_op ) { case Cop0Op_MFC0: GenerateMFC0( rt, op_code.fs ); handled = true; break; //case Cop0Op_MTC0: GenerateMTC0( rt, op_code.fs ); handled = true; break; //1080 deg has issues with this default: break; } break; case OP_COPRO1: switch( op_code.cop1_op ) { case Cop1Op_MFC1: GenerateMFC1( rt, op_code.fs ); handled = true; break; case Cop1Op_MTC1: GenerateMTC1( op_code.fs, rt ); handled = true; break; case Cop1Op_CFC1: GenerateCFC1( rt, op_code.fs ); handled = true; break; case Cop1Op_CTC1: GenerateCTC1( op_code.fs, rt ); handled = true; break; case Cop1Op_DInstr: if( gDynarecDoublesOptimisation ) { switch( op_code.cop1_funct ) { case Cop1OpFunc_ADD: GenerateADD_D_Sim( op_code.fd, op_code.fs, op_code.ft ); handled = true; break; case Cop1OpFunc_SUB: GenerateSUB_D_Sim( op_code.fd, op_code.fs, op_code.ft ); handled = true; break; case Cop1OpFunc_MUL: GenerateMUL_D_Sim( op_code.fd, op_code.fs, op_code.ft ); handled = true; break; case Cop1OpFunc_DIV: GenerateDIV_D_Sim( op_code.fd, op_code.fs, op_code.ft ); handled = true; break; case Cop1OpFunc_SQRT: GenerateSQRT_D_Sim( op_code.fd, op_code.fs ); handled = true; break; case Cop1OpFunc_ABS: GenerateABS_D_Sim( op_code.fd, op_code.fs ); handled = true; break; case Cop1OpFunc_MOV: GenerateMOV_D_Sim( op_code.fd, op_code.fs ); handled = true; break; case Cop1OpFunc_NEG: GenerateNEG_D_Sim( op_code.fd, op_code.fs ); handled = true; break; case Cop1OpFunc_TRUNC_W: GenerateTRUNC_W_D_Sim( op_code.fd, op_code.fs ); handled = true; break; case Cop1OpFunc_CVT_W: GenerateCVT_W_D_Sim( op_code.fd, op_code.fs ); handled = true; break; case Cop1OpFunc_CVT_S: GenerateCVT_S_D_Sim( op_code.fd, op_code.fs ); handled = true; break; case Cop1OpFunc_CMP_F: GenerateCMP_D_Sim( op_code.fs, Cop1OpFunc_CMP_F, op_code.ft ); handled = true; break; case Cop1OpFunc_CMP_UN: GenerateCMP_D_Sim( op_code.fs, Cop1OpFunc_CMP_UN, op_code.ft ); handled = true; break; case Cop1OpFunc_CMP_EQ: GenerateCMP_D_Sim( op_code.fs, Cop1OpFunc_CMP_EQ, op_code.ft ); handled = true; break; //Conker has issues with this case Cop1OpFunc_CMP_UEQ: GenerateCMP_D_Sim( op_code.fs, Cop1OpFunc_CMP_UEQ, op_code.ft ); handled = true; break; case Cop1OpFunc_CMP_OLT: GenerateCMP_D_Sim( op_code.fs, Cop1OpFunc_CMP_OLT, op_code.ft ); handled = true; break; case Cop1OpFunc_CMP_ULT: GenerateCMP_D_Sim( op_code.fs, Cop1OpFunc_CMP_ULT, op_code.ft ); handled = true; break; case Cop1OpFunc_CMP_OLE: GenerateCMP_D_Sim( op_code.fs, Cop1OpFunc_CMP_OLE, op_code.ft ); handled = true; break; case Cop1OpFunc_CMP_ULE: GenerateCMP_D_Sim( op_code.fs, Cop1OpFunc_CMP_ULE, op_code.ft ); handled = true; break; case Cop1OpFunc_CMP_SF: GenerateCMP_D_Sim( op_code.fs, Cop1OpFunc_CMP_SF, op_code.ft ); handled = true; break; case Cop1OpFunc_CMP_NGLE: GenerateCMP_D_Sim( op_code.fs, Cop1OpFunc_CMP_NGLE, op_code.ft ); handled = true; break; case Cop1OpFunc_CMP_SEQ: GenerateCMP_D_Sim( op_code.fs, Cop1OpFunc_CMP_SEQ, op_code.ft ); handled = true; break; case Cop1OpFunc_CMP_NGL: GenerateCMP_D_Sim( op_code.fs, Cop1OpFunc_CMP_NGL, op_code.ft ); handled = true; break; case Cop1OpFunc_CMP_LT: GenerateCMP_D_Sim( op_code.fs, Cop1OpFunc_CMP_LT, op_code.ft ); handled = true; break; case Cop1OpFunc_CMP_NGE: GenerateCMP_D_Sim( op_code.fs, Cop1OpFunc_CMP_NGE, op_code.ft ); handled = true; break; case Cop1OpFunc_CMP_LE: GenerateCMP_D_Sim( op_code.fs, Cop1OpFunc_CMP_LE, op_code.ft ); handled = true; break; case Cop1OpFunc_CMP_NGT: GenerateCMP_D_Sim( op_code.fs, Cop1OpFunc_CMP_NGT, op_code.ft ); handled = true; break; default: break; //call Generic4300 } } else { GenerateGenericR4300( op_code, R4300_GetDInstructionHandler( op_code ) ); handled = true; break; // Need branch delay? } break; case Cop1Op_SInstr: switch( op_code.cop1_funct ) { case Cop1OpFunc_ADD: GenerateADD_S( op_code.fd, op_code.fs, op_code.ft ); handled = true; break; case Cop1OpFunc_SUB: GenerateSUB_S( op_code.fd, op_code.fs, op_code.ft ); handled = true; break; case Cop1OpFunc_MUL: GenerateMUL_S( op_code.fd, op_code.fs, op_code.ft ); handled = true; break; case Cop1OpFunc_DIV: GenerateDIV_S( op_code.fd, op_code.fs, op_code.ft ); handled = true; break; case Cop1OpFunc_SQRT: GenerateSQRT_S( op_code.fd, op_code.fs ); handled = true; break; case Cop1OpFunc_ABS: GenerateABS_S( op_code.fd, op_code.fs ); handled = true; break; case Cop1OpFunc_MOV: GenerateMOV_S( op_code.fd, op_code.fs ); handled = true; break; case Cop1OpFunc_NEG: GenerateNEG_S( op_code.fd, op_code.fs ); handled = true; break; case Cop1OpFunc_TRUNC_W: GenerateTRUNC_W_S( op_code.fd, op_code.fs ); handled = true; break; case Cop1OpFunc_FLOOR_W: GenerateFLOOR_W_S( op_code.fd, op_code.fs ); handled = true; break; case Cop1OpFunc_CVT_W: GenerateCVT_W_S( op_code.fd, op_code.fs ); handled = true; break; case Cop1OpFunc_CVT_D: if( gDynarecDoublesOptimisation & !g_ROM.DISABLE_SIM_CVT_D_S ) GenerateCVT_D_S_Sim( op_code.fd, op_code.fs ); //Sim has issues with EWJ/Tom&Jerry/PowerPuffGirls else GenerateCVT_D_S( op_code.fd, op_code.fs ); handled = true; break; case Cop1OpFunc_CMP_F: GenerateCMP_S( op_code.fs, Cop1OpFunc_CMP_F, op_code.ft ); handled = true; break; case Cop1OpFunc_CMP_UN: GenerateCMP_S( op_code.fs, Cop1OpFunc_CMP_UN, op_code.ft ); handled = true; break; case Cop1OpFunc_CMP_EQ: GenerateCMP_S( op_code.fs, Cop1OpFunc_CMP_EQ, op_code.ft ); handled = true; break; case Cop1OpFunc_CMP_UEQ: GenerateCMP_S( op_code.fs, Cop1OpFunc_CMP_UEQ, op_code.ft ); handled = true; break; case Cop1OpFunc_CMP_OLT: GenerateCMP_S( op_code.fs, Cop1OpFunc_CMP_OLT, op_code.ft ); handled = true; break; case Cop1OpFunc_CMP_ULT: GenerateCMP_S( op_code.fs, Cop1OpFunc_CMP_ULT, op_code.ft ); handled = true; break; case Cop1OpFunc_CMP_OLE: GenerateCMP_S( op_code.fs, Cop1OpFunc_CMP_OLE, op_code.ft ); handled = true; break; case Cop1OpFunc_CMP_ULE: GenerateCMP_S( op_code.fs, Cop1OpFunc_CMP_ULE, op_code.ft ); handled = true; break; case Cop1OpFunc_CMP_SF: GenerateCMP_S( op_code.fs, Cop1OpFunc_CMP_SF, op_code.ft ); handled = true; break; case Cop1OpFunc_CMP_NGLE: GenerateCMP_S( op_code.fs, Cop1OpFunc_CMP_NGLE, op_code.ft ); handled = true; break; case Cop1OpFunc_CMP_SEQ: GenerateCMP_S( op_code.fs, Cop1OpFunc_CMP_SEQ, op_code.ft ); handled = true; break; case Cop1OpFunc_CMP_NGL: GenerateCMP_S( op_code.fs, Cop1OpFunc_CMP_NGL, op_code.ft ); handled = true; break; case Cop1OpFunc_CMP_LT: GenerateCMP_S( op_code.fs, Cop1OpFunc_CMP_LT, op_code.ft ); handled = true; break; case Cop1OpFunc_CMP_NGE: GenerateCMP_S( op_code.fs, Cop1OpFunc_CMP_NGE, op_code.ft ); handled = true; break; case Cop1OpFunc_CMP_LE: GenerateCMP_S( op_code.fs, Cop1OpFunc_CMP_LE, op_code.ft ); handled = true; break; //This breaks D64, but I think is a bug somewhere else since interpreter trows fp nan exception in Cop1_S_NGT case Cop1OpFunc_CMP_NGT: if(g_ROM.GameHacks != DK64) {GenerateCMP_S( op_code.fs, Cop1OpFunc_CMP_NGT, op_code.ft ); handled = true;} break; } break; case Cop1Op_WInstr: switch( op_code.cop1_funct ) { case Cop1OpFunc_CVT_S: GenerateCVT_S_W( op_code.fd, op_code.fs ); handled = true; break; case Cop1OpFunc_CVT_D: if( gDynarecDoublesOptimisation ) { GenerateCVT_D_W_Sim( op_code.fd, op_code.fs ); handled = true; } break; } break; case Cop1Op_BCInstr: switch( op_code.cop1_bc ) { // These can be handled by the same Generate function, as the 'likely' bit is handled elsewhere case Cop1BCOp_BC1F: case Cop1BCOp_BC1FL: GenerateBC1F( p_branch, p_branch_jump ); handled = true; break; case Cop1BCOp_BC1T: case Cop1BCOp_BC1TL: GenerateBC1T( p_branch, p_branch_jump ); handled = true; break; } break; default: break; } break; } //Invalidate load/store base regs if they has been modified with current OPcode //Corn if( (ti.Usage.RegWrites >> mPreviousLoadBase) & 1 ) mPreviousLoadBase = N64Reg_R0; //Invalidate if( (ti.Usage.RegWrites >> mPreviousStoreBase) & 1 ) mPreviousStoreBase = N64Reg_R0; //Invalidate // Default handling - call interpreting function // if( !handled ) { #if 0 //1->Show not handled OP codes (Require that DAEDALUS_SILENT flag is undefined) // Note: Cop1Op_DInstr are handled elsewhere! char msg[128]; SprintOpCodeInfo( msg, address, op_code ); printf( "Unhandled: 0x%08x %s\n", address, msg ); #endif bool BranchDelaySet = false; if( R4300_InstructionHandlerNeedsPC( op_code ) ) { //Generate exception handler // SetVar( &gCPUState.CurrentPC, address ); if( branch_delay_slot ) { SetVar( &gCPUState.Delay, EXEC_DELAY ); BranchDelaySet = true; } GenerateGenericR4300( op_code, R4300_GetInstructionHandler( op_code ) ); #ifdef ALLOW_INTERPRETER_EXCEPTIONS // Make sure all dirty registers are flushed. NB - we don't invalidate them // to avoid reloading the contents if no exception was thrown. FlushAllRegisters( mRegisterCache, false ); JAL( CCodeLabel( reinterpret_cast< const void * >( _ReturnFromDynaRecIfStuffToDo ) ), false ); ORI( PspReg_A0, PspReg_R0, 0 ); #endif } else { GenerateGenericR4300( op_code, R4300_GetInstructionHandler( op_code ) ); } // Check whether we want to invert the status of this branch if( p_branch != nullptr ) { CCodeLabel no_target( nullptr ); // // Check if the branch has been taken // if( p_branch->Direct ) { if( p_branch->ConditionalBranchTaken ) { *p_branch_jump = GenerateBranchIfNotEqual( &gCPUState.Delay, DO_DELAY, no_target ); } else { *p_branch_jump = GenerateBranchIfEqual( &gCPUState.Delay, DO_DELAY, no_target ); } } else { // XXXX eventually just exit here, and skip default exit code below if( p_branch->Eret ) { *p_branch_jump = GenerateBranchAlways( no_target ); } else { *p_branch_jump = GenerateBranchIfNotEqual( &gCPUState.TargetPC, p_branch->TargetAddress, no_target ); } } } if( BranchDelaySet ) { SetVar( &gCPUState.Delay, NO_DELAY ); } } return CJumpLocation(); } // // NB - This assumes that the address and branch delay have been set up before // calling this function. // void CCodeGeneratorPSP::GenerateGenericR4300( OpCode op_code, CPU_Instruction p_instruction ) { mPreviousLoadBase = N64Reg_R0; //Invalidate mPreviousStoreBase = N64Reg_R0; //Invalidate // Flush out the dirty registers, and mark them all as invalid // Theoretically we could just flush the registers referenced in the call FlushAllRegisters( mRegisterCache, true ); PspOpCode op1; PspOpCode op2; // Load the opcode in to A0 GetLoadConstantOps( PspReg_A0, op_code._u32, &op1, &op2 ); if( op2._u32 == 0 ) { JAL( CCodeLabel( reinterpret_cast< const void * >( p_instruction ) ), false ); // No branch delay AppendOp( op1 ); } else { AppendOp( op1 ); JAL( CCodeLabel( reinterpret_cast< const void * >( p_instruction ) ), false ); // No branch delay AppendOp( op2 ); } } // CJumpLocation CCodeGeneratorPSP::ExecuteNativeFunction( CCodeLabel speed_hack, bool check_return ) { JAL( speed_hack, true ); if( check_return ) return BEQ(PspReg_V0, PspReg_R0, CCodeLabel(nullptr), true); else return CJumpLocation(nullptr); } // inline bool CCodeGeneratorPSP::GenerateDirectLoad( EPspReg psp_dst, EN64Reg base, s16 offset, OpCodeValue load_op, u32 swizzle ) { if(mRegisterCache.IsKnownValue( base, 0 )) { u32 base_address( mRegisterCache.GetKnownValue( base, 0 )._u32 ); u32 address( (base_address + s32( offset )) ^ swizzle ); if( load_op == OP_LWL ) { load_op = OP_LW; u32 shift = address & 3; address ^= shift; //Zero low 2 bits in address ADDIU( PspReg_A3, PspReg_R0, shift); //copy low 2 bits to A3 } const MemFuncRead & m( g_MemoryLookupTableRead[ address >> 18 ] ); if( m.pRead ) { void * p_memory( (void*)( m.pRead + address ) ); //printf( "Loading from %s %08x + %04x (%08x) op %d\n", RegNames[ base ], base_address, u16(offset), address, load_op ); EPspReg reg_base; s16 base_offset; GetBaseRegisterAndOffset( p_memory, ®_base, &base_offset ); CAssemblyWriterPSP::LoadRegister( psp_dst, load_op, reg_base, base_offset ); return true; } else { //printf( "Loading from %s %08x + %04x (%08x) - unhandled\n", RegNames[ base ], base_address, u16(offset), address ); } } return false; } // void CCodeGeneratorPSP::GenerateSlowLoad( u32 current_pc, EPspReg psp_dst, EPspReg reg_address, ReadMemoryFunction p_read_memory ) { // We need to flush all the regular registers AND invalidate all the temp regs. // NB: When we can flush registers through some kind of handle (i.e. when the // exception handling branch is taken), this won't be necessary CN64RegisterCachePSP current_regs( mRegisterCache ); FlushAllRegisters( mRegisterCache, false ); FlushAllTemporaryRegisters( mRegisterCache, true ); // // // if( reg_address != PspReg_A0 ) { OR( PspReg_A0, reg_address, PspReg_R0 ); } PspOpCode op1, op2; GetLoadConstantOps( PspReg_A1, current_pc, &op1, &op2 ); if( op2._u32 == 0 ) { JAL( CCodeLabel( reinterpret_cast< const void * >( p_read_memory ) ), false ); AppendOp( op1 ); } else { AppendOp( op1 ); JAL( CCodeLabel( reinterpret_cast< const void * >( p_read_memory ) ), false ); AppendOp( op2 ); } // Restore all registers BEFORE copying back final value RestoreAllRegisters( mRegisterCache, current_regs ); // Copy return value to the destination register if we're not using V0 if( psp_dst != PspReg_V0 ) { OR( psp_dst, PspReg_V0, PspReg_R0 ); } // Restore the state of the registers mRegisterCache = current_regs; } // void CCodeGeneratorPSP::GenerateLoad( u32 current_pc, EPspReg psp_dst, EN64Reg n64_base, s16 offset, OpCodeValue load_op, u32 swizzle, ReadMemoryFunction p_read_memory ) { // // Check if the base pointer is a known value, in which case we can load directly. // if(GenerateDirectLoad( psp_dst, n64_base, offset, load_op, swizzle )) { return; } EPspReg reg_base( GetRegisterAndLoadLo( n64_base, PspReg_A0 ) ); EPspReg reg_address( reg_base ); if( load_op == OP_LWL ) //We handle both LWL & LWR here { if(offset != 0) { ADDIU( PspReg_A0, reg_address, offset ); //base + offset reg_address = PspReg_A0; offset = 0; } ANDI( PspReg_A3, reg_address, 3 ); //copy two LSB bits to A3, used later for mask XOR( PspReg_A0, reg_address, PspReg_A3); //zero two LSB bits in address //Dont cache the current pointer to K0 reg because the address is mangled by the XOR if( (n64_base == N64Reg_SP) | (gMemoryAccessOptimisation & mQuickLoad) ) { ADDU( PspReg_A0, PspReg_A0, gMemoryBaseReg ); CAssemblyWriterPSP::LoadRegister( psp_dst, OP_LW, PspReg_A0, 0 ); return; } load_op = OP_LW; reg_address = PspReg_A0; } else { if( (n64_base == N64Reg_SP) | (gMemoryAccessOptimisation & mQuickLoad) ) { if( swizzle != 0 ) { if( offset != 0 ) { ADDIU( PspReg_A0, reg_address, offset ); reg_address = PspReg_A0; offset = 0; } XORI( PspReg_A0, reg_address, swizzle ); ADDU( PspReg_A0, PspReg_A0, gMemoryBaseReg ); CAssemblyWriterPSP::LoadRegister( psp_dst, load_op, PspReg_A0, offset ); return; } //Re use old base register if consegutive accesses from same base register //Corn if( n64_base == mPreviousLoadBase ) { CAssemblyWriterPSP::LoadRegister( psp_dst, load_op, PspReg_K0, offset ); return; } else { ADDU( PspReg_K0, reg_address, gMemoryBaseReg ); CAssemblyWriterPSP::LoadRegister( psp_dst, load_op, PspReg_K0, offset ); mPreviousLoadBase = n64_base; return; } } } if( swizzle != 0 ) { if( offset != 0 ) { ADDIU( PspReg_A0, reg_address, offset ); reg_address = PspReg_A0; offset = 0; } XORI( PspReg_A0, reg_address, swizzle ); reg_address = PspReg_A0; } // // If the fast load failed, we need to perform a 'slow' load going through a handler routine // We may already be performing this instruction in the second buffer, in which // case we don't bother with the transitions // if( CAssemblyWriterPSP::IsBufferA() ) { // // This is a very quick check for 0x80000000 < address < 0x80r00000 // If it's in range, we add the base pointer (already offset by 0x80000000) // and dereference (the branch likely ensures that we only dereference // in the situations where the condition holds) // SLT( PspReg_V1, reg_address, gMemUpperBoundReg ); // t1 = upper < address CJumpLocation branch( BEQ( PspReg_V1, PspReg_R0, CCodeLabel( nullptr ), false ) ); // branch to jump to handler ADDU( PspReg_A1, reg_address, gMemoryBaseReg ); CAssemblyWriterPSP::LoadRegister( psp_dst, load_op, PspReg_A1, offset ); CCodeLabel continue_location( GetAssemblyBuffer()->GetLabel() ); // // Generate the handler code in the secondary buffer // CAssemblyWriterPSP::SetBufferB(); CCodeLabel handler_label( GetAssemblyBuffer()->GetLabel() ); if( offset != 0 ) { ADDIU( PspReg_A0, reg_address, offset ); reg_address = PspReg_A0; } #ifdef ENABLE_LWC1 if( load_op == OP_LWC1 ) { GenerateSlowLoad( current_pc, PspReg_V0, reg_address, p_read_memory ); // Copy return value to the FPU destination register MTC1( (EPspFloatReg)psp_dst, PspReg_V0 ); } else #endif { GenerateSlowLoad( current_pc, psp_dst, reg_address, p_read_memory ); } CJumpLocation ret_handler( J( continue_location, true ) ); CAssemblyWriterPSP::SetBufferA(); // // Keep track of the details we need to generate the jump to the distant handler function // mAddressCheckFixups.push_back( SAddressCheckFixup( branch, handler_label ) ); } else { // // This is a very quick check for 0x80000000 < address < 0x80r00000 // If it's in range, we add the base pointer (already offset by 0x80000000) // and dereference (the branch likely ensures that we only dereference // in the situations where the condition holds) // SLT( PspReg_V1, reg_address, gMemUpperBoundReg ); // t1 = upper < address ADDU( PspReg_A1, reg_address, gMemoryBaseReg ); CJumpLocation branch( BNEL( PspReg_V1, PspReg_R0, CCodeLabel( nullptr ), false ) ); CAssemblyWriterPSP::LoadRegister( psp_dst, load_op, PspReg_A1, offset ); if( offset != 0 ) { ADDIU( PspReg_A0, reg_address, offset ); reg_address = PspReg_A0; } #ifdef ENABLE_LWC1 if( load_op == OP_LWC1 ) { GenerateSlowLoad( current_pc, PspReg_V0, reg_address, p_read_memory ); // Copy return value to the FPU destination register MTC1( (EPspFloatReg)psp_dst, PspReg_V0 ); } else #endif { GenerateSlowLoad( current_pc, psp_dst, reg_address, p_read_memory ); } PatchJumpLong( branch, GetAssemblyBuffer()->GetLabel() ); } } // inline void CCodeGeneratorPSP::GenerateAddressCheckFixups() { for( u32 i {0}; i < mAddressCheckFixups.size(); ++i ) { GenerateAddressCheckFixup( mAddressCheckFixups[ i ] ); } } // inline void CCodeGeneratorPSP::GenerateAddressCheckFixup( const SAddressCheckFixup & fixup ) { CAssemblyWriterPSP::SetBufferA(); // // Fixup the branch we emitted so that it jumps to the handler routine. // If possible, we try to branch directly. // If that fails, we try to emit a stub function which jumps to the handler. Hopefully that's a bit closer // If *that* fails, we have to revert to an unconditional jump to the handler routine. Expensive!! // /* if( PatchJumpLong( fixup.BranchToJump, fixup.HandlerAddress ) ) { printf( "Wow, managed to jump directly to our handler\n" ); } else { if( PatchJumpLong( fixup.BranchToJump, GetAssemblyBuffer()->GetLabel() ) ) { // // Emit a long jump to the handler function // J( fixup.HandlerAddress, true ); } else { printf( "Warning, jump is too far for address handler function\n" ); ReplaceBranchWithJump( fixup.BranchToJump, fixup.HandlerAddress ); } } */ // Optimized version of above code - Salvy // First condition which is the fastest, never happens // Second condition which still fast, always happens (we have an assert if otherwise) // Third condition (slowest) never happens and thus is ignored // ToDO: Optimized futher eliminating return value from PatchJumpLong // PatchJumpLong( fixup.BranchToJump, GetAssemblyBuffer()->GetLabel() ); // // Emit a long jump to the handler function // J( fixup.HandlerAddress, true ); } // inline bool CCodeGeneratorPSP::GenerateDirectStore( EPspReg psp_src, EN64Reg base, s16 offset, OpCodeValue store_op, u32 swizzle ) { if(mRegisterCache.IsKnownValue( base, 0 )) { u32 base_address( mRegisterCache.GetKnownValue( base, 0 )._u32 ); u32 address( (base_address + s32( offset )) ^ swizzle ); const MemFuncWrite & m( g_MemoryLookupTableWrite[ address >> 18 ] ); if( m.pWrite ) { void * p_memory( (void*)( m.pWrite + address ) ); //printf( "Storing to %s %08x + %04x (%08x) op %d\n", RegNames[ base ], base_address, u16(offset), address, store_op ); EPspReg reg_base; s16 base_offset; GetBaseRegisterAndOffset( p_memory, ®_base, &base_offset ); CAssemblyWriterPSP::StoreRegister( psp_src, store_op, reg_base, base_offset ); return true; } else { //printf( "Storing to %s %08x + %04x (%08x) - unhandled\n", RegNames[ base ], base_address, u16(offset), address ); } } return false; } // inline void CCodeGeneratorPSP::GenerateSlowStore( u32 current_pc, EPspReg psp_src, EPspReg reg_address, WriteMemoryFunction p_write_memory ) { // We need to flush all the regular registers AND invalidate all the temp regs. // NB: When we can flush registers through some kind of handle (i.e. when the // exception handling branch is taken), this won't be necessary CN64RegisterCachePSP current_regs( mRegisterCache ); FlushAllRegisters( mRegisterCache, false ); FlushAllTemporaryRegisters( mRegisterCache, true ); if( reg_address != PspReg_A0 ) { OR( PspReg_A0, reg_address, PspReg_R0 ); } if( psp_src != PspReg_A1 ) { OR( PspReg_A1, psp_src, PspReg_R0 ); } PspOpCode op1, op2; GetLoadConstantOps( PspReg_A2, current_pc, &op1, &op2 ); if( op2._u32 == 0 ) { JAL( CCodeLabel( reinterpret_cast< const void * >( p_write_memory ) ), false ); // Don't set branch delay slot AppendOp( op1 ); } else { AppendOp( op1 ); JAL( CCodeLabel( reinterpret_cast< const void * >( p_write_memory ) ), false ); // Don't set branch delay slot AppendOp( op2 ); } // Restore all registers BEFORE copying back final value RestoreAllRegisters( mRegisterCache, current_regs ); // Restore the state of the registers mRegisterCache = current_regs; } // void CCodeGeneratorPSP::GenerateStore( u32 current_pc, EPspReg psp_src, EN64Reg n64_base, s32 offset, OpCodeValue store_op, u32 swizzle, WriteMemoryFunction p_write_memory ) { // // Check if the base pointer is a known value, in which case we can store directly. // if(GenerateDirectStore( psp_src, n64_base, offset, store_op, swizzle )) { return; } EPspReg reg_base( GetRegisterAndLoadLo( n64_base, PspReg_A0 ) ); EPspReg reg_address( reg_base ); if( (n64_base == N64Reg_SP) | (gMemoryAccessOptimisation & mQuickLoad) ) { if( swizzle != 0 ) { if( offset != 0 ) { ADDIU( PspReg_A0, reg_address, offset ); reg_address = PspReg_A0; offset = 0; } XORI( PspReg_A0, reg_address, swizzle ); ADDU( PspReg_A0, PspReg_A0, gMemoryBaseReg ); CAssemblyWriterPSP::StoreRegister( psp_src, store_op, PspReg_A0, offset ); return; } //Re use old base register if consegutive accesses from same base register //Corn if( n64_base == mPreviousStoreBase ) { CAssemblyWriterPSP::StoreRegister( psp_src, store_op, PspReg_K1, offset ); return; } else { ADDU( PspReg_K1, reg_address, gMemoryBaseReg ); CAssemblyWriterPSP::StoreRegister( psp_src, store_op, PspReg_K1, offset ); mPreviousStoreBase = n64_base; return; } } if( swizzle != 0 ) { if( offset != 0 ) { ADDIU( PspReg_A0, reg_address, offset ); reg_address = PspReg_A0; offset = 0; } XORI( PspReg_A0, reg_address, swizzle ); reg_address = PspReg_A0; } if( CAssemblyWriterPSP::IsBufferA() ) { SLT( PspReg_V1, reg_address, gMemUpperBoundReg ); // V1 = upper < address CJumpLocation branch( BEQ( PspReg_V1, PspReg_R0, CCodeLabel( nullptr ), false ) ); ADDU( PspReg_V0, reg_address, gMemoryBaseReg ); CAssemblyWriterPSP::StoreRegister( psp_src, store_op, PspReg_V0, offset ); CCodeLabel continue_location( GetAssemblyBuffer()->GetLabel() ); // // Generate the handler code in the secondary buffer // CAssemblyWriterPSP::SetBufferB(); CCodeLabel handler_label( GetAssemblyBuffer()->GetLabel() ); if( offset != 0 ) { ADDIU( PspReg_A0, reg_address, offset ); reg_address = PspReg_A0; } #ifdef ENABLE_SWC1 if( store_op == OP_SWC1 ) { //Move value from FPU to CPU MFC1( PspReg_A1, (EPspFloatReg)psp_src ); psp_src = PspReg_A1; } #endif GenerateSlowStore( current_pc, psp_src, reg_address, p_write_memory ); CJumpLocation ret_handler( J( continue_location, true ) ); CAssemblyWriterPSP::SetBufferA(); // // Keep track of the details we need to generate the jump to the distant handler function // mAddressCheckFixups.push_back( SAddressCheckFixup( branch, handler_label ) ); } else { SLT( PspReg_V1, reg_address, gMemUpperBoundReg ); // V1 = upper < address ADDU( PspReg_V0, reg_address, gMemoryBaseReg ); CJumpLocation branch( BNEL( PspReg_V1, PspReg_R0, CCodeLabel( nullptr ), false ) ); CAssemblyWriterPSP::StoreRegister( psp_src, store_op, PspReg_V0, offset ); if( offset != 0 ) { ADDIU( PspReg_A0, reg_address, offset ); reg_address = PspReg_A0; } #ifdef ENABLE_SWC1 if( store_op == OP_SWC1 ) { //Move value from FPU to CPU MFC1( PspReg_A1, (EPspFloatReg)psp_src ); psp_src = PspReg_A1; } #endif GenerateSlowStore( current_pc, psp_src, reg_address, p_write_memory ); PatchJumpLong( branch, GetAssemblyBuffer()->GetLabel() ); } } // inline void CCodeGeneratorPSP::GenerateCACHE( EN64Reg base, s16 offset, u32 cache_op ) { //u32 cache_op = op_code.rt; //u32 address = (u32)( gGPR[op_code.base]._s32_0 + (s32)(s16)op_code.immediate ); u32 cache = cache_op & 0x3; u32 action = (cache_op >> 2) & 0x7; // For instruction cache invalidation, make sure we let the CPU know so the whole // dynarec system can be invalidated //if(!(cache_op & 0x1F)) if(cache == 0 && (action == 0 || action == 4)) { FlushAllRegisters(mRegisterCache, true); LoadRegister(PspReg_A0, base, 0); ADDI(PspReg_A0, PspReg_A0, offset); JAL( CCodeLabel( reinterpret_cast< const void * >( CPU_InvalidateICacheRange ) ), false ); ORI(PspReg_A1, PspReg_R0, 0x20); } //else //{ // We don't care about data cache etc //} } // inline void CCodeGeneratorPSP::GenerateJAL( u32 address ) { //gGPR[REG_ra]._s64 = (s64)(s32)(gCPUState.CurrentPC + 8); // Store return address //u32 new_pc( (gCPUState.CurrentPC & 0xF0000000) | (op_code.target<<2) ); //CPU_TakeBranch( new_pc, CPU_BRANCH_DIRECT ); //EPspReg reg_lo( GetRegisterNoLoadLo( N64Reg_RA, PspReg_V0 ) ); //LoadConstant( reg_lo, address + 8 ); //UpdateRegister( N64Reg_RA, reg_lo, URO_HI_SIGN_EXTEND, PspReg_V0 ); SetRegister32s(N64Reg_RA, address + 8); } // inline void CCodeGeneratorPSP::GenerateJR( EN64Reg rs, const SBranchDetails * p_branch, CJumpLocation * p_branch_jump ) { //u32 new_pc( gGPR[ op_code.rs ]._u32_0 ); //CPU_TakeBranch( new_pc, CPU_BRANCH_INDIRECT ); EPspReg reg_lo( GetRegisterAndLoadLo( rs, PspReg_V0 ) ); // Necessary? Could just directly compare reg_lo and constant p_branch->TargetAddress?? //SetVar( &gCPUState.TargetPC, reg_lo ); //SetVar( &gCPUState.Delay, DO_DELAY ); //*p_branch_jump = GenerateBranchIfNotEqual( &gCPUState.TargetPC, p_branch->TargetAddress, CCodeLabel() ); SetVar( &gCPUState.TargetPC, reg_lo ); //SetVar( &gCPUState.Delay, DO_DELAY ); *p_branch_jump = GenerateBranchIfNotEqual( reg_lo, p_branch->TargetAddress, CCodeLabel() ); } // inline void CCodeGeneratorPSP::GenerateJALR( EN64Reg rs, EN64Reg rd, u32 address, const SBranchDetails * p_branch, CJumpLocation * p_branch_jump ) { // Jump And Link //u32 new_pc( gGPR[ op_code.rs ]._u32_0 ); //gGPR[ op_code.rd ]._s64 = (s64)(s32)(gCPUState.CurrentPC + 8); // Store return address //EPspReg savereg_lo( GetRegisterNoLoadLo( rd, PspReg_V0 ) ); // Necessary? Could just directly compare reg_lo and constant p_branch->TargetAddress?? //SetVar( &gCPUState.TargetPC, reg_lo ); //SetVar( &gCPUState.Delay, DO_DELAY ); //*p_branch_jump = GenerateBranchIfNotEqual( &gCPUState.TargetPC, p_branch->TargetAddress, CCodeLabel() ); //LoadConstant( savereg_lo, address + 8 ); //UpdateRegister( rd, savereg_lo, URO_HI_SIGN_EXTEND, PspReg_V0 ); SetRegister32s(rd, address + 8); EPspReg reg_lo( GetRegisterAndLoadLo( rs, PspReg_V0 ) ); SetVar( &gCPUState.TargetPC, reg_lo ); *p_branch_jump = GenerateBranchIfNotEqual( reg_lo, p_branch->TargetAddress, CCodeLabel() ); } // inline void CCodeGeneratorPSP::GenerateMFLO( EN64Reg rd ) { //gGPR[ op_code.rd ]._u64 = gCPUState.MultLo._u64; if( mMultIsValid ) { EPspReg reg_lo( GetRegisterNoLoadLo( rd, PspReg_A0 ) ); MFLO( reg_lo ); UpdateRegister( rd, reg_lo, URO_HI_SIGN_EXTEND ); } else { EPspReg reg_lo( GetRegisterNoLoadLo( rd, PspReg_V0 ) ); GetVar( reg_lo, &gCPUState.MultLo._u32_0 ); StoreRegisterLo( rd, reg_lo ); EPspReg reg_hi( GetRegisterNoLoadHi( rd, PspReg_V0 ) ); GetVar( reg_hi, &gCPUState.MultLo._u32_1 ); StoreRegisterHi( rd, reg_hi ); } } // inline void CCodeGeneratorPSP::GenerateMFHI( EN64Reg rd ) { //gGPR[ op_code.rd ]._u64 = gCPUState.MultHi._u64; if( mMultIsValid ) { EPspReg reg_lo( GetRegisterNoLoadLo( rd, PspReg_V0 ) ); MFHI( reg_lo ); UpdateRegister( rd, reg_lo, URO_HI_SIGN_EXTEND ); } else { EPspReg reg_lo( GetRegisterNoLoadLo( rd, PspReg_V0 ) ); GetVar( reg_lo, &gCPUState.MultHi._u32_0 ); StoreRegisterLo( rd, reg_lo ); EPspReg reg_hi( GetRegisterNoLoadHi( rd, PspReg_V0 ) ); GetVar( reg_hi, &gCPUState.MultHi._u32_1 ); StoreRegisterHi( rd, reg_hi ); } } // inline void CCodeGeneratorPSP::GenerateMTLO( EN64Reg rs ) { mMultIsValid = false; //gCPUState.MultLo._u64 = gGPR[ op_code.rs ]._u64; EPspReg reg_lo( GetRegisterAndLoadLo( rs, PspReg_V0 ) ); SetVar( &gCPUState.MultLo._u32_0, reg_lo ); EPspReg reg_hi( GetRegisterAndLoadHi( rs, PspReg_V0 ) ); SetVar( &gCPUState.MultLo._u32_1, reg_hi ); } // inline void CCodeGeneratorPSP::GenerateMTHI( EN64Reg rs ) { mMultIsValid = false; //gCPUState.MultHi._u64 = gGPR[ op_code.rs ]._u64; EPspReg reg_lo( GetRegisterAndLoadLo( rs, PspReg_V0 ) ); SetVar( &gCPUState.MultHi._u32_0, reg_lo ); EPspReg reg_hi( GetRegisterAndLoadHi( rs, PspReg_V0 ) ); SetVar( &gCPUState.MultHi._u32_1, reg_hi ); } // inline void CCodeGeneratorPSP::GenerateMULT( EN64Reg rs, EN64Reg rt ) { mMultIsValid = true; //s64 dwResult = (s64)gGPR[ op_code.rs ]._s32_0 * (s64)gGPR[ op_code.rt ]._s32_0; //gCPUState.MultLo = (s64)(s32)(dwResult); //gCPUState.MultHi = (s64)(s32)(dwResult >> 32); EPspReg reg_lo_a( GetRegisterAndLoadLo( rs, PspReg_V0 ) ); EPspReg reg_lo_b( GetRegisterAndLoadLo( rt, PspReg_A0 ) ); MULT( reg_lo_a, reg_lo_b ); MFLO( PspReg_V0 ); MFHI( PspReg_A0 ); SetVar( &gCPUState.MultLo._u32_0, PspReg_V0 ); SetVar( &gCPUState.MultHi._u32_0, PspReg_A0 ); #ifdef ENABLE_64BIT SRA( PspReg_V0, PspReg_V0, 0x1f ); // Sign extend SRA( PspReg_A0, PspReg_A0, 0x1f ); // Sign extend SetVar( &gCPUState.MultLo._u32_1, PspReg_V0 ); SetVar( &gCPUState.MultHi._u32_1, PspReg_A0 ); #endif } // inline void CCodeGeneratorPSP::GenerateMULTU( EN64Reg rs, EN64Reg rt ) { mMultIsValid = true; //u64 dwResult = (u64)gGPR[ op_code.rs ]._u32_0 * (u64)gGPR[ op_code.rt ]._u32_0; //gCPUState.MultLo = (s64)(s32)(dwResult); //gCPUState.MultHi = (s64)(s32)(dwResult >> 32); EPspReg reg_lo_a( GetRegisterAndLoadLo( rs, PspReg_V0 ) ); EPspReg reg_lo_b( GetRegisterAndLoadLo( rt, PspReg_A0 ) ); MULTU( reg_lo_a, reg_lo_b ); MFLO( PspReg_V0 ); MFHI( PspReg_A0 ); SetVar( &gCPUState.MultLo._u32_0, PspReg_V0 ); SetVar( &gCPUState.MultHi._u32_0, PspReg_A0 ); //Yoshi and DOOM64 must have sign extension //Corn SRA( PspReg_V0, PspReg_V0, 0x1f ); // Sign extend SRA( PspReg_A0, PspReg_A0, 0x1f ); // Sign extend SetVar( &gCPUState.MultLo._u32_1, PspReg_V0 ); SetVar( &gCPUState.MultHi._u32_1, PspReg_A0 ); } // inline void CCodeGeneratorPSP::GenerateDIV( EN64Reg rs, EN64Reg rt ) { mMultIsValid = true; //s32 nDividend = gGPR[ op_code.rs ]._s32_0; //s32 nDivisor = gGPR[ op_code.rt ]._s32_0; //if (nDivisor) //{ // gCPUState.MultLo._u64 = (s64)(s32)(nDividend / nDivisor); // gCPUState.MultHi._u64 = (s64)(s32)(nDividend % nDivisor); //} #ifdef DIVZEROCHK EPspReg reg_lo_rs( GetRegisterAndLoadLo( rs, PspReg_V0 ) ); EPspReg reg_lo_rt( GetRegisterAndLoadLo( rt, PspReg_A0 ) ); CJumpLocation branch( BEQ( reg_lo_rt, PspReg_R0, CCodeLabel(nullptr), true ) ); // Can use branch delay for something? DIV( reg_lo_rs, reg_lo_rt ); MFLO( PspReg_V0 ); MFHI( PspReg_A0 ); SetVar( &gCPUState.MultLo._u32_0, PspReg_V0 ); SetVar( &gCPUState.MultHi._u32_0, PspReg_A0 ); SRA( PspReg_V0, PspReg_V0, 0x1f ); // Sign extend SRA( PspReg_A0, PspReg_A0, 0x1f ); // Sign extend SetVar( &gCPUState.MultLo._u32_1, PspReg_V0 ); SetVar( &gCPUState.MultHi._u32_1, PspReg_A0 ); // Branch here - really should trigger exception! PatchJumpLong( branch, GetAssemblyBuffer()->GetLabel() ); #else EPspReg reg_lo_rs( GetRegisterAndLoadLo( rs, PspReg_V0 ) ); EPspReg reg_lo_rt( GetRegisterAndLoadLo( rt, PspReg_A0 ) ); DIV( reg_lo_rs, reg_lo_rt ); MFLO( PspReg_V0 ); MFHI( PspReg_A0 ); SetVar( &gCPUState.MultLo._u32_0, PspReg_V0 ); SetVar( &gCPUState.MultHi._u32_0, PspReg_A0 ); #ifdef ENABLE_64BIT SRA( PspReg_V0, PspReg_V0, 0x1f ); // Sign extend SRA( PspReg_A0, PspReg_A0, 0x1f ); // Sign extend SetVar( &gCPUState.MultLo._u32_1, PspReg_V0 ); SetVar( &gCPUState.MultHi._u32_1, PspReg_A0 ); #endif #endif } // inline void CCodeGeneratorPSP::GenerateDIVU( EN64Reg rs, EN64Reg rt ) { mMultIsValid = true; //u32 dwDividend = gGPR[ op_code.rs ]._u32_0; //u32 dwDivisor = gGPR[ op_code.rt ]._u32_0; //if (dwDivisor) { // gCPUState.MultLo._u64 = (s64)(s32)(dwDividend / dwDivisor); // gCPUState.MultHi._u64 = (s64)(s32)(dwDividend % dwDivisor); //} #ifdef DIVZEROCHK EPspReg reg_lo_rs( GetRegisterAndLoadLo( rs, PspReg_V0 ) ); EPspReg reg_lo_rt( GetRegisterAndLoadLo( rt, PspReg_A0 ) ); CJumpLocation branch( BEQ( reg_lo_rt, PspReg_R0, CCodeLabel(nullptr), true ) ); // Can use branch delay for something? DIVU( reg_lo_rs, reg_lo_rt ); MFLO( PspReg_V0 ); MFHI( PspReg_A0 ); SetVar( &gCPUState.MultLo._u32_0, PspReg_V0 ); SetVar( &gCPUState.MultHi._u32_0, PspReg_A0 ); SRA( PspReg_V0, PspReg_V0, 0x1f ); // Sign extend SRA( PspReg_A0, PspReg_A0, 0x1f ); // Sign extend SetVar( &gCPUState.MultLo._u32_1, PspReg_V0 ); SetVar( &gCPUState.MultHi._u32_1, PspReg_A0 ); // Branch here - really should trigger exception! PatchJumpLong( branch, GetAssemblyBuffer()->GetLabel() ); #else EPspReg reg_lo_rs( GetRegisterAndLoadLo( rs, PspReg_V0 ) ); EPspReg reg_lo_rt( GetRegisterAndLoadLo( rt, PspReg_A0 ) ); DIVU( reg_lo_rs, reg_lo_rt ); MFLO( PspReg_V0 ); MFHI( PspReg_A0 ); SetVar( &gCPUState.MultLo._u32_0, PspReg_V0 ); SetVar( &gCPUState.MultHi._u32_0, PspReg_A0 ); #ifdef ENABLE_64BIT SRA( PspReg_V0, PspReg_V0, 0x1f ); // Sign extend SRA( PspReg_A0, PspReg_A0, 0x1f ); // Sign extend SetVar( &gCPUState.MultLo._u32_1, PspReg_V0 ); SetVar( &gCPUState.MultHi._u32_1, PspReg_A0 ); #endif #endif } // inline void CCodeGeneratorPSP::GenerateADDU( EN64Reg rd, EN64Reg rs, EN64Reg rt ) { if (mRegisterCache.IsKnownValue(rs, 0) & mRegisterCache.IsKnownValue(rt, 0)) { SetRegister32s(rd, mRegisterCache.GetKnownValue(rs, 0)._s32 + mRegisterCache.GetKnownValue(rt, 0)._s32); return; } if( rs == N64Reg_R0 ) { if(mRegisterCache.IsKnownValue(rt, 0)) { SetRegister32s(rd, mRegisterCache.GetKnownValue(rt, 0)._s32); return; } // As RS is zero, the ADD is just a copy of RT to RD. // Try to avoid loading into a temp register if the dest is cached EPspReg reg_lo_d( GetRegisterNoLoadLo( rd, PspReg_V0 ) ); LoadRegisterLo( reg_lo_d, rt ); UpdateRegister( rd, reg_lo_d, URO_HI_SIGN_EXTEND ); } else if( rt == N64Reg_R0 ) { if(mRegisterCache.IsKnownValue(rs, 0)) { SetRegister32s(rd, mRegisterCache.GetKnownValue(rs, 0)._s32); return; } // As RT is zero, the ADD is just a copy of RS to RD. // Try to avoid loading into a temp register if the dest is cached EPspReg reg_lo_d( GetRegisterNoLoadLo( rd, PspReg_V0 ) ); LoadRegisterLo( reg_lo_d, rs ); UpdateRegister( rd, reg_lo_d, URO_HI_SIGN_EXTEND ); } else { //gGPR[ op_code.rd ]._s64 = (s64)(s32)( gGPR[ op_code.rs ]._s32_0 + gGPR[ op_code.rt ]._s32_0 ); EPspReg reg_lo_d( GetRegisterNoLoadLo( rd, PspReg_V0 ) ); EPspReg reg_lo_a( GetRegisterAndLoadLo( rs, PspReg_V0 ) ); EPspReg reg_lo_b( GetRegisterAndLoadLo( rt, PspReg_A0 ) ); ADDU( reg_lo_d, reg_lo_a, reg_lo_b ); UpdateRegister( rd, reg_lo_d, URO_HI_SIGN_EXTEND ); } } // inline void CCodeGeneratorPSP::GenerateSUBU( EN64Reg rd, EN64Reg rs, EN64Reg rt ) { if (mRegisterCache.IsKnownValue(rs, 0) & mRegisterCache.IsKnownValue(rt, 0)) { SetRegister32s(rd, mRegisterCache.GetKnownValue(rs, 0)._s32 - mRegisterCache.GetKnownValue(rt, 0)._s32); return; } //gGPR[ op_code.rd ]._s64 = (s64)(s32)( gGPR[ op_code.rs ]._s32_0 - gGPR[ op_code.rt ]._s32_0 ); EPspReg reg_lo_d( GetRegisterNoLoadLo( rd, PspReg_V0 ) ); EPspReg reg_lo_a( GetRegisterAndLoadLo( rs, PspReg_V0 ) ); EPspReg reg_lo_b( GetRegisterAndLoadLo( rt, PspReg_A0 ) ); SUBU( reg_lo_d, reg_lo_a, reg_lo_b ); UpdateRegister( rd, reg_lo_d, URO_HI_SIGN_EXTEND ); } //64bit unsigned division //Corn inline void CCodeGeneratorPSP::GenerateDDIVU( EN64Reg rs, EN64Reg rt ) { mMultIsValid = false; EPspReg reg_lo_a( GetRegisterAndLoadLo( rs, PspReg_A0 ) ); EPspReg reg_hi_a( GetRegisterAndLoadHi( rs, PspReg_A1 ) ); EPspReg reg_lo_b( GetRegisterAndLoadLo( rt, PspReg_A2 ) ); if( reg_lo_a != PspReg_A0 ) OR( PspReg_A0, PspReg_R0, reg_lo_a); if( reg_hi_a != PspReg_A1 ) OR( PspReg_A1, PspReg_R0, reg_hi_a); if( reg_lo_b != PspReg_A2 ) OR( PspReg_A2, PspReg_R0, reg_lo_b); JAL( CCodeLabel( (void*)_DDIVU ), true ); } //64bit signed division //Corn inline void CCodeGeneratorPSP::GenerateDDIV( EN64Reg rs, EN64Reg rt ) { mMultIsValid = false; EPspReg reg_lo_a( GetRegisterAndLoadLo( rs, PspReg_A0 ) ); EPspReg reg_hi_a( GetRegisterAndLoadHi( rs, PspReg_A1 ) ); EPspReg reg_lo_b( GetRegisterAndLoadLo( rt, PspReg_A2 ) ); if( reg_lo_a != PspReg_A0 ) OR( PspReg_A0, PspReg_R0, reg_lo_a); if( reg_hi_a != PspReg_A1 ) OR( PspReg_A1, PspReg_R0, reg_hi_a); if( reg_lo_b != PspReg_A2 ) OR( PspReg_A2, PspReg_R0, reg_lo_b); JAL( CCodeLabel( (void*)_DDIV ), true ); } //64bit unsigned multiply //Corn inline void CCodeGeneratorPSP::GenerateDMULTU( EN64Reg rs, EN64Reg rt ) { mMultIsValid = false; EPspReg reg_lo_a( GetRegisterAndLoadLo( rs, PspReg_A0 ) ); EPspReg reg_hi_a( GetRegisterAndLoadHi( rs, PspReg_A1 ) ); EPspReg reg_lo_b( GetRegisterAndLoadLo( rt, PspReg_A2 ) ); EPspReg reg_hi_b( GetRegisterAndLoadHi( rt, PspReg_A3 ) ); if( reg_lo_a != PspReg_A0 ) OR( PspReg_A0, PspReg_R0, reg_lo_a); if( reg_hi_a != PspReg_A1 ) OR( PspReg_A1, PspReg_R0, reg_hi_a); if( reg_lo_b != PspReg_A2 ) OR( PspReg_A2, PspReg_R0, reg_lo_b); if( reg_hi_b != PspReg_A3 ) OR( PspReg_A3, PspReg_R0, reg_hi_b); JAL( CCodeLabel( (void*)_DMULTU ), true ); } //64bit signed multiply //Corn inline void CCodeGeneratorPSP::GenerateDMULT( EN64Reg rs, EN64Reg rt ) { mMultIsValid = false; EPspReg reg_lo_a( GetRegisterAndLoadLo( rs, PspReg_A0 ) ); EPspReg reg_hi_a( GetRegisterAndLoadHi( rs, PspReg_A1 ) ); EPspReg reg_lo_b( GetRegisterAndLoadLo( rt, PspReg_A2 ) ); EPspReg reg_hi_b( GetRegisterAndLoadHi( rt, PspReg_A3 ) ); if( reg_lo_a != PspReg_A0 ) OR( PspReg_A0, PspReg_R0, reg_lo_a); if( reg_hi_a != PspReg_A1 ) OR( PspReg_A1, PspReg_R0, reg_hi_a); if( reg_lo_b != PspReg_A2 ) OR( PspReg_A2, PspReg_R0, reg_lo_b); if( reg_hi_b != PspReg_A3 ) OR( PspReg_A3, PspReg_R0, reg_hi_b); JAL( CCodeLabel( (void*)_DMULT ), true ); } // inline void CCodeGeneratorPSP::GenerateDADDIU( EN64Reg rt, EN64Reg rs, s16 immediate ) { if( rs == N64Reg_R0 ) { SetRegister32s( rt, immediate ); } else { EPspReg reg_lo_d( GetRegisterNoLoadLo( rt, PspReg_V0 ) ); EPspReg reg_lo_a( GetRegisterAndLoadLo( rs, PspReg_A0 ) ); if(reg_lo_d == reg_lo_a) { ADDIU( PspReg_A0, reg_lo_a, immediate ); SLTU( PspReg_A1, PspReg_A0, reg_lo_a ); // Overflowed? OR( reg_lo_d, PspReg_A0, PspReg_R0 ); } else { ADDIU( reg_lo_d, reg_lo_a, immediate ); SLTU( PspReg_A1, reg_lo_d, reg_lo_a ); // Overflowed? } StoreRegisterLo( rt, reg_lo_d ); EPspReg reg_hi_d( GetRegisterNoLoadHi( rt, PspReg_V0 ) ); EPspReg reg_hi_a( GetRegisterAndLoadHi( rs, PspReg_A0 ) ); ADDU( reg_hi_d, PspReg_A1, reg_hi_a ); // Add on overflow StoreRegisterHi( rt, reg_hi_d ); } } // inline void CCodeGeneratorPSP::GenerateDADDU( EN64Reg rd, EN64Reg rs, EN64Reg rt ) { //gGPR[ op_code.rd ]._u64 = gGPR[ op_code.rt ]._u64 + gGPR[ op_code.rs ]._u64 //890c250: 00c41021 addu d_lo,a_lo,b_lo //890c254: 0046402b sltu t0,d_lo,a_lo //890c260: ad220000 sw d_lo,0(t1) //890c258: 00e51821 addu d_hi,a_hi,b_hi //890c25c: 01031821 addu d_hi,t0,d_hi //890c268: ad230004 sw d_hi,4(t1) EPspReg reg_lo_d( GetRegisterNoLoadLo( rd, PspReg_V0 ) ); EPspReg reg_lo_a( GetRegisterAndLoadLo( rs, PspReg_A0 ) ); EPspReg reg_lo_b( GetRegisterAndLoadLo( rt, PspReg_A1 ) ); ADDU( reg_lo_d, reg_lo_a, reg_lo_b ); //Assumes that just one of the source regs are the same as DST SLTU( PspReg_V1, reg_lo_d, reg_lo_d == reg_lo_a ? reg_lo_b : reg_lo_a ); // Overflowed? StoreRegisterLo( rd, reg_lo_d ); EPspReg reg_hi_d( GetRegisterNoLoadHi( rd, PspReg_V0 ) ); EPspReg reg_hi_a( GetRegisterAndLoadHi( rs, PspReg_A0 ) ); EPspReg reg_hi_b( GetRegisterAndLoadHi( rt, PspReg_A1 ) ); ADDU( reg_hi_d, reg_hi_a, reg_hi_b ); ADDU( reg_hi_d, PspReg_V1, reg_hi_d ); // Add on overflow StoreRegisterHi( rd, reg_hi_d ); } // inline void CCodeGeneratorPSP::GenerateAND( EN64Reg rd, EN64Reg rs, EN64Reg rt ) { //gGPR[ op_code.rd ]._u64 = gGPR[ op_code.rs ]._u64 & gGPR[ op_code.rt ]._u64; bool HiIsDone = false; if (mRegisterCache.IsKnownValue(rs, 1) & mRegisterCache.IsKnownValue(rt, 1)) { SetRegister(rd, 1, mRegisterCache.GetKnownValue(rs, 1)._u32 & mRegisterCache.GetKnownValue(rt, 1)._u32 ); HiIsDone = true; } else if ((mRegisterCache.IsKnownValue(rs, 1) & (mRegisterCache.GetKnownValue(rs, 1)._u32 == 0)) | (mRegisterCache.IsKnownValue(rt, 1) & (mRegisterCache.GetKnownValue(rt, 1)._u32 == 0)) ) { SetRegister(rd, 1, 0 ); HiIsDone = true; } else if( mRegisterCache.IsKnownValue(rt, 1) & (mRegisterCache.GetKnownValue(rt, 1)._s32 == -1) ) { EPspReg reg_hi_d( GetRegisterNoLoadHi( rd, PspReg_V0 ) ); LoadRegisterHi( reg_hi_d, rs ); StoreRegisterHi( rd, reg_hi_d ); HiIsDone = true; } if (mRegisterCache.IsKnownValue(rs, 0) & mRegisterCache.IsKnownValue(rt, 0)) { SetRegister(rd, 0, mRegisterCache.GetKnownValue(rs, 0)._u32 & mRegisterCache.GetKnownValue(rt, 0)._u32 ); } else if ((rs == N64Reg_R0) | (rt == N64Reg_R0)) { SetRegister64( rd, 0, 0 ); } else { // XXXX or into dest register EPspReg reg_lo_d( GetRegisterNoLoadLo( rd, PspReg_V0 ) ); EPspReg reg_lo_a( GetRegisterAndLoadLo( rs, PspReg_V0 ) ); EPspReg reg_lo_b( GetRegisterAndLoadLo( rt, PspReg_A0 ) ); AND( reg_lo_d, reg_lo_a, reg_lo_b ); StoreRegisterLo( rd, reg_lo_d ); if(!HiIsDone) { EPspReg reg_hi_d( GetRegisterNoLoadHi( rd, PspReg_V0 ) ); EPspReg reg_hi_a( GetRegisterAndLoadHi( rs, PspReg_V0 ) ); EPspReg reg_hi_b( GetRegisterAndLoadHi( rt, PspReg_A0 ) ); AND( reg_hi_d, reg_hi_a, reg_hi_b ); StoreRegisterHi( rd, reg_hi_d ); } } } // void CCodeGeneratorPSP::GenerateOR( EN64Reg rd, EN64Reg rs, EN64Reg rt ) { //gGPR[ op_code.rd ]._u64 = gGPR[ op_code.rs ]._u64 | gGPR[ op_code.rt ]._u64; bool HiIsDone = false; if (mRegisterCache.IsKnownValue(rs, 1) & mRegisterCache.IsKnownValue(rt, 1)) { SetRegister(rd, 1, mRegisterCache.GetKnownValue(rs, 1)._u32 | mRegisterCache.GetKnownValue(rt, 1)._u32 ); HiIsDone = true; } else if ((mRegisterCache.IsKnownValue(rs, 1) & (mRegisterCache.GetKnownValue(rs, 1)._s32 == -1)) | (mRegisterCache.IsKnownValue(rt, 1) & (mRegisterCache.GetKnownValue(rt, 1)._s32 == -1)) ) { SetRegister(rd, 1, ~0 ); HiIsDone = true; } if (mRegisterCache.IsKnownValue(rs, 0) & mRegisterCache.IsKnownValue(rt, 0)) { SetRegister(rd, 0, mRegisterCache.GetKnownValue(rs, 0)._u32 | mRegisterCache.GetKnownValue(rt, 0)._u32); return; } if( rs == N64Reg_R0 ) { // This doesn't seem to happen /*if (mRegisterCache.IsKnownValue(rt, 1)) { SetRegister(rd, 1, mRegisterCache.GetKnownValue(rt, 1)._u32 ); HiIsDone = true; } */ if(mRegisterCache.IsKnownValue(rt, 0)) { SetRegister64(rd, mRegisterCache.GetKnownValue(rt, 0)._u32, mRegisterCache.GetKnownValue(rt, 1)._u32); return; } // This case rarely seems to happen... // As RS is zero, the OR is just a copy of RT to RD. // Try to avoid loading into a temp register if the dest is cached EPspReg reg_lo_d( GetRegisterNoLoadLo( rd, PspReg_V0 ) ); LoadRegisterLo( reg_lo_d, rt ); StoreRegisterLo( rd, reg_lo_d ); if(!HiIsDone) { EPspReg reg_hi_d( GetRegisterNoLoadHi( rd, PspReg_V0 ) ); LoadRegisterHi( reg_hi_d, rt ); StoreRegisterHi( rd, reg_hi_d ); } } else if( rt == N64Reg_R0 ) { if (mRegisterCache.IsKnownValue(rs, 1)) { SetRegister(rd, 1, mRegisterCache.GetKnownValue(rs, 1)._u32 ); HiIsDone = true; } if(mRegisterCache.IsKnownValue(rs, 0)) { SetRegister64(rd, mRegisterCache.GetKnownValue(rs, 0)._u32, mRegisterCache.GetKnownValue(rs, 1)._u32); return; } // As RT is zero, the OR is just a copy of RS to RD. // Try to avoid loading into a temp register if the dest is cached EPspReg reg_lo_d( GetRegisterNoLoadLo( rd, PspReg_V0 ) ); LoadRegisterLo( reg_lo_d, rs ); StoreRegisterLo( rd, reg_lo_d ); if(!HiIsDone) { EPspReg reg_hi_d( GetRegisterNoLoadHi( rd, PspReg_V0 ) ); LoadRegisterHi( reg_hi_d, rs ); StoreRegisterHi( rd, reg_hi_d ); } } else { EPspReg reg_lo_d( GetRegisterNoLoadLo( rd, PspReg_V0 ) ); EPspReg reg_lo_a( GetRegisterAndLoadLo( rs, PspReg_V0 ) ); EPspReg reg_lo_b( GetRegisterAndLoadLo( rt, PspReg_A0 ) ); OR( reg_lo_d, reg_lo_a, reg_lo_b ); StoreRegisterLo( rd, reg_lo_d ); if(!HiIsDone) { if( mRegisterCache.IsKnownValue(rs, 1) & (mRegisterCache.GetKnownValue(rs, 1)._u32 == 0) ) { EPspReg reg_hi_d( GetRegisterNoLoadHi( rd, PspReg_V0 ) ); LoadRegisterHi( reg_hi_d, rt ); StoreRegisterHi( rd, reg_hi_d ); } else if( mRegisterCache.IsKnownValue(rt, 1) & (mRegisterCache.GetKnownValue(rt, 1)._u32 == 0) ) { EPspReg reg_hi_d( GetRegisterNoLoadHi( rd, PspReg_V0 ) ); LoadRegisterHi( reg_hi_d, rs ); StoreRegisterHi( rd, reg_hi_d ); } else { EPspReg reg_hi_d( GetRegisterNoLoadHi( rd, PspReg_V0 ) ); EPspReg reg_hi_a( GetRegisterAndLoadHi( rs, PspReg_V0 ) ); EPspReg reg_hi_b( GetRegisterAndLoadHi( rt, PspReg_A0 ) ); OR( reg_hi_d, reg_hi_a, reg_hi_b ); StoreRegisterHi( rd, reg_hi_d ); } } } } // inline void CCodeGeneratorPSP::GenerateXOR( EN64Reg rd, EN64Reg rs, EN64Reg rt ) { //gGPR[ op_code.rd ]._u64 = gGPR[ op_code.rs ]._u64 ^ gGPR[ op_code.rt ]._u64; bool HiIsDone = false; if (mRegisterCache.IsKnownValue(rs, 1) & mRegisterCache.IsKnownValue(rt, 1)) { SetRegister(rd, 1, mRegisterCache.GetKnownValue(rs, 1)._u32 ^ mRegisterCache.GetKnownValue(rt, 1)._u32 ); HiIsDone = true; } else if ((mRegisterCache.IsKnownValue(rs, 1) & (mRegisterCache.GetKnownValue(rs, 1)._u32 == 0)) ) { EPspReg reg_hi_d( GetRegisterNoLoadHi( rd, PspReg_V0 ) ); LoadRegisterHi( reg_hi_d, rt ); StoreRegisterHi( rd, reg_hi_d ); HiIsDone = true; } else if ((mRegisterCache.IsKnownValue(rt, 1) & (mRegisterCache.GetKnownValue(rt, 1)._u32 == 0)) ) { EPspReg reg_hi_d( GetRegisterNoLoadHi( rd, PspReg_V0 ) ); LoadRegisterHi( reg_hi_d, rs ); StoreRegisterHi( rd, reg_hi_d ); HiIsDone = true; } if (mRegisterCache.IsKnownValue(rs, 0) & mRegisterCache.IsKnownValue(rt, 0)) { SetRegister(rd, 0, mRegisterCache.GetKnownValue(rs, 0)._u32 ^ mRegisterCache.GetKnownValue(rt, 0)._u32); return; } EPspReg reg_lo_d( GetRegisterNoLoadLo( rd, PspReg_V0 ) ); EPspReg reg_lo_a( GetRegisterAndLoadLo( rs, PspReg_V0 ) ); EPspReg reg_lo_b( GetRegisterAndLoadLo( rt, PspReg_A0 ) ); XOR( reg_lo_d, reg_lo_a, reg_lo_b ); StoreRegisterLo( rd, reg_lo_d ); if(!HiIsDone) { EPspReg reg_hi_d( GetRegisterNoLoadHi( rd, PspReg_V0 ) ); EPspReg reg_hi_a( GetRegisterAndLoadHi( rs, PspReg_V0 ) ); EPspReg reg_hi_b( GetRegisterAndLoadHi( rt, PspReg_A0 ) ); XOR( reg_hi_d, reg_hi_a, reg_hi_b ); StoreRegisterHi( rd, reg_hi_d ); } } // inline void CCodeGeneratorPSP::GenerateNOR( EN64Reg rd, EN64Reg rs, EN64Reg rt ) { //gGPR[ op_code.rd ]._u64 = ~(gGPR[ op_code.rs ]._u64 | gGPR[ op_code.rt ]._u64); bool HiIsDone = false; if (mRegisterCache.IsKnownValue(rs, 1) & mRegisterCache.IsKnownValue(rt, 1)) { SetRegister(rd, 1, ~(mRegisterCache.GetKnownValue(rs, 1)._u32 | mRegisterCache.GetKnownValue(rt, 1)._u32) ); HiIsDone = true; } if (mRegisterCache.IsKnownValue(rs, 0) & mRegisterCache.IsKnownValue(rt, 0)) { SetRegister(rd, 0, ~(mRegisterCache.GetKnownValue(rs, 0)._u32 | mRegisterCache.GetKnownValue(rt, 0)._u32) ); return; } EPspReg reg_lo_d( GetRegisterNoLoadLo( rd, PspReg_V0 ) ); EPspReg reg_lo_a( GetRegisterAndLoadLo( rs, PspReg_V0 ) ); EPspReg reg_lo_b( GetRegisterAndLoadLo( rt, PspReg_A0 ) ); NOR( reg_lo_d, reg_lo_a, reg_lo_b ); StoreRegisterLo( rd, reg_lo_d ); if(!HiIsDone) { EPspReg reg_hi_d( GetRegisterNoLoadHi( rd, PspReg_V0 ) ); EPspReg reg_hi_a( GetRegisterAndLoadHi( rs, PspReg_V0 ) ); EPspReg reg_hi_b( GetRegisterAndLoadHi( rt, PspReg_A0 ) ); NOR( reg_hi_d, reg_hi_a, reg_hi_b ); StoreRegisterHi( rd, reg_hi_d ); } } // inline void CCodeGeneratorPSP::GenerateSLT( EN64Reg rd, EN64Reg rs, EN64Reg rt ) { #ifdef ENABLE_64BIT // Because we have a branch here, we need to make sure that we have a consistent view // of the registers regardless of whether we take it or not. We pull in the lo halves of // the registers here so that they're Valid regardless of whether we take the branch or not PrepareCachedRegisterLo( rs ); PrepareCachedRegisterLo( rt ); // If possible, we write directly into the destination register. We have to be careful though - // if the destination register is the same as either of the source registers we have to use // a temporary instead, to avoid overwriting the contents. EPspReg reg_lo_d( GetRegisterNoLoadLo( rd, PspReg_V0 ) ); if((rd == rs) | (rd == rt)) { reg_lo_d = PspReg_V0; } EPspReg reg_hi_a( GetRegisterAndLoadHi( rs, PspReg_V0 ) ); EPspReg reg_hi_b( GetRegisterAndLoadHi( rt, PspReg_A0 ) ); CJumpLocation branch( BNE( reg_hi_a, reg_hi_b, CCodeLabel(nullptr), false ) ); SLT( reg_lo_d, reg_hi_a, reg_hi_b ); // In branch delay slot // If the branch was not taken, it means that the high part of the registers was equal, so compare bottom half EPspReg reg_lo_a( GetRegisterAndLoadLo( rs, PspReg_V0 ) ); EPspReg reg_lo_b( GetRegisterAndLoadLo( rt, PspReg_A0 ) ); SLT( reg_lo_d, reg_lo_a, reg_lo_b ); // Patch up the branch PatchJumpLong( branch, GetAssemblyBuffer()->GetLabel() ); #else /* if (mRegisterCache.IsKnownValue(rs, 0) & mRegisterCache.IsKnownValue(rt, 0)) { SetRegister32s(rd, (mRegisterCache.GetKnownValue(rs, 0)._s32 < mRegisterCache.GetKnownValue(rt, 0)._s32) ); return; } */ EPspReg reg_lo_d( GetRegisterNoLoadLo( rd, PspReg_V0 ) ); EPspReg reg_lo_a( GetRegisterAndLoadLo( rs, PspReg_V0 ) ); EPspReg reg_lo_b( GetRegisterAndLoadLo( rt, PspReg_A0 ) ); SLT( reg_lo_d, reg_lo_a, reg_lo_b ); #endif UpdateRegister( rd, reg_lo_d, URO_HI_CLEAR ); } // inline void CCodeGeneratorPSP::GenerateSLTU( EN64Reg rd, EN64Reg rs, EN64Reg rt ) { #ifdef ENABLE_64BIT // Because we have a branch here, we need to make sure that we have a consistant view // of the registers regardless of whether we take it or not. We pull in the lo halves of // the registers here so that they're Valid regardless of whether we take the branch or not PrepareCachedRegisterLo( rs ); PrepareCachedRegisterLo( rt ); // If possible, we write directly into the destination register. We have to be careful though - // if the destination register is the same as either of the source registers we have to use // a temporary instead, to avoid overwriting the contents. EPspReg reg_lo_d( GetRegisterNoLoadLo( rd, PspReg_V0 ) ); if((rd == rs) | (rd == rt)) { reg_lo_d = PspReg_V0; } EPspReg reg_hi_a( GetRegisterAndLoadHi( rs, PspReg_V0 ) ); EPspReg reg_hi_b( GetRegisterAndLoadHi( rt, PspReg_A0 ) ); CJumpLocation branch( BNE( reg_hi_a, reg_hi_b, CCodeLabel(nullptr), false ) ); SLTU( reg_lo_d, reg_hi_a, reg_hi_b ); // In branch delay slot // If the branch was not taken, it means that the high part of the registers was equal, so compare bottom half EPspReg reg_lo_a( GetRegisterAndLoadLo( rs, PspReg_V0 ) ); EPspReg reg_lo_b( GetRegisterAndLoadLo( rt, PspReg_A0 ) ); SLTU( reg_lo_d, reg_lo_a, reg_lo_b ); // Patch up the branch PatchJumpLong( branch, GetAssemblyBuffer()->GetLabel() ); #else /* if (mRegisterCache.IsKnownValue(rs, 0) & mRegisterCache.IsKnownValue(rt, 0)) { SetRegister32s(rd, (mRegisterCache.GetKnownValue(rs, 0)._u32 < mRegisterCache.GetKnownValue(rt, 0)._u32) ); return; } */ EPspReg reg_lo_d( GetRegisterNoLoadLo( rd, PspReg_V0 ) ); EPspReg reg_lo_a( GetRegisterAndLoadLo( rs, PspReg_V0 ) ); EPspReg reg_lo_b( GetRegisterAndLoadLo( rt, PspReg_A0 ) ); SLTU( reg_lo_d, reg_lo_a, reg_lo_b ); #endif UpdateRegister( rd, reg_lo_d, URO_HI_CLEAR ); } // inline void CCodeGeneratorPSP::GenerateADDIU( EN64Reg rt, EN64Reg rs, s16 immediate ) { //gGPR[op_code.rt]._s64 = (s64)(s32)(gGPR[op_code.rs]._s32_0 + (s32)(s16)op_code.immediate); if( rs == N64Reg_R0 ) { SetRegister32s( rt, immediate ); } else if(mRegisterCache.IsKnownValue( rs, 0 )) { s32 known_value( mRegisterCache.GetKnownValue( rs, 0 )._s32 + (s32)immediate ); SetRegister32s( rt, known_value ); } else { EPspReg dst_reg( GetRegisterNoLoadLo( rt, PspReg_V0 ) ); EPspReg src_reg( GetRegisterAndLoadLo( rs, PspReg_A0 ) ); ADDIU( dst_reg, src_reg, immediate ); UpdateRegister( rt, dst_reg, URO_HI_SIGN_EXTEND ); } } // inline void CCodeGeneratorPSP::GenerateANDI( EN64Reg rt, EN64Reg rs, u16 immediate ) { //gGPR[op_code.rt]._u64 = gGPR[op_code.rs]._u64 & (u64)(u16)op_code.immediate; if(mRegisterCache.IsKnownValue( rs, 0 )) { s32 known_value_lo( mRegisterCache.GetKnownValue( rs, 0 )._u32 & (u32)immediate ); s32 known_value_hi( 0 ); SetRegister64( rt, known_value_lo, known_value_hi ); } else { EPspReg dst_reg( GetRegisterNoLoadLo( rt, PspReg_V0 ) ); EPspReg src_reg( GetRegisterAndLoadLo( rs, PspReg_A0 ) ); ANDI( dst_reg, src_reg, immediate ); UpdateRegister( rt, dst_reg, URO_HI_CLEAR ); } } // inline void CCodeGeneratorPSP::GenerateORI( EN64Reg rt, EN64Reg rs, u16 immediate ) { //gGPR[op_code.rt]._u64 = gGPR[op_code.rs]._u64 | (u64)(u16)op_code.immediate; if( rs == N64Reg_R0 ) { // If we're oring again 0, then we're just setting a constant value SetRegister64( rt, immediate, 0 ); } else if(mRegisterCache.IsKnownValue( rs, 0 )) { s32 known_value_lo( mRegisterCache.GetKnownValue( rs, 0 )._u32 | (u32)immediate ); s32 known_value_hi( mRegisterCache.GetKnownValue( rs, 1 )._u32 ); SetRegister64( rt, known_value_lo, known_value_hi ); } else { EPspReg dst_reg( GetRegisterNoLoadLo( rt, PspReg_V0 ) ); EPspReg src_reg( GetRegisterAndLoadLo( rs, PspReg_A0 ) ); ORI( dst_reg, src_reg, immediate ); StoreRegisterLo( rt, dst_reg ); #ifdef ENABLE_64BIT //This is out of spec but seems we can always ignore copying the hi part // If the source/dest regs are different we need to copy the high bits across if(rt != rs) { EPspReg dst_reg_hi( GetRegisterNoLoadHi( rt, PspReg_V0 ) ); LoadRegisterHi( dst_reg_hi, rs ); StoreRegisterHi( rt, dst_reg_hi ); } #endif } } // inline void CCodeGeneratorPSP::GenerateXORI( EN64Reg rt, EN64Reg rs, u16 immediate ) { //gGPR[op_code.rt]._u64 = gGPR[op_code.rs]._u64 ^ (u64)(u16)op_code.immediate; if(mRegisterCache.IsKnownValue( rs, 0 )) { s32 known_value_lo( mRegisterCache.GetKnownValue( rs, 0 )._u32 ^ (u32)immediate ); s32 known_value_hi( mRegisterCache.GetKnownValue( rs, 1 )._u32 ); SetRegister64( rt, known_value_lo, known_value_hi ); } else { EPspReg dst_reg( GetRegisterNoLoadLo( rt, PspReg_V0 ) ); EPspReg src_reg( GetRegisterAndLoadLo( rs, PspReg_A0 ) ); XORI( dst_reg, src_reg, immediate ); StoreRegisterLo( rt, dst_reg ); #ifdef ENABLE_64BIT //This is out of spec but seems we can always ignore copying the hi part // If the source/dest regs are different we need to copy the high bits across // (if they are the same, we're xoring 0 to the top half which is essentially a NOP) if(rt != rs) { EPspReg dst_reg_hi( GetRegisterNoLoadHi( rt, PspReg_V0 ) ); LoadRegisterHi( dst_reg_hi, rs ); StoreRegisterHi( rt, dst_reg_hi ); } #endif } } // inline void CCodeGeneratorPSP::GenerateLUI( EN64Reg rt, s16 immediate ) { //gGPR[op_code.rt]._s64 = (s64)(s32)((s32)(s16)op_code.immediate<<16); SetRegister32s( rt, s32( immediate ) << 16 ); } // inline void CCodeGeneratorPSP::GenerateSLTI( EN64Reg rt, EN64Reg rs, s16 immediate ) { #ifdef ENABLE_64BIT // Because we have a branch here, we need to make sure that we have a consistant view // of the register regardless of whether we take it or not. We pull in the lo halves of // the register here so that it's Valid regardless of whether we take the branch or not PrepareCachedRegisterLo( rs ); // If possible, we write directly into the destination register. We have to be careful though - // if the destination register is the same as either of the source registers we have to use // a temporary instead, to avoid overwriting the contents. EPspReg reg_lo_d( GetRegisterNoLoadLo( rt, PspReg_V0 ) ); if(rt == rs) { reg_lo_d = PspReg_V0; } CJumpLocation branch; EPspReg reg_hi_a( GetRegisterAndLoadHi( rs, PspReg_V0 ) ); if( immediate >= 0 ) { // Positive data - we can avoid a contant load here branch = BNE( reg_hi_a, PspReg_R0, CCodeLabel(nullptr), false ); SLTI( reg_lo_d, reg_hi_a, 0x0000 ); // In branch delay slot } else { // Negative data LoadConstant( PspReg_A0, -1 ); branch = BNE( reg_hi_a, PspReg_A0, CCodeLabel(nullptr), false ); SLTI( reg_lo_d, reg_hi_a, 0xffff ); // In branch delay slot } // If the branch was not taken, it means that the high part of the registers was equal, so compare bottom half EPspReg reg_lo_a( GetRegisterAndLoadLo( rs, PspReg_V0 ) ); //Potential bug!!! //If using 64bit mode this might need to be SLTIU since sign is already checked in hi word //Strmn SLTI( reg_lo_d, reg_lo_a, immediate ); // Patch up the branch PatchJumpLong( branch, GetAssemblyBuffer()->GetLabel() ); #else /* if (mRegisterCache.IsKnownValue(rs, 0)) { SetRegister32s( rt, (mRegisterCache.GetKnownValue(rs, 0)._s32 < (s32)immediate) ); return; } */ EPspReg reg_lo_d( GetRegisterNoLoadLo( rt, PspReg_V0 ) ); EPspReg reg_lo_a( GetRegisterAndLoadLo( rs, PspReg_V0 ) ); SLTI( reg_lo_d, reg_lo_a, immediate ); #endif UpdateRegister( rt, reg_lo_d, URO_HI_CLEAR ); } // inline void CCodeGeneratorPSP::GenerateSLTIU( EN64Reg rt, EN64Reg rs, s16 immediate ) { #ifdef ENABLE_64BIT // Because we have a branch here, we need to make sure that we have a consistent view // of the register regardless of whether we take it or not. We pull in the lo halves of // the register here so that it's Valid regardless of whether we take the branch or not PrepareCachedRegisterLo( rs ); // If possible, we write directly into the destination register. We have to be careful though - // if the destination register is the same as either of the source registers we have to use // a temporary instead, to avoid overwriting the contents. EPspReg reg_lo_d( GetRegisterNoLoadLo( rt, PspReg_V0 ) ); if(rt == rs) { reg_lo_d = PspReg_V0; } CJumpLocation branch; EPspReg reg_hi_a( GetRegisterAndLoadHi( rs, PspReg_V0 ) ); if( immediate >= 0 ) { // Positive data - we can avoid a contant load here branch = BNE( reg_hi_a, PspReg_R0, CCodeLabel(nullptr), false ); SLTIU( reg_lo_d, reg_hi_a, 0x0000 ); // In branch delay slot } else { // Negative data LoadConstant( PspReg_A0, -1 ); branch = BNE( reg_hi_a, PspReg_A0, CCodeLabel(nullptr), false ); SLTIU( reg_lo_d, reg_hi_a, 0xffff ); // In branch delay slot } // If the branch was not taken, it means that the high part of the registers was equal, so compare bottom half EPspReg reg_lo_a( GetRegisterAndLoadLo( rs, PspReg_V0 ) ); SLTIU( reg_lo_d, reg_lo_a, immediate ); // Patch up the branch PatchJumpLong( branch, GetAssemblyBuffer()->GetLabel() ); #else /* if (mRegisterCache.IsKnownValue(rs, 0)) { SetRegister32s( rt, (mRegisterCache.GetKnownValue(rs, 0)._u32 < (u32)immediate) ); return; } */ EPspReg reg_lo_d( GetRegisterNoLoadLo( rt, PspReg_V0 ) ); EPspReg reg_lo_a( GetRegisterAndLoadLo( rs, PspReg_V0 ) ); SLTIU( reg_lo_d, reg_lo_a, immediate ); #endif UpdateRegister( rt, reg_lo_d, URO_HI_CLEAR ); } // inline void CCodeGeneratorPSP::GenerateSLL( EN64Reg rd, EN64Reg rt, u32 sa ) { //gGPR[ op_code.rd ]._s64 = (s64)(s32)( (gGPR[ op_code.rt ]._u32_0 << op_code.sa) & 0xFFFFFFFF ); if (mRegisterCache.IsKnownValue(rt, 0)) { SetRegister32s(rd, (s32)(mRegisterCache.GetKnownValue(rt, 0)._u32 << sa)); return; } EPspReg reg_lo_rd( GetRegisterNoLoadLo( rd, PspReg_V0 ) ); EPspReg reg_lo_rt( GetRegisterAndLoadLo( rt, PspReg_V0 ) ); SLL( reg_lo_rd, reg_lo_rt, sa ); UpdateRegister( rd, reg_lo_rd, URO_HI_SIGN_EXTEND ); } //Double Shift Left Logical (Doom64) inline void CCodeGeneratorPSP::GenerateDSLL( EN64Reg rd, EN64Reg rt, u32 sa ) { //gGPR[ op_code.rd ]._s64 = (s64)(s32)( (gGPR[ op_code.rt ]._u32_0 << op_code.sa) & 0xFFFFFFFF ); //if (mRegisterCache.IsKnownValue(rt, 0)) //{ // SetRegister32s(rd, (s32)(mRegisterCache.GetKnownValue(rt, 0)._u32 << sa)); // return; //} EPspReg reg_lo_rd( GetRegisterNoLoadLo( rd, PspReg_V0 ) ); EPspReg reg_hi_rd( GetRegisterNoLoadHi( rd, PspReg_A0 ) ); EPspReg reg_lo_rt( GetRegisterAndLoadLo( rt, PspReg_V0 ) ); EPspReg reg_hi_rt( GetRegisterAndLoadHi( rt, PspReg_A0 ) ); SLL( reg_hi_rd, reg_hi_rt, sa ); if( sa != 0 ) { SRL( PspReg_A2, reg_lo_rt, 32-sa); OR( reg_hi_rd, reg_hi_rd, PspReg_A2); } SLL( reg_lo_rd, reg_lo_rt, sa ); StoreRegisterLo( rd, reg_lo_rd); StoreRegisterHi( rd, reg_hi_rd); } // inline void CCodeGeneratorPSP::GenerateDSLL32( EN64Reg rd, EN64Reg rt, u32 sa ) { EPspReg reg_hi_rd( GetRegisterNoLoadHi( rd, PspReg_V0 ) ); EPspReg reg_lo_rt( GetRegisterAndLoadLo( rt, PspReg_V0 ) ); SLL( reg_hi_rd, reg_lo_rt, sa ); SetRegister( rd, 0, 0 ); //Zero lo part StoreRegisterHi( rd, reg_hi_rd ); //Store result } // inline void CCodeGeneratorPSP::GenerateSRL( EN64Reg rd, EN64Reg rt, u32 sa ) { //gGPR[ op_code.rd ]._s64 = (s64)(s32)( gGPR[ op_code.rt ]._u32_0 >> op_code.sa ); if (mRegisterCache.IsKnownValue(rt, 0)) { SetRegister32s(rd, (s32)(mRegisterCache.GetKnownValue(rt, 0)._u32 >> sa)); return; } EPspReg reg_lo_rd( GetRegisterNoLoadLo( rd, PspReg_V0 ) ); EPspReg reg_lo_rt( GetRegisterAndLoadLo( rt, PspReg_V0 ) ); SRL( reg_lo_rd, reg_lo_rt, sa ); UpdateRegister( rd, reg_lo_rd, URO_HI_SIGN_EXTEND ); } //Double Shift Right Arithmetic (Doom64) inline void CCodeGeneratorPSP::GenerateDSRA( EN64Reg rd, EN64Reg rt, u32 sa ) { //gGPR[ op_code.rd ]._s64 = (s64)(s32)( gGPR[ op_code.rt ]._s32_0 >> op_code.sa ); //if (mRegisterCache.IsKnownValue(rt, 0)) //{ // SetRegister32s(rd, mRegisterCache.GetKnownValue(rt, 0)._s32 >> sa); // return; //} EPspReg reg_lo_rd( GetRegisterNoLoadLo( rd, PspReg_V0 ) ); EPspReg reg_hi_rd( GetRegisterNoLoadHi( rd, PspReg_A0 ) ); EPspReg reg_lo_rt( GetRegisterAndLoadLo( rt, PspReg_V0 ) ); EPspReg reg_hi_rt( GetRegisterAndLoadHi( rt, PspReg_A0 ) ); SRL( reg_lo_rd, reg_lo_rt, sa ); if( sa != 0 ) { SLL( PspReg_A2, reg_hi_rt, 32-sa ); OR( reg_lo_rd, reg_lo_rd, PspReg_A2); } SRA( reg_hi_rd, reg_hi_rt, sa ); StoreRegisterLo( rd, reg_lo_rd); StoreRegisterHi( rd, reg_hi_rd); } // inline void CCodeGeneratorPSP::GenerateDSRA32( EN64Reg rd, EN64Reg rt, u32 sa ) { EPspReg reg_lo_rd( GetRegisterNoLoadLo( rd, PspReg_V0 ) ); EPspReg reg_hi_rt( GetRegisterAndLoadHi( rt, PspReg_V0 ) ); SRA( reg_lo_rd, reg_hi_rt, sa ); UpdateRegister( rd, reg_lo_rd, URO_HI_SIGN_EXTEND ); } // inline void CCodeGeneratorPSP::GenerateSRA( EN64Reg rd, EN64Reg rt, u32 sa ) { //gGPR[ op_code.rd ]._s64 = (s64)(s32)( gGPR[ op_code.rt ]._s32_0 >> op_code.sa ); if (mRegisterCache.IsKnownValue(rt, 0)) { SetRegister32s(rd, mRegisterCache.GetKnownValue(rt, 0)._s32 >> sa); return; } EPspReg reg_lo_rd( GetRegisterNoLoadLo( rd, PspReg_V0 ) ); EPspReg reg_lo_rt( GetRegisterAndLoadLo( rt, PspReg_V0 ) ); SRA( reg_lo_rd, reg_lo_rt, sa ); UpdateRegister( rd, reg_lo_rd, URO_HI_SIGN_EXTEND ); } // inline void CCodeGeneratorPSP::GenerateSLLV( EN64Reg rd, EN64Reg rs, EN64Reg rt ) { //gGPR[ op_code.rd ]._s64 = (s64)(s32)( (gGPR[ op_code.rt ]._u32_0 << ( gGPR[ op_code.rs ]._u32_0 & 0x1F ) ) & 0xFFFFFFFF ); if (mRegisterCache.IsKnownValue(rs, 0) & mRegisterCache.IsKnownValue(rt, 0)) { SetRegister32s(rd, (s32)(mRegisterCache.GetKnownValue(rt, 0)._u32 << (mRegisterCache.GetKnownValue(rs, 0)._u32 & 0x1F))); return; } // PSP sllv does exactly the same op as n64- no need for masking EPspReg reg_lo_rd( GetRegisterNoLoadLo( rd, PspReg_V0 ) ); EPspReg reg_lo_rs( GetRegisterAndLoadLo( rs, PspReg_A0 ) ); EPspReg reg_lo_rt( GetRegisterAndLoadLo( rt, PspReg_V0 ) ); SLLV( reg_lo_rd, reg_lo_rs, reg_lo_rt ); UpdateRegister( rd, reg_lo_rd, URO_HI_SIGN_EXTEND ); } // inline void CCodeGeneratorPSP::GenerateSRLV( EN64Reg rd, EN64Reg rs, EN64Reg rt ) { //gGPR[ op_code.rd ]._s64 = (s64)(s32)( gGPR[ op_code.rt ]._u32_0 >> ( gGPR[ op_code.rs ]._u32_0 & 0x1F ) ); if (mRegisterCache.IsKnownValue(rs, 0) & mRegisterCache.IsKnownValue(rt, 0)) { SetRegister32s(rd, (s32)(mRegisterCache.GetKnownValue(rt, 0)._u32 >> (mRegisterCache.GetKnownValue(rs, 0)._u32 & 0x1F))); return; } // PSP srlv does exactly the same op as n64- no need for masking EPspReg reg_lo_rd( GetRegisterNoLoadLo( rd, PspReg_V0 ) ); EPspReg reg_lo_rs( GetRegisterAndLoadLo( rs, PspReg_A0 ) ); EPspReg reg_lo_rt( GetRegisterAndLoadLo( rt, PspReg_V0 ) ); SRLV( reg_lo_rd, reg_lo_rs, reg_lo_rt ); UpdateRegister( rd, reg_lo_rd, URO_HI_SIGN_EXTEND ); } // inline void CCodeGeneratorPSP::GenerateSRAV( EN64Reg rd, EN64Reg rs, EN64Reg rt ) { //gGPR[ op_code.rd ]._s64 = (s64)(s32)( gGPR[ op_code.rt ]._s32_0 >> ( gGPR[ op_code.rs ]._u32_0 & 0x1F ) ); if (mRegisterCache.IsKnownValue(rs, 0) & mRegisterCache.IsKnownValue(rt, 0)) { SetRegister32s(rd, (s32)(mRegisterCache.GetKnownValue(rt, 0)._s32 >> (mRegisterCache.GetKnownValue(rs, 0)._u32 & 0x1F))); return; } // PSP srlv does exactly the same op as n64- no need for masking EPspReg reg_lo_rd( GetRegisterNoLoadLo( rd, PspReg_V0 ) ); EPspReg reg_lo_rs( GetRegisterAndLoadLo( rs, PspReg_A0 ) ); EPspReg reg_lo_rt( GetRegisterAndLoadLo( rt, PspReg_V0 ) ); SRAV( reg_lo_rd, reg_lo_rs, reg_lo_rt ); UpdateRegister( rd, reg_lo_rd, URO_HI_SIGN_EXTEND ); } // inline void CCodeGeneratorPSP::GenerateLB( u32 address, bool set_branch_delay, EN64Reg rt, EN64Reg base, s32 offset ) { EPspReg reg_dst( GetRegisterNoLoadLo( rt, PspReg_V0 ) ); // Use V0 to avoid copying return value if reg is not cached GenerateLoad( address, reg_dst, base, offset, OP_LB, 3, set_branch_delay ? ReadBitsDirectBD_s8 : ReadBitsDirect_s8 ); // NB this fills the whole of reg_dst UpdateRegister( rt, reg_dst, URO_HI_SIGN_EXTEND ); } // inline void CCodeGeneratorPSP::GenerateLBU( u32 address, bool set_branch_delay, EN64Reg rt, EN64Reg base, s32 offset ) { EPspReg reg_dst( GetRegisterNoLoadLo( rt, PspReg_V0 ) ); // Use V0 to avoid copying return value if reg is not cached GenerateLoad( address, reg_dst, base, offset, OP_LBU, 3, set_branch_delay ? ReadBitsDirectBD_u8 : ReadBitsDirect_u8 ); // NB this fills the whole of reg_dst UpdateRegister( rt, reg_dst, URO_HI_CLEAR ); } // inline void CCodeGeneratorPSP::GenerateLH( u32 address, bool set_branch_delay, EN64Reg rt, EN64Reg base, s32 offset ) { EPspReg reg_dst( GetRegisterNoLoadLo( rt, PspReg_V0 ) ); // Use V0 to avoid copying return value if reg is not cached GenerateLoad( address, reg_dst, base, offset, OP_LH, 2, set_branch_delay ? ReadBitsDirect_s16 : ReadBitsDirect_s16 ); // NB this fills the whole of reg_dst UpdateRegister( rt, reg_dst, URO_HI_SIGN_EXTEND ); } // inline void CCodeGeneratorPSP::GenerateLHU( u32 address, bool set_branch_delay, EN64Reg rt, EN64Reg base, s32 offset ) { EPspReg reg_dst( GetRegisterNoLoadLo( rt, PspReg_V0 ) ); // Use V0 to avoid copying return value if reg is not cached GenerateLoad( address, reg_dst, base, offset, OP_LHU, 2, set_branch_delay ? ReadBitsDirectBD_u16 : ReadBitsDirect_u16 ); // NB this fills the whole of reg_dst UpdateRegister( rt, reg_dst, URO_HI_CLEAR ); } // inline void CCodeGeneratorPSP::GenerateLW( u32 address, bool set_branch_delay, EN64Reg rt, EN64Reg base, s16 offset ) { EPspReg reg_dst( GetRegisterNoLoadLo( rt, PspReg_V0 ) ); // Use V0 to avoid copying return value if reg is not cached // This is for San Francisco 2049, otherwise it crashes when the race is about to start. if(rt == N64Reg_R0) { #ifdef DAEDALUS_DEBUG_CONSOLE DAEDALUS_ERROR("Attempted write to r0!"); #endif return; } GenerateLoad( address, reg_dst, base, offset, OP_LW, 0, set_branch_delay ? ReadBitsDirectBD_u32 : ReadBitsDirect_u32 ); UpdateRegister( rt, reg_dst, URO_HI_SIGN_EXTEND ); } // inline void CCodeGeneratorPSP::GenerateLD( u32 address, bool set_branch_delay, EN64Reg rt, EN64Reg base, s16 offset ) { EPspReg reg_dst_hi( GetRegisterNoLoadHi( rt, PspReg_V0 ) ); // Use V0 to avoid copying return value if reg is not cached GenerateLoad( address, reg_dst_hi, base, offset, OP_LW, 0, set_branch_delay ? ReadBitsDirectBD_u32 : ReadBitsDirect_u32 ); StoreRegisterHi( rt, reg_dst_hi ); //Adding 4 to offset should be quite safe //Corn EPspReg reg_dst_lo( GetRegisterNoLoadLo( rt, PspReg_V0 ) ); // Use V0 to avoid copying return value if reg is not cached GenerateLoad( address, reg_dst_lo, base, offset + 4, OP_LW, 0, set_branch_delay ? ReadBitsDirectBD_u32 : ReadBitsDirect_u32 ); StoreRegisterLo( rt, reg_dst_lo ); } // inline void CCodeGeneratorPSP::GenerateLWC1( u32 address, bool set_branch_delay, u32 ft, EN64Reg base, s32 offset ) { EN64FloatReg n64_ft = EN64FloatReg( ft ); EPspFloatReg psp_ft = EPspFloatReg( n64_ft );// 1:1 Mapping //u32 address = (u32)( gGPR[op_code.base]._s32_0 + (s32)(s16)op_code.immediate ); //value = Read32Bits(address); //gCPUState.FPU[op_code.ft]._s32_0 = value; #ifdef ENABLE_LWC1 //Since LW and LWC1 have same format we do a small hack that let us load directly to //the float reg without passing to the CPU first that saves us the MFC1 instruction //Corn GenerateLoad( address, (EPspReg)psp_ft, base, offset, OP_LWC1, 0, set_branch_delay ? ReadBitsDirectBD_u32 : ReadBitsDirect_u32 ); UpdateFloatRegister( n64_ft ); #else // TODO: Actually perform LWC1 here EPspReg reg_dst( PspReg_V0 ); // GenerateLoad is slightly more efficient when using V0 GenerateLoad( address, reg_dst, base, offset, OP_LW, 0, set_branch_delay ? ReadBitsDirectBD_u32 : ReadBitsDirect_u32 ); //SetVar( &gCPUState.FPU[ ft ]._u32, reg_dst ); MTC1( psp_ft, reg_dst ); UpdateFloatRegister( n64_ft ); #endif } // #ifdef ENABLE_LDC1 inline void CCodeGeneratorPSP::GenerateLDC1( u32 address, bool set_branch_delay, u32 ft, EN64Reg base, s32 offset ) { EPspReg reg_base( GetRegisterAndLoadLo( base, PspReg_A0 ) ); ADDU( PspReg_A0, reg_base, gMemoryBaseReg ); EN64FloatReg n64_ft = EN64FloatReg( ft + 1 ); EPspFloatReg psp_ft = EPspFloatReg( n64_ft );// 1:1 Mapping //GenerateLoad( address, (EPspReg)psp_ft, base, offset, OP_LWC1, 0, set_branch_delay ? ReadBitsDirectBD_u32 : ReadBitsDirect_u32 ); LWC1( psp_ft, PspReg_A0, offset); mRegisterCache.MarkFPAsValid( n64_ft, true ); mRegisterCache.MarkFPAsDirty( n64_ft, true ); n64_ft = EN64FloatReg( ft ); psp_ft = EPspFloatReg( n64_ft );// 1:1 Mapping //GenerateLoad( address, (EPspReg)psp_ft, base, offset + 4, OP_LWC1, 0, set_branch_delay ? ReadBitsDirectBD_u32 : ReadBitsDirect_u32 ); LWC1( psp_ft, PspReg_A0, offset+4); mRegisterCache.MarkFPAsValid( n64_ft, true ); mRegisterCache.MarkFPAsDirty( n64_ft, true ); mRegisterCache.MarkFPAsSim( n64_ft, false ); } #endif #ifdef ENABLE_LWR_LWL //Unaligned load Left //Corn inline void CCodeGeneratorPSP::GenerateLWL( u32 address, bool set_branch_delay, EN64Reg rt, EN64Reg base, s16 offset ) { //Will return the value in PspReg_V0 and the shift in PspReg_A3 GenerateLoad( address, PspReg_V0, base, offset, OP_LWL, 0, set_branch_delay ? ReadBitsDirectBD_u32 : ReadBitsDirect_u32 ); EPspReg reg_dst( GetRegisterAndLoadLo( rt, PspReg_A0 ) ); SLL( PspReg_A3, PspReg_A3, 0x3 ); // shift *= 8 NOR( PspReg_A2, PspReg_R0, PspReg_R0 ); // Load 0xFFFFFFFF SLLV( PspReg_A2, PspReg_A3, PspReg_A2 ); // mask <<= shift NOR( PspReg_A2, PspReg_A2, PspReg_R0 ); // mask != mask SLLV( PspReg_A3, PspReg_A3, PspReg_V0 ); // memory <<= shift AND( reg_dst, reg_dst, PspReg_A2 ); // reg_dst & mask OR( reg_dst, reg_dst, PspReg_A3 ); // reg_dst | memory UpdateRegister( rt, reg_dst, URO_HI_SIGN_EXTEND ); } //Unaligned load Right //Corn inline void CCodeGeneratorPSP::GenerateLWR( u32 address, bool set_branch_delay, EN64Reg rt, EN64Reg base, s16 offset ) { //Will return the value in PspReg_V0 and the shift in PspReg_A3 GenerateLoad( address, PspReg_V0, base, offset, OP_LWL, 0, set_branch_delay ? ReadBitsDirectBD_u32 : ReadBitsDirect_u32 ); EPspReg reg_dst( GetRegisterAndLoadLo( rt, PspReg_A0 ) ); SLL( PspReg_A3, PspReg_A3, 0x3 ); // shift *= 8 ADDIU( PspReg_A2, PspReg_R0, 0xFF00 ); // Load 0xFFFFFF00 SLLV( PspReg_A2, PspReg_A3, PspReg_A2 ); // mask <<= shift AND( reg_dst, reg_dst, PspReg_A2 ); // reg_dst & mask XORI( PspReg_A3, PspReg_A3, 0x18 ); // shift ^= shift (eg. invert shift) SRLV( PspReg_A3, PspReg_A3, PspReg_V0 ); // memory >>= shift OR( reg_dst, reg_dst, PspReg_A3 ); // reg_dst | memory UpdateRegister( rt, reg_dst, URO_HI_SIGN_EXTEND ); } #endif // inline void CCodeGeneratorPSP::GenerateSB( u32 current_pc, bool set_branch_delay, EN64Reg rt, EN64Reg base, s32 offset ) { EPspReg reg_value( GetRegisterAndLoadLo( rt, PspReg_A1 ) ); GenerateStore( current_pc, reg_value, base, offset, OP_SB, 3, set_branch_delay ? WriteBitsDirectBD_u8 : WriteBitsDirect_u8 ); } // inline void CCodeGeneratorPSP::GenerateSH( u32 current_pc, bool set_branch_delay, EN64Reg rt, EN64Reg base, s32 offset ) { EPspReg reg_value( GetRegisterAndLoadLo( rt, PspReg_A1 ) ); GenerateStore( current_pc, reg_value, base, offset, OP_SH, 2, set_branch_delay ? WriteBitsDirectBD_u16 : WriteBitsDirect_u16 ); } // inline void CCodeGeneratorPSP::GenerateSW( u32 current_pc, bool set_branch_delay, EN64Reg rt, EN64Reg base, s32 offset ) { EPspReg reg_value( GetRegisterAndLoadLo( rt, PspReg_A1 ) ); GenerateStore( current_pc, reg_value, base, offset, OP_SW, 0, set_branch_delay ? WriteBitsDirectBD_u32 : WriteBitsDirect_u32 ); } // inline void CCodeGeneratorPSP::GenerateSD( u32 current_pc, bool set_branch_delay, EN64Reg rt, EN64Reg base, s32 offset ) { EPspReg reg_value_hi( GetRegisterAndLoadHi( rt, PspReg_A1 ) ); GenerateStore( current_pc, reg_value_hi, base, offset, OP_SW, 0, set_branch_delay ? WriteBitsDirectBD_u32 : WriteBitsDirect_u32 ); //Adding 4 to offset should be quite safe //Corn EPspReg reg_value_lo( GetRegisterAndLoadLo( rt, PspReg_A1 ) ); GenerateStore( current_pc, reg_value_lo, base, offset + 4, OP_SW, 0, set_branch_delay ? WriteBitsDirectBD_u32 : WriteBitsDirect_u32 ); } // inline void CCodeGeneratorPSP::GenerateSWC1( u32 current_pc, bool set_branch_delay, u32 ft, EN64Reg base, s32 offset ) { EN64FloatReg n64_ft = EN64FloatReg( ft ); EPspFloatReg psp_ft( GetFloatRegisterAndLoad( n64_ft ) ); #ifdef ENABLE_SWC1 //Since SW and SWC1 have same format we do a small hack that let us save directly from //the float reg without passing to the CPU first that saves us the MFC1 instruction //Corn GenerateStore( current_pc, (EPspReg)psp_ft, base, offset, OP_SWC1, 0, set_branch_delay ? WriteBitsDirectBD_u32 : WriteBitsDirect_u32 ); #else MFC1( PspReg_A1, psp_ft ); GenerateStore( current_pc, PspReg_A1, base, offset, OP_SW, 0, set_branch_delay ? WriteBitsDirectBD_u32 : WriteBitsDirect_u32 ); #endif } // #ifdef ENABLE_SDC1 inline void CCodeGeneratorPSP::GenerateSDC1( u32 current_pc, bool set_branch_delay, u32 ft, EN64Reg base, s32 offset ) { EN64FloatReg n64_ft_sig = EN64FloatReg( ft ); EPspFloatReg psp_ft_sig = GetFloatRegisterAndLoad( n64_ft_sig ); EN64FloatReg n64_ft = EN64FloatReg( ft + 1 ); EPspFloatReg psp_ft( GetFloatRegisterAndLoad( EN64FloatReg(n64_ft + 1) ) ); //GenerateStore( current_pc, (EPspReg)psp_ft, base, offset, OP_SWC1, 0, set_branch_delay ? WriteBitsDirectBD_u32 : WriteBitsDirect_u32 ); //GenerateStore( current_pc, (EPspReg)psp_ft_sig, base, offset + 4, OP_SWC1, 0, set_branch_delay ? WriteBitsDirectBD_u32 : WriteBitsDirect_u32 ); EPspReg reg_base( GetRegisterAndLoadLo( base, PspReg_A0 ) ); ADDU( PspReg_A0, reg_base, gMemoryBaseReg ); SWC1( psp_ft, PspReg_A0, offset); SWC1( psp_ft_sig, PspReg_A0, offset+4); } #endif // #ifdef ENABLE_SWR_SWL inline void CCodeGeneratorPSP::GenerateSWL( u32 current_pc, bool set_branch_delay, EN64Reg rt, EN64Reg base, s32 offset ) { EPspReg reg_value( GetRegisterAndLoadLo( rt, PspReg_A1 ) ); GenerateStore( current_pc, reg_value, base, offset, OP_SWL, 3, set_branch_delay ? WriteBitsDirectBD_u32 : WriteBitsDirect_u32 ); } // inline void CCodeGeneratorPSP::GenerateSWR( u32 current_pc, bool set_branch_delay, EN64Reg rt, EN64Reg base, s32 offset ) { EPspReg reg_value( GetRegisterAndLoadLo( rt, PspReg_A1 ) ); GenerateStore( current_pc, reg_value, base, offset, OP_SWR, 3, set_branch_delay ? WriteBitsDirectBD_u32 : WriteBitsDirect_u32 ); } #endif // inline void CCodeGeneratorPSP::GenerateMFC1( EN64Reg rt, u32 fs ) { //gGPR[ op_code.rt ]._s64 = (s64)(s32)gCPUState.FPU[fs]._s32_0 EPspReg reg_dst( GetRegisterNoLoadLo( rt, PspReg_V0 ) ); EN64FloatReg n64_fs = EN64FloatReg( fs ); EPspFloatReg psp_fs( GetFloatRegisterAndLoad( n64_fs ) ); MFC1( reg_dst, psp_fs ); //Needs to be sign extended(or breaks DKR) UpdateRegister( rt, reg_dst, URO_HI_SIGN_EXTEND ); } // inline void CCodeGeneratorPSP::GenerateMTC1( u32 fs, EN64Reg rt ) { //gCPUState.FPU[fs]._s32_0 = gGPR[ op_code.rt ]._s32_0; EPspReg psp_rt( GetRegisterAndLoadLo( rt, PspReg_V0 ) ); EN64FloatReg n64_fs = EN64FloatReg( fs ); EPspFloatReg psp_fs = EPspFloatReg( n64_fs );//1:1 Mapping MTC1( psp_fs, psp_rt ); UpdateFloatRegister( n64_fs ); } // inline void CCodeGeneratorPSP::GenerateCFC1( EN64Reg rt, u32 fs ) { if( (fs == 0) | (fs == 31) ) { EPspReg reg_dst( GetRegisterNoLoadLo( rt, PspReg_V0 ) ); GetVar( reg_dst, &gCPUState.FPUControl[ fs ]._u32 ); #ifdef ENABLE_64BIT UpdateRegister( rt, reg_dst, URO_HI_SIGN_EXTEND ); #else StoreRegisterLo( rt, reg_dst ); #endif } } // // This breaks BombreMan Hero, not sure how to fix it though, maybe because we don't set the rounding mode? // http://forums.daedalusx64.com/viewtopic.php?f=77&t=2258&p=36374&hilit=GenerateCTC1#p36374 // void CCodeGeneratorPSP::GenerateCTC1( u32 fs, EN64Reg rt ) { #ifdef DAEDALUS_ENABLE_ASSERTS DAEDALUS_ASSERT( fs == 31, "CTC1 register is invalid"); #endif EPspReg psp_rt_lo( GetRegisterAndLoadLo( rt, PspReg_V0 ) ); SetVar( &gCPUState.FPUControl[ fs ]._u32, psp_rt_lo ); // Only the lo part is ever set - Salvy //EPspReg psp_rt_hi( GetRegisterAndLoadHi( rt, PspReg_V0 ) ); //SetVar( &gCPUState.FPUControl[ fs ]._u32_1, psp_rt_hi ); //XXXX TODO: // Change the rounding mode } // inline void CCodeGeneratorPSP::GenerateBEQ( EN64Reg rs, EN64Reg rt, const SBranchDetails * p_branch, CJumpLocation * p_branch_jump ) { #ifdef DAEDALUS_ENABLE_ASSERTS DAEDALUS_ASSERT( p_branch != nullptr, "No branch details?" ); DAEDALUS_ASSERT( p_branch->Direct, "Indirect branch for BEQ?" ); #endif // One or other of these may be r0 - we don't really care for optimisation purposes though // as ultimately the register load regs factored out EPspReg reg_a( GetRegisterAndLoadLo( rs, PspReg_V0 ) ); EPspReg reg_b( GetRegisterAndLoadLo( rt, PspReg_A0 ) ); // XXXX This may actually need to be a 64 bit compare, but this is what R4300.cpp does if( p_branch->ConditionalBranchTaken ) { // Flip the sign of the test - *p_branch_jump = BNE( reg_a, reg_b, CCodeLabel(nullptr), true ); } else { *p_branch_jump = BEQ( reg_a, reg_b, CCodeLabel(nullptr), true ); } } // inline void CCodeGeneratorPSP::GenerateBNE( EN64Reg rs, EN64Reg rt, const SBranchDetails * p_branch, CJumpLocation * p_branch_jump ) { #ifdef DAEDALUS_ENABLE_ASSERTS DAEDALUS_ASSERT( p_branch != nullptr, "No branch details?" ); DAEDALUS_ASSERT( p_branch->Direct, "Indirect branch for BNE?" ); #endif // One or other of these may be r0 - we don't really care for optimisation purposes though // as ultimately the register load regs factored out EPspReg reg_a( GetRegisterAndLoadLo( rs, PspReg_V0 ) ); EPspReg reg_b( GetRegisterAndLoadLo( rt, PspReg_A0 ) ); // XXXX This may actually need to be a 64 bit compare, but this is what R4300.cpp does if( p_branch->ConditionalBranchTaken ) { // Flip the sign of the test - *p_branch_jump = BEQ( reg_a, reg_b, CCodeLabel(nullptr), true ); } else { *p_branch_jump = BNE( reg_a, reg_b, CCodeLabel(nullptr), true ); } } // inline void CCodeGeneratorPSP::GenerateBLEZ( EN64Reg rs, const SBranchDetails * p_branch, CJumpLocation * p_branch_jump ) { #ifdef DAEDALUS_ENABLE_ASSERTS DAEDALUS_ASSERT( p_branch != nullptr, "No branch details?" ); DAEDALUS_ASSERT( p_branch->Direct, "Indirect branch for BLEZ?" ); #endif EPspReg reg_a( GetRegisterAndLoadLo( rs, PspReg_V0 ) ); // XXXX This may actually need to be a 64 bit compare, but this is what R4300.cpp does if( p_branch->ConditionalBranchTaken ) { // Flip the sign of the test - *p_branch_jump = BGTZ( reg_a, CCodeLabel(nullptr), true ); } else { *p_branch_jump = BLEZ( reg_a, CCodeLabel(nullptr), true ); } } // inline void CCodeGeneratorPSP::GenerateBGTZ( EN64Reg rs, const SBranchDetails * p_branch, CJumpLocation * p_branch_jump ) { #ifdef DAEDALUS_ENABLE_ASSERTS DAEDALUS_ASSERT( p_branch != nullptr, "No branch details?" ); DAEDALUS_ASSERT( p_branch->Direct, "Indirect branch for BGTZ?" ); #endif EPspReg reg_lo( GetRegisterAndLoadLo( rs, PspReg_V0 ) ); //64bit compare is needed for DK64 or DK can walk trough walls if(g_ROM.GameHacks == DK64) { #if 0 //Do a full 64 bit compare //Corn SRL( PspReg_V0, reg_lo, 1); //Free MSB sign bit ANDI( PspReg_V1, reg_lo, 1); //Extract LSB bit OR( PspReg_V0, PspReg_V0, PspReg_V1); //Merge with OR EPspReg reg_hi( GetRegisterAndLoadHi( rs, PspReg_V1 ) ); OR( PspReg_V1, PspReg_V0, reg_hi); //Merge lo with hi part of register with sign bit #else //This takes some shortcuts //Corn EPspReg reg_hi( GetRegisterAndLoadHi( rs, PspReg_V1 ) ); OR( PspReg_V1, reg_lo, reg_hi); #endif if( p_branch->ConditionalBranchTaken ) { // Flip the sign of the test - *p_branch_jump = BLEZ( PspReg_V1, CCodeLabel(nullptr), true ); } else { *p_branch_jump = BGTZ( PspReg_V1, CCodeLabel(nullptr), true ); } } else { //Do a fast 32 bit compare //Corn if( p_branch->ConditionalBranchTaken ) { // Flip the sign of the test - *p_branch_jump = BLEZ( reg_lo, CCodeLabel(nullptr), true ); } else { *p_branch_jump = BGTZ( reg_lo, CCodeLabel(nullptr), true ); } } } // inline void CCodeGeneratorPSP::GenerateBLTZ( EN64Reg rs, const SBranchDetails * p_branch, CJumpLocation * p_branch_jump ) { #ifdef DAEDALUS_ENABLE_ASSERTS DAEDALUS_ASSERT( p_branch != nullptr, "No branch details?" ); DAEDALUS_ASSERT( p_branch->Direct, "Indirect branch for BLTZ?" ); #endif EPspReg reg_a( GetRegisterAndLoadLo( rs, PspReg_V0 ) ); // XXXX This should actually need to be a 64 bit compare??? if( p_branch->ConditionalBranchTaken ) { // Flip the sign of the test - *p_branch_jump = BGEZ( reg_a, CCodeLabel(nullptr), true ); } else { *p_branch_jump = BLTZ( reg_a, CCodeLabel(nullptr), true ); } } // inline void CCodeGeneratorPSP::GenerateBGEZ( EN64Reg rs, const SBranchDetails * p_branch, CJumpLocation * p_branch_jump ) { #ifdef DAEDALUS_ENABLE_ASSERTS DAEDALUS_ASSERT( p_branch != nullptr, "No branch details?" ); DAEDALUS_ASSERT( p_branch->Direct, "Indirect branch for BGEZ?" ); #endif EPspReg reg_a( GetRegisterAndLoadLo( rs, PspReg_V0 ) ); // XXXX This should actually need to be a 64 bit compare??? if( p_branch->ConditionalBranchTaken ) { // Flip the sign of the test - *p_branch_jump = BLTZ( reg_a, CCodeLabel(nullptr), true ); } else { *p_branch_jump = BGEZ( reg_a, CCodeLabel(nullptr), true ); } } // inline void CCodeGeneratorPSP::GenerateBC1F( const SBranchDetails * p_branch, CJumpLocation * p_branch_jump ) { #ifdef DAEDALUS_ENABLE_ASSERTS DAEDALUS_ASSERT( p_branch != nullptr, "No branch details?" ); DAEDALUS_ASSERT( p_branch->Direct, "Indirect branch for BC1F?" ); #endif //If compare was done in current fragment then use BC1T or BC1F directly //Corn if( mFloatCMPIsValid ) { if( p_branch->ConditionalBranchTaken ) { // Flip the sign of the test - *p_branch_jump = BC1T( CCodeLabel(nullptr), true ); } else { *p_branch_jump = BC1F( CCodeLabel(nullptr), true ); } } else { GetVar( PspReg_V0, &gCPUState.FPUControl[31]._u32 ); #if 1 EXT( PspReg_V0, PspReg_V0, 0, 23 ); //Extract condition bit (true/false) #else LoadConstant( PspReg_A0, FPCSR_C ); AND( PspReg_V0, PspReg_V0, PspReg_A0 ); #endif if( p_branch->ConditionalBranchTaken ) { // Flip the sign of the test - *p_branch_jump = BNE( PspReg_V0, PspReg_R0, CCodeLabel(nullptr), true ); } else { *p_branch_jump = BEQ( PspReg_V0, PspReg_R0, CCodeLabel(nullptr), true ); } } } // inline void CCodeGeneratorPSP::GenerateBC1T( const SBranchDetails * p_branch, CJumpLocation * p_branch_jump ) { #ifdef DAEDALUS_ENABLE_ASSERTS DAEDALUS_ASSERT( p_branch != nullptr, "No branch details?" ); DAEDALUS_ASSERT( p_branch->Direct, "Indirect branch for BC1T?" ); #endif //If compare was done in current fragment then use BC1T or BC1F directly //Corn if( mFloatCMPIsValid ) { if( p_branch->ConditionalBranchTaken ) { // Flip the sign of the test - *p_branch_jump = BC1F( CCodeLabel(nullptr), true ); } else { *p_branch_jump = BC1T( CCodeLabel(nullptr), true ); } } else { GetVar( PspReg_V0, &gCPUState.FPUControl[31]._u32 ); #if 1 EXT( PspReg_V0, PspReg_V0, 0, 23 ); //Extract condition bit (true/false) #else LoadConstant( PspReg_A0, FPCSR_C ); AND( PspReg_V0, PspReg_V0, PspReg_A0 ); #endif if( p_branch->ConditionalBranchTaken ) { // Flip the sign of the test - *p_branch_jump = BEQ( PspReg_V0, PspReg_R0, CCodeLabel(nullptr), true ); } else { *p_branch_jump = BNE( PspReg_V0, PspReg_R0, CCodeLabel(nullptr), true ); } } } // inline void CCodeGeneratorPSP::GenerateADD_D_Sim( u32 fd, u32 fs, u32 ft ) { EN64FloatReg n64_fs = EN64FloatReg( fs ); EN64FloatReg n64_ft = EN64FloatReg( ft ); EN64FloatReg n64_fd = EN64FloatReg( fd ); EPspFloatReg psp_fd_sig = EPspFloatReg( n64_fd );//1:1 Mapping EPspFloatReg psp_fd = EPspFloatReg( n64_fd + 1 );//1:1 Mapping EPspFloatReg psp_fs( GetSimFloatRegisterAndLoad( n64_fs ) ); EPspFloatReg psp_ft( GetSimFloatRegisterAndLoad( n64_ft ) ); //Use float now instead of double :) ADD_S( psp_fd, psp_fs, psp_ft ); if( !mRegisterCache.IsFPSim( n64_fd ) ) MOV_S( psp_fd_sig, EPspFloatReg( fs )); //Copy signature as well if needed UpdateSimDoubleRegister( n64_fd ); } // inline void CCodeGeneratorPSP::GenerateSUB_D_Sim( u32 fd, u32 fs, u32 ft ) { EN64FloatReg n64_fs = EN64FloatReg( fs ); EN64FloatReg n64_ft = EN64FloatReg( ft ); EN64FloatReg n64_fd = EN64FloatReg( fd ); EPspFloatReg psp_fd_sig = EPspFloatReg( n64_fd );//1:1 Mapping EPspFloatReg psp_fd = EPspFloatReg( n64_fd + 1 );//1:1 Mapping EPspFloatReg psp_fs( GetSimFloatRegisterAndLoad( n64_fs ) ); EPspFloatReg psp_ft( GetSimFloatRegisterAndLoad( n64_ft ) ); //Use float now instead of double :) SUB_S( psp_fd, psp_fs, psp_ft ); if( !mRegisterCache.IsFPSim( n64_fd ) ) MOV_S( psp_fd_sig, EPspFloatReg( fs )); //Copy signature as well if needed UpdateSimDoubleRegister( n64_fd ); } // inline void CCodeGeneratorPSP::GenerateMUL_D_Sim( u32 fd, u32 fs, u32 ft ) { EN64FloatReg n64_fs = EN64FloatReg( fs ); EN64FloatReg n64_ft = EN64FloatReg( ft ); EN64FloatReg n64_fd = EN64FloatReg( fd ); EPspFloatReg psp_fd_sig = EPspFloatReg( n64_fd );//1:1 Mapping EPspFloatReg psp_fd = EPspFloatReg( n64_fd + 1 );//1:1 Mapping EPspFloatReg psp_fs( GetSimFloatRegisterAndLoad( n64_fs ) ); EPspFloatReg psp_ft( GetSimFloatRegisterAndLoad( n64_ft ) ); //Use float now instead of double :) MUL_S( psp_fd, psp_fs, psp_ft ); if( !mRegisterCache.IsFPSim( n64_fd ) ) MOV_S( psp_fd_sig, EPspFloatReg( fs )); //Copy signature as well if needed UpdateSimDoubleRegister( n64_fd ); } // inline void CCodeGeneratorPSP::GenerateDIV_D_Sim( u32 fd, u32 fs, u32 ft ) { EN64FloatReg n64_fs = EN64FloatReg( fs ); EN64FloatReg n64_ft = EN64FloatReg( ft ); EN64FloatReg n64_fd = EN64FloatReg( fd ); EPspFloatReg psp_fd_sig = EPspFloatReg( n64_fd );//1:1 Mapping EPspFloatReg psp_fd = EPspFloatReg( n64_fd + 1 );//1:1 Mapping EPspFloatReg psp_fs( GetSimFloatRegisterAndLoad( n64_fs ) ); EPspFloatReg psp_ft( GetSimFloatRegisterAndLoad( n64_ft ) ); //Use float now instead of double :) DIV_S( psp_fd, psp_fs, psp_ft ); if( !mRegisterCache.IsFPSim( n64_fd ) ) MOV_S( psp_fd_sig, EPspFloatReg( fs )); //Copy signature as well if needed UpdateSimDoubleRegister( n64_fd ); } // inline void CCodeGeneratorPSP::GenerateSQRT_D_Sim( u32 fd, u32 fs ) { EN64FloatReg n64_fs = EN64FloatReg( fs ); EN64FloatReg n64_fd = EN64FloatReg( fd ); EPspFloatReg psp_fd_sig = EPspFloatReg( n64_fd );//1:1 Mapping EPspFloatReg psp_fd = EPspFloatReg( n64_fd + 1 );//1:1 Mapping EPspFloatReg psp_fs( GetSimFloatRegisterAndLoad( n64_fs ) ); //Use float now instead of double :) SQRT_S( psp_fd, psp_fs ); if( !mRegisterCache.IsFPSim( n64_fd ) ) MOV_S( psp_fd_sig, EPspFloatReg( fs )); //Copy signature as well if needed UpdateSimDoubleRegister( n64_fd ); } // inline void CCodeGeneratorPSP::GenerateABS_D_Sim( u32 fd, u32 fs ) { EN64FloatReg n64_fs = EN64FloatReg( fs ); EN64FloatReg n64_fd = EN64FloatReg( fd ); EPspFloatReg psp_fd_sig = EPspFloatReg( n64_fd );//1:1 Mapping EPspFloatReg psp_fd = EPspFloatReg( n64_fd + 1 );//1:1 Mapping EPspFloatReg psp_fs( GetSimFloatRegisterAndLoad( n64_fs ) ); //Use float now instead of double :) ABS_S( psp_fd, psp_fs ); if( !mRegisterCache.IsFPSim( n64_fd ) ) MOV_S( psp_fd_sig, EPspFloatReg( fs )); //Copy signature as well if needed UpdateSimDoubleRegister( n64_fd ); } // inline void CCodeGeneratorPSP::GenerateMOV_D_Sim( u32 fd, u32 fs ) { EN64FloatReg n64_fs = EN64FloatReg( fs ); EN64FloatReg n64_fd = EN64FloatReg( fd ); EPspFloatReg psp_fd_sig = EPspFloatReg( n64_fd );//1:1 Mapping EPspFloatReg psp_fd = EPspFloatReg( n64_fd + 1 );//1:1 Mapping EPspFloatReg psp_fs_sig = EPspFloatReg( n64_fs );//1:1 Mapping if( mRegisterCache.IsFPSim( n64_fs ) ) { //Copy Sim double EPspFloatReg psp_fs( GetSimFloatRegisterAndLoad( n64_fs ) ); //Move Sim double :) MOV_S( psp_fd, psp_fs ); MOV_S( psp_fd_sig, psp_fs_sig); //Copy signature as well UpdateSimDoubleRegister( n64_fd ); } else { //Copy true double! GetFloatRegisterAndLoad( n64_fs ); EPspFloatReg psp_fs( GetFloatRegisterAndLoad( EN64FloatReg(n64_fs + 1) ) ); //Move double :) MOV_S( psp_fd_sig, psp_fs_sig); //Lo part MOV_S( psp_fd, psp_fs ); //Hi part UpdateFloatRegister( n64_fd ); UpdateFloatRegister( EN64FloatReg(n64_fd + 1) ); } } // inline void CCodeGeneratorPSP::GenerateNEG_D_Sim( u32 fd, u32 fs ) { EN64FloatReg n64_fs = EN64FloatReg( fs ); EN64FloatReg n64_fd = EN64FloatReg( fd ); EPspFloatReg psp_fd_sig = EPspFloatReg( n64_fd );//1:1 Mapping EPspFloatReg psp_fd = EPspFloatReg( n64_fd + 1 );//1:1 Mapping EPspFloatReg psp_fs( GetSimFloatRegisterAndLoad( n64_fs ) ); //Use float now instead of double :) NEG_S( psp_fd, psp_fs ); if( !mRegisterCache.IsFPSim( n64_fd ) ) MOV_S( psp_fd_sig, EPspFloatReg( fs )); //Copy signature as well if needed UpdateSimDoubleRegister( n64_fd ); } //Convert Double to s32(TRUNC) inline void CCodeGeneratorPSP::GenerateTRUNC_W_D_Sim( u32 fd, u32 fs ) { EN64FloatReg n64_fs = EN64FloatReg( fs ); EN64FloatReg n64_fd = EN64FloatReg( fd ); EPspFloatReg psp_fd_sig = EPspFloatReg( n64_fd );//1:1 Mapping //EPspFloatReg psp_fd = EPspFloatReg( n64_fd + 1 );//1:1 Mapping EPspFloatReg psp_fs( GetSimFloatRegisterAndLoad( n64_fs ) ); //Use float now instead of double :) TRUNC_W_S( psp_fd_sig, psp_fs ); UpdateFloatRegister( n64_fd ); } //Convert Double to s32 inline void CCodeGeneratorPSP::GenerateCVT_W_D_Sim( u32 fd, u32 fs ) { EN64FloatReg n64_fs = EN64FloatReg( fs ); EN64FloatReg n64_fd = EN64FloatReg( fd ); EPspFloatReg psp_fd_sig = EPspFloatReg( n64_fd );//1:1 Mapping //EPspFloatReg psp_fd = EPspFloatReg( n64_fd + 1 );//1:1 Mapping EPspFloatReg psp_fs( GetSimFloatRegisterAndLoad( n64_fs ) ); //Use float now instead of double :) CVT_W_S( psp_fd_sig, psp_fs ); UpdateFloatRegister( n64_fd ); } //Convert Double to Float inline void CCodeGeneratorPSP::GenerateCVT_S_D_Sim( u32 fd, u32 fs ) { EN64FloatReg n64_fs = EN64FloatReg( fs ); EN64FloatReg n64_fd = EN64FloatReg( fd ); EPspFloatReg psp_fd_sig = EPspFloatReg( n64_fd );//1:1 Mapping //EPspFloatReg psp_fd = EPspFloatReg( n64_fd + 1 );//1:1 Mapping EPspFloatReg psp_fs( GetSimFloatRegisterAndLoad( n64_fs ) ); //Use float now instead of double :) MOV_S( psp_fd_sig, psp_fs ); UpdateFloatRegister( n64_fd ); } // inline void CCodeGeneratorPSP::GenerateCMP_D_Sim( u32 fs, ECop1OpFunction cmp_op, u32 ft ) { mFloatCMPIsValid = true; EN64FloatReg n64_fs = EN64FloatReg( fs ); EN64FloatReg n64_ft = EN64FloatReg( ft ); EPspFloatReg psp_fs( GetSimFloatRegisterAndLoad( n64_fs ) ); EPspFloatReg psp_ft( GetSimFloatRegisterAndLoad( n64_ft ) ); //Use float now instead of double :) CMP_S( psp_fs, cmp_op, psp_ft ); #if 1 //Improved version no branch //Corn GetVar( PspReg_V0, &gCPUState.FPUControl[31]._u32 ); CFC1( PspReg_A0, (EPspFloatReg)31 ); EXT( PspReg_A0, PspReg_A0, 0, 23 ); //Extract condition bit (true/false) INS( PspReg_V0, PspReg_A0, 23, 23 ); //Insert condition bit (true/false) SetVar( &gCPUState.FPUControl[31]._u32, PspReg_V0 ); #else //Improved version with only one branch //Corn GetVar( PspReg_V0, &gCPUState.FPUControl[31]._u32 ); LoadConstant( PspReg_A0, FPCSR_C ); CJumpLocation test_condition( BC1T( CCodeLabel( nullptr ), false ) ); OR( PspReg_V0, PspReg_V0, PspReg_A0 ); // flag |= c NOR( PspReg_A0, PspReg_A0, PspReg_V0 ); // c = !c AND( PspReg_V0, PspReg_V0, PspReg_A0 ); // flag &= !c CCodeLabel condition_true( GetAssemblyBuffer()->GetLabel() ); SetVar( &gCPUState.FPUControl[31]._u32, PspReg_V0 ); PatchJumpLong( test_condition, condition_true ); #endif } // inline void CCodeGeneratorPSP::GenerateADD_S( u32 fd, u32 fs, u32 ft ) { EN64FloatReg n64_fs = EN64FloatReg( fs ); EN64FloatReg n64_ft = EN64FloatReg( ft ); EN64FloatReg n64_fd = EN64FloatReg( fd ); EPspFloatReg psp_fd = EPspFloatReg( n64_fd );//1:1 Mapping EPspFloatReg psp_fs( GetFloatRegisterAndLoad( n64_fs ) ); EPspFloatReg psp_ft( GetFloatRegisterAndLoad( n64_ft ) ); //SET_ROUND_MODE( gRoundingMode ); //XXXX Is this needed? ADD_S( psp_fd, psp_fs, psp_ft ); UpdateFloatRegister( n64_fd ); } // inline void CCodeGeneratorPSP::GenerateSUB_S( u32 fd, u32 fs, u32 ft ) { EN64FloatReg n64_fs = EN64FloatReg( fs ); EN64FloatReg n64_ft = EN64FloatReg( ft ); EN64FloatReg n64_fd = EN64FloatReg( fd ); EPspFloatReg psp_fd = EPspFloatReg(n64_fd ); //1:1 Mapping EPspFloatReg psp_fs( GetFloatRegisterAndLoad( n64_fs ) ); EPspFloatReg psp_ft( GetFloatRegisterAndLoad( n64_ft ) ); //SET_ROUND_MODE( gRoundingMode ); //XXXX Is this needed? SUB_S( psp_fd, psp_fs, psp_ft ); UpdateFloatRegister( n64_fd ); } // inline void CCodeGeneratorPSP::GenerateMUL_S( u32 fd, u32 fs, u32 ft ) { EN64FloatReg n64_fs = EN64FloatReg( fs ); EN64FloatReg n64_ft = EN64FloatReg( ft ); EN64FloatReg n64_fd = EN64FloatReg( fd ); EPspFloatReg psp_fd = EPspFloatReg( n64_fd ); EPspFloatReg psp_fs( GetFloatRegisterAndLoad( n64_fs ) ); EPspFloatReg psp_ft( GetFloatRegisterAndLoad( n64_ft ) ); //SET_ROUND_MODE( gRoundingMode ); //XXXX Is this needed? MUL_S( psp_fd, psp_fs, psp_ft ); UpdateFloatRegister( n64_fd ); } // inline void CCodeGeneratorPSP::GenerateDIV_S( u32 fd, u32 fs, u32 ft ) { EN64FloatReg n64_fs = EN64FloatReg( fs ); EN64FloatReg n64_ft = EN64FloatReg( ft ); EN64FloatReg n64_fd = EN64FloatReg( fd ); EPspFloatReg psp_fd = EPspFloatReg( n64_fd ); EPspFloatReg psp_fs( GetFloatRegisterAndLoad( n64_fs ) ); EPspFloatReg psp_ft( GetFloatRegisterAndLoad( n64_ft ) ); //SET_ROUND_MODE( gRoundingMode ); //XXXX Is this needed? DIV_S( psp_fd, psp_fs, psp_ft ); UpdateFloatRegister( n64_fd ); } // inline void CCodeGeneratorPSP::GenerateSQRT_S( u32 fd, u32 fs ) { EN64FloatReg n64_fs = EN64FloatReg( fs ); EN64FloatReg n64_fd = EN64FloatReg( fd ); EPspFloatReg psp_fd = EPspFloatReg( n64_fd ); EPspFloatReg psp_fs( GetFloatRegisterAndLoad( n64_fs ) ); //SET_ROUND_MODE( gRoundingMode ); //XXXX Is this needed? SQRT_S( psp_fd, psp_fs ); UpdateFloatRegister( n64_fd ); } // inline void CCodeGeneratorPSP::GenerateABS_S( u32 fd, u32 fs ) { EN64FloatReg n64_fs = EN64FloatReg( fs ); EN64FloatReg n64_fd = EN64FloatReg( fd ); EPspFloatReg psp_fd = EPspFloatReg( n64_fd ); EPspFloatReg psp_fs( GetFloatRegisterAndLoad( n64_fs ) ); //SET_ROUND_MODE( gRoundingMode ); //XXXX Is this needed? ABS_S( psp_fd, psp_fs ); UpdateFloatRegister( n64_fd ); } // inline void CCodeGeneratorPSP::GenerateMOV_S( u32 fd, u32 fs ) { EN64FloatReg n64_fs = EN64FloatReg( fs ); EN64FloatReg n64_fd = EN64FloatReg( fd ); EPspFloatReg psp_fd = EPspFloatReg( n64_fd ); EPspFloatReg psp_fs( GetFloatRegisterAndLoad( n64_fs ) ); //SET_ROUND_MODE( gRoundingMode ); //XXXX Is this needed? MOV_S( psp_fd, psp_fs ); UpdateFloatRegister( n64_fd ); } // inline void CCodeGeneratorPSP::GenerateNEG_S( u32 fd, u32 fs ) { EN64FloatReg n64_fs = EN64FloatReg( fs ); EN64FloatReg n64_fd = EN64FloatReg( fd ); EPspFloatReg psp_fd = EPspFloatReg( n64_fd ); EPspFloatReg psp_fs( GetFloatRegisterAndLoad( n64_fs ) ); //SET_ROUND_MODE( gRoundingMode ); //XXXX Is this needed? NEG_S( psp_fd, psp_fs ); UpdateFloatRegister( n64_fd ); } //Convert Float to s32 inline void CCodeGeneratorPSP::GenerateTRUNC_W_S( u32 fd, u32 fs ) { EN64FloatReg n64_fs = EN64FloatReg( fs ); EN64FloatReg n64_fd = EN64FloatReg( fd ); EPspFloatReg psp_fd = EPspFloatReg( n64_fd ); EPspFloatReg psp_fs( GetFloatRegisterAndLoad( n64_fs ) ); //SET_ROUND_MODE( gRoundingMode ); //XXXX Is this needed? TRUNC_W_S( psp_fd, psp_fs ); UpdateFloatRegister( n64_fd ); } //Convert Float to s32 inline void CCodeGeneratorPSP::GenerateFLOOR_W_S( u32 fd, u32 fs ) { EN64FloatReg n64_fs = EN64FloatReg( fs ); EN64FloatReg n64_fd = EN64FloatReg( fd ); EPspFloatReg psp_fd = EPspFloatReg( n64_fd ); EPspFloatReg psp_fs( GetFloatRegisterAndLoad( n64_fs ) ); //SET_ROUND_MODE( gRoundingMode ); //XXXX Is this needed? FLOOR_W_S( psp_fd, psp_fs ); UpdateFloatRegister( n64_fd ); } //Convert Float to s32 inline void CCodeGeneratorPSP::GenerateCVT_W_S( u32 fd, u32 fs ) { EN64FloatReg n64_fs = EN64FloatReg( fs ); EN64FloatReg n64_fd = EN64FloatReg( fd ); EPspFloatReg psp_fd = EPspFloatReg( n64_fd ); EPspFloatReg psp_fs( GetFloatRegisterAndLoad( n64_fs ) ); //SET_ROUND_MODE( gRoundingMode ); //XXXX Is this needed? CVT_W_S( psp_fd, psp_fs ); UpdateFloatRegister( n64_fd ); } //Convert s32 to (simulated)Double inline void CCodeGeneratorPSP::GenerateCVT_D_W_Sim( u32 fd, u32 fs ) { EN64FloatReg n64_fs = EN64FloatReg( fs ); EN64FloatReg n64_fd = EN64FloatReg( fd ); EPspFloatReg psp_fd_sig = EPspFloatReg( n64_fd ); EPspFloatReg psp_fd = EPspFloatReg( n64_fd + 1 ); EPspFloatReg psp_fs( GetFloatRegisterAndLoad( n64_fs ) ); CVT_S_W( psp_fd, psp_fs ); LoadConstant( PspReg_A0, SIMULATESIG ); //Get signature MTC1( psp_fd_sig , PspReg_A0 ); //Write signature to float reg UpdateSimDoubleRegister( n64_fd ); } //Convert Float to (simulated)Double inline void CCodeGeneratorPSP::GenerateCVT_D_S_Sim( u32 fd, u32 fs ) { EN64FloatReg n64_fs = EN64FloatReg( fs ); EN64FloatReg n64_fd = EN64FloatReg( fd ); EPspFloatReg psp_fd_sig = EPspFloatReg( n64_fd ); EPspFloatReg psp_fd = EPspFloatReg( n64_fd + 1 ); EPspFloatReg psp_fs( GetFloatRegisterAndLoad( n64_fs ) ); MOV_S( psp_fd, psp_fs ); LoadConstant( PspReg_A0, SIMULATESIG ); //Get signature MTC1( psp_fd_sig , PspReg_A0 ); //Write signature to float reg //SetFloatVar( &gCPUState.FPU[fd + 0]._f32, psp_fd_sig ); //SetFloatVar( &gCPUState.FPU[fd + 1]._f32, psp_fd ); UpdateSimDoubleRegister( n64_fd ); } //Convert Float to Double inline void CCodeGeneratorPSP::GenerateCVT_D_S( u32 fd, u32 fs ) { EN64FloatReg n64_fs = EN64FloatReg( fs ); EN64FloatReg n64_fd = EN64FloatReg( fd ); EPspFloatReg psp_fd_sig = EPspFloatReg( n64_fd );//1:1 Mapping EPspFloatReg psp_fd = EPspFloatReg( n64_fd + 1 );//1:1 Mapping EPspFloatReg psp_fs( GetFloatRegisterAndLoad( n64_fs ) ); JAL( CCodeLabel( reinterpret_cast< const void * >( _FloatToDouble ) ), false ); //Convert Float to Double MFC1( PspReg_A0, psp_fs ); //Get float to convert MTC1( psp_fd_sig , PspReg_V0 ); //Copy converted Double lo to FPU MTC1( psp_fd , PspReg_V1 ); //Copy converted Double hi to FPU mRegisterCache.MarkFPAsValid( n64_fd, true ); mRegisterCache.MarkFPAsDirty( n64_fd, true ); mRegisterCache.MarkFPAsSim( n64_fd, false ); //Dont flag as simulated its a real Double!!! mRegisterCache.MarkFPAsValid( EN64FloatReg(n64_fd + 1), true ); mRegisterCache.MarkFPAsDirty( EN64FloatReg(n64_fd + 1), true ); } // inline void CCodeGeneratorPSP::GenerateCMP_S( u32 fs, ECop1OpFunction cmp_op, u32 ft ) { mFloatCMPIsValid = true; EN64FloatReg n64_fs = EN64FloatReg( fs ); EN64FloatReg n64_ft = EN64FloatReg( ft ); EPspFloatReg psp_fs( GetFloatRegisterAndLoad( n64_fs ) ); EPspFloatReg psp_ft( GetFloatRegisterAndLoad( n64_ft ) ); CMP_S( psp_fs, cmp_op, psp_ft ); #if 1 //Improved version no branch //Corn GetVar( PspReg_V0, &gCPUState.FPUControl[31]._u32 ); CFC1( PspReg_A0, (EPspFloatReg)31 ); EXT( PspReg_A0, PspReg_A0, 0, 23 ); //Extract condition bit (true/false) INS( PspReg_V0, PspReg_A0, 23, 23 ); //Insert condition bit (true/false) SetVar( &gCPUState.FPUControl[31]._u32, PspReg_V0 ); #else //Improved version with only one branch //Corn GetVar( PspReg_V0, &gCPUState.FPUControl[31]._u32 ); LoadConstant( PspReg_A0, FPCSR_C ); CJumpLocation test_condition( BC1T( CCodeLabel( nullptr ), false ) ); OR( PspReg_V0, PspReg_V0, PspReg_A0 ); // flag |= c NOR( PspReg_A0, PspReg_A0, PspReg_R0 ); // c = !c AND( PspReg_V0, PspReg_V0, PspReg_A0 ); // flag &= !c CCodeLabel condition_true( GetAssemblyBuffer()->GetLabel() ); SetVar( &gCPUState.FPUControl[31]._u32, PspReg_V0 ); PatchJumpLong( test_condition, condition_true ); #endif } // inline void CCodeGeneratorPSP::GenerateADD_D( u32 fd, u32 fs, u32 ft ) { #ifdef DAEDALUS_DEBUG_CONSOLE NOT_IMPLEMENTED( __FUNCTION__ ); #endif } // inline void CCodeGeneratorPSP::GenerateSUB_D( u32 fd, u32 fs, u32 ft ) { #ifdef DAEDALUS_DEBUG_CONSOLE NOT_IMPLEMENTED( __FUNCTION__ ); #endif } // inline void CCodeGeneratorPSP::GenerateMUL_D( u32 fd, u32 fs, u32 ft ) { #ifdef DAEDALUS_DEBUG_CONSOLE NOT_IMPLEMENTED( __FUNCTION__ ); #endif } // inline void CCodeGeneratorPSP::GenerateDIV_D( u32 fd, u32 fs, u32 ft ) { #ifdef DAEDALUS_DEBUG_CONSOLE NOT_IMPLEMENTED( __FUNCTION__ ); #endif } // inline void CCodeGeneratorPSP::GenerateSQRT_D( u32 fd, u32 fs ) { #ifdef DAEDALUS_DEBUG_CONSOLE NOT_IMPLEMENTED( __FUNCTION__ ); #endif } // inline void CCodeGeneratorPSP::GenerateMOV_D( u32 fd, u32 fs ) { #ifdef DAEDALUS_DEBUG_CONSOLE NOT_IMPLEMENTED( __FUNCTION__ ); #endif } // inline void CCodeGeneratorPSP::GenerateNEG_D( u32 fd, u32 fs ) { #ifdef DAEDALUS_DEBUG_CONSOLE NOT_IMPLEMENTED( __FUNCTION__ ); #endif } //Convert s32 to Float inline void CCodeGeneratorPSP::GenerateCVT_S_W( u32 fd, u32 fs ) { EN64FloatReg n64_fs = EN64FloatReg( fs ); EN64FloatReg n64_fd = EN64FloatReg( fd ); EPspFloatReg psp_fd = EPspFloatReg( n64_fd ); EPspFloatReg psp_fs( GetFloatRegisterAndLoad( n64_fs ) ); //SET_ROUND_MODE( gRoundingMode ); //XXXX Is this needed? CVT_S_W( psp_fd, psp_fs ); UpdateFloatRegister( n64_fd ); } // inline void CCodeGeneratorPSP::GenerateMFC0( EN64Reg rt, u32 fs ) { #ifdef DAEDALUS_ENABLE_ASSERTS // Never seen this to happen, no reason to bother to handle it DAEDALUS_ASSERT( fs != C0_RAND, "Reading MFC0 random register is unhandled"); #endif EPspReg reg_dst( GetRegisterNoLoadLo( rt, PspReg_V0 ) ); GetVar( reg_dst, &gCPUState.CPUControl[ fs ]._u32 ); UpdateRegister( rt, reg_dst, URO_HI_SIGN_EXTEND ); } // inline void CCodeGeneratorPSP::GenerateMTC0( EN64Reg rt, u32 fs ) { EPspReg reg_src( GetRegisterAndLoadLo( rt, PspReg_V0 ) ); SetVar( &gCPUState.CPUControl[ fs ]._u32, reg_src ); }