daedalus/Source/SysPSP/DynaRec/CodeGeneratorPSP.cpp
2022-03-22 18:06:17 +11:00

5095 lines
152 KiB
C++

/*
Copyright (C) 2005 StrmnNrmn
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
#include "BuildOptions.h"
#include "Base/Types.h"
#include <limits.h>
#include <stdio.h>
#include <algorithm>
#include "Config/ConfigOptions.h"
#include "Core/CPU.h"
#include "Core/Memory.h"
#include "Core/R4300.h"
#include "Core/Registers.h"
#include "Core/ROM.h"
#include "Debug/DBGConsole.h"
#include "DynaRec/AssemblyUtils.h"
#include "DynaRec/Trace.h"
#include "Base/MathUtil.h"
#include "Ultra/ultra_R4300.h"
#include "SysPSP/DynaRec/CodeGeneratorPSP.h"
#include "SysPSP/DynaRec/N64RegisterCachePSP.h"
#include "Base/Macros.h"
#include "Core/PrintOpCode.h"
#include "Utility/Profiler.h"
using namespace AssemblyUtils;
//Enable unaligned load/store(used in CBFD, OOT, Rayman2 and PD) //Corn
#define ENABLE_LWR_LWL
//#define ENABLE_SWR_SWL
//Enable to load/store floats directly to/from FPU //Corn
#define ENABLE_LWC1
#define ENABLE_SWC1
//#define ENABLE_LDC1
//#define ENABLE_SDC1
//Define to handle full 64bit checks for SLT,SLTU,SLTI & SLTIU //Corn
//#define ENABLE_64BIT
//Define to check for DIV / 0 //Corn
//#define DIVZEROCHK
//Define to enable exceptions for interpreter calls from DYNAREC
//#define ALLOW_INTERPRETER_EXCEPTIONS
#define NOT_IMPLEMENTED( x ) DAEDALUS_ERROR( x )
extern "C" { const void * g_MemoryLookupTableReadForDynarec = g_MemoryLookupTableRead; } //Important pointer for Dynarec see DynaRecStubs.s
extern "C" { void _DDIV( s64 Num, s32 Div ); } //signed 64bit division //Corn
extern "C" { void _DDIVU( u64 Num, u32 Div ); } //unsigned 64bit division //Corn
extern "C" { void _DMULTU( u64 A, u64 B ); } //unsigned 64bit multiply //Corn
extern "C" { void _DMULT( s64 A, s64 B ); } //signed 64bit multiply (result is 64bit not 128bit!) //Corn
extern "C" { u64 _FloatToDouble( u32 _float); } //Uses CPU to pass f64/32 thats why its maskerading as u64/32 //Corn
extern "C" { u32 _DoubleToFloat( u64 _double); } //Uses CPU to pass f64/32 thats why its maskerading as u64/32 //Corn
extern "C" { void _ReturnFromDynaRec(); }
extern "C" { void _DirectExitCheckNoDelay( u32 instructions_executed, u32 exit_pc ); }
extern "C" { void _DirectExitCheckDelay( u32 instructions_executed, u32 exit_pc, u32 target_pc ); }
extern "C" { void _IndirectExitCheck( u32 instructions_executed, CIndirectExitMap * p_map, u32 target_pc ); }
extern "C"
{
u32 _ReadBitsDirect_u8( u32 address, u32 current_pc );
u32 _ReadBitsDirect_s8( u32 address, u32 current_pc );
u32 _ReadBitsDirect_u16( u32 address, u32 current_pc );
u32 _ReadBitsDirect_s16( u32 address, u32 current_pc );
u32 _ReadBitsDirect_u32( u32 address, u32 current_pc );
u32 _ReadBitsDirectBD_u8( u32 address, u32 current_pc );
u32 _ReadBitsDirectBD_s8( u32 address, u32 current_pc );
u32 _ReadBitsDirectBD_u16( u32 address, u32 current_pc );
u32 _ReadBitsDirectBD_s16( u32 address, u32 current_pc );
u32 _ReadBitsDirectBD_u32( u32 address, u32 current_pc );
// Dynarec calls this for simplicity
void Write32BitsForDynaRec( u32 address, u32 value )
{
Write32Bits_NoSwizzle( address, value );
}
void Write16BitsForDynaRec( u32 address, u16 value )
{
Write16Bits_NoSwizzle( address, value );
}
void Write8BitsForDynaRec( u32 address, u8 value )
{
Write8Bits_NoSwizzle( address, value );
}
void _WriteBitsDirect_u32( u32 address, u32 value, u32 current_pc );
void _WriteBitsDirect_u16( u32 address, u32 value, u32 current_pc ); // Value in low 16 bits
void _WriteBitsDirect_u8( u32 address, u32 value, u32 current_pc ); // Value in low 8 bits
void _WriteBitsDirectBD_u32( u32 address, u32 value, u32 current_pc );
void _WriteBitsDirectBD_u16( u32 address, u32 value, u32 current_pc ); // Value in low 16 bits
void _WriteBitsDirectBD_u8( u32 address, u32 value, u32 current_pc ); // Value in low 8 bits
}
#define ReadBitsDirect_u8 _ReadBitsDirect_u8
#define ReadBitsDirect_s8 _ReadBitsDirect_s8
#define ReadBitsDirect_u16 _ReadBitsDirect_u16
#define ReadBitsDirect_s16 _ReadBitsDirect_s16
#define ReadBitsDirect_u32 _ReadBitsDirect_u32
#define ReadBitsDirectBD_u8 _ReadBitsDirectBD_u8
#define ReadBitsDirectBD_s8 _ReadBitsDirectBD_s8
#define ReadBitsDirectBD_u16 _ReadBitsDirectBD_u16
#define ReadBitsDirectBD_s16 _ReadBitsDirectBD_s16
#define ReadBitsDirectBD_u32 _ReadBitsDirectBD_u32
#define WriteBitsDirect_u32 _WriteBitsDirect_u32
#define WriteBitsDirect_u16 _WriteBitsDirect_u16
#define WriteBitsDirect_u8 _WriteBitsDirect_u8
#define WriteBitsDirectBD_u32 _WriteBitsDirectBD_u32
#define WriteBitsDirectBD_u16 _WriteBitsDirectBD_u16
#define WriteBitsDirectBD_u8 _WriteBitsDirectBD_u8
//Used to print value from ASM just add these two OPs (all caller saved regs will to be saved including RA and A0) //Corn
// jal _printf_asm
// lw $a0, _Delay($fp) #ex: prints Delay value
// OR ADD
// JAL( CCodeLabel( (void*)_printf_asm ), false );
// OR( PspReg_A0, dst_reg, PspReg_R0 );
extern "C" { void _printf_asm( u32 val ); }
extern "C" { void output_extern( u32 val ) { printf("%d\n", val); } }
extern "C" { void _ReturnFromDynaRecIfStuffToDo( u32 register_mask ); }
extern "C" { void _DaedalusICacheInvalidate( const void * address, u32 length ); }
bool gHaveSavedPatchedOps = false;
PspOpCode gOriginalOps[2];
PspOpCode gReplacementOps[2];
#define URO_HI_SIGN_EXTEND 0 // Sign extend from src
#define URO_HI_CLEAR 1 // Clear hi bits
void Dynarec_ClearedCPUStuffToDo()
{
// Replace first two ops of _ReturnFromDynaRecIfStuffToDo with 'jr ra, nop'
u8 * p_void_function( reinterpret_cast< u8 * >( _ReturnFromDynaRecIfStuffToDo ) );
PspOpCode * p_function_address = reinterpret_cast< PspOpCode * >( make_uncached_ptr( p_void_function ) );
if(!gHaveSavedPatchedOps)
{
gOriginalOps[0] = p_function_address[0];
gOriginalOps[1] = p_function_address[1];
PspOpCode op_code;
op_code._u32 = 0;
op_code.op = OP_SPECOP;
op_code.spec_op = SpecOp_JR;
op_code.rs = PspReg_RA;
gReplacementOps[0] = op_code; // JR RA
gReplacementOps[1]._u32 = 0; // NOP
gHaveSavedPatchedOps = true;
}
p_function_address[0] = gReplacementOps[0];
p_function_address[1] = gReplacementOps[1];
const u8 * p_lower( RoundPointerDown( p_void_function, 64 ) );
const u8 * p_upper( RoundPointerUp( p_void_function + 8, 64 ) );
const u32 size( p_upper - p_lower);
//sceKernelDcacheWritebackRange( p_lower, size );
//sceKernelIcacheInvalidateRange( p_lower, size );
_DaedalusICacheInvalidate( p_lower, size );
}
void Dynarec_SetCPUStuffToDo()
{
// Restore first two ops of _ReturnFromDynaRecIfStuffToDo
u8 * p_void_function( reinterpret_cast< u8 * >( _ReturnFromDynaRecIfStuffToDo ) );
PspOpCode * p_function_address = reinterpret_cast< PspOpCode * >( make_uncached_ptr( p_void_function ) );
p_function_address[0] = gOriginalOps[0];
p_function_address[1] = gOriginalOps[1];
const u8 * p_lower( RoundPointerDown( p_void_function, 64 ) );
const u8 * p_upper( RoundPointerUp( p_void_function + 8, 64 ) );
const u32 size( p_upper - p_lower);
//sceKernelDcacheWritebackRange( p_lower, size );
//sceKernelIcacheInvalidateRange( p_lower, size );
_DaedalusICacheInvalidate( p_lower, size );
}
extern "C"
{
void HandleException_extern()
{
switch (gCPUState.Delay)
{
case DO_DELAY:
gCPUState.Delay = EXEC_DELAY; //fall through to PC +=4
case NO_DELAY:
gCPUState.CurrentPC += 4;
break;
case EXEC_DELAY:
gCPUState.CurrentPC = gCPUState.TargetPC;
gCPUState.Delay = NO_DELAY;
break;
default:
NODEFAULT;
}
}
}
namespace
{
const EPspReg gMemUpperBoundReg = PspReg_S6;
const EPspReg gMemoryBaseReg = PspReg_S7;
const EPspReg gRegistersToUseForCaching[] =
{
// PspReg_V0, // Used as calculation temp
// PspReg_V1, // Used as calculation temp
// PspReg_A0, // Used as calculation temp
// PspReg_A1, // Used as calculation temp
// PspReg_A2, // Used as calculation temp
// PspReg_A3, // Used as calculation temp
PspReg_AT,
PspReg_T0,
PspReg_T1,
PspReg_T2,
PspReg_T3,
PspReg_T4,
PspReg_T5,
PspReg_T6,
PspReg_T7,
PspReg_T8,
PspReg_T9,
PspReg_S0,
PspReg_S1,
PspReg_S2,
PspReg_S3,
PspReg_S4,
PspReg_S5,
// PspReg_S6, // Memory upper bound
// PspReg_S7, // Used for g_pu8RamBase - 0x80000000
// PspReg_S8, // Used for base register (&gCPUState)
// PspReg_K0, //Used as load base register. Normally it is reserved for Kernel but seems to work if we borrow it...(could come back and bite us if we use kernel stuff?) //Corn
// PspReg_K1, //Used as store base register. Normally it is reserved for Kernel but seems to work if we borrow it...(could come back and bite us if we use kernel stuff?)
// PspReg_GP, //This needs saving to work?
};
}
//u32 gTotalRegistersCached = 0;
//u32 gTotalRegistersUncached = 0;
//
CCodeGeneratorPSP::CCodeGeneratorPSP( CAssemblyBuffer * p_buffer_a, CAssemblyBuffer * p_buffer_b )
: CCodeGenerator( )
, CAssemblyWriterPSP( p_buffer_a, p_buffer_b )
, mpBasePointer( nullptr )
, mBaseRegister( PspReg_S8 ) // TODO
, mEntryAddress( 0 )
, mLoopTop( nullptr )
, mUseFixedRegisterAllocation( false )
{
}
//
void CCodeGeneratorPSP::Initialise( u32 entry_address, u32 exit_address, u32 * hit_counter, const void * p_base, const SRegisterUsageInfo & register_usage )
{
mEntryAddress = entry_address;
mpBasePointer = reinterpret_cast< const u8 * >( p_base );
SetRegisterSpanList( register_usage, entry_address == exit_address );
mPreviousLoadBase = N64Reg_R0; //Invalidate
mPreviousStoreBase = N64Reg_R0; //Invalidate
mFloatCMPIsValid = false;
mMultIsValid = false;
if( hit_counter != nullptr )
{
GetVar( PspReg_V0, hit_counter );
ADDIU( PspReg_V0, PspReg_V0, 1 );
SetVar( hit_counter, PspReg_V0 );
}
}
//
void CCodeGeneratorPSP::Finalise( ExceptionHandlerFn p_exception_handler_fn, const std::vector< CJumpLocation > & exception_handler_jumps )
{
// We handle exceptions directly with _ReturnFromDynaRecIfStuffToDo - we should never get here on the psp
#ifdef DAEDALUS_ENABLE_ASSERTS
DAEDALUS_ASSERT( exception_handler_jumps.empty(), "Not expecting to have any exception handler jumps to process" );
#endif
GenerateAddressCheckFixups();
CAssemblyWriterPSP::Finalise();
}
//
void CCodeGeneratorPSP::SetRegisterSpanList( const SRegisterUsageInfo & register_usage, bool loops_to_self )
{
mRegisterSpanList = register_usage.SpanList;
// Sort in order of increasing start point
std::sort( mRegisterSpanList.begin(), mRegisterSpanList.end(), SAscendingSpanStartSort() );
const u32 NUM_CACHE_REGS( sizeof(gRegistersToUseForCaching) / sizeof(gRegistersToUseForCaching[0]) );
// Push all the available registers in reverse order (i.e. use temporaries later)
// Use temporaries first so we can avoid flushing them in case of a funcion call //Corn
#ifdef DAEDALUS_ENABLE_ASSERTS
DAEDALUS_ASSERT( mAvailableRegisters.empty(), "Why isn't the available register list empty?" );
#endif
for( u32 i {0}; i < NUM_CACHE_REGS; i++ )
{
mAvailableRegisters.push( gRegistersToUseForCaching[ i ] );
}
// Optimization for self looping code
if( gDynarecLoopOptimisation & loops_to_self )
{
mUseFixedRegisterAllocation = true;
u32 cache_reg_idx( 0 );
u32 HiLo {0};
while ( HiLo<2 ) // If there are still unused registers, assign to high part of reg
{
RegisterSpanList::const_iterator span_it = mRegisterSpanList.begin();
while( span_it < mRegisterSpanList.end() )
{
const SRegisterSpan & span( *span_it );
if( cache_reg_idx < NUM_CACHE_REGS )
{
EPspReg cachable_reg( gRegistersToUseForCaching[cache_reg_idx] );
mRegisterCache.SetCachedReg( span.Register, HiLo, cachable_reg );
cache_reg_idx++;
}
++span_it;
}
++HiLo;
}
//
// Pull all the cached registers into memory
//
// Skip r0
u32 i {1};
while( i < NUM_N64_REGS )
{
EN64Reg n64_reg = EN64Reg( i );
u32 lo_hi_idx {};
while( lo_hi_idx < 2)
{
if( mRegisterCache.IsCached( n64_reg, lo_hi_idx ) )
{
PrepareCachedRegister( n64_reg, lo_hi_idx );
//
// If the register is modified anywhere in the fragment, we need
// to mark it as dirty so it's flushed correctly on exit.
//
if( register_usage.IsModified( n64_reg ) )
{
mRegisterCache.MarkAsDirty( n64_reg, lo_hi_idx, true );
}
}
++lo_hi_idx;
}
++i;
}
mLoopTop = GetAssemblyBuffer()->GetLabel();
} //End of Loop optimization code
}
//
void CCodeGeneratorPSP::ExpireOldIntervals( u32 instruction_idx )
{
// mActiveIntervals is held in order of increasing end point
for(RegisterSpanList::iterator span_it = mActiveIntervals.begin(); span_it < mActiveIntervals.end(); ++span_it )
{
const SRegisterSpan & span( *span_it );
if( span.SpanEnd >= instruction_idx )
{
break;
}
// This interval is no longer active - flush the register and return it to the list of available regs
EPspReg psp_reg( mRegisterCache.GetCachedReg( span.Register, 0 ) );
FlushRegister( mRegisterCache, span.Register, 0, true );
mRegisterCache.ClearCachedReg( span.Register, 0 );
mAvailableRegisters.push( psp_reg );
span_it = mActiveIntervals.erase( span_it );
}
}
//
void CCodeGeneratorPSP::SpillAtInterval( const SRegisterSpan & live_span )
{
#ifdef DAEDALUS_ENABLE_ASSERTS
DAEDALUS_ASSERT( !mActiveIntervals.empty(), "There are no active intervals" );
#endif
const SRegisterSpan & last_span( mActiveIntervals.back() ); // Spill the last active interval (it has the greatest end point)
if( last_span.SpanEnd > live_span.SpanEnd )
{
// Uncache the old span
EPspReg psp_reg( mRegisterCache.GetCachedReg( last_span.Register, 0 ) );
FlushRegister( mRegisterCache, last_span.Register, 0, true );
mRegisterCache.ClearCachedReg( last_span.Register, 0 );
// Cache the new span
mRegisterCache.SetCachedReg( live_span.Register, 0, psp_reg );
mActiveIntervals.pop_back(); // Remove the last span
mActiveIntervals.push_back( live_span ); // Insert in order of increasing end point
std::sort( mActiveIntervals.begin(), mActiveIntervals.end(), SAscendingSpanEndSort() ); // XXXX - will be quicker to insert in the correct place rather than sorting each time
}
else
{
// There is no space for this register - we just don't update the register cache info, so we save/restore it from memory as needed
}
}
//
void CCodeGeneratorPSP::UpdateRegisterCaching( u32 instruction_idx )
{
if( !mUseFixedRegisterAllocation )
{
ExpireOldIntervals( instruction_idx );
for(RegisterSpanList::const_iterator span_it = mRegisterSpanList.begin(); span_it < mRegisterSpanList.end(); ++span_it )
{
const SRegisterSpan & span( *span_it );
// As we keep the intervals sorted in order of SpanStart, we can exit as soon as we encounter a SpanStart in the future
if( instruction_idx < span.SpanStart )
{
break;
}
// Only process live intervals
if( (instruction_idx >= span.SpanStart) & (instruction_idx <= span.SpanEnd) )
{
if( !mRegisterCache.IsCached( span.Register, 0 ) )
{
if( mAvailableRegisters.empty() )
{
SpillAtInterval( span );
}
else
{
// Use this register for caching
mRegisterCache.SetCachedReg( span.Register, 0, mAvailableRegisters.top() );
// Pop this register from the available list
mAvailableRegisters.pop();
mActiveIntervals.push_back( span ); // Insert in order of increasing end point
std::sort( mActiveIntervals.begin(), mActiveIntervals.end(), SAscendingSpanEndSort() ); // XXXX - will be quicker to insert in the correct place rather than sorting each time
}
}
}
}
}
}
//
RegisterSnapshotHandle CCodeGeneratorPSP::GetRegisterSnapshot()
{
RegisterSnapshotHandle handle( mRegisterSnapshots.size() );
mRegisterSnapshots.push_back( mRegisterCache );
return handle;
}
//
CCodeLabel CCodeGeneratorPSP::GetEntryPoint() const
{
return GetAssemblyBuffer()->GetStartAddress();
}
//
CCodeLabel CCodeGeneratorPSP::GetCurrentLocation() const
{
return GetAssemblyBuffer()->GetLabel();
}
//
u32 CCodeGeneratorPSP::GetCompiledCodeSize() const
{
return GetAssemblyBuffer()->GetSize();
}
//Get a (cached) N64 register mapped to a PSP register(usefull for dst register)
EPspReg CCodeGeneratorPSP::GetRegisterNoLoad( EN64Reg n64_reg, u32 lo_hi_idx, EPspReg scratch_reg )
{
if( mRegisterCache.IsCached( n64_reg, lo_hi_idx ) )
{
return mRegisterCache.GetCachedReg( n64_reg, lo_hi_idx );
}
else
{
return scratch_reg;
}
}
//Load value from an emulated N64 register or known value to a PSP register
void CCodeGeneratorPSP::GetRegisterValue( EPspReg psp_reg, EN64Reg n64_reg, u32 lo_hi_idx )
{
if( mRegisterCache.IsKnownValue( n64_reg, lo_hi_idx ) )
{
//printf( "Loading %s[%d] <- %08x\n", RegNames[ n64_reg ], lo_hi_idx, mRegisterCache.GetKnownValue( n64_reg, lo_hi_idx ) );
LoadConstant( psp_reg, mRegisterCache.GetKnownValue( n64_reg, lo_hi_idx )._s32 );
if( mRegisterCache.IsCached( n64_reg, lo_hi_idx ) )
{
mRegisterCache.MarkAsValid( n64_reg, lo_hi_idx, true );
mRegisterCache.MarkAsDirty( n64_reg, lo_hi_idx, true );
mRegisterCache.ClearKnownValue( n64_reg, lo_hi_idx );
}
}
else
{
GetVar( psp_reg, lo_hi_idx ? &gGPR[ n64_reg ]._u32_1 : &gGPR[ n64_reg ]._u32_0 );
}
}
//Get (cached) N64 register value mapped to a PSP register (or scratch reg)
//and also load the value if not loaded yet(usefull for src register)
EPspReg CCodeGeneratorPSP::GetRegisterAndLoad( EN64Reg n64_reg, u32 lo_hi_idx, EPspReg scratch_reg )
{
EPspReg reg;
bool need_load( false );
if( mRegisterCache.IsCached( n64_reg, lo_hi_idx ) )
{
// gTotalRegistersCached++;
reg = mRegisterCache.GetCachedReg( n64_reg, lo_hi_idx );
// We're loading it below, so set the valid flag
if( !mRegisterCache.IsValid( n64_reg, lo_hi_idx ) )
{
need_load = true;
mRegisterCache.MarkAsValid( n64_reg, lo_hi_idx, true );
}
}
else if( n64_reg == N64Reg_R0 )
{
reg = PspReg_R0;
}
else
{
// gTotalRegistersUncached++;
reg = scratch_reg;
need_load = true;
}
if( need_load )
{
GetRegisterValue( reg, n64_reg, lo_hi_idx );
}
return reg;
}
// Similar to GetRegisterAndLoad, but ALWAYS loads into the specified psp register
void CCodeGeneratorPSP::LoadRegister( EPspReg psp_reg, EN64Reg n64_reg, u32 lo_hi_idx )
{
if( mRegisterCache.IsCached( n64_reg, lo_hi_idx ) )
{
EPspReg cached_reg( mRegisterCache.GetCachedReg( n64_reg, lo_hi_idx ) );
// gTotalRegistersCached++;
// Load the register if it's currently invalid
if( !mRegisterCache.IsValid( n64_reg,lo_hi_idx ) )
{
GetRegisterValue( cached_reg, n64_reg, lo_hi_idx );
mRegisterCache.MarkAsValid( n64_reg, lo_hi_idx, true );
}
// Copy the register if necessary
if( psp_reg != cached_reg )
{
OR( psp_reg, cached_reg, PspReg_R0 );
}
}
else if( n64_reg == N64Reg_R0 )
{
OR( psp_reg, PspReg_R0, PspReg_R0 );
}
else
{
// gTotalRegistersUncached++;
GetRegisterValue( psp_reg, n64_reg, lo_hi_idx );
}
}
// This function pulls in a cached register so that it can be used at a later point.
// This is usally done when we have a branching instruction - it guarantees that
// the register is valid regardless of whether or not the branch is taken.
void CCodeGeneratorPSP::PrepareCachedRegister( EN64Reg n64_reg, u32 lo_hi_idx )
{
if( mRegisterCache.IsCached( n64_reg, lo_hi_idx ) )
{
EPspReg cached_reg( mRegisterCache.GetCachedReg( n64_reg, lo_hi_idx ) );
// Load the register if it's currently invalid
if( !mRegisterCache.IsValid( n64_reg,lo_hi_idx ) )
{
GetRegisterValue( cached_reg, n64_reg, lo_hi_idx );
mRegisterCache.MarkAsValid( n64_reg, lo_hi_idx, true );
}
}
}
//
void CCodeGeneratorPSP::StoreRegister( EN64Reg n64_reg, u32 lo_hi_idx, EPspReg psp_reg )
{
mRegisterCache.ClearKnownValue( n64_reg, lo_hi_idx );
if( mRegisterCache.IsCached( n64_reg, lo_hi_idx ) )
{
EPspReg cached_reg( mRegisterCache.GetCachedReg( n64_reg, lo_hi_idx ) );
// gTotalRegistersCached++;
// Update our copy as necessary
if( psp_reg != cached_reg )
{
OR( cached_reg, psp_reg, PspReg_R0 );
}
mRegisterCache.MarkAsDirty( n64_reg, lo_hi_idx, true );
mRegisterCache.MarkAsValid( n64_reg, lo_hi_idx, true );
}
else
{
// gTotalRegistersUncached++;
SetVar( lo_hi_idx ? &gGPR[ n64_reg ]._u32_1 : &gGPR[ n64_reg ]._u32_0, psp_reg );
mRegisterCache.MarkAsDirty( n64_reg, lo_hi_idx, false );
}
}
//
void CCodeGeneratorPSP::SetRegister64( EN64Reg n64_reg, s32 lo_value, s32 hi_value )
{
SetRegister( n64_reg, 0, lo_value );
SetRegister( n64_reg, 1, hi_value );
}
// Set the low 32 bits of a register to a known value (and hence the upper
// 32 bits are also known though sign extension)
inline void CCodeGeneratorPSP::SetRegister32s( EN64Reg n64_reg, s32 value )
{
//SetRegister64( n64_reg, value, value >= 0 ? 0 : 0xffffffff );
SetRegister64( n64_reg, value, value >> 31 );
}
//
inline void CCodeGeneratorPSP::SetRegister( EN64Reg n64_reg, u32 lo_hi_idx, u32 value )
{
mRegisterCache.SetKnownValue( n64_reg, lo_hi_idx, value );
mRegisterCache.MarkAsDirty( n64_reg, lo_hi_idx, true );
if( mRegisterCache.IsCached( n64_reg, lo_hi_idx ) )
{
mRegisterCache.MarkAsValid( n64_reg, lo_hi_idx, false ); // The actual cache is invalid though!
}
}
//
void CCodeGeneratorPSP::UpdateRegister( EN64Reg n64_reg, EPspReg psp_reg, bool options )
{
//if(n64_reg == N64Reg_R0) return; //Try to modify R0!!!
StoreRegisterLo( n64_reg, psp_reg );
//Skip storing sign extension on some regs //Corn
if( N64Reg_DontNeedSign( n64_reg ) ) return;
if( options == URO_HI_SIGN_EXTEND )
{
EPspReg scratch_reg = PspReg_V0;
if( mRegisterCache.IsCached( n64_reg, 1 ) )
{
scratch_reg = mRegisterCache.GetCachedReg( n64_reg, 1 );
}
SRA( scratch_reg, psp_reg, 0x1f ); // Sign extend
StoreRegisterHi( n64_reg, scratch_reg );
}
else // == URO_HI_CLEAR
{
SetRegister( n64_reg, 1, 0 );
}
}
//
EPspFloatReg CCodeGeneratorPSP::GetFloatRegisterAndLoad( EN64FloatReg n64_reg )
{
EPspFloatReg psp_reg = EPspFloatReg( n64_reg ); // 1:1 mapping
if( !mRegisterCache.IsFPValid( n64_reg ) )
{
GetFloatVar( psp_reg, &gCPUState.FPU[n64_reg]._f32 );
mRegisterCache.MarkFPAsValid( n64_reg, true );
}
mRegisterCache.MarkFPAsSim( n64_reg, false );
return psp_reg;
}
//
inline void CCodeGeneratorPSP::UpdateFloatRegister( EN64FloatReg n64_reg )
{
mRegisterCache.MarkFPAsDirty( n64_reg, true );
mRegisterCache.MarkFPAsValid( n64_reg, true );
mRegisterCache.MarkFPAsSim( n64_reg, false );
}
//Get Double or SimDouble and add code to convert to simulated double if needed //Corn
#define SIMULATESIG 0x1234 //Reduce signature to load value with one OP
EPspFloatReg CCodeGeneratorPSP::GetSimFloatRegisterAndLoad( EN64FloatReg n64_reg )
{
EPspFloatReg psp_reg_sig = EPspFloatReg( n64_reg ); // 1:1 mapping
EPspFloatReg psp_reg = EPspFloatReg(n64_reg + 1);
//This is Double Lo or signature
if( !mRegisterCache.IsFPValid( n64_reg ) )
{
GetFloatVar( psp_reg_sig, &gCPUState.FPU[n64_reg]._f32 );
mRegisterCache.MarkFPAsValid( n64_reg, true );
}
//This is Double Hi or f32
if( !mRegisterCache.IsFPValid( EN64FloatReg(n64_reg + 1) ) )
{
GetFloatVar( psp_reg, &gCPUState.FPU[n64_reg+1]._f32 );
mRegisterCache.MarkFPAsValid( EN64FloatReg(n64_reg+1), true );
}
//If register is not SimDouble yet or unknown add check and conversion routine
if( !mRegisterCache.IsFPSim( n64_reg ) )
{
MFC1( PspReg_A0, psp_reg_sig ); //Get lo part of double
LoadConstant( PspReg_A1, SIMULATESIG ); //Get signature
CJumpLocation test_reg( BEQ( PspReg_A0, PspReg_A1, CCodeLabel(nullptr), false ) ); //compare float to signature
MTC1( psp_reg_sig , PspReg_A1 ); //Always write back signature to float reg
JAL( CCodeLabel( reinterpret_cast< const void * >( _DoubleToFloat ) ), false ); //Convert Double to Float
MFC1( PspReg_A1, psp_reg ); //Get hi part of double
MTC1( psp_reg , PspReg_V0 ); //store converted float
PatchJumpLong( test_reg, GetAssemblyBuffer()->GetLabel() );
mRegisterCache.MarkFPAsDirty( n64_reg, true );
mRegisterCache.MarkFPAsSim( n64_reg, true );
mRegisterCache.MarkFPAsDirty( EN64FloatReg(n64_reg + 1), true );
}
return psp_reg;
}
//
inline void CCodeGeneratorPSP::UpdateSimDoubleRegister( EN64FloatReg n64_reg )
{
mRegisterCache.MarkFPAsValid( n64_reg, true );
mRegisterCache.MarkFPAsDirty( n64_reg, true );
mRegisterCache.MarkFPAsSim( n64_reg, true );
mRegisterCache.MarkFPAsValid( EN64FloatReg(n64_reg + 1), true );
mRegisterCache.MarkFPAsDirty( EN64FloatReg(n64_reg + 1), true );
}
//
const CN64RegisterCachePSP & CCodeGeneratorPSP::GetRegisterCacheFromHandle( RegisterSnapshotHandle snapshot ) const
{
#ifdef DAEDALUS_ENABLE_ASSERTS
DAEDALUS_ASSERT( snapshot.Handle < mRegisterSnapshots.size(), "Invalid snapshot handle" );
#endif
return mRegisterSnapshots[ snapshot.Handle ];
}
// Flush a specific register back to memory if dirty.
// Clears the dirty flag and invalidates the contents if specified
void CCodeGeneratorPSP::FlushRegister( CN64RegisterCachePSP & cache, EN64Reg n64_reg, u32 lo_hi_idx, bool invalidate )
{
if( cache.IsDirty( n64_reg, lo_hi_idx ) )
{
if( cache.IsKnownValue( n64_reg, lo_hi_idx ) )
{
s32 known_value( cache.GetKnownValue( n64_reg, lo_hi_idx )._s32 );
SetVar( lo_hi_idx ? &gGPR[ n64_reg ]._u32_1 : &gGPR[ n64_reg ]._u32_0, known_value );
}
else if( cache.IsCached( n64_reg, lo_hi_idx ) )
{
#ifdef DAEDALUS_ENABLE_ASSERTS
DAEDALUS_ASSERT( cache.IsValid( n64_reg, lo_hi_idx ), "Register is dirty but not valid?" );
#endif
EPspReg cached_reg( cache.GetCachedReg( n64_reg, lo_hi_idx ) );
SetVar( lo_hi_idx ? &gGPR[ n64_reg ]._u32_1 : &gGPR[ n64_reg ]._u32_0, cached_reg );
}
#ifdef DAEDALUS_DEBUG_CONSOLE
else
{
DAEDALUS_ERROR( "Register is dirty, but not known or cached" );
}
#endif
// We're no longer dirty
cache.MarkAsDirty( n64_reg, lo_hi_idx, false );
}
// Invalidate the register, so we pick up any values the function might have changed
if( invalidate )
{
cache.ClearKnownValue( n64_reg, lo_hi_idx );
if( cache.IsCached( n64_reg, lo_hi_idx ) )
{
cache.MarkAsValid( n64_reg, lo_hi_idx, false );
}
}
}
// This function flushes all dirty registers back to memory
// If the invalidate flag is set this also invalidates the known value/cached
// register. This is primarily to ensure that we keep the register set
// in a consistent set across calls to generic functions. Ideally we need
// to reimplement generic functions with specialised code to avoid the flush.
void CCodeGeneratorPSP::FlushAllRegisters( CN64RegisterCachePSP & cache, bool invalidate )
{
mFloatCMPIsValid = false; //invalidate float compare register
mMultIsValid = false; //Mult hi/lo are invalid
// Skip r0
for( u32 i = 1; i < NUM_N64_REGS; i++ )
{
EN64Reg n64_reg = EN64Reg( i );
FlushRegister( cache, n64_reg, 0, invalidate );
FlushRegister( cache, n64_reg, 1, invalidate );
}
FlushAllFloatingPointRegisters( cache, invalidate );
}
// Floating-point arguments are placed in $f12-$f15.
// Floating-point return values go into $f0-$f1.
// $f0-$f19 are caller-saved.
// $f20-$f31 are callee-saved.
void CCodeGeneratorPSP::FlushAllFloatingPointRegisters( CN64RegisterCachePSP & cache, bool invalidate )
{
for( u32 i {0}; i < NUM_N64_FP_REGS; i++ )
{
EN64FloatReg n64_reg = EN64FloatReg( i );
if( cache.IsFPDirty( n64_reg ) )
{
#ifdef DAEDALUS_ENABLE_ASSERTS
DAEDALUS_ASSERT( cache.IsFPValid( n64_reg ), "Register is dirty but not valid?" );
#endif
EPspFloatReg psp_reg = EPspFloatReg( n64_reg );
SetFloatVar( &gCPUState.FPU[n64_reg]._f32, psp_reg );
cache.MarkFPAsDirty( n64_reg, false );
}
if( invalidate )
{
// Invalidate the register, so we pick up any values the function might have changed
cache.MarkFPAsValid( n64_reg, false );
cache.MarkFPAsSim( n64_reg, false );
}
}
}
// This function flushes all dirty *temporary* registers back to memory, and
// marks them as invalid.
void CCodeGeneratorPSP::FlushAllTemporaryRegisters( CN64RegisterCachePSP & cache, bool invalidate )
{
// Skip r0
for( u32 i {1}; i < NUM_N64_REGS; i++ )
{
EN64Reg n64_reg = EN64Reg( i );
if( cache.IsTemporary( n64_reg, 0 ) )
{
FlushRegister( cache, n64_reg, 0, invalidate );
}
if( cache.IsTemporary( n64_reg, 1 ) )
{
FlushRegister( cache, n64_reg, 1, invalidate );
}
}
// XXXX some fp regs are preserved across function calls?
FlushAllFloatingPointRegisters( cache, invalidate );
}
//
void CCodeGeneratorPSP::RestoreAllRegisters( CN64RegisterCachePSP & current_cache, CN64RegisterCachePSP & new_cache )
{
// Skip r0
for( u32 i {1}; i < NUM_N64_REGS; i++ )
{
EN64Reg n64_reg = EN64Reg( i );
if( new_cache.IsValid( n64_reg, 0 ) && !current_cache.IsValid( n64_reg, 0 ) )
{
GetVar( new_cache.GetCachedReg( n64_reg, 0 ), &gGPR[ n64_reg ]._u32_0 );
}
if( new_cache.IsValid( n64_reg, 1 ) && !current_cache.IsValid( n64_reg, 1 ) )
{
GetVar( new_cache.GetCachedReg( n64_reg, 1 ), &gGPR[ n64_reg ]._u32_1 );
}
}
// XXXX some fp regs are preserved across function calls?
for( u32 i {0}; i < NUM_N64_FP_REGS; ++i )
{
EN64FloatReg n64_reg = EN64FloatReg( i );
if( new_cache.IsFPValid( n64_reg ) && !current_cache.IsFPValid( n64_reg ) )
{
EPspFloatReg psp_reg = EPspFloatReg( n64_reg );
GetFloatVar( psp_reg, &gCPUState.FPU[n64_reg]._f32 );
}
}
}
//
CJumpLocation CCodeGeneratorPSP::GenerateExitCode( u32 exit_address, u32 jump_address, u32 num_instructions, CCodeLabel next_fragment )
{
#ifdef DAEDALUS_ENABLE_ASSERTS
//DAEDALUS_ASSERT( exit_address != u32( ~0 ), "Invalid exit address" );
DAEDALUS_ASSERT( !next_fragment.IsSet() || jump_address == 0, "Shouldn't be specifying a jump address if we have a next fragment?" );
#endif
if( (exit_address == mEntryAddress) & mLoopTop.IsSet() )
{
#ifdef DAEDALUS_ENABLE_ASSERTS
DAEDALUS_ASSERT( mUseFixedRegisterAllocation, "Have mLoopTop but unfixed register allocation?" );
#endif
FlushAllFloatingPointRegisters( mRegisterCache, false );
// Check if we're ok to continue, without flushing any registers
GetVar( PspReg_V0, &gCPUState.CPUControl[C0_COUNT]._u32 );
GetVar( PspReg_A0, (const u32*)&gCPUState.Events[0].mCount );
//
// Pull in any registers which may have been flushed for whatever reason.
//
// Skip r0
for( u32 i {1}; i < NUM_N64_REGS; i++ )
{
EN64Reg n64_reg = EN64Reg( i );
//if( mRegisterCache.IsDirty( n64_reg, 0 ) & mRegisterCache.IsKnownValue( n64_reg, 0 ) )
//{
// FlushRegister( mRegisterCache, n64_reg, 0, false );
//}
//if( mRegisterCache.IsDirty( n64_reg, 1 ) & mRegisterCache.IsKnownValue( n64_reg, 1 ) )
//{
// FlushRegister( mRegisterCache, n64_reg, 1, false );
//}
PrepareCachedRegister( n64_reg, 0 );
PrepareCachedRegister( n64_reg, 1 );
}
// Assuming we don't need to set CurrentPC/Delay flags before we branch to the top..
//
ADDIU( PspReg_V0, PspReg_V0, num_instructions );
SetVar( &gCPUState.CPUControl[C0_COUNT]._u32, PspReg_V0 );
//
// If the event counter is still positive, just jump directly to the top of our loop
//
ADDIU( PspReg_A0, PspReg_A0, -s16(num_instructions) );
BGTZ( PspReg_A0, mLoopTop, false );
SetVar( (u32*)&gCPUState.Events[0].mCount, PspReg_A0 ); // ASSUMES store is done in just a single op.
FlushAllRegisters( mRegisterCache, true );
SetVar( &gCPUState.CurrentPC, exit_address );
JAL( CCodeLabel( reinterpret_cast< const void * >( CPU_HANDLE_COUNT_INTERRUPT ) ), false );
SetVar( &gCPUState.Delay, NO_DELAY ); // ASSUMES store is done in just a single op.
J( CCodeLabel( reinterpret_cast< const void * >( _ReturnFromDynaRec ) ), true );
//
// Return an invalid jump location to indicate we've handled our own linking.
//
return CJumpLocation( nullptr );
}
FlushAllRegisters( mRegisterCache, true );
CCodeLabel exit_routine;
// Use the branch delay slot to load the second argument
PspOpCode op1;
PspOpCode op2;
if( jump_address != 0 )
{
LoadConstant( PspReg_A0, num_instructions );
LoadConstant( PspReg_A1, exit_address );
GetLoadConstantOps( PspReg_A2, jump_address, &op1, &op2 );
exit_routine = CCodeLabel( reinterpret_cast< const void * >( _DirectExitCheckDelay ) );
}
else
{
LoadConstant( PspReg_A0, num_instructions );
GetLoadConstantOps( PspReg_A1, exit_address, &op1, &op2 );
exit_routine = CCodeLabel( reinterpret_cast< const void * >( _DirectExitCheckNoDelay ) );
}
if( op2._u32 == 0 )
{
JAL( exit_routine, false ); // No branch delay
AppendOp( op1 );
}
else
{
AppendOp( op1 );
JAL( exit_routine, false ); // No branch delay
AppendOp( op2 );
}
// This jump may be nullptr, in which case we patch it below
// This gets patched with a jump to the next fragment if the target is later found
CJumpLocation jump_to_next_fragment( J( next_fragment, true ) );
// Patch up the exit jump if the target hasn't been compiled yet
if( !next_fragment.IsSet() )
{
PatchJumpLong( jump_to_next_fragment, CCodeLabel( reinterpret_cast< const void * >( _ReturnFromDynaRec ) ) );
}
return jump_to_next_fragment;
}
// Handle branching back to the interpreter after an ERET
void CCodeGeneratorPSP::GenerateEretExitCode( u32 num_instructions, CIndirectExitMap * p_map )
{
FlushAllRegisters( mRegisterCache, false );
// Eret is a bit bodged so we exit at PC + 4
LoadConstant( PspReg_A0, num_instructions );
LoadConstant( PspReg_A1, reinterpret_cast< s32 >( p_map ) );
GetVar( PspReg_A2, &gCPUState.CurrentPC );
J( CCodeLabel( reinterpret_cast< const void * >( _IndirectExitCheck ) ), false );
ADDIU( PspReg_A2, PspReg_A2, 4 ); // Delay slot
}
// Handle branching back to the interpreter after an indirect jump
void CCodeGeneratorPSP::GenerateIndirectExitCode( u32 num_instructions, CIndirectExitMap * p_map )
{
FlushAllRegisters( mRegisterCache, false );
// New return address is in gCPUState.TargetPC
PspOpCode op1, op2;
GetLoadConstantOps(PspReg_A0, num_instructions, &op1, &op2);
LoadConstant( PspReg_A1, reinterpret_cast< s32 >( p_map ) );
GetVar( PspReg_A2, &gCPUState.TargetPC );
if (op2._u32 == 0)
{
J( CCodeLabel( reinterpret_cast< const void * >( _IndirectExitCheck ) ), false );
AppendOp(op1);
}
else
{
AppendOp(op1);
J( CCodeLabel( reinterpret_cast< const void * >( _IndirectExitCheck ) ), false );
AppendOp(op2);
}
}
//
void CCodeGeneratorPSP::GenerateBranchHandler( CJumpLocation branch_handler_jump, RegisterSnapshotHandle snapshot )
{
#ifdef DAEDALUS_ENABLE_ASSERTS
DAEDALUS_ASSERT( branch_handler_jump.IsSet(), "Why is the branch handler jump not set?" );
#endif
CCodeGeneratorPSP::SetBufferA();
CCodeLabel current_label( GetAssemblyBuffer()->GetLabel() );
PatchJumpLong( branch_handler_jump, current_label );
mRegisterCache = GetRegisterCacheFromHandle( snapshot );
CJumpLocation jump_to_b( J( CCodeLabel( nullptr ), true ) );
CCodeGeneratorPSP::SetBufferB();
current_label = GetAssemblyBuffer()->GetLabel();
PatchJumpLong( jump_to_b, current_label );
}
//
CJumpLocation CCodeGeneratorPSP::GenerateBranchAlways( CCodeLabel target )
{
return J( target, true );
}
//
CJumpLocation CCodeGeneratorPSP::GenerateBranchIfSet( const u32 * p_var, CCodeLabel target )
{
GetVar( PspReg_V0, p_var );
return BNE( PspReg_V0, PspReg_R0, target, true );
}
//
CJumpLocation CCodeGeneratorPSP::GenerateBranchIfNotSet( const u32 * p_var, CCodeLabel target )
{
GetVar( PspReg_V0, p_var );
return BEQ( PspReg_V0, PspReg_R0, target, true );
}
//
CJumpLocation CCodeGeneratorPSP::GenerateBranchIfEqual( const u32 * p_var, u32 value, CCodeLabel target )
{
GetVar( PspReg_V0, p_var );
LoadConstant( PspReg_A0, value );
return BEQ( PspReg_V0, PspReg_A0, target, true );
}
//
CJumpLocation CCodeGeneratorPSP::GenerateBranchIfNotEqual( const u32 * p_var, u32 value, CCodeLabel target )
{
GetVar( PspReg_V0, p_var );
LoadConstant( PspReg_A0, value );
return BNE( PspReg_V0, PspReg_A0, target, true );
}
//
CJumpLocation CCodeGeneratorPSP::GenerateBranchIfNotEqual( EPspReg reg_a, u32 value, CCodeLabel target )
{
EPspReg reg_b( reg_a == PspReg_V0 ? PspReg_A0 : PspReg_V0 ); // Make sure we don't use the same reg
LoadConstant( reg_b, value );
return BNE( reg_a, reg_b, target, true );
}
//
void CCodeGeneratorPSP::SetVar( u32 * p_var, EPspReg reg_src )
{
//DAEDALUS_ASSERT( reg_src != PspReg_AT, "Whoops, splattering source register" );
EPspReg reg_base;
s16 base_offset;
GetBaseRegisterAndOffset( p_var, &reg_base, &base_offset );
SW( reg_src, reg_base, base_offset );
}
//
void CCodeGeneratorPSP::SetVar( u32 * p_var, u32 value )
{
EPspReg reg_value;
if(value == 0)
{
reg_value = PspReg_R0;
}
else
{
LoadConstant( PspReg_V0, value );
reg_value = PspReg_V0;
}
EPspReg reg_base;
s16 base_offset;
GetBaseRegisterAndOffset( p_var, &reg_base, &base_offset );
SW( reg_value, reg_base, base_offset );
}
//
void CCodeGeneratorPSP::SetFloatVar( f32 * p_var, EPspFloatReg reg_src )
{
EPspReg reg_base;
s16 base_offset;
GetBaseRegisterAndOffset( p_var, &reg_base, &base_offset );
SWC1( reg_src, reg_base, base_offset );
}
//
void CCodeGeneratorPSP::GetVar( EPspReg dst_reg, const u32 * p_var )
{
EPspReg reg_base;
s16 base_offset;
GetBaseRegisterAndOffset( p_var, &reg_base, &base_offset );
LW( dst_reg, reg_base, base_offset );
}
//
void CCodeGeneratorPSP::GetFloatVar( EPspFloatReg dst_reg, const f32 * p_var )
{
EPspReg reg_base;
s16 base_offset;
GetBaseRegisterAndOffset( p_var, &reg_base, &base_offset );
LWC1( dst_reg, reg_base, base_offset );
}
//
void CCodeGeneratorPSP::GetBaseRegisterAndOffset( const void * p_address, EPspReg * p_reg, s16 * p_offset )
{
s32 base_pointer_offset( reinterpret_cast< const u8 * >( p_address ) - mpBasePointer );
if( (base_pointer_offset > SHRT_MIN) & (base_pointer_offset < SHRT_MAX) )
{
*p_reg = mBaseRegister;
*p_offset = base_pointer_offset;
}
else
{
u32 address( reinterpret_cast< u32 >( p_address ) );
u16 hi_bits( (u16)( address >> 16 ) );
u16 lo_bits( (u16)( address ) );
//
// Move up
//
/*if(lo_bits >= 0x8000)
{
hi_bits++;
}*/
hi_bits += lo_bits >> 15;
s32 long_offset( address - ((s32)hi_bits<<16) );
#ifdef DAEDALUS_ENABLE_ASSERTS
DAEDALUS_ASSERT( long_offset >= SHRT_MIN && long_offset <= SHRT_MAX, "Offset is out of range!" );
#endif
s16 offset( (s16)long_offset );
#ifdef DAEDALUS_ENABLE_ASSERTS
DAEDALUS_ASSERT( ((s32)hi_bits<<16) + offset == (s32)address, "Incorrect address calculation" );
#endif
LUI( PspReg_A0, hi_bits );
*p_reg = PspReg_A0;
*p_offset = offset;
}
}
//
/*void CCodeGeneratorPSP::UpdateAddressAndDelay( u32 address, bool set_branch_delay )
{
DAEDALUS_ASSERT( !set_branch_delay || (address != 0), "Have branch delay and no address?" );
if( set_branch_delay )
{
SetVar( &gCPUState.Delay, EXEC_DELAY );
mBranchDelaySet = true;
}
//else
//{
// SetVar( &gCPUState.Delay, NO_DELAY );
//}
if( address != 0 )
{
SetVar( &gCPUState.CurrentPC, address );
}
}*/
// Generates instruction handler for the specified op code.
// Returns a jump location if an exception handler is required
CJumpLocation CCodeGeneratorPSP::GenerateOpCode( const STraceEntry& ti, bool branch_delay_slot, const SBranchDetails * p_branch, CJumpLocation * p_branch_jump )
{
#ifdef DAEDALUS_PROFILE
DAEDALUS_PROFILE( "CCodeGeneratorPSP::GenerateOpCode" );
#endif
u32 address = ti.Address;
OpCode op_code = ti.OpCode;
bool handled( false );
bool is_nop( op_code._u32 == 0 );
if( is_nop )
{
if( branch_delay_slot )
{
SetVar( &gCPUState.Delay, NO_DELAY );
}
return CJumpLocation();
}
if( branch_delay_slot ) mPreviousStoreBase = mPreviousLoadBase = N64Reg_R0; //Invalidate
mQuickLoad = ti.Usage.Access8000;
const EN64Reg rs = EN64Reg( op_code.rs );
const EN64Reg rt = EN64Reg( op_code.rt );
const EN64Reg rd = EN64Reg( op_code.rd );
const EN64Reg base = EN64Reg( op_code.base );
const u32 ft = op_code.ft;
const u32 sa = op_code.sa;
//const u32 jump_target( (address&0xF0000000) | (op_code.target<<2) );
//const u32 branch_target( address + ( ((s32)(s16)op_code.immediate)<<2 ) + 4);
//
// Look for opcodes we can handle manually
//
switch( op_code.op )
{
case OP_SPECOP:
switch( op_code.spec_op )
{
case SpecOp_SLL: GenerateSLL( rd, rt, sa ); handled = true; break;
case SpecOp_SRA: GenerateSRA( rd, rt, sa ); handled = true; break;
case SpecOp_SRL: GenerateSRL( rd, rt, sa ); handled = true; break;
case SpecOp_SLLV: GenerateSLLV( rd, rs, rt ); handled = true; break;
case SpecOp_SRAV: GenerateSRAV( rd, rs, rt ); handled = true; break;
case SpecOp_SRLV: GenerateSRLV( rd, rs, rt ); handled = true; break;
case SpecOp_JR: GenerateJR( rs, p_branch, p_branch_jump ); handled = true; break;
case SpecOp_JALR: GenerateJALR( rs, rd, address, p_branch, p_branch_jump ); handled = true; break;
case SpecOp_MFLO: GenerateMFLO( rd ); handled = true; break;
case SpecOp_MFHI: GenerateMFHI( rd ); handled = true; break;
case SpecOp_MTLO: GenerateMTLO( rd ); handled = true; break;
case SpecOp_MTHI: GenerateMTHI( rd ); handled = true; break;
case SpecOp_MULT: GenerateMULT( rs, rt ); handled = true; break;
case SpecOp_MULTU: GenerateMULTU( rs, rt ); handled = true; break;
case SpecOp_DIV: GenerateDIV( rs, rt ); handled = true; break;
case SpecOp_DIVU: GenerateDIVU( rs, rt ); handled = true; break;
case SpecOp_DMULT: GenerateDMULT( rs, rt ); handled = true; break;
case SpecOp_DMULTU: GenerateDMULTU( rs, rt ); handled = true; break;
case SpecOp_DDIVU: GenerateDDIVU( rs, rt ); handled = true; break;
case SpecOp_DDIV: GenerateDDIV( rs, rt ); handled = true; break;
case SpecOp_ADD: GenerateADDU( rd, rs, rt ); handled = true; break;
case SpecOp_ADDU: GenerateADDU( rd, rs, rt ); handled = true; break;
case SpecOp_SUB: GenerateSUBU( rd, rs, rt ); handled = true; break;
case SpecOp_SUBU: GenerateSUBU( rd, rs, rt ); handled = true; break;
case SpecOp_AND: GenerateAND( rd, rs, rt ); handled = true; break;
case SpecOp_OR: GenerateOR( rd, rs, rt ); handled = true; break;
case SpecOp_XOR: GenerateXOR( rd, rs, rt ); handled = true; break;
case SpecOp_NOR: GenerateNOR( rd, rs, rt ); handled = true; break;
case SpecOp_SLT: GenerateSLT( rd, rs, rt ); handled = true; break;
case SpecOp_SLTU: GenerateSLTU( rd, rs, rt ); handled = true; break;
case SpecOp_DADD: GenerateDADDU( rd, rs, rt ); handled = true; break;
case SpecOp_DADDU: GenerateDADDU( rd, rs, rt ); handled = true; break;
case SpecOp_DSRA32: GenerateDSRA32( rd, rt, sa ); handled = true; break;
case SpecOp_DSRA: GenerateDSRA( rd, rt, sa ); handled = true; break;
case SpecOp_DSLL32: GenerateDSLL32( rd, rt, sa ); handled = true; break;
case SpecOp_DSLL: GenerateDSLL( rd, rt, sa ); handled = true; break;
default:
break;
}
break;
case OP_REGIMM:
switch( op_code.regimm_op )
{
// These can be handled by the same Generate function, as the 'likely' bit is handled elsewhere
case RegImmOp_BLTZ:
case RegImmOp_BLTZL:
GenerateBLTZ( rs, p_branch, p_branch_jump ); handled = true; break;
case RegImmOp_BGEZ:
case RegImmOp_BGEZL:
GenerateBGEZ( rs, p_branch, p_branch_jump ); handled = true; break;
}
break;
case OP_J: /* nothing to do */ handled = true; break;
case OP_JAL: GenerateJAL( address ); handled = true; break;
case OP_BEQ: GenerateBEQ( rs, rt, p_branch, p_branch_jump ); handled = true; break;
case OP_BEQL: GenerateBEQ( rs, rt, p_branch, p_branch_jump ); handled = true; break;
case OP_BNE: GenerateBNE( rs, rt, p_branch, p_branch_jump ); handled = true; break;
case OP_BNEL: GenerateBNE( rs, rt, p_branch, p_branch_jump ); handled = true; break;
case OP_BLEZ: GenerateBLEZ( rs, p_branch, p_branch_jump ); handled = true; break;
case OP_BLEZL: GenerateBLEZ( rs, p_branch, p_branch_jump ); handled = true; break;
case OP_BGTZ: GenerateBGTZ( rs, p_branch, p_branch_jump ); handled = true; break;
case OP_BGTZL: GenerateBGTZ( rs, p_branch, p_branch_jump ); handled = true; break;
case OP_ADDI: GenerateADDIU( rt, rs, s16( op_code.immediate ) ); handled = true; break;
case OP_ADDIU: GenerateADDIU( rt, rs, s16( op_code.immediate ) ); handled = true; break;
case OP_SLTI: GenerateSLTI( rt, rs, s16( op_code.immediate ) ); handled = true; break;
case OP_SLTIU: GenerateSLTIU( rt, rs, s16( op_code.immediate ) ); handled = true; break;
case OP_DADDI: GenerateDADDIU( rt, rs, s16( op_code.immediate ) ); handled = true; break;
case OP_DADDIU: GenerateDADDIU( rt, rs, s16( op_code.immediate ) ); handled = true; break;
case OP_ANDI: GenerateANDI( rt, rs, op_code.immediate ); handled = true; break;
case OP_ORI: GenerateORI( rt, rs, op_code.immediate ); handled = true; break;
case OP_XORI: GenerateXORI( rt, rs, op_code.immediate ); handled = true; break;
case OP_LUI: GenerateLUI( rt, s16( op_code.immediate ) ); handled = true; break;
case OP_LB: GenerateLB( address, branch_delay_slot, rt, base, s16( op_code.immediate ) ); handled = true; break;
case OP_LBU: GenerateLBU( address, branch_delay_slot, rt, base, s16( op_code.immediate ) ); handled = true; break;
case OP_LH: GenerateLH( address, branch_delay_slot, rt, base, s16( op_code.immediate ) ); handled = true; break;
case OP_LHU: GenerateLHU( address, branch_delay_slot, rt, base, s16( op_code.immediate ) ); handled = true; break;
case OP_LW: GenerateLW( address, branch_delay_slot, rt, base, s16( op_code.immediate ) ); handled = true; break;
case OP_LD: GenerateLD( address, branch_delay_slot, rt, base, s16( op_code.immediate ) ); handled = true; break;
case OP_LWC1: GenerateLWC1( address, branch_delay_slot, ft, base, s16( op_code.immediate ) ); handled = true; break;
#ifdef ENABLE_LDC1
case OP_LDC1: if( !branch_delay_slot & gMemoryAccessOptimisation & mQuickLoad ) { GenerateLDC1( address, branch_delay_slot, ft, base, s16( op_code.immediate ) ); handled = true; } break;
#endif
#ifdef ENABLE_LWR_LWL
case OP_LWL: GenerateLWL( address, branch_delay_slot, rt, base, s16( op_code.immediate ) ); handled = true; break;
case OP_LWR: GenerateLWR( address, branch_delay_slot, rt, base, s16( op_code.immediate ) ); handled = true; break;
#endif
case OP_SB: GenerateSB( address, branch_delay_slot, rt, base, s16( op_code.immediate ) ); handled = true; break;
case OP_SH: GenerateSH( address, branch_delay_slot, rt, base, s16( op_code.immediate ) ); handled = true; break;
case OP_SW: GenerateSW( address, branch_delay_slot, rt, base, s16( op_code.immediate ) ); handled = true; break;
case OP_SD: GenerateSD( address, branch_delay_slot, rt, base, s16( op_code.immediate ) ); handled = true; break;
case OP_SWC1: GenerateSWC1( address, branch_delay_slot, ft, base, s16( op_code.immediate ) ); handled = true; break;
#ifdef ENABLE_SDC1
case OP_SDC1: if( !branch_delay_slot & gMemoryAccessOptimisation & mQuickLoad ) { GenerateSDC1( address, branch_delay_slot, ft, base, s16( op_code.immediate ) ); handled = true; } break;
#endif
#ifdef ENABLE_SWR_SWL
case OP_SWL: GenerateSWL( address, branch_delay_slot, rt, base, s16( op_code.immediate ) ); handled = true; break;
case OP_SWR: GenerateSWR( address, branch_delay_slot, rt, base, s16( op_code.immediate ) ); handled = true; break;
#endif
case OP_CACHE: GenerateCACHE( base, op_code.immediate, rt ); handled = true; break;
case OP_COPRO0:
switch( op_code.cop0_op )
{
case Cop0Op_MFC0: GenerateMFC0( rt, op_code.fs ); handled = true; break;
//case Cop0Op_MTC0: GenerateMTC0( rt, op_code.fs ); handled = true; break; //1080 deg has issues with this
default:
break;
}
break;
case OP_COPRO1:
switch( op_code.cop1_op )
{
case Cop1Op_MFC1: GenerateMFC1( rt, op_code.fs ); handled = true; break;
case Cop1Op_MTC1: GenerateMTC1( op_code.fs, rt ); handled = true; break;
case Cop1Op_CFC1: GenerateCFC1( rt, op_code.fs ); handled = true; break;
case Cop1Op_CTC1: GenerateCTC1( op_code.fs, rt ); handled = true; break;
case Cop1Op_DInstr:
if( gDynarecDoublesOptimisation )
{
switch( op_code.cop1_funct )
{
case Cop1OpFunc_ADD: GenerateADD_D_Sim( op_code.fd, op_code.fs, op_code.ft ); handled = true; break;
case Cop1OpFunc_SUB: GenerateSUB_D_Sim( op_code.fd, op_code.fs, op_code.ft ); handled = true; break;
case Cop1OpFunc_MUL: GenerateMUL_D_Sim( op_code.fd, op_code.fs, op_code.ft ); handled = true; break;
case Cop1OpFunc_DIV: GenerateDIV_D_Sim( op_code.fd, op_code.fs, op_code.ft ); handled = true; break;
case Cop1OpFunc_SQRT: GenerateSQRT_D_Sim( op_code.fd, op_code.fs ); handled = true; break;
case Cop1OpFunc_ABS: GenerateABS_D_Sim( op_code.fd, op_code.fs ); handled = true; break;
case Cop1OpFunc_MOV: GenerateMOV_D_Sim( op_code.fd, op_code.fs ); handled = true; break;
case Cop1OpFunc_NEG: GenerateNEG_D_Sim( op_code.fd, op_code.fs ); handled = true; break;
case Cop1OpFunc_TRUNC_W: GenerateTRUNC_W_D_Sim( op_code.fd, op_code.fs ); handled = true; break;
case Cop1OpFunc_CVT_W: GenerateCVT_W_D_Sim( op_code.fd, op_code.fs ); handled = true; break;
case Cop1OpFunc_CVT_S: GenerateCVT_S_D_Sim( op_code.fd, op_code.fs ); handled = true; break;
case Cop1OpFunc_CMP_F: GenerateCMP_D_Sim( op_code.fs, Cop1OpFunc_CMP_F, op_code.ft ); handled = true; break;
case Cop1OpFunc_CMP_UN: GenerateCMP_D_Sim( op_code.fs, Cop1OpFunc_CMP_UN, op_code.ft ); handled = true; break;
case Cop1OpFunc_CMP_EQ: GenerateCMP_D_Sim( op_code.fs, Cop1OpFunc_CMP_EQ, op_code.ft ); handled = true; break; //Conker has issues with this
case Cop1OpFunc_CMP_UEQ: GenerateCMP_D_Sim( op_code.fs, Cop1OpFunc_CMP_UEQ, op_code.ft ); handled = true; break;
case Cop1OpFunc_CMP_OLT: GenerateCMP_D_Sim( op_code.fs, Cop1OpFunc_CMP_OLT, op_code.ft ); handled = true; break;
case Cop1OpFunc_CMP_ULT: GenerateCMP_D_Sim( op_code.fs, Cop1OpFunc_CMP_ULT, op_code.ft ); handled = true; break;
case Cop1OpFunc_CMP_OLE: GenerateCMP_D_Sim( op_code.fs, Cop1OpFunc_CMP_OLE, op_code.ft ); handled = true; break;
case Cop1OpFunc_CMP_ULE: GenerateCMP_D_Sim( op_code.fs, Cop1OpFunc_CMP_ULE, op_code.ft ); handled = true; break;
case Cop1OpFunc_CMP_SF: GenerateCMP_D_Sim( op_code.fs, Cop1OpFunc_CMP_SF, op_code.ft ); handled = true; break;
case Cop1OpFunc_CMP_NGLE: GenerateCMP_D_Sim( op_code.fs, Cop1OpFunc_CMP_NGLE, op_code.ft ); handled = true; break;
case Cop1OpFunc_CMP_SEQ: GenerateCMP_D_Sim( op_code.fs, Cop1OpFunc_CMP_SEQ, op_code.ft ); handled = true; break;
case Cop1OpFunc_CMP_NGL: GenerateCMP_D_Sim( op_code.fs, Cop1OpFunc_CMP_NGL, op_code.ft ); handled = true; break;
case Cop1OpFunc_CMP_LT: GenerateCMP_D_Sim( op_code.fs, Cop1OpFunc_CMP_LT, op_code.ft ); handled = true; break;
case Cop1OpFunc_CMP_NGE: GenerateCMP_D_Sim( op_code.fs, Cop1OpFunc_CMP_NGE, op_code.ft ); handled = true; break;
case Cop1OpFunc_CMP_LE: GenerateCMP_D_Sim( op_code.fs, Cop1OpFunc_CMP_LE, op_code.ft ); handled = true; break;
case Cop1OpFunc_CMP_NGT: GenerateCMP_D_Sim( op_code.fs, Cop1OpFunc_CMP_NGT, op_code.ft ); handled = true; break;
default:
break; //call Generic4300
}
}
else
{
GenerateGenericR4300( op_code, R4300_GetDInstructionHandler( op_code ) ); handled = true; break; // Need branch delay?
}
break;
case Cop1Op_SInstr:
switch( op_code.cop1_funct )
{
case Cop1OpFunc_ADD: GenerateADD_S( op_code.fd, op_code.fs, op_code.ft ); handled = true; break;
case Cop1OpFunc_SUB: GenerateSUB_S( op_code.fd, op_code.fs, op_code.ft ); handled = true; break;
case Cop1OpFunc_MUL: GenerateMUL_S( op_code.fd, op_code.fs, op_code.ft ); handled = true; break;
case Cop1OpFunc_DIV: GenerateDIV_S( op_code.fd, op_code.fs, op_code.ft ); handled = true; break;
case Cop1OpFunc_SQRT: GenerateSQRT_S( op_code.fd, op_code.fs ); handled = true; break;
case Cop1OpFunc_ABS: GenerateABS_S( op_code.fd, op_code.fs ); handled = true; break;
case Cop1OpFunc_MOV: GenerateMOV_S( op_code.fd, op_code.fs ); handled = true; break;
case Cop1OpFunc_NEG: GenerateNEG_S( op_code.fd, op_code.fs ); handled = true; break;
case Cop1OpFunc_TRUNC_W: GenerateTRUNC_W_S( op_code.fd, op_code.fs ); handled = true; break;
case Cop1OpFunc_FLOOR_W: GenerateFLOOR_W_S( op_code.fd, op_code.fs ); handled = true; break;
case Cop1OpFunc_CVT_W: GenerateCVT_W_S( op_code.fd, op_code.fs ); handled = true; break;
case Cop1OpFunc_CVT_D: if( gDynarecDoublesOptimisation & !g_ROM.DISABLE_SIM_CVT_D_S ) GenerateCVT_D_S_Sim( op_code.fd, op_code.fs ); //Sim has issues with EWJ/Tom&Jerry/PowerPuffGirls
else GenerateCVT_D_S( op_code.fd, op_code.fs );
handled = true;
break;
case Cop1OpFunc_CMP_F: GenerateCMP_S( op_code.fs, Cop1OpFunc_CMP_F, op_code.ft ); handled = true; break;
case Cop1OpFunc_CMP_UN: GenerateCMP_S( op_code.fs, Cop1OpFunc_CMP_UN, op_code.ft ); handled = true; break;
case Cop1OpFunc_CMP_EQ: GenerateCMP_S( op_code.fs, Cop1OpFunc_CMP_EQ, op_code.ft ); handled = true; break;
case Cop1OpFunc_CMP_UEQ: GenerateCMP_S( op_code.fs, Cop1OpFunc_CMP_UEQ, op_code.ft ); handled = true; break;
case Cop1OpFunc_CMP_OLT: GenerateCMP_S( op_code.fs, Cop1OpFunc_CMP_OLT, op_code.ft ); handled = true; break;
case Cop1OpFunc_CMP_ULT: GenerateCMP_S( op_code.fs, Cop1OpFunc_CMP_ULT, op_code.ft ); handled = true; break;
case Cop1OpFunc_CMP_OLE: GenerateCMP_S( op_code.fs, Cop1OpFunc_CMP_OLE, op_code.ft ); handled = true; break;
case Cop1OpFunc_CMP_ULE: GenerateCMP_S( op_code.fs, Cop1OpFunc_CMP_ULE, op_code.ft ); handled = true; break;
case Cop1OpFunc_CMP_SF: GenerateCMP_S( op_code.fs, Cop1OpFunc_CMP_SF, op_code.ft ); handled = true; break;
case Cop1OpFunc_CMP_NGLE: GenerateCMP_S( op_code.fs, Cop1OpFunc_CMP_NGLE, op_code.ft ); handled = true; break;
case Cop1OpFunc_CMP_SEQ: GenerateCMP_S( op_code.fs, Cop1OpFunc_CMP_SEQ, op_code.ft ); handled = true; break;
case Cop1OpFunc_CMP_NGL: GenerateCMP_S( op_code.fs, Cop1OpFunc_CMP_NGL, op_code.ft ); handled = true; break;
case Cop1OpFunc_CMP_LT: GenerateCMP_S( op_code.fs, Cop1OpFunc_CMP_LT, op_code.ft ); handled = true; break;
case Cop1OpFunc_CMP_NGE: GenerateCMP_S( op_code.fs, Cop1OpFunc_CMP_NGE, op_code.ft ); handled = true; break;
case Cop1OpFunc_CMP_LE: GenerateCMP_S( op_code.fs, Cop1OpFunc_CMP_LE, op_code.ft ); handled = true; break;
//This breaks D64, but I think is a bug somewhere else since interpreter trows fp nan exception in Cop1_S_NGT
case Cop1OpFunc_CMP_NGT: if(g_ROM.GameHacks != DK64) {GenerateCMP_S( op_code.fs, Cop1OpFunc_CMP_NGT, op_code.ft ); handled = true;} break;
}
break;
case Cop1Op_WInstr:
switch( op_code.cop1_funct )
{
case Cop1OpFunc_CVT_S: GenerateCVT_S_W( op_code.fd, op_code.fs ); handled = true; break;
case Cop1OpFunc_CVT_D: if( gDynarecDoublesOptimisation ) { GenerateCVT_D_W_Sim( op_code.fd, op_code.fs ); handled = true; } break;
}
break;
case Cop1Op_BCInstr:
switch( op_code.cop1_bc )
{
// These can be handled by the same Generate function, as the 'likely' bit is handled elsewhere
case Cop1BCOp_BC1F:
case Cop1BCOp_BC1FL:
GenerateBC1F( p_branch, p_branch_jump ); handled = true; break;
case Cop1BCOp_BC1T:
case Cop1BCOp_BC1TL:
GenerateBC1T( p_branch, p_branch_jump ); handled = true; break;
}
break;
default:
break;
}
break;
}
//Invalidate load/store base regs if they has been modified with current OPcode //Corn
if( (ti.Usage.RegWrites >> mPreviousLoadBase) & 1 ) mPreviousLoadBase = N64Reg_R0; //Invalidate
if( (ti.Usage.RegWrites >> mPreviousStoreBase) & 1 ) mPreviousStoreBase = N64Reg_R0; //Invalidate
// Default handling - call interpreting function
//
if( !handled )
{
#if 0
//1->Show not handled OP codes (Require that DAEDALUS_SILENT flag is undefined)
// Note: Cop1Op_DInstr are handled elsewhere!
char msg[128];
SprintOpCodeInfo( msg, address, op_code );
printf( "Unhandled: 0x%08x %s\n", address, msg );
#endif
bool BranchDelaySet = false;
if( R4300_InstructionHandlerNeedsPC( op_code ) )
{
//Generate exception handler
//
SetVar( &gCPUState.CurrentPC, address );
if( branch_delay_slot )
{
SetVar( &gCPUState.Delay, EXEC_DELAY );
BranchDelaySet = true;
}
GenerateGenericR4300( op_code, R4300_GetInstructionHandler( op_code ) );
#ifdef ALLOW_INTERPRETER_EXCEPTIONS
// Make sure all dirty registers are flushed. NB - we don't invalidate them
// to avoid reloading the contents if no exception was thrown.
FlushAllRegisters( mRegisterCache, false );
JAL( CCodeLabel( reinterpret_cast< const void * >( _ReturnFromDynaRecIfStuffToDo ) ), false );
ORI( PspReg_A0, PspReg_R0, 0 );
#endif
}
else
{
GenerateGenericR4300( op_code, R4300_GetInstructionHandler( op_code ) );
}
// Check whether we want to invert the status of this branch
if( p_branch != nullptr )
{
CCodeLabel no_target( nullptr );
//
// Check if the branch has been taken
//
if( p_branch->Direct )
{
if( p_branch->ConditionalBranchTaken )
{
*p_branch_jump = GenerateBranchIfNotEqual( &gCPUState.Delay, DO_DELAY, no_target );
}
else
{
*p_branch_jump = GenerateBranchIfEqual( &gCPUState.Delay, DO_DELAY, no_target );
}
}
else
{
// XXXX eventually just exit here, and skip default exit code below
if( p_branch->Eret )
{
*p_branch_jump = GenerateBranchAlways( no_target );
}
else
{
*p_branch_jump = GenerateBranchIfNotEqual( &gCPUState.TargetPC, p_branch->TargetAddress, no_target );
}
}
}
if( BranchDelaySet )
{
SetVar( &gCPUState.Delay, NO_DELAY );
}
}
return CJumpLocation();
}
//
// NB - This assumes that the address and branch delay have been set up before
// calling this function.
//
void CCodeGeneratorPSP::GenerateGenericR4300( OpCode op_code, CPU_Instruction p_instruction )
{
mPreviousLoadBase = N64Reg_R0; //Invalidate
mPreviousStoreBase = N64Reg_R0; //Invalidate
// Flush out the dirty registers, and mark them all as invalid
// Theoretically we could just flush the registers referenced in the call
FlushAllRegisters( mRegisterCache, true );
PspOpCode op1;
PspOpCode op2;
// Load the opcode in to A0
GetLoadConstantOps( PspReg_A0, op_code._u32, &op1, &op2 );
if( op2._u32 == 0 )
{
JAL( CCodeLabel( reinterpret_cast< const void * >( p_instruction ) ), false ); // No branch delay
AppendOp( op1 );
}
else
{
AppendOp( op1 );
JAL( CCodeLabel( reinterpret_cast< const void * >( p_instruction ) ), false ); // No branch delay
AppendOp( op2 );
}
}
//
CJumpLocation CCodeGeneratorPSP::ExecuteNativeFunction( CCodeLabel speed_hack, bool check_return )
{
JAL( speed_hack, true );
if( check_return )
return BEQ(PspReg_V0, PspReg_R0, CCodeLabel(nullptr), true);
else
return CJumpLocation(nullptr);
}
//
inline bool CCodeGeneratorPSP::GenerateDirectLoad( EPspReg psp_dst, EN64Reg base, s16 offset, OpCodeValue load_op, u32 swizzle )
{
if(mRegisterCache.IsKnownValue( base, 0 ))
{
u32 base_address( mRegisterCache.GetKnownValue( base, 0 )._u32 );
u32 address( (base_address + s32( offset )) ^ swizzle );
if( load_op == OP_LWL )
{
load_op = OP_LW;
u32 shift = address & 3;
address ^= shift; //Zero low 2 bits in address
ADDIU( PspReg_A3, PspReg_R0, shift); //copy low 2 bits to A3
}
const MemFuncRead & m( g_MemoryLookupTableRead[ address >> 18 ] );
if( m.pRead )
{
void * p_memory( (void*)( m.pRead + address ) );
//printf( "Loading from %s %08x + %04x (%08x) op %d\n", RegNames[ base ], base_address, u16(offset), address, load_op );
EPspReg reg_base;
s16 base_offset;
GetBaseRegisterAndOffset( p_memory, &reg_base, &base_offset );
CAssemblyWriterPSP::LoadRegister( psp_dst, load_op, reg_base, base_offset );
return true;
}
else
{
//printf( "Loading from %s %08x + %04x (%08x) - unhandled\n", RegNames[ base ], base_address, u16(offset), address );
}
}
return false;
}
//
void CCodeGeneratorPSP::GenerateSlowLoad( u32 current_pc, EPspReg psp_dst, EPspReg reg_address, ReadMemoryFunction p_read_memory )
{
// We need to flush all the regular registers AND invalidate all the temp regs.
// NB: When we can flush registers through some kind of handle (i.e. when the
// exception handling branch is taken), this won't be necessary
CN64RegisterCachePSP current_regs( mRegisterCache );
FlushAllRegisters( mRegisterCache, false );
FlushAllTemporaryRegisters( mRegisterCache, true );
//
//
//
if( reg_address != PspReg_A0 )
{
OR( PspReg_A0, reg_address, PspReg_R0 );
}
PspOpCode op1, op2;
GetLoadConstantOps( PspReg_A1, current_pc, &op1, &op2 );
if( op2._u32 == 0 )
{
JAL( CCodeLabel( reinterpret_cast< const void * >( p_read_memory ) ), false );
AppendOp( op1 );
}
else
{
AppendOp( op1 );
JAL( CCodeLabel( reinterpret_cast< const void * >( p_read_memory ) ), false );
AppendOp( op2 );
}
// Restore all registers BEFORE copying back final value
RestoreAllRegisters( mRegisterCache, current_regs );
// Copy return value to the destination register if we're not using V0
if( psp_dst != PspReg_V0 )
{
OR( psp_dst, PspReg_V0, PspReg_R0 );
}
// Restore the state of the registers
mRegisterCache = current_regs;
}
//
void CCodeGeneratorPSP::GenerateLoad( u32 current_pc,
EPspReg psp_dst,
EN64Reg n64_base,
s16 offset,
OpCodeValue load_op,
u32 swizzle,
ReadMemoryFunction p_read_memory )
{
//
// Check if the base pointer is a known value, in which case we can load directly.
//
if(GenerateDirectLoad( psp_dst, n64_base, offset, load_op, swizzle ))
{
return;
}
EPspReg reg_base( GetRegisterAndLoadLo( n64_base, PspReg_A0 ) );
EPspReg reg_address( reg_base );
if( load_op == OP_LWL ) //We handle both LWL & LWR here
{
if(offset != 0)
{
ADDIU( PspReg_A0, reg_address, offset ); //base + offset
reg_address = PspReg_A0;
offset = 0;
}
ANDI( PspReg_A3, reg_address, 3 ); //copy two LSB bits to A3, used later for mask
XOR( PspReg_A0, reg_address, PspReg_A3); //zero two LSB bits in address
//Dont cache the current pointer to K0 reg because the address is mangled by the XOR
if( (n64_base == N64Reg_SP) | (gMemoryAccessOptimisation & mQuickLoad) )
{
ADDU( PspReg_A0, PspReg_A0, gMemoryBaseReg );
CAssemblyWriterPSP::LoadRegister( psp_dst, OP_LW, PspReg_A0, 0 );
return;
}
load_op = OP_LW;
reg_address = PspReg_A0;
}
else
{
if( (n64_base == N64Reg_SP) | (gMemoryAccessOptimisation & mQuickLoad) )
{
if( swizzle != 0 )
{
if( offset != 0 )
{
ADDIU( PspReg_A0, reg_address, offset );
reg_address = PspReg_A0;
offset = 0;
}
XORI( PspReg_A0, reg_address, swizzle );
ADDU( PspReg_A0, PspReg_A0, gMemoryBaseReg );
CAssemblyWriterPSP::LoadRegister( psp_dst, load_op, PspReg_A0, offset );
return;
}
//Re use old base register if consegutive accesses from same base register //Corn
if( n64_base == mPreviousLoadBase )
{
CAssemblyWriterPSP::LoadRegister( psp_dst, load_op, PspReg_K0, offset );
return;
}
else
{
ADDU( PspReg_K0, reg_address, gMemoryBaseReg );
CAssemblyWriterPSP::LoadRegister( psp_dst, load_op, PspReg_K0, offset );
mPreviousLoadBase = n64_base;
return;
}
}
}
if( swizzle != 0 )
{
if( offset != 0 )
{
ADDIU( PspReg_A0, reg_address, offset );
reg_address = PspReg_A0;
offset = 0;
}
XORI( PspReg_A0, reg_address, swizzle );
reg_address = PspReg_A0;
}
//
// If the fast load failed, we need to perform a 'slow' load going through a handler routine
// We may already be performing this instruction in the second buffer, in which
// case we don't bother with the transitions
//
if( CAssemblyWriterPSP::IsBufferA() )
{
//
// This is a very quick check for 0x80000000 < address < 0x80r00000
// If it's in range, we add the base pointer (already offset by 0x80000000)
// and dereference (the branch likely ensures that we only dereference
// in the situations where the condition holds)
//
SLT( PspReg_V1, reg_address, gMemUpperBoundReg ); // t1 = upper < address
CJumpLocation branch( BEQ( PspReg_V1, PspReg_R0, CCodeLabel( nullptr ), false ) ); // branch to jump to handler
ADDU( PspReg_A1, reg_address, gMemoryBaseReg );
CAssemblyWriterPSP::LoadRegister( psp_dst, load_op, PspReg_A1, offset );
CCodeLabel continue_location( GetAssemblyBuffer()->GetLabel() );
//
// Generate the handler code in the secondary buffer
//
CAssemblyWriterPSP::SetBufferB();
CCodeLabel handler_label( GetAssemblyBuffer()->GetLabel() );
if( offset != 0 )
{
ADDIU( PspReg_A0, reg_address, offset );
reg_address = PspReg_A0;
}
#ifdef ENABLE_LWC1
if( load_op == OP_LWC1 )
{
GenerateSlowLoad( current_pc, PspReg_V0, reg_address, p_read_memory );
// Copy return value to the FPU destination register
MTC1( (EPspFloatReg)psp_dst, PspReg_V0 );
}
else
#endif
{
GenerateSlowLoad( current_pc, psp_dst, reg_address, p_read_memory );
}
CJumpLocation ret_handler( J( continue_location, true ) );
CAssemblyWriterPSP::SetBufferA();
//
// Keep track of the details we need to generate the jump to the distant handler function
//
mAddressCheckFixups.push_back( SAddressCheckFixup( branch, handler_label ) );
}
else
{
//
// This is a very quick check for 0x80000000 < address < 0x80r00000
// If it's in range, we add the base pointer (already offset by 0x80000000)
// and dereference (the branch likely ensures that we only dereference
// in the situations where the condition holds)
//
SLT( PspReg_V1, reg_address, gMemUpperBoundReg ); // t1 = upper < address
ADDU( PspReg_A1, reg_address, gMemoryBaseReg );
CJumpLocation branch( BNEL( PspReg_V1, PspReg_R0, CCodeLabel( nullptr ), false ) );
CAssemblyWriterPSP::LoadRegister( psp_dst, load_op, PspReg_A1, offset );
if( offset != 0 )
{
ADDIU( PspReg_A0, reg_address, offset );
reg_address = PspReg_A0;
}
#ifdef ENABLE_LWC1
if( load_op == OP_LWC1 )
{
GenerateSlowLoad( current_pc, PspReg_V0, reg_address, p_read_memory );
// Copy return value to the FPU destination register
MTC1( (EPspFloatReg)psp_dst, PspReg_V0 );
}
else
#endif
{
GenerateSlowLoad( current_pc, psp_dst, reg_address, p_read_memory );
}
PatchJumpLong( branch, GetAssemblyBuffer()->GetLabel() );
}
}
//
inline void CCodeGeneratorPSP::GenerateAddressCheckFixups()
{
for( u32 i {0}; i < mAddressCheckFixups.size(); ++i )
{
GenerateAddressCheckFixup( mAddressCheckFixups[ i ] );
}
}
//
inline void CCodeGeneratorPSP::GenerateAddressCheckFixup( const SAddressCheckFixup & fixup )
{
CAssemblyWriterPSP::SetBufferA();
//
// Fixup the branch we emitted so that it jumps to the handler routine.
// If possible, we try to branch directly.
// If that fails, we try to emit a stub function which jumps to the handler. Hopefully that's a bit closer
// If *that* fails, we have to revert to an unconditional jump to the handler routine. Expensive!!
//
/*
if( PatchJumpLong( fixup.BranchToJump, fixup.HandlerAddress ) )
{
printf( "Wow, managed to jump directly to our handler\n" );
}
else
{
if( PatchJumpLong( fixup.BranchToJump, GetAssemblyBuffer()->GetLabel() ) )
{
//
// Emit a long jump to the handler function
//
J( fixup.HandlerAddress, true );
}
else
{
printf( "Warning, jump is too far for address handler function\n" );
ReplaceBranchWithJump( fixup.BranchToJump, fixup.HandlerAddress );
}
}
*/
// Optimized version of above code - Salvy
// First condition which is the fastest, never happens
// Second condition which still fast, always happens (we have an assert if otherwise)
// Third condition (slowest) never happens and thus is ignored
// ToDO: Optimized futher eliminating return value from PatchJumpLong
//
PatchJumpLong( fixup.BranchToJump, GetAssemblyBuffer()->GetLabel() );
//
// Emit a long jump to the handler function
//
J( fixup.HandlerAddress, true );
}
//
inline bool CCodeGeneratorPSP::GenerateDirectStore( EPspReg psp_src, EN64Reg base, s16 offset, OpCodeValue store_op, u32 swizzle )
{
if(mRegisterCache.IsKnownValue( base, 0 ))
{
u32 base_address( mRegisterCache.GetKnownValue( base, 0 )._u32 );
u32 address( (base_address + s32( offset )) ^ swizzle );
const MemFuncWrite & m( g_MemoryLookupTableWrite[ address >> 18 ] );
if( m.pWrite )
{
void * p_memory( (void*)( m.pWrite + address ) );
//printf( "Storing to %s %08x + %04x (%08x) op %d\n", RegNames[ base ], base_address, u16(offset), address, store_op );
EPspReg reg_base;
s16 base_offset;
GetBaseRegisterAndOffset( p_memory, &reg_base, &base_offset );
CAssemblyWriterPSP::StoreRegister( psp_src, store_op, reg_base, base_offset );
return true;
}
else
{
//printf( "Storing to %s %08x + %04x (%08x) - unhandled\n", RegNames[ base ], base_address, u16(offset), address );
}
}
return false;
}
//
inline void CCodeGeneratorPSP::GenerateSlowStore( u32 current_pc, EPspReg psp_src, EPspReg reg_address, WriteMemoryFunction p_write_memory )
{
// We need to flush all the regular registers AND invalidate all the temp regs.
// NB: When we can flush registers through some kind of handle (i.e. when the
// exception handling branch is taken), this won't be necessary
CN64RegisterCachePSP current_regs( mRegisterCache );
FlushAllRegisters( mRegisterCache, false );
FlushAllTemporaryRegisters( mRegisterCache, true );
if( reg_address != PspReg_A0 )
{
OR( PspReg_A0, reg_address, PspReg_R0 );
}
if( psp_src != PspReg_A1 )
{
OR( PspReg_A1, psp_src, PspReg_R0 );
}
PspOpCode op1, op2;
GetLoadConstantOps( PspReg_A2, current_pc, &op1, &op2 );
if( op2._u32 == 0 )
{
JAL( CCodeLabel( reinterpret_cast< const void * >( p_write_memory ) ), false ); // Don't set branch delay slot
AppendOp( op1 );
}
else
{
AppendOp( op1 );
JAL( CCodeLabel( reinterpret_cast< const void * >( p_write_memory ) ), false ); // Don't set branch delay slot
AppendOp( op2 );
}
// Restore all registers BEFORE copying back final value
RestoreAllRegisters( mRegisterCache, current_regs );
// Restore the state of the registers
mRegisterCache = current_regs;
}
//
void CCodeGeneratorPSP::GenerateStore( u32 current_pc,
EPspReg psp_src,
EN64Reg n64_base,
s32 offset,
OpCodeValue store_op,
u32 swizzle,
WriteMemoryFunction p_write_memory )
{
//
// Check if the base pointer is a known value, in which case we can store directly.
//
if(GenerateDirectStore( psp_src, n64_base, offset, store_op, swizzle ))
{
return;
}
EPspReg reg_base( GetRegisterAndLoadLo( n64_base, PspReg_A0 ) );
EPspReg reg_address( reg_base );
if( (n64_base == N64Reg_SP) | (gMemoryAccessOptimisation & mQuickLoad) )
{
if( swizzle != 0 )
{
if( offset != 0 )
{
ADDIU( PspReg_A0, reg_address, offset );
reg_address = PspReg_A0;
offset = 0;
}
XORI( PspReg_A0, reg_address, swizzle );
ADDU( PspReg_A0, PspReg_A0, gMemoryBaseReg );
CAssemblyWriterPSP::StoreRegister( psp_src, store_op, PspReg_A0, offset );
return;
}
//Re use old base register if consegutive accesses from same base register //Corn
if( n64_base == mPreviousStoreBase )
{
CAssemblyWriterPSP::StoreRegister( psp_src, store_op, PspReg_K1, offset );
return;
}
else
{
ADDU( PspReg_K1, reg_address, gMemoryBaseReg );
CAssemblyWriterPSP::StoreRegister( psp_src, store_op, PspReg_K1, offset );
mPreviousStoreBase = n64_base;
return;
}
}
if( swizzle != 0 )
{
if( offset != 0 )
{
ADDIU( PspReg_A0, reg_address, offset );
reg_address = PspReg_A0;
offset = 0;
}
XORI( PspReg_A0, reg_address, swizzle );
reg_address = PspReg_A0;
}
if( CAssemblyWriterPSP::IsBufferA() )
{
SLT( PspReg_V1, reg_address, gMemUpperBoundReg ); // V1 = upper < address
CJumpLocation branch( BEQ( PspReg_V1, PspReg_R0, CCodeLabel( nullptr ), false ) );
ADDU( PspReg_V0, reg_address, gMemoryBaseReg );
CAssemblyWriterPSP::StoreRegister( psp_src, store_op, PspReg_V0, offset );
CCodeLabel continue_location( GetAssemblyBuffer()->GetLabel() );
//
// Generate the handler code in the secondary buffer
//
CAssemblyWriterPSP::SetBufferB();
CCodeLabel handler_label( GetAssemblyBuffer()->GetLabel() );
if( offset != 0 )
{
ADDIU( PspReg_A0, reg_address, offset );
reg_address = PspReg_A0;
}
#ifdef ENABLE_SWC1
if( store_op == OP_SWC1 )
{ //Move value from FPU to CPU
MFC1( PspReg_A1, (EPspFloatReg)psp_src );
psp_src = PspReg_A1;
}
#endif
GenerateSlowStore( current_pc, psp_src, reg_address, p_write_memory );
CJumpLocation ret_handler( J( continue_location, true ) );
CAssemblyWriterPSP::SetBufferA();
//
// Keep track of the details we need to generate the jump to the distant handler function
//
mAddressCheckFixups.push_back( SAddressCheckFixup( branch, handler_label ) );
}
else
{
SLT( PspReg_V1, reg_address, gMemUpperBoundReg ); // V1 = upper < address
ADDU( PspReg_V0, reg_address, gMemoryBaseReg );
CJumpLocation branch( BNEL( PspReg_V1, PspReg_R0, CCodeLabel( nullptr ), false ) );
CAssemblyWriterPSP::StoreRegister( psp_src, store_op, PspReg_V0, offset );
if( offset != 0 )
{
ADDIU( PspReg_A0, reg_address, offset );
reg_address = PspReg_A0;
}
#ifdef ENABLE_SWC1
if( store_op == OP_SWC1 )
{ //Move value from FPU to CPU
MFC1( PspReg_A1, (EPspFloatReg)psp_src );
psp_src = PspReg_A1;
}
#endif
GenerateSlowStore( current_pc, psp_src, reg_address, p_write_memory );
PatchJumpLong( branch, GetAssemblyBuffer()->GetLabel() );
}
}
//
inline void CCodeGeneratorPSP::GenerateCACHE( EN64Reg base, s16 offset, u32 cache_op )
{
//u32 cache_op = op_code.rt;
//u32 address = (u32)( gGPR[op_code.base]._s32_0 + (s32)(s16)op_code.immediate );
u32 cache = cache_op & 0x3;
u32 action = (cache_op >> 2) & 0x7;
// For instruction cache invalidation, make sure we let the CPU know so the whole
// dynarec system can be invalidated
//if(!(cache_op & 0x1F))
if(cache == 0 && (action == 0 || action == 4))
{
FlushAllRegisters(mRegisterCache, true);
LoadRegister(PspReg_A0, base, 0);
ADDI(PspReg_A0, PspReg_A0, offset);
JAL( CCodeLabel( reinterpret_cast< const void * >( CPU_InvalidateICacheRange ) ), false );
ORI(PspReg_A1, PspReg_R0, 0x20);
}
//else
//{
// We don't care about data cache etc
//}
}
//
inline void CCodeGeneratorPSP::GenerateJAL( u32 address )
{
//gGPR[REG_ra]._s64 = (s64)(s32)(gCPUState.CurrentPC + 8); // Store return address
//u32 new_pc( (gCPUState.CurrentPC & 0xF0000000) | (op_code.target<<2) );
//CPU_TakeBranch( new_pc, CPU_BRANCH_DIRECT );
//EPspReg reg_lo( GetRegisterNoLoadLo( N64Reg_RA, PspReg_V0 ) );
//LoadConstant( reg_lo, address + 8 );
//UpdateRegister( N64Reg_RA, reg_lo, URO_HI_SIGN_EXTEND, PspReg_V0 );
SetRegister32s(N64Reg_RA, address + 8);
}
//
inline void CCodeGeneratorPSP::GenerateJR( EN64Reg rs, const SBranchDetails * p_branch, CJumpLocation * p_branch_jump )
{
//u32 new_pc( gGPR[ op_code.rs ]._u32_0 );
//CPU_TakeBranch( new_pc, CPU_BRANCH_INDIRECT );
EPspReg reg_lo( GetRegisterAndLoadLo( rs, PspReg_V0 ) );
// Necessary? Could just directly compare reg_lo and constant p_branch->TargetAddress??
//SetVar( &gCPUState.TargetPC, reg_lo );
//SetVar( &gCPUState.Delay, DO_DELAY );
//*p_branch_jump = GenerateBranchIfNotEqual( &gCPUState.TargetPC, p_branch->TargetAddress, CCodeLabel() );
SetVar( &gCPUState.TargetPC, reg_lo );
//SetVar( &gCPUState.Delay, DO_DELAY );
*p_branch_jump = GenerateBranchIfNotEqual( reg_lo, p_branch->TargetAddress, CCodeLabel() );
}
//
inline void CCodeGeneratorPSP::GenerateJALR( EN64Reg rs, EN64Reg rd, u32 address, const SBranchDetails * p_branch, CJumpLocation * p_branch_jump )
{
// Jump And Link
//u32 new_pc( gGPR[ op_code.rs ]._u32_0 );
//gGPR[ op_code.rd ]._s64 = (s64)(s32)(gCPUState.CurrentPC + 8); // Store return address
//EPspReg savereg_lo( GetRegisterNoLoadLo( rd, PspReg_V0 ) );
// Necessary? Could just directly compare reg_lo and constant p_branch->TargetAddress??
//SetVar( &gCPUState.TargetPC, reg_lo );
//SetVar( &gCPUState.Delay, DO_DELAY );
//*p_branch_jump = GenerateBranchIfNotEqual( &gCPUState.TargetPC, p_branch->TargetAddress, CCodeLabel() );
//LoadConstant( savereg_lo, address + 8 );
//UpdateRegister( rd, savereg_lo, URO_HI_SIGN_EXTEND, PspReg_V0 );
SetRegister32s(rd, address + 8);
EPspReg reg_lo( GetRegisterAndLoadLo( rs, PspReg_V0 ) );
SetVar( &gCPUState.TargetPC, reg_lo );
*p_branch_jump = GenerateBranchIfNotEqual( reg_lo, p_branch->TargetAddress, CCodeLabel() );
}
//
inline void CCodeGeneratorPSP::GenerateMFLO( EN64Reg rd )
{
//gGPR[ op_code.rd ]._u64 = gCPUState.MultLo._u64;
if( mMultIsValid )
{
EPspReg reg_lo( GetRegisterNoLoadLo( rd, PspReg_A0 ) );
MFLO( reg_lo );
UpdateRegister( rd, reg_lo, URO_HI_SIGN_EXTEND );
}
else
{
EPspReg reg_lo( GetRegisterNoLoadLo( rd, PspReg_V0 ) );
GetVar( reg_lo, &gCPUState.MultLo._u32_0 );
StoreRegisterLo( rd, reg_lo );
EPspReg reg_hi( GetRegisterNoLoadHi( rd, PspReg_V0 ) );
GetVar( reg_hi, &gCPUState.MultLo._u32_1 );
StoreRegisterHi( rd, reg_hi );
}
}
//
inline void CCodeGeneratorPSP::GenerateMFHI( EN64Reg rd )
{
//gGPR[ op_code.rd ]._u64 = gCPUState.MultHi._u64;
if( mMultIsValid )
{
EPspReg reg_lo( GetRegisterNoLoadLo( rd, PspReg_V0 ) );
MFHI( reg_lo );
UpdateRegister( rd, reg_lo, URO_HI_SIGN_EXTEND );
}
else
{
EPspReg reg_lo( GetRegisterNoLoadLo( rd, PspReg_V0 ) );
GetVar( reg_lo, &gCPUState.MultHi._u32_0 );
StoreRegisterLo( rd, reg_lo );
EPspReg reg_hi( GetRegisterNoLoadHi( rd, PspReg_V0 ) );
GetVar( reg_hi, &gCPUState.MultHi._u32_1 );
StoreRegisterHi( rd, reg_hi );
}
}
//
inline void CCodeGeneratorPSP::GenerateMTLO( EN64Reg rs )
{
mMultIsValid = false;
//gCPUState.MultLo._u64 = gGPR[ op_code.rs ]._u64;
EPspReg reg_lo( GetRegisterAndLoadLo( rs, PspReg_V0 ) );
SetVar( &gCPUState.MultLo._u32_0, reg_lo );
EPspReg reg_hi( GetRegisterAndLoadHi( rs, PspReg_V0 ) );
SetVar( &gCPUState.MultLo._u32_1, reg_hi );
}
//
inline void CCodeGeneratorPSP::GenerateMTHI( EN64Reg rs )
{
mMultIsValid = false;
//gCPUState.MultHi._u64 = gGPR[ op_code.rs ]._u64;
EPspReg reg_lo( GetRegisterAndLoadLo( rs, PspReg_V0 ) );
SetVar( &gCPUState.MultHi._u32_0, reg_lo );
EPspReg reg_hi( GetRegisterAndLoadHi( rs, PspReg_V0 ) );
SetVar( &gCPUState.MultHi._u32_1, reg_hi );
}
//
inline void CCodeGeneratorPSP::GenerateMULT( EN64Reg rs, EN64Reg rt )
{
mMultIsValid = true;
//s64 dwResult = (s64)gGPR[ op_code.rs ]._s32_0 * (s64)gGPR[ op_code.rt ]._s32_0;
//gCPUState.MultLo = (s64)(s32)(dwResult);
//gCPUState.MultHi = (s64)(s32)(dwResult >> 32);
EPspReg reg_lo_a( GetRegisterAndLoadLo( rs, PspReg_V0 ) );
EPspReg reg_lo_b( GetRegisterAndLoadLo( rt, PspReg_A0 ) );
MULT( reg_lo_a, reg_lo_b );
MFLO( PspReg_V0 );
MFHI( PspReg_A0 );
SetVar( &gCPUState.MultLo._u32_0, PspReg_V0 );
SetVar( &gCPUState.MultHi._u32_0, PspReg_A0 );
#ifdef ENABLE_64BIT
SRA( PspReg_V0, PspReg_V0, 0x1f ); // Sign extend
SRA( PspReg_A0, PspReg_A0, 0x1f ); // Sign extend
SetVar( &gCPUState.MultLo._u32_1, PspReg_V0 );
SetVar( &gCPUState.MultHi._u32_1, PspReg_A0 );
#endif
}
//
inline void CCodeGeneratorPSP::GenerateMULTU( EN64Reg rs, EN64Reg rt )
{
mMultIsValid = true;
//u64 dwResult = (u64)gGPR[ op_code.rs ]._u32_0 * (u64)gGPR[ op_code.rt ]._u32_0;
//gCPUState.MultLo = (s64)(s32)(dwResult);
//gCPUState.MultHi = (s64)(s32)(dwResult >> 32);
EPspReg reg_lo_a( GetRegisterAndLoadLo( rs, PspReg_V0 ) );
EPspReg reg_lo_b( GetRegisterAndLoadLo( rt, PspReg_A0 ) );
MULTU( reg_lo_a, reg_lo_b );
MFLO( PspReg_V0 );
MFHI( PspReg_A0 );
SetVar( &gCPUState.MultLo._u32_0, PspReg_V0 );
SetVar( &gCPUState.MultHi._u32_0, PspReg_A0 );
//Yoshi and DOOM64 must have sign extension //Corn
SRA( PspReg_V0, PspReg_V0, 0x1f ); // Sign extend
SRA( PspReg_A0, PspReg_A0, 0x1f ); // Sign extend
SetVar( &gCPUState.MultLo._u32_1, PspReg_V0 );
SetVar( &gCPUState.MultHi._u32_1, PspReg_A0 );
}
//
inline void CCodeGeneratorPSP::GenerateDIV( EN64Reg rs, EN64Reg rt )
{
mMultIsValid = true;
//s32 nDividend = gGPR[ op_code.rs ]._s32_0;
//s32 nDivisor = gGPR[ op_code.rt ]._s32_0;
//if (nDivisor)
//{
// gCPUState.MultLo._u64 = (s64)(s32)(nDividend / nDivisor);
// gCPUState.MultHi._u64 = (s64)(s32)(nDividend % nDivisor);
//}
#ifdef DIVZEROCHK
EPspReg reg_lo_rs( GetRegisterAndLoadLo( rs, PspReg_V0 ) );
EPspReg reg_lo_rt( GetRegisterAndLoadLo( rt, PspReg_A0 ) );
CJumpLocation branch( BEQ( reg_lo_rt, PspReg_R0, CCodeLabel(nullptr), true ) ); // Can use branch delay for something?
DIV( reg_lo_rs, reg_lo_rt );
MFLO( PspReg_V0 );
MFHI( PspReg_A0 );
SetVar( &gCPUState.MultLo._u32_0, PspReg_V0 );
SetVar( &gCPUState.MultHi._u32_0, PspReg_A0 );
SRA( PspReg_V0, PspReg_V0, 0x1f ); // Sign extend
SRA( PspReg_A0, PspReg_A0, 0x1f ); // Sign extend
SetVar( &gCPUState.MultLo._u32_1, PspReg_V0 );
SetVar( &gCPUState.MultHi._u32_1, PspReg_A0 );
// Branch here - really should trigger exception!
PatchJumpLong( branch, GetAssemblyBuffer()->GetLabel() );
#else
EPspReg reg_lo_rs( GetRegisterAndLoadLo( rs, PspReg_V0 ) );
EPspReg reg_lo_rt( GetRegisterAndLoadLo( rt, PspReg_A0 ) );
DIV( reg_lo_rs, reg_lo_rt );
MFLO( PspReg_V0 );
MFHI( PspReg_A0 );
SetVar( &gCPUState.MultLo._u32_0, PspReg_V0 );
SetVar( &gCPUState.MultHi._u32_0, PspReg_A0 );
#ifdef ENABLE_64BIT
SRA( PspReg_V0, PspReg_V0, 0x1f ); // Sign extend
SRA( PspReg_A0, PspReg_A0, 0x1f ); // Sign extend
SetVar( &gCPUState.MultLo._u32_1, PspReg_V0 );
SetVar( &gCPUState.MultHi._u32_1, PspReg_A0 );
#endif
#endif
}
//
inline void CCodeGeneratorPSP::GenerateDIVU( EN64Reg rs, EN64Reg rt )
{
mMultIsValid = true;
//u32 dwDividend = gGPR[ op_code.rs ]._u32_0;
//u32 dwDivisor = gGPR[ op_code.rt ]._u32_0;
//if (dwDivisor) {
// gCPUState.MultLo._u64 = (s64)(s32)(dwDividend / dwDivisor);
// gCPUState.MultHi._u64 = (s64)(s32)(dwDividend % dwDivisor);
//}
#ifdef DIVZEROCHK
EPspReg reg_lo_rs( GetRegisterAndLoadLo( rs, PspReg_V0 ) );
EPspReg reg_lo_rt( GetRegisterAndLoadLo( rt, PspReg_A0 ) );
CJumpLocation branch( BEQ( reg_lo_rt, PspReg_R0, CCodeLabel(nullptr), true ) ); // Can use branch delay for something?
DIVU( reg_lo_rs, reg_lo_rt );
MFLO( PspReg_V0 );
MFHI( PspReg_A0 );
SetVar( &gCPUState.MultLo._u32_0, PspReg_V0 );
SetVar( &gCPUState.MultHi._u32_0, PspReg_A0 );
SRA( PspReg_V0, PspReg_V0, 0x1f ); // Sign extend
SRA( PspReg_A0, PspReg_A0, 0x1f ); // Sign extend
SetVar( &gCPUState.MultLo._u32_1, PspReg_V0 );
SetVar( &gCPUState.MultHi._u32_1, PspReg_A0 );
// Branch here - really should trigger exception!
PatchJumpLong( branch, GetAssemblyBuffer()->GetLabel() );
#else
EPspReg reg_lo_rs( GetRegisterAndLoadLo( rs, PspReg_V0 ) );
EPspReg reg_lo_rt( GetRegisterAndLoadLo( rt, PspReg_A0 ) );
DIVU( reg_lo_rs, reg_lo_rt );
MFLO( PspReg_V0 );
MFHI( PspReg_A0 );
SetVar( &gCPUState.MultLo._u32_0, PspReg_V0 );
SetVar( &gCPUState.MultHi._u32_0, PspReg_A0 );
#ifdef ENABLE_64BIT
SRA( PspReg_V0, PspReg_V0, 0x1f ); // Sign extend
SRA( PspReg_A0, PspReg_A0, 0x1f ); // Sign extend
SetVar( &gCPUState.MultLo._u32_1, PspReg_V0 );
SetVar( &gCPUState.MultHi._u32_1, PspReg_A0 );
#endif
#endif
}
//
inline void CCodeGeneratorPSP::GenerateADDU( EN64Reg rd, EN64Reg rs, EN64Reg rt )
{
if (mRegisterCache.IsKnownValue(rs, 0)
& mRegisterCache.IsKnownValue(rt, 0))
{
SetRegister32s(rd, mRegisterCache.GetKnownValue(rs, 0)._s32
+ mRegisterCache.GetKnownValue(rt, 0)._s32);
return;
}
if( rs == N64Reg_R0 )
{
if(mRegisterCache.IsKnownValue(rt, 0))
{
SetRegister32s(rd, mRegisterCache.GetKnownValue(rt, 0)._s32);
return;
}
// As RS is zero, the ADD is just a copy of RT to RD.
// Try to avoid loading into a temp register if the dest is cached
EPspReg reg_lo_d( GetRegisterNoLoadLo( rd, PspReg_V0 ) );
LoadRegisterLo( reg_lo_d, rt );
UpdateRegister( rd, reg_lo_d, URO_HI_SIGN_EXTEND );
}
else if( rt == N64Reg_R0 )
{
if(mRegisterCache.IsKnownValue(rs, 0))
{
SetRegister32s(rd, mRegisterCache.GetKnownValue(rs, 0)._s32);
return;
}
// As RT is zero, the ADD is just a copy of RS to RD.
// Try to avoid loading into a temp register if the dest is cached
EPspReg reg_lo_d( GetRegisterNoLoadLo( rd, PspReg_V0 ) );
LoadRegisterLo( reg_lo_d, rs );
UpdateRegister( rd, reg_lo_d, URO_HI_SIGN_EXTEND );
}
else
{
//gGPR[ op_code.rd ]._s64 = (s64)(s32)( gGPR[ op_code.rs ]._s32_0 + gGPR[ op_code.rt ]._s32_0 );
EPspReg reg_lo_d( GetRegisterNoLoadLo( rd, PspReg_V0 ) );
EPspReg reg_lo_a( GetRegisterAndLoadLo( rs, PspReg_V0 ) );
EPspReg reg_lo_b( GetRegisterAndLoadLo( rt, PspReg_A0 ) );
ADDU( reg_lo_d, reg_lo_a, reg_lo_b );
UpdateRegister( rd, reg_lo_d, URO_HI_SIGN_EXTEND );
}
}
//
inline void CCodeGeneratorPSP::GenerateSUBU( EN64Reg rd, EN64Reg rs, EN64Reg rt )
{
if (mRegisterCache.IsKnownValue(rs, 0)
& mRegisterCache.IsKnownValue(rt, 0))
{
SetRegister32s(rd, mRegisterCache.GetKnownValue(rs, 0)._s32
- mRegisterCache.GetKnownValue(rt, 0)._s32);
return;
}
//gGPR[ op_code.rd ]._s64 = (s64)(s32)( gGPR[ op_code.rs ]._s32_0 - gGPR[ op_code.rt ]._s32_0 );
EPspReg reg_lo_d( GetRegisterNoLoadLo( rd, PspReg_V0 ) );
EPspReg reg_lo_a( GetRegisterAndLoadLo( rs, PspReg_V0 ) );
EPspReg reg_lo_b( GetRegisterAndLoadLo( rt, PspReg_A0 ) );
SUBU( reg_lo_d, reg_lo_a, reg_lo_b );
UpdateRegister( rd, reg_lo_d, URO_HI_SIGN_EXTEND );
}
//64bit unsigned division //Corn
inline void CCodeGeneratorPSP::GenerateDDIVU( EN64Reg rs, EN64Reg rt )
{
mMultIsValid = false;
EPspReg reg_lo_a( GetRegisterAndLoadLo( rs, PspReg_A0 ) );
EPspReg reg_hi_a( GetRegisterAndLoadHi( rs, PspReg_A1 ) );
EPspReg reg_lo_b( GetRegisterAndLoadLo( rt, PspReg_A2 ) );
if( reg_lo_a != PspReg_A0 ) OR( PspReg_A0, PspReg_R0, reg_lo_a);
if( reg_hi_a != PspReg_A1 ) OR( PspReg_A1, PspReg_R0, reg_hi_a);
if( reg_lo_b != PspReg_A2 ) OR( PspReg_A2, PspReg_R0, reg_lo_b);
JAL( CCodeLabel( (void*)_DDIVU ), true );
}
//64bit signed division //Corn
inline void CCodeGeneratorPSP::GenerateDDIV( EN64Reg rs, EN64Reg rt )
{
mMultIsValid = false;
EPspReg reg_lo_a( GetRegisterAndLoadLo( rs, PspReg_A0 ) );
EPspReg reg_hi_a( GetRegisterAndLoadHi( rs, PspReg_A1 ) );
EPspReg reg_lo_b( GetRegisterAndLoadLo( rt, PspReg_A2 ) );
if( reg_lo_a != PspReg_A0 ) OR( PspReg_A0, PspReg_R0, reg_lo_a);
if( reg_hi_a != PspReg_A1 ) OR( PspReg_A1, PspReg_R0, reg_hi_a);
if( reg_lo_b != PspReg_A2 ) OR( PspReg_A2, PspReg_R0, reg_lo_b);
JAL( CCodeLabel( (void*)_DDIV ), true );
}
//64bit unsigned multiply //Corn
inline void CCodeGeneratorPSP::GenerateDMULTU( EN64Reg rs, EN64Reg rt )
{
mMultIsValid = false;
EPspReg reg_lo_a( GetRegisterAndLoadLo( rs, PspReg_A0 ) );
EPspReg reg_hi_a( GetRegisterAndLoadHi( rs, PspReg_A1 ) );
EPspReg reg_lo_b( GetRegisterAndLoadLo( rt, PspReg_A2 ) );
EPspReg reg_hi_b( GetRegisterAndLoadHi( rt, PspReg_A3 ) );
if( reg_lo_a != PspReg_A0 ) OR( PspReg_A0, PspReg_R0, reg_lo_a);
if( reg_hi_a != PspReg_A1 ) OR( PspReg_A1, PspReg_R0, reg_hi_a);
if( reg_lo_b != PspReg_A2 ) OR( PspReg_A2, PspReg_R0, reg_lo_b);
if( reg_hi_b != PspReg_A3 ) OR( PspReg_A3, PspReg_R0, reg_hi_b);
JAL( CCodeLabel( (void*)_DMULTU ), true );
}
//64bit signed multiply //Corn
inline void CCodeGeneratorPSP::GenerateDMULT( EN64Reg rs, EN64Reg rt )
{
mMultIsValid = false;
EPspReg reg_lo_a( GetRegisterAndLoadLo( rs, PspReg_A0 ) );
EPspReg reg_hi_a( GetRegisterAndLoadHi( rs, PspReg_A1 ) );
EPspReg reg_lo_b( GetRegisterAndLoadLo( rt, PspReg_A2 ) );
EPspReg reg_hi_b( GetRegisterAndLoadHi( rt, PspReg_A3 ) );
if( reg_lo_a != PspReg_A0 ) OR( PspReg_A0, PspReg_R0, reg_lo_a);
if( reg_hi_a != PspReg_A1 ) OR( PspReg_A1, PspReg_R0, reg_hi_a);
if( reg_lo_b != PspReg_A2 ) OR( PspReg_A2, PspReg_R0, reg_lo_b);
if( reg_hi_b != PspReg_A3 ) OR( PspReg_A3, PspReg_R0, reg_hi_b);
JAL( CCodeLabel( (void*)_DMULT ), true );
}
//
inline void CCodeGeneratorPSP::GenerateDADDIU( EN64Reg rt, EN64Reg rs, s16 immediate )
{
if( rs == N64Reg_R0 )
{
SetRegister32s( rt, immediate );
}
else
{
EPspReg reg_lo_d( GetRegisterNoLoadLo( rt, PspReg_V0 ) );
EPspReg reg_lo_a( GetRegisterAndLoadLo( rs, PspReg_A0 ) );
if(reg_lo_d == reg_lo_a)
{
ADDIU( PspReg_A0, reg_lo_a, immediate );
SLTU( PspReg_A1, PspReg_A0, reg_lo_a ); // Overflowed?
OR( reg_lo_d, PspReg_A0, PspReg_R0 );
}
else
{
ADDIU( reg_lo_d, reg_lo_a, immediate );
SLTU( PspReg_A1, reg_lo_d, reg_lo_a ); // Overflowed?
}
StoreRegisterLo( rt, reg_lo_d );
EPspReg reg_hi_d( GetRegisterNoLoadHi( rt, PspReg_V0 ) );
EPspReg reg_hi_a( GetRegisterAndLoadHi( rs, PspReg_A0 ) );
ADDU( reg_hi_d, PspReg_A1, reg_hi_a ); // Add on overflow
StoreRegisterHi( rt, reg_hi_d );
}
}
//
inline void CCodeGeneratorPSP::GenerateDADDU( EN64Reg rd, EN64Reg rs, EN64Reg rt )
{
//gGPR[ op_code.rd ]._u64 = gGPR[ op_code.rt ]._u64 + gGPR[ op_code.rs ]._u64
//890c250: 00c41021 addu d_lo,a_lo,b_lo
//890c254: 0046402b sltu t0,d_lo,a_lo
//890c260: ad220000 sw d_lo,0(t1)
//890c258: 00e51821 addu d_hi,a_hi,b_hi
//890c25c: 01031821 addu d_hi,t0,d_hi
//890c268: ad230004 sw d_hi,4(t1)
EPspReg reg_lo_d( GetRegisterNoLoadLo( rd, PspReg_V0 ) );
EPspReg reg_lo_a( GetRegisterAndLoadLo( rs, PspReg_A0 ) );
EPspReg reg_lo_b( GetRegisterAndLoadLo( rt, PspReg_A1 ) );
ADDU( reg_lo_d, reg_lo_a, reg_lo_b );
//Assumes that just one of the source regs are the same as DST
SLTU( PspReg_V1, reg_lo_d, reg_lo_d == reg_lo_a ? reg_lo_b : reg_lo_a ); // Overflowed?
StoreRegisterLo( rd, reg_lo_d );
EPspReg reg_hi_d( GetRegisterNoLoadHi( rd, PspReg_V0 ) );
EPspReg reg_hi_a( GetRegisterAndLoadHi( rs, PspReg_A0 ) );
EPspReg reg_hi_b( GetRegisterAndLoadHi( rt, PspReg_A1 ) );
ADDU( reg_hi_d, reg_hi_a, reg_hi_b );
ADDU( reg_hi_d, PspReg_V1, reg_hi_d ); // Add on overflow
StoreRegisterHi( rd, reg_hi_d );
}
//
inline void CCodeGeneratorPSP::GenerateAND( EN64Reg rd, EN64Reg rs, EN64Reg rt )
{
//gGPR[ op_code.rd ]._u64 = gGPR[ op_code.rs ]._u64 & gGPR[ op_code.rt ]._u64;
bool HiIsDone = false;
if (mRegisterCache.IsKnownValue(rs, 1) & mRegisterCache.IsKnownValue(rt, 1))
{
SetRegister(rd, 1, mRegisterCache.GetKnownValue(rs, 1)._u32 & mRegisterCache.GetKnownValue(rt, 1)._u32 );
HiIsDone = true;
}
else if ((mRegisterCache.IsKnownValue(rs, 1) & (mRegisterCache.GetKnownValue(rs, 1)._u32 == 0)) |
(mRegisterCache.IsKnownValue(rt, 1) & (mRegisterCache.GetKnownValue(rt, 1)._u32 == 0)) )
{
SetRegister(rd, 1, 0 );
HiIsDone = true;
}
else if( mRegisterCache.IsKnownValue(rt, 1) & (mRegisterCache.GetKnownValue(rt, 1)._s32 == -1) )
{
EPspReg reg_hi_d( GetRegisterNoLoadHi( rd, PspReg_V0 ) );
LoadRegisterHi( reg_hi_d, rs );
StoreRegisterHi( rd, reg_hi_d );
HiIsDone = true;
}
if (mRegisterCache.IsKnownValue(rs, 0) & mRegisterCache.IsKnownValue(rt, 0))
{
SetRegister(rd, 0, mRegisterCache.GetKnownValue(rs, 0)._u32 & mRegisterCache.GetKnownValue(rt, 0)._u32 );
}
else if ((rs == N64Reg_R0) | (rt == N64Reg_R0))
{
SetRegister64( rd, 0, 0 );
}
else
{
// XXXX or into dest register
EPspReg reg_lo_d( GetRegisterNoLoadLo( rd, PspReg_V0 ) );
EPspReg reg_lo_a( GetRegisterAndLoadLo( rs, PspReg_V0 ) );
EPspReg reg_lo_b( GetRegisterAndLoadLo( rt, PspReg_A0 ) );
AND( reg_lo_d, reg_lo_a, reg_lo_b );
StoreRegisterLo( rd, reg_lo_d );
if(!HiIsDone)
{
EPspReg reg_hi_d( GetRegisterNoLoadHi( rd, PspReg_V0 ) );
EPspReg reg_hi_a( GetRegisterAndLoadHi( rs, PspReg_V0 ) );
EPspReg reg_hi_b( GetRegisterAndLoadHi( rt, PspReg_A0 ) );
AND( reg_hi_d, reg_hi_a, reg_hi_b );
StoreRegisterHi( rd, reg_hi_d );
}
}
}
//
void CCodeGeneratorPSP::GenerateOR( EN64Reg rd, EN64Reg rs, EN64Reg rt )
{
//gGPR[ op_code.rd ]._u64 = gGPR[ op_code.rs ]._u64 | gGPR[ op_code.rt ]._u64;
bool HiIsDone = false;
if (mRegisterCache.IsKnownValue(rs, 1) & mRegisterCache.IsKnownValue(rt, 1))
{
SetRegister(rd, 1, mRegisterCache.GetKnownValue(rs, 1)._u32 | mRegisterCache.GetKnownValue(rt, 1)._u32 );
HiIsDone = true;
}
else if ((mRegisterCache.IsKnownValue(rs, 1) & (mRegisterCache.GetKnownValue(rs, 1)._s32 == -1)) |
(mRegisterCache.IsKnownValue(rt, 1) & (mRegisterCache.GetKnownValue(rt, 1)._s32 == -1)) )
{
SetRegister(rd, 1, ~0 );
HiIsDone = true;
}
if (mRegisterCache.IsKnownValue(rs, 0) & mRegisterCache.IsKnownValue(rt, 0))
{
SetRegister(rd, 0, mRegisterCache.GetKnownValue(rs, 0)._u32 | mRegisterCache.GetKnownValue(rt, 0)._u32);
return;
}
if( rs == N64Reg_R0 )
{
// This doesn't seem to happen
/*if (mRegisterCache.IsKnownValue(rt, 1))
{
SetRegister(rd, 1, mRegisterCache.GetKnownValue(rt, 1)._u32 );
HiIsDone = true;
}
*/
if(mRegisterCache.IsKnownValue(rt, 0))
{
SetRegister64(rd,
mRegisterCache.GetKnownValue(rt, 0)._u32, mRegisterCache.GetKnownValue(rt, 1)._u32);
return;
}
// This case rarely seems to happen...
// As RS is zero, the OR is just a copy of RT to RD.
// Try to avoid loading into a temp register if the dest is cached
EPspReg reg_lo_d( GetRegisterNoLoadLo( rd, PspReg_V0 ) );
LoadRegisterLo( reg_lo_d, rt );
StoreRegisterLo( rd, reg_lo_d );
if(!HiIsDone)
{
EPspReg reg_hi_d( GetRegisterNoLoadHi( rd, PspReg_V0 ) );
LoadRegisterHi( reg_hi_d, rt );
StoreRegisterHi( rd, reg_hi_d );
}
}
else if( rt == N64Reg_R0 )
{
if (mRegisterCache.IsKnownValue(rs, 1))
{
SetRegister(rd, 1, mRegisterCache.GetKnownValue(rs, 1)._u32 );
HiIsDone = true;
}
if(mRegisterCache.IsKnownValue(rs, 0))
{
SetRegister64(rd, mRegisterCache.GetKnownValue(rs, 0)._u32,
mRegisterCache.GetKnownValue(rs, 1)._u32);
return;
}
// As RT is zero, the OR is just a copy of RS to RD.
// Try to avoid loading into a temp register if the dest is cached
EPspReg reg_lo_d( GetRegisterNoLoadLo( rd, PspReg_V0 ) );
LoadRegisterLo( reg_lo_d, rs );
StoreRegisterLo( rd, reg_lo_d );
if(!HiIsDone)
{
EPspReg reg_hi_d( GetRegisterNoLoadHi( rd, PspReg_V0 ) );
LoadRegisterHi( reg_hi_d, rs );
StoreRegisterHi( rd, reg_hi_d );
}
}
else
{
EPspReg reg_lo_d( GetRegisterNoLoadLo( rd, PspReg_V0 ) );
EPspReg reg_lo_a( GetRegisterAndLoadLo( rs, PspReg_V0 ) );
EPspReg reg_lo_b( GetRegisterAndLoadLo( rt, PspReg_A0 ) );
OR( reg_lo_d, reg_lo_a, reg_lo_b );
StoreRegisterLo( rd, reg_lo_d );
if(!HiIsDone)
{
if( mRegisterCache.IsKnownValue(rs, 1) & (mRegisterCache.GetKnownValue(rs, 1)._u32 == 0) )
{
EPspReg reg_hi_d( GetRegisterNoLoadHi( rd, PspReg_V0 ) );
LoadRegisterHi( reg_hi_d, rt );
StoreRegisterHi( rd, reg_hi_d );
}
else if( mRegisterCache.IsKnownValue(rt, 1) & (mRegisterCache.GetKnownValue(rt, 1)._u32 == 0) )
{
EPspReg reg_hi_d( GetRegisterNoLoadHi( rd, PspReg_V0 ) );
LoadRegisterHi( reg_hi_d, rs );
StoreRegisterHi( rd, reg_hi_d );
}
else
{
EPspReg reg_hi_d( GetRegisterNoLoadHi( rd, PspReg_V0 ) );
EPspReg reg_hi_a( GetRegisterAndLoadHi( rs, PspReg_V0 ) );
EPspReg reg_hi_b( GetRegisterAndLoadHi( rt, PspReg_A0 ) );
OR( reg_hi_d, reg_hi_a, reg_hi_b );
StoreRegisterHi( rd, reg_hi_d );
}
}
}
}
//
inline void CCodeGeneratorPSP::GenerateXOR( EN64Reg rd, EN64Reg rs, EN64Reg rt )
{
//gGPR[ op_code.rd ]._u64 = gGPR[ op_code.rs ]._u64 ^ gGPR[ op_code.rt ]._u64;
bool HiIsDone = false;
if (mRegisterCache.IsKnownValue(rs, 1) & mRegisterCache.IsKnownValue(rt, 1))
{
SetRegister(rd, 1, mRegisterCache.GetKnownValue(rs, 1)._u32 ^ mRegisterCache.GetKnownValue(rt, 1)._u32 );
HiIsDone = true;
}
else if ((mRegisterCache.IsKnownValue(rs, 1) & (mRegisterCache.GetKnownValue(rs, 1)._u32 == 0)) )
{
EPspReg reg_hi_d( GetRegisterNoLoadHi( rd, PspReg_V0 ) );
LoadRegisterHi( reg_hi_d, rt );
StoreRegisterHi( rd, reg_hi_d );
HiIsDone = true;
}
else if ((mRegisterCache.IsKnownValue(rt, 1) & (mRegisterCache.GetKnownValue(rt, 1)._u32 == 0)) )
{
EPspReg reg_hi_d( GetRegisterNoLoadHi( rd, PspReg_V0 ) );
LoadRegisterHi( reg_hi_d, rs );
StoreRegisterHi( rd, reg_hi_d );
HiIsDone = true;
}
if (mRegisterCache.IsKnownValue(rs, 0) & mRegisterCache.IsKnownValue(rt, 0))
{
SetRegister(rd, 0, mRegisterCache.GetKnownValue(rs, 0)._u32 ^ mRegisterCache.GetKnownValue(rt, 0)._u32);
return;
}
EPspReg reg_lo_d( GetRegisterNoLoadLo( rd, PspReg_V0 ) );
EPspReg reg_lo_a( GetRegisterAndLoadLo( rs, PspReg_V0 ) );
EPspReg reg_lo_b( GetRegisterAndLoadLo( rt, PspReg_A0 ) );
XOR( reg_lo_d, reg_lo_a, reg_lo_b );
StoreRegisterLo( rd, reg_lo_d );
if(!HiIsDone)
{
EPspReg reg_hi_d( GetRegisterNoLoadHi( rd, PspReg_V0 ) );
EPspReg reg_hi_a( GetRegisterAndLoadHi( rs, PspReg_V0 ) );
EPspReg reg_hi_b( GetRegisterAndLoadHi( rt, PspReg_A0 ) );
XOR( reg_hi_d, reg_hi_a, reg_hi_b );
StoreRegisterHi( rd, reg_hi_d );
}
}
//
inline void CCodeGeneratorPSP::GenerateNOR( EN64Reg rd, EN64Reg rs, EN64Reg rt )
{
//gGPR[ op_code.rd ]._u64 = ~(gGPR[ op_code.rs ]._u64 | gGPR[ op_code.rt ]._u64);
bool HiIsDone = false;
if (mRegisterCache.IsKnownValue(rs, 1) & mRegisterCache.IsKnownValue(rt, 1))
{
SetRegister(rd, 1, ~(mRegisterCache.GetKnownValue(rs, 1)._u32 | mRegisterCache.GetKnownValue(rt, 1)._u32) );
HiIsDone = true;
}
if (mRegisterCache.IsKnownValue(rs, 0) & mRegisterCache.IsKnownValue(rt, 0))
{
SetRegister(rd, 0, ~(mRegisterCache.GetKnownValue(rs, 0)._u32 | mRegisterCache.GetKnownValue(rt, 0)._u32) );
return;
}
EPspReg reg_lo_d( GetRegisterNoLoadLo( rd, PspReg_V0 ) );
EPspReg reg_lo_a( GetRegisterAndLoadLo( rs, PspReg_V0 ) );
EPspReg reg_lo_b( GetRegisterAndLoadLo( rt, PspReg_A0 ) );
NOR( reg_lo_d, reg_lo_a, reg_lo_b );
StoreRegisterLo( rd, reg_lo_d );
if(!HiIsDone)
{
EPspReg reg_hi_d( GetRegisterNoLoadHi( rd, PspReg_V0 ) );
EPspReg reg_hi_a( GetRegisterAndLoadHi( rs, PspReg_V0 ) );
EPspReg reg_hi_b( GetRegisterAndLoadHi( rt, PspReg_A0 ) );
NOR( reg_hi_d, reg_hi_a, reg_hi_b );
StoreRegisterHi( rd, reg_hi_d );
}
}
//
inline void CCodeGeneratorPSP::GenerateSLT( EN64Reg rd, EN64Reg rs, EN64Reg rt )
{
#ifdef ENABLE_64BIT
// Because we have a branch here, we need to make sure that we have a consistent view
// of the registers regardless of whether we take it or not. We pull in the lo halves of
// the registers here so that they're Valid regardless of whether we take the branch or not
PrepareCachedRegisterLo( rs );
PrepareCachedRegisterLo( rt );
// If possible, we write directly into the destination register. We have to be careful though -
// if the destination register is the same as either of the source registers we have to use
// a temporary instead, to avoid overwriting the contents.
EPspReg reg_lo_d( GetRegisterNoLoadLo( rd, PspReg_V0 ) );
if((rd == rs) | (rd == rt))
{
reg_lo_d = PspReg_V0;
}
EPspReg reg_hi_a( GetRegisterAndLoadHi( rs, PspReg_V0 ) );
EPspReg reg_hi_b( GetRegisterAndLoadHi( rt, PspReg_A0 ) );
CJumpLocation branch( BNE( reg_hi_a, reg_hi_b, CCodeLabel(nullptr), false ) );
SLT( reg_lo_d, reg_hi_a, reg_hi_b ); // In branch delay slot
// If the branch was not taken, it means that the high part of the registers was equal, so compare bottom half
EPspReg reg_lo_a( GetRegisterAndLoadLo( rs, PspReg_V0 ) );
EPspReg reg_lo_b( GetRegisterAndLoadLo( rt, PspReg_A0 ) );
SLT( reg_lo_d, reg_lo_a, reg_lo_b );
// Patch up the branch
PatchJumpLong( branch, GetAssemblyBuffer()->GetLabel() );
#else
/*
if (mRegisterCache.IsKnownValue(rs, 0) & mRegisterCache.IsKnownValue(rt, 0))
{
SetRegister32s(rd, (mRegisterCache.GetKnownValue(rs, 0)._s32 < mRegisterCache.GetKnownValue(rt, 0)._s32) );
return;
}
*/
EPspReg reg_lo_d( GetRegisterNoLoadLo( rd, PspReg_V0 ) );
EPspReg reg_lo_a( GetRegisterAndLoadLo( rs, PspReg_V0 ) );
EPspReg reg_lo_b( GetRegisterAndLoadLo( rt, PspReg_A0 ) );
SLT( reg_lo_d, reg_lo_a, reg_lo_b );
#endif
UpdateRegister( rd, reg_lo_d, URO_HI_CLEAR );
}
//
inline void CCodeGeneratorPSP::GenerateSLTU( EN64Reg rd, EN64Reg rs, EN64Reg rt )
{
#ifdef ENABLE_64BIT
// Because we have a branch here, we need to make sure that we have a consistant view
// of the registers regardless of whether we take it or not. We pull in the lo halves of
// the registers here so that they're Valid regardless of whether we take the branch or not
PrepareCachedRegisterLo( rs );
PrepareCachedRegisterLo( rt );
// If possible, we write directly into the destination register. We have to be careful though -
// if the destination register is the same as either of the source registers we have to use
// a temporary instead, to avoid overwriting the contents.
EPspReg reg_lo_d( GetRegisterNoLoadLo( rd, PspReg_V0 ) );
if((rd == rs) | (rd == rt))
{
reg_lo_d = PspReg_V0;
}
EPspReg reg_hi_a( GetRegisterAndLoadHi( rs, PspReg_V0 ) );
EPspReg reg_hi_b( GetRegisterAndLoadHi( rt, PspReg_A0 ) );
CJumpLocation branch( BNE( reg_hi_a, reg_hi_b, CCodeLabel(nullptr), false ) );
SLTU( reg_lo_d, reg_hi_a, reg_hi_b ); // In branch delay slot
// If the branch was not taken, it means that the high part of the registers was equal, so compare bottom half
EPspReg reg_lo_a( GetRegisterAndLoadLo( rs, PspReg_V0 ) );
EPspReg reg_lo_b( GetRegisterAndLoadLo( rt, PspReg_A0 ) );
SLTU( reg_lo_d, reg_lo_a, reg_lo_b );
// Patch up the branch
PatchJumpLong( branch, GetAssemblyBuffer()->GetLabel() );
#else
/*
if (mRegisterCache.IsKnownValue(rs, 0) & mRegisterCache.IsKnownValue(rt, 0))
{
SetRegister32s(rd, (mRegisterCache.GetKnownValue(rs, 0)._u32 < mRegisterCache.GetKnownValue(rt, 0)._u32) );
return;
}
*/
EPspReg reg_lo_d( GetRegisterNoLoadLo( rd, PspReg_V0 ) );
EPspReg reg_lo_a( GetRegisterAndLoadLo( rs, PspReg_V0 ) );
EPspReg reg_lo_b( GetRegisterAndLoadLo( rt, PspReg_A0 ) );
SLTU( reg_lo_d, reg_lo_a, reg_lo_b );
#endif
UpdateRegister( rd, reg_lo_d, URO_HI_CLEAR );
}
//
inline void CCodeGeneratorPSP::GenerateADDIU( EN64Reg rt, EN64Reg rs, s16 immediate )
{
//gGPR[op_code.rt]._s64 = (s64)(s32)(gGPR[op_code.rs]._s32_0 + (s32)(s16)op_code.immediate);
if( rs == N64Reg_R0 )
{
SetRegister32s( rt, immediate );
}
else if(mRegisterCache.IsKnownValue( rs, 0 ))
{
s32 known_value( mRegisterCache.GetKnownValue( rs, 0 )._s32 + (s32)immediate );
SetRegister32s( rt, known_value );
}
else
{
EPspReg dst_reg( GetRegisterNoLoadLo( rt, PspReg_V0 ) );
EPspReg src_reg( GetRegisterAndLoadLo( rs, PspReg_A0 ) );
ADDIU( dst_reg, src_reg, immediate );
UpdateRegister( rt, dst_reg, URO_HI_SIGN_EXTEND );
}
}
//
inline void CCodeGeneratorPSP::GenerateANDI( EN64Reg rt, EN64Reg rs, u16 immediate )
{
//gGPR[op_code.rt]._u64 = gGPR[op_code.rs]._u64 & (u64)(u16)op_code.immediate;
if(mRegisterCache.IsKnownValue( rs, 0 ))
{
s32 known_value_lo( mRegisterCache.GetKnownValue( rs, 0 )._u32 & (u32)immediate );
s32 known_value_hi( 0 );
SetRegister64( rt, known_value_lo, known_value_hi );
}
else
{
EPspReg dst_reg( GetRegisterNoLoadLo( rt, PspReg_V0 ) );
EPspReg src_reg( GetRegisterAndLoadLo( rs, PspReg_A0 ) );
ANDI( dst_reg, src_reg, immediate );
UpdateRegister( rt, dst_reg, URO_HI_CLEAR );
}
}
//
inline void CCodeGeneratorPSP::GenerateORI( EN64Reg rt, EN64Reg rs, u16 immediate )
{
//gGPR[op_code.rt]._u64 = gGPR[op_code.rs]._u64 | (u64)(u16)op_code.immediate;
if( rs == N64Reg_R0 )
{
// If we're oring again 0, then we're just setting a constant value
SetRegister64( rt, immediate, 0 );
}
else if(mRegisterCache.IsKnownValue( rs, 0 ))
{
s32 known_value_lo( mRegisterCache.GetKnownValue( rs, 0 )._u32 | (u32)immediate );
s32 known_value_hi( mRegisterCache.GetKnownValue( rs, 1 )._u32 );
SetRegister64( rt, known_value_lo, known_value_hi );
}
else
{
EPspReg dst_reg( GetRegisterNoLoadLo( rt, PspReg_V0 ) );
EPspReg src_reg( GetRegisterAndLoadLo( rs, PspReg_A0 ) );
ORI( dst_reg, src_reg, immediate );
StoreRegisterLo( rt, dst_reg );
#ifdef ENABLE_64BIT //This is out of spec but seems we can always ignore copying the hi part
// If the source/dest regs are different we need to copy the high bits across
if(rt != rs)
{
EPspReg dst_reg_hi( GetRegisterNoLoadHi( rt, PspReg_V0 ) );
LoadRegisterHi( dst_reg_hi, rs );
StoreRegisterHi( rt, dst_reg_hi );
}
#endif
}
}
//
inline void CCodeGeneratorPSP::GenerateXORI( EN64Reg rt, EN64Reg rs, u16 immediate )
{
//gGPR[op_code.rt]._u64 = gGPR[op_code.rs]._u64 ^ (u64)(u16)op_code.immediate;
if(mRegisterCache.IsKnownValue( rs, 0 ))
{
s32 known_value_lo( mRegisterCache.GetKnownValue( rs, 0 )._u32 ^ (u32)immediate );
s32 known_value_hi( mRegisterCache.GetKnownValue( rs, 1 )._u32 );
SetRegister64( rt, known_value_lo, known_value_hi );
}
else
{
EPspReg dst_reg( GetRegisterNoLoadLo( rt, PspReg_V0 ) );
EPspReg src_reg( GetRegisterAndLoadLo( rs, PspReg_A0 ) );
XORI( dst_reg, src_reg, immediate );
StoreRegisterLo( rt, dst_reg );
#ifdef ENABLE_64BIT //This is out of spec but seems we can always ignore copying the hi part
// If the source/dest regs are different we need to copy the high bits across
// (if they are the same, we're xoring 0 to the top half which is essentially a NOP)
if(rt != rs)
{
EPspReg dst_reg_hi( GetRegisterNoLoadHi( rt, PspReg_V0 ) );
LoadRegisterHi( dst_reg_hi, rs );
StoreRegisterHi( rt, dst_reg_hi );
}
#endif
}
}
//
inline void CCodeGeneratorPSP::GenerateLUI( EN64Reg rt, s16 immediate )
{
//gGPR[op_code.rt]._s64 = (s64)(s32)((s32)(s16)op_code.immediate<<16);
SetRegister32s( rt, s32( immediate ) << 16 );
}
//
inline void CCodeGeneratorPSP::GenerateSLTI( EN64Reg rt, EN64Reg rs, s16 immediate )
{
#ifdef ENABLE_64BIT
// Because we have a branch here, we need to make sure that we have a consistant view
// of the register regardless of whether we take it or not. We pull in the lo halves of
// the register here so that it's Valid regardless of whether we take the branch or not
PrepareCachedRegisterLo( rs );
// If possible, we write directly into the destination register. We have to be careful though -
// if the destination register is the same as either of the source registers we have to use
// a temporary instead, to avoid overwriting the contents.
EPspReg reg_lo_d( GetRegisterNoLoadLo( rt, PspReg_V0 ) );
if(rt == rs)
{
reg_lo_d = PspReg_V0;
}
CJumpLocation branch;
EPspReg reg_hi_a( GetRegisterAndLoadHi( rs, PspReg_V0 ) );
if( immediate >= 0 )
{
// Positive data - we can avoid a contant load here
branch = BNE( reg_hi_a, PspReg_R0, CCodeLabel(nullptr), false );
SLTI( reg_lo_d, reg_hi_a, 0x0000 ); // In branch delay slot
}
else
{
// Negative data
LoadConstant( PspReg_A0, -1 );
branch = BNE( reg_hi_a, PspReg_A0, CCodeLabel(nullptr), false );
SLTI( reg_lo_d, reg_hi_a, 0xffff ); // In branch delay slot
}
// If the branch was not taken, it means that the high part of the registers was equal, so compare bottom half
EPspReg reg_lo_a( GetRegisterAndLoadLo( rs, PspReg_V0 ) );
//Potential bug!!!
//If using 64bit mode this might need to be SLTIU since sign is already checked in hi word //Strmn
SLTI( reg_lo_d, reg_lo_a, immediate );
// Patch up the branch
PatchJumpLong( branch, GetAssemblyBuffer()->GetLabel() );
#else
/*
if (mRegisterCache.IsKnownValue(rs, 0))
{
SetRegister32s( rt, (mRegisterCache.GetKnownValue(rs, 0)._s32 < (s32)immediate) );
return;
}
*/
EPspReg reg_lo_d( GetRegisterNoLoadLo( rt, PspReg_V0 ) );
EPspReg reg_lo_a( GetRegisterAndLoadLo( rs, PspReg_V0 ) );
SLTI( reg_lo_d, reg_lo_a, immediate );
#endif
UpdateRegister( rt, reg_lo_d, URO_HI_CLEAR );
}
//
inline void CCodeGeneratorPSP::GenerateSLTIU( EN64Reg rt, EN64Reg rs, s16 immediate )
{
#ifdef ENABLE_64BIT
// Because we have a branch here, we need to make sure that we have a consistent view
// of the register regardless of whether we take it or not. We pull in the lo halves of
// the register here so that it's Valid regardless of whether we take the branch or not
PrepareCachedRegisterLo( rs );
// If possible, we write directly into the destination register. We have to be careful though -
// if the destination register is the same as either of the source registers we have to use
// a temporary instead, to avoid overwriting the contents.
EPspReg reg_lo_d( GetRegisterNoLoadLo( rt, PspReg_V0 ) );
if(rt == rs)
{
reg_lo_d = PspReg_V0;
}
CJumpLocation branch;
EPspReg reg_hi_a( GetRegisterAndLoadHi( rs, PspReg_V0 ) );
if( immediate >= 0 )
{
// Positive data - we can avoid a contant load here
branch = BNE( reg_hi_a, PspReg_R0, CCodeLabel(nullptr), false );
SLTIU( reg_lo_d, reg_hi_a, 0x0000 ); // In branch delay slot
}
else
{
// Negative data
LoadConstant( PspReg_A0, -1 );
branch = BNE( reg_hi_a, PspReg_A0, CCodeLabel(nullptr), false );
SLTIU( reg_lo_d, reg_hi_a, 0xffff ); // In branch delay slot
}
// If the branch was not taken, it means that the high part of the registers was equal, so compare bottom half
EPspReg reg_lo_a( GetRegisterAndLoadLo( rs, PspReg_V0 ) );
SLTIU( reg_lo_d, reg_lo_a, immediate );
// Patch up the branch
PatchJumpLong( branch, GetAssemblyBuffer()->GetLabel() );
#else
/*
if (mRegisterCache.IsKnownValue(rs, 0))
{
SetRegister32s( rt, (mRegisterCache.GetKnownValue(rs, 0)._u32 < (u32)immediate) );
return;
}
*/
EPspReg reg_lo_d( GetRegisterNoLoadLo( rt, PspReg_V0 ) );
EPspReg reg_lo_a( GetRegisterAndLoadLo( rs, PspReg_V0 ) );
SLTIU( reg_lo_d, reg_lo_a, immediate );
#endif
UpdateRegister( rt, reg_lo_d, URO_HI_CLEAR );
}
//
inline void CCodeGeneratorPSP::GenerateSLL( EN64Reg rd, EN64Reg rt, u32 sa )
{
//gGPR[ op_code.rd ]._s64 = (s64)(s32)( (gGPR[ op_code.rt ]._u32_0 << op_code.sa) & 0xFFFFFFFF );
if (mRegisterCache.IsKnownValue(rt, 0))
{
SetRegister32s(rd, (s32)(mRegisterCache.GetKnownValue(rt, 0)._u32 << sa));
return;
}
EPspReg reg_lo_rd( GetRegisterNoLoadLo( rd, PspReg_V0 ) );
EPspReg reg_lo_rt( GetRegisterAndLoadLo( rt, PspReg_V0 ) );
SLL( reg_lo_rd, reg_lo_rt, sa );
UpdateRegister( rd, reg_lo_rd, URO_HI_SIGN_EXTEND );
}
//Double Shift Left Logical (Doom64)
inline void CCodeGeneratorPSP::GenerateDSLL( EN64Reg rd, EN64Reg rt, u32 sa )
{
//gGPR[ op_code.rd ]._s64 = (s64)(s32)( (gGPR[ op_code.rt ]._u32_0 << op_code.sa) & 0xFFFFFFFF );
//if (mRegisterCache.IsKnownValue(rt, 0))
//{
// SetRegister32s(rd, (s32)(mRegisterCache.GetKnownValue(rt, 0)._u32 << sa));
// return;
//}
EPspReg reg_lo_rd( GetRegisterNoLoadLo( rd, PspReg_V0 ) );
EPspReg reg_hi_rd( GetRegisterNoLoadHi( rd, PspReg_A0 ) );
EPspReg reg_lo_rt( GetRegisterAndLoadLo( rt, PspReg_V0 ) );
EPspReg reg_hi_rt( GetRegisterAndLoadHi( rt, PspReg_A0 ) );
SLL( reg_hi_rd, reg_hi_rt, sa );
if( sa != 0 )
{
SRL( PspReg_A2, reg_lo_rt, 32-sa);
OR( reg_hi_rd, reg_hi_rd, PspReg_A2);
}
SLL( reg_lo_rd, reg_lo_rt, sa );
StoreRegisterLo( rd, reg_lo_rd);
StoreRegisterHi( rd, reg_hi_rd);
}
//
inline void CCodeGeneratorPSP::GenerateDSLL32( EN64Reg rd, EN64Reg rt, u32 sa )
{
EPspReg reg_hi_rd( GetRegisterNoLoadHi( rd, PspReg_V0 ) );
EPspReg reg_lo_rt( GetRegisterAndLoadLo( rt, PspReg_V0 ) );
SLL( reg_hi_rd, reg_lo_rt, sa );
SetRegister( rd, 0, 0 ); //Zero lo part
StoreRegisterHi( rd, reg_hi_rd ); //Store result
}
//
inline void CCodeGeneratorPSP::GenerateSRL( EN64Reg rd, EN64Reg rt, u32 sa )
{
//gGPR[ op_code.rd ]._s64 = (s64)(s32)( gGPR[ op_code.rt ]._u32_0 >> op_code.sa );
if (mRegisterCache.IsKnownValue(rt, 0))
{
SetRegister32s(rd, (s32)(mRegisterCache.GetKnownValue(rt, 0)._u32 >> sa));
return;
}
EPspReg reg_lo_rd( GetRegisterNoLoadLo( rd, PspReg_V0 ) );
EPspReg reg_lo_rt( GetRegisterAndLoadLo( rt, PspReg_V0 ) );
SRL( reg_lo_rd, reg_lo_rt, sa );
UpdateRegister( rd, reg_lo_rd, URO_HI_SIGN_EXTEND );
}
//Double Shift Right Arithmetic (Doom64)
inline void CCodeGeneratorPSP::GenerateDSRA( EN64Reg rd, EN64Reg rt, u32 sa )
{
//gGPR[ op_code.rd ]._s64 = (s64)(s32)( gGPR[ op_code.rt ]._s32_0 >> op_code.sa );
//if (mRegisterCache.IsKnownValue(rt, 0))
//{
// SetRegister32s(rd, mRegisterCache.GetKnownValue(rt, 0)._s32 >> sa);
// return;
//}
EPspReg reg_lo_rd( GetRegisterNoLoadLo( rd, PspReg_V0 ) );
EPspReg reg_hi_rd( GetRegisterNoLoadHi( rd, PspReg_A0 ) );
EPspReg reg_lo_rt( GetRegisterAndLoadLo( rt, PspReg_V0 ) );
EPspReg reg_hi_rt( GetRegisterAndLoadHi( rt, PspReg_A0 ) );
SRL( reg_lo_rd, reg_lo_rt, sa );
if( sa != 0 )
{
SLL( PspReg_A2, reg_hi_rt, 32-sa );
OR( reg_lo_rd, reg_lo_rd, PspReg_A2);
}
SRA( reg_hi_rd, reg_hi_rt, sa );
StoreRegisterLo( rd, reg_lo_rd);
StoreRegisterHi( rd, reg_hi_rd);
}
//
inline void CCodeGeneratorPSP::GenerateDSRA32( EN64Reg rd, EN64Reg rt, u32 sa )
{
EPspReg reg_lo_rd( GetRegisterNoLoadLo( rd, PspReg_V0 ) );
EPspReg reg_hi_rt( GetRegisterAndLoadHi( rt, PspReg_V0 ) );
SRA( reg_lo_rd, reg_hi_rt, sa );
UpdateRegister( rd, reg_lo_rd, URO_HI_SIGN_EXTEND );
}
//
inline void CCodeGeneratorPSP::GenerateSRA( EN64Reg rd, EN64Reg rt, u32 sa )
{
//gGPR[ op_code.rd ]._s64 = (s64)(s32)( gGPR[ op_code.rt ]._s32_0 >> op_code.sa );
if (mRegisterCache.IsKnownValue(rt, 0))
{
SetRegister32s(rd, mRegisterCache.GetKnownValue(rt, 0)._s32 >> sa);
return;
}
EPspReg reg_lo_rd( GetRegisterNoLoadLo( rd, PspReg_V0 ) );
EPspReg reg_lo_rt( GetRegisterAndLoadLo( rt, PspReg_V0 ) );
SRA( reg_lo_rd, reg_lo_rt, sa );
UpdateRegister( rd, reg_lo_rd, URO_HI_SIGN_EXTEND );
}
//
inline void CCodeGeneratorPSP::GenerateSLLV( EN64Reg rd, EN64Reg rs, EN64Reg rt )
{
//gGPR[ op_code.rd ]._s64 = (s64)(s32)( (gGPR[ op_code.rt ]._u32_0 << ( gGPR[ op_code.rs ]._u32_0 & 0x1F ) ) & 0xFFFFFFFF );
if (mRegisterCache.IsKnownValue(rs, 0)
& mRegisterCache.IsKnownValue(rt, 0))
{
SetRegister32s(rd, (s32)(mRegisterCache.GetKnownValue(rt, 0)._u32 <<
(mRegisterCache.GetKnownValue(rs, 0)._u32 & 0x1F)));
return;
}
// PSP sllv does exactly the same op as n64- no need for masking
EPspReg reg_lo_rd( GetRegisterNoLoadLo( rd, PspReg_V0 ) );
EPspReg reg_lo_rs( GetRegisterAndLoadLo( rs, PspReg_A0 ) );
EPspReg reg_lo_rt( GetRegisterAndLoadLo( rt, PspReg_V0 ) );
SLLV( reg_lo_rd, reg_lo_rs, reg_lo_rt );
UpdateRegister( rd, reg_lo_rd, URO_HI_SIGN_EXTEND );
}
//
inline void CCodeGeneratorPSP::GenerateSRLV( EN64Reg rd, EN64Reg rs, EN64Reg rt )
{
//gGPR[ op_code.rd ]._s64 = (s64)(s32)( gGPR[ op_code.rt ]._u32_0 >> ( gGPR[ op_code.rs ]._u32_0 & 0x1F ) );
if (mRegisterCache.IsKnownValue(rs, 0)
& mRegisterCache.IsKnownValue(rt, 0))
{
SetRegister32s(rd, (s32)(mRegisterCache.GetKnownValue(rt, 0)._u32 >>
(mRegisterCache.GetKnownValue(rs, 0)._u32 & 0x1F)));
return;
}
// PSP srlv does exactly the same op as n64- no need for masking
EPspReg reg_lo_rd( GetRegisterNoLoadLo( rd, PspReg_V0 ) );
EPspReg reg_lo_rs( GetRegisterAndLoadLo( rs, PspReg_A0 ) );
EPspReg reg_lo_rt( GetRegisterAndLoadLo( rt, PspReg_V0 ) );
SRLV( reg_lo_rd, reg_lo_rs, reg_lo_rt );
UpdateRegister( rd, reg_lo_rd, URO_HI_SIGN_EXTEND );
}
//
inline void CCodeGeneratorPSP::GenerateSRAV( EN64Reg rd, EN64Reg rs, EN64Reg rt )
{
//gGPR[ op_code.rd ]._s64 = (s64)(s32)( gGPR[ op_code.rt ]._s32_0 >> ( gGPR[ op_code.rs ]._u32_0 & 0x1F ) );
if (mRegisterCache.IsKnownValue(rs, 0)
& mRegisterCache.IsKnownValue(rt, 0))
{
SetRegister32s(rd, (s32)(mRegisterCache.GetKnownValue(rt, 0)._s32 >>
(mRegisterCache.GetKnownValue(rs, 0)._u32 & 0x1F)));
return;
}
// PSP srlv does exactly the same op as n64- no need for masking
EPspReg reg_lo_rd( GetRegisterNoLoadLo( rd, PspReg_V0 ) );
EPspReg reg_lo_rs( GetRegisterAndLoadLo( rs, PspReg_A0 ) );
EPspReg reg_lo_rt( GetRegisterAndLoadLo( rt, PspReg_V0 ) );
SRAV( reg_lo_rd, reg_lo_rs, reg_lo_rt );
UpdateRegister( rd, reg_lo_rd, URO_HI_SIGN_EXTEND );
}
//
inline void CCodeGeneratorPSP::GenerateLB( u32 address, bool set_branch_delay, EN64Reg rt, EN64Reg base, s32 offset )
{
EPspReg reg_dst( GetRegisterNoLoadLo( rt, PspReg_V0 ) ); // Use V0 to avoid copying return value if reg is not cached
GenerateLoad( address, reg_dst, base, offset, OP_LB, 3, set_branch_delay ? ReadBitsDirectBD_s8 : ReadBitsDirect_s8 ); // NB this fills the whole of reg_dst
UpdateRegister( rt, reg_dst, URO_HI_SIGN_EXTEND );
}
//
inline void CCodeGeneratorPSP::GenerateLBU( u32 address, bool set_branch_delay, EN64Reg rt, EN64Reg base, s32 offset )
{
EPspReg reg_dst( GetRegisterNoLoadLo( rt, PspReg_V0 ) ); // Use V0 to avoid copying return value if reg is not cached
GenerateLoad( address, reg_dst, base, offset, OP_LBU, 3, set_branch_delay ? ReadBitsDirectBD_u8 : ReadBitsDirect_u8 ); // NB this fills the whole of reg_dst
UpdateRegister( rt, reg_dst, URO_HI_CLEAR );
}
//
inline void CCodeGeneratorPSP::GenerateLH( u32 address, bool set_branch_delay, EN64Reg rt, EN64Reg base, s32 offset )
{
EPspReg reg_dst( GetRegisterNoLoadLo( rt, PspReg_V0 ) ); // Use V0 to avoid copying return value if reg is not cached
GenerateLoad( address, reg_dst, base, offset, OP_LH, 2, set_branch_delay ? ReadBitsDirect_s16 : ReadBitsDirect_s16 ); // NB this fills the whole of reg_dst
UpdateRegister( rt, reg_dst, URO_HI_SIGN_EXTEND );
}
//
inline void CCodeGeneratorPSP::GenerateLHU( u32 address, bool set_branch_delay, EN64Reg rt, EN64Reg base, s32 offset )
{
EPspReg reg_dst( GetRegisterNoLoadLo( rt, PspReg_V0 ) ); // Use V0 to avoid copying return value if reg is not cached
GenerateLoad( address, reg_dst, base, offset, OP_LHU, 2, set_branch_delay ? ReadBitsDirectBD_u16 : ReadBitsDirect_u16 ); // NB this fills the whole of reg_dst
UpdateRegister( rt, reg_dst, URO_HI_CLEAR );
}
//
inline void CCodeGeneratorPSP::GenerateLW( u32 address, bool set_branch_delay, EN64Reg rt, EN64Reg base, s16 offset )
{
EPspReg reg_dst( GetRegisterNoLoadLo( rt, PspReg_V0 ) ); // Use V0 to avoid copying return value if reg is not cached
// This is for San Francisco 2049, otherwise it crashes when the race is about to start.
if(rt == N64Reg_R0)
{
#ifdef DAEDALUS_DEBUG_CONSOLE
DAEDALUS_ERROR("Attempted write to r0!");
#endif
return;
}
GenerateLoad( address, reg_dst, base, offset, OP_LW, 0, set_branch_delay ? ReadBitsDirectBD_u32 : ReadBitsDirect_u32 );
UpdateRegister( rt, reg_dst, URO_HI_SIGN_EXTEND );
}
//
inline void CCodeGeneratorPSP::GenerateLD( u32 address, bool set_branch_delay, EN64Reg rt, EN64Reg base, s16 offset )
{
EPspReg reg_dst_hi( GetRegisterNoLoadHi( rt, PspReg_V0 ) ); // Use V0 to avoid copying return value if reg is not cached
GenerateLoad( address, reg_dst_hi, base, offset, OP_LW, 0, set_branch_delay ? ReadBitsDirectBD_u32 : ReadBitsDirect_u32 );
StoreRegisterHi( rt, reg_dst_hi );
//Adding 4 to offset should be quite safe //Corn
EPspReg reg_dst_lo( GetRegisterNoLoadLo( rt, PspReg_V0 ) ); // Use V0 to avoid copying return value if reg is not cached
GenerateLoad( address, reg_dst_lo, base, offset + 4, OP_LW, 0, set_branch_delay ? ReadBitsDirectBD_u32 : ReadBitsDirect_u32 );
StoreRegisterLo( rt, reg_dst_lo );
}
//
inline void CCodeGeneratorPSP::GenerateLWC1( u32 address, bool set_branch_delay, u32 ft, EN64Reg base, s32 offset )
{
EN64FloatReg n64_ft = EN64FloatReg( ft );
EPspFloatReg psp_ft = EPspFloatReg( n64_ft );// 1:1 Mapping
//u32 address = (u32)( gGPR[op_code.base]._s32_0 + (s32)(s16)op_code.immediate );
//value = Read32Bits(address);
//gCPUState.FPU[op_code.ft]._s32_0 = value;
#ifdef ENABLE_LWC1
//Since LW and LWC1 have same format we do a small hack that let us load directly to
//the float reg without passing to the CPU first that saves us the MFC1 instruction //Corn
GenerateLoad( address, (EPspReg)psp_ft, base, offset, OP_LWC1, 0, set_branch_delay ? ReadBitsDirectBD_u32 : ReadBitsDirect_u32 );
UpdateFloatRegister( n64_ft );
#else
// TODO: Actually perform LWC1 here
EPspReg reg_dst( PspReg_V0 ); // GenerateLoad is slightly more efficient when using V0
GenerateLoad( address, reg_dst, base, offset, OP_LW, 0, set_branch_delay ? ReadBitsDirectBD_u32 : ReadBitsDirect_u32 );
//SetVar( &gCPUState.FPU[ ft ]._u32, reg_dst );
MTC1( psp_ft, reg_dst );
UpdateFloatRegister( n64_ft );
#endif
}
//
#ifdef ENABLE_LDC1
inline void CCodeGeneratorPSP::GenerateLDC1( u32 address, bool set_branch_delay, u32 ft, EN64Reg base, s32 offset )
{
EPspReg reg_base( GetRegisterAndLoadLo( base, PspReg_A0 ) );
ADDU( PspReg_A0, reg_base, gMemoryBaseReg );
EN64FloatReg n64_ft = EN64FloatReg( ft + 1 );
EPspFloatReg psp_ft = EPspFloatReg( n64_ft );// 1:1 Mapping
//GenerateLoad( address, (EPspReg)psp_ft, base, offset, OP_LWC1, 0, set_branch_delay ? ReadBitsDirectBD_u32 : ReadBitsDirect_u32 );
LWC1( psp_ft, PspReg_A0, offset);
mRegisterCache.MarkFPAsValid( n64_ft, true );
mRegisterCache.MarkFPAsDirty( n64_ft, true );
n64_ft = EN64FloatReg( ft );
psp_ft = EPspFloatReg( n64_ft );// 1:1 Mapping
//GenerateLoad( address, (EPspReg)psp_ft, base, offset + 4, OP_LWC1, 0, set_branch_delay ? ReadBitsDirectBD_u32 : ReadBitsDirect_u32 );
LWC1( psp_ft, PspReg_A0, offset+4);
mRegisterCache.MarkFPAsValid( n64_ft, true );
mRegisterCache.MarkFPAsDirty( n64_ft, true );
mRegisterCache.MarkFPAsSim( n64_ft, false );
}
#endif
#ifdef ENABLE_LWR_LWL
//Unaligned load Left //Corn
inline void CCodeGeneratorPSP::GenerateLWL( u32 address, bool set_branch_delay, EN64Reg rt, EN64Reg base, s16 offset )
{
//Will return the value in PspReg_V0 and the shift in PspReg_A3
GenerateLoad( address, PspReg_V0, base, offset, OP_LWL, 0, set_branch_delay ? ReadBitsDirectBD_u32 : ReadBitsDirect_u32 );
EPspReg reg_dst( GetRegisterAndLoadLo( rt, PspReg_A0 ) );
SLL( PspReg_A3, PspReg_A3, 0x3 ); // shift *= 8
NOR( PspReg_A2, PspReg_R0, PspReg_R0 ); // Load 0xFFFFFFFF
SLLV( PspReg_A2, PspReg_A3, PspReg_A2 ); // mask <<= shift
NOR( PspReg_A2, PspReg_A2, PspReg_R0 ); // mask != mask
SLLV( PspReg_A3, PspReg_A3, PspReg_V0 ); // memory <<= shift
AND( reg_dst, reg_dst, PspReg_A2 ); // reg_dst & mask
OR( reg_dst, reg_dst, PspReg_A3 ); // reg_dst | memory
UpdateRegister( rt, reg_dst, URO_HI_SIGN_EXTEND );
}
//Unaligned load Right //Corn
inline void CCodeGeneratorPSP::GenerateLWR( u32 address, bool set_branch_delay, EN64Reg rt, EN64Reg base, s16 offset )
{
//Will return the value in PspReg_V0 and the shift in PspReg_A3
GenerateLoad( address, PspReg_V0, base, offset, OP_LWL, 0, set_branch_delay ? ReadBitsDirectBD_u32 : ReadBitsDirect_u32 );
EPspReg reg_dst( GetRegisterAndLoadLo( rt, PspReg_A0 ) );
SLL( PspReg_A3, PspReg_A3, 0x3 ); // shift *= 8
ADDIU( PspReg_A2, PspReg_R0, 0xFF00 ); // Load 0xFFFFFF00
SLLV( PspReg_A2, PspReg_A3, PspReg_A2 ); // mask <<= shift
AND( reg_dst, reg_dst, PspReg_A2 ); // reg_dst & mask
XORI( PspReg_A3, PspReg_A3, 0x18 ); // shift ^= shift (eg. invert shift)
SRLV( PspReg_A3, PspReg_A3, PspReg_V0 ); // memory >>= shift
OR( reg_dst, reg_dst, PspReg_A3 ); // reg_dst | memory
UpdateRegister( rt, reg_dst, URO_HI_SIGN_EXTEND );
}
#endif
//
inline void CCodeGeneratorPSP::GenerateSB( u32 current_pc, bool set_branch_delay, EN64Reg rt, EN64Reg base, s32 offset )
{
EPspReg reg_value( GetRegisterAndLoadLo( rt, PspReg_A1 ) );
GenerateStore( current_pc, reg_value, base, offset, OP_SB, 3, set_branch_delay ? WriteBitsDirectBD_u8 : WriteBitsDirect_u8 );
}
//
inline void CCodeGeneratorPSP::GenerateSH( u32 current_pc, bool set_branch_delay, EN64Reg rt, EN64Reg base, s32 offset )
{
EPspReg reg_value( GetRegisterAndLoadLo( rt, PspReg_A1 ) );
GenerateStore( current_pc, reg_value, base, offset, OP_SH, 2, set_branch_delay ? WriteBitsDirectBD_u16 : WriteBitsDirect_u16 );
}
//
inline void CCodeGeneratorPSP::GenerateSW( u32 current_pc, bool set_branch_delay, EN64Reg rt, EN64Reg base, s32 offset )
{
EPspReg reg_value( GetRegisterAndLoadLo( rt, PspReg_A1 ) );
GenerateStore( current_pc, reg_value, base, offset, OP_SW, 0, set_branch_delay ? WriteBitsDirectBD_u32 : WriteBitsDirect_u32 );
}
//
inline void CCodeGeneratorPSP::GenerateSD( u32 current_pc, bool set_branch_delay, EN64Reg rt, EN64Reg base, s32 offset )
{
EPspReg reg_value_hi( GetRegisterAndLoadHi( rt, PspReg_A1 ) );
GenerateStore( current_pc, reg_value_hi, base, offset, OP_SW, 0, set_branch_delay ? WriteBitsDirectBD_u32 : WriteBitsDirect_u32 );
//Adding 4 to offset should be quite safe //Corn
EPspReg reg_value_lo( GetRegisterAndLoadLo( rt, PspReg_A1 ) );
GenerateStore( current_pc, reg_value_lo, base, offset + 4, OP_SW, 0, set_branch_delay ? WriteBitsDirectBD_u32 : WriteBitsDirect_u32 );
}
//
inline void CCodeGeneratorPSP::GenerateSWC1( u32 current_pc, bool set_branch_delay, u32 ft, EN64Reg base, s32 offset )
{
EN64FloatReg n64_ft = EN64FloatReg( ft );
EPspFloatReg psp_ft( GetFloatRegisterAndLoad( n64_ft ) );
#ifdef ENABLE_SWC1
//Since SW and SWC1 have same format we do a small hack that let us save directly from
//the float reg without passing to the CPU first that saves us the MFC1 instruction //Corn
GenerateStore( current_pc, (EPspReg)psp_ft, base, offset, OP_SWC1, 0, set_branch_delay ? WriteBitsDirectBD_u32 : WriteBitsDirect_u32 );
#else
MFC1( PspReg_A1, psp_ft );
GenerateStore( current_pc, PspReg_A1, base, offset, OP_SW, 0, set_branch_delay ? WriteBitsDirectBD_u32 : WriteBitsDirect_u32 );
#endif
}
//
#ifdef ENABLE_SDC1
inline void CCodeGeneratorPSP::GenerateSDC1( u32 current_pc, bool set_branch_delay, u32 ft, EN64Reg base, s32 offset )
{
EN64FloatReg n64_ft_sig = EN64FloatReg( ft );
EPspFloatReg psp_ft_sig = GetFloatRegisterAndLoad( n64_ft_sig );
EN64FloatReg n64_ft = EN64FloatReg( ft + 1 );
EPspFloatReg psp_ft( GetFloatRegisterAndLoad( EN64FloatReg(n64_ft + 1) ) );
//GenerateStore( current_pc, (EPspReg)psp_ft, base, offset, OP_SWC1, 0, set_branch_delay ? WriteBitsDirectBD_u32 : WriteBitsDirect_u32 );
//GenerateStore( current_pc, (EPspReg)psp_ft_sig, base, offset + 4, OP_SWC1, 0, set_branch_delay ? WriteBitsDirectBD_u32 : WriteBitsDirect_u32 );
EPspReg reg_base( GetRegisterAndLoadLo( base, PspReg_A0 ) );
ADDU( PspReg_A0, reg_base, gMemoryBaseReg );
SWC1( psp_ft, PspReg_A0, offset);
SWC1( psp_ft_sig, PspReg_A0, offset+4);
}
#endif
//
#ifdef ENABLE_SWR_SWL
inline void CCodeGeneratorPSP::GenerateSWL( u32 current_pc, bool set_branch_delay, EN64Reg rt, EN64Reg base, s32 offset )
{
EPspReg reg_value( GetRegisterAndLoadLo( rt, PspReg_A1 ) );
GenerateStore( current_pc, reg_value, base, offset, OP_SWL, 3, set_branch_delay ? WriteBitsDirectBD_u32 : WriteBitsDirect_u32 );
}
//
inline void CCodeGeneratorPSP::GenerateSWR( u32 current_pc, bool set_branch_delay, EN64Reg rt, EN64Reg base, s32 offset )
{
EPspReg reg_value( GetRegisterAndLoadLo( rt, PspReg_A1 ) );
GenerateStore( current_pc, reg_value, base, offset, OP_SWR, 3, set_branch_delay ? WriteBitsDirectBD_u32 : WriteBitsDirect_u32 );
}
#endif
//
inline void CCodeGeneratorPSP::GenerateMFC1( EN64Reg rt, u32 fs )
{
//gGPR[ op_code.rt ]._s64 = (s64)(s32)gCPUState.FPU[fs]._s32_0
EPspReg reg_dst( GetRegisterNoLoadLo( rt, PspReg_V0 ) );
EN64FloatReg n64_fs = EN64FloatReg( fs );
EPspFloatReg psp_fs( GetFloatRegisterAndLoad( n64_fs ) );
MFC1( reg_dst, psp_fs );
//Needs to be sign extended(or breaks DKR)
UpdateRegister( rt, reg_dst, URO_HI_SIGN_EXTEND );
}
//
inline void CCodeGeneratorPSP::GenerateMTC1( u32 fs, EN64Reg rt )
{
//gCPUState.FPU[fs]._s32_0 = gGPR[ op_code.rt ]._s32_0;
EPspReg psp_rt( GetRegisterAndLoadLo( rt, PspReg_V0 ) );
EN64FloatReg n64_fs = EN64FloatReg( fs );
EPspFloatReg psp_fs = EPspFloatReg( n64_fs );//1:1 Mapping
MTC1( psp_fs, psp_rt );
UpdateFloatRegister( n64_fs );
}
//
inline void CCodeGeneratorPSP::GenerateCFC1( EN64Reg rt, u32 fs )
{
if( (fs == 0) | (fs == 31) )
{
EPspReg reg_dst( GetRegisterNoLoadLo( rt, PspReg_V0 ) );
GetVar( reg_dst, &gCPUState.FPUControl[ fs ]._u32 );
#ifdef ENABLE_64BIT
UpdateRegister( rt, reg_dst, URO_HI_SIGN_EXTEND );
#else
StoreRegisterLo( rt, reg_dst );
#endif
}
}
//
// This breaks BombreMan Hero, not sure how to fix it though, maybe because we don't set the rounding mode?
// http://forums.daedalusx64.com/viewtopic.php?f=77&t=2258&p=36374&hilit=GenerateCTC1#p36374
//
void CCodeGeneratorPSP::GenerateCTC1( u32 fs, EN64Reg rt )
{
#ifdef DAEDALUS_ENABLE_ASSERTS
DAEDALUS_ASSERT( fs == 31, "CTC1 register is invalid");
#endif
EPspReg psp_rt_lo( GetRegisterAndLoadLo( rt, PspReg_V0 ) );
SetVar( &gCPUState.FPUControl[ fs ]._u32, psp_rt_lo );
// Only the lo part is ever set - Salvy
//EPspReg psp_rt_hi( GetRegisterAndLoadHi( rt, PspReg_V0 ) );
//SetVar( &gCPUState.FPUControl[ fs ]._u32_1, psp_rt_hi );
//XXXX TODO:
// Change the rounding mode
}
//
inline void CCodeGeneratorPSP::GenerateBEQ( EN64Reg rs, EN64Reg rt, const SBranchDetails * p_branch, CJumpLocation * p_branch_jump )
{
#ifdef DAEDALUS_ENABLE_ASSERTS
DAEDALUS_ASSERT( p_branch != nullptr, "No branch details?" );
DAEDALUS_ASSERT( p_branch->Direct, "Indirect branch for BEQ?" );
#endif
// One or other of these may be r0 - we don't really care for optimisation purposes though
// as ultimately the register load regs factored out
EPspReg reg_a( GetRegisterAndLoadLo( rs, PspReg_V0 ) );
EPspReg reg_b( GetRegisterAndLoadLo( rt, PspReg_A0 ) );
// XXXX This may actually need to be a 64 bit compare, but this is what R4300.cpp does
if( p_branch->ConditionalBranchTaken )
{
// Flip the sign of the test -
*p_branch_jump = BNE( reg_a, reg_b, CCodeLabel(nullptr), true );
}
else
{
*p_branch_jump = BEQ( reg_a, reg_b, CCodeLabel(nullptr), true );
}
}
//
inline void CCodeGeneratorPSP::GenerateBNE( EN64Reg rs, EN64Reg rt, const SBranchDetails * p_branch, CJumpLocation * p_branch_jump )
{
#ifdef DAEDALUS_ENABLE_ASSERTS
DAEDALUS_ASSERT( p_branch != nullptr, "No branch details?" );
DAEDALUS_ASSERT( p_branch->Direct, "Indirect branch for BNE?" );
#endif
// One or other of these may be r0 - we don't really care for optimisation purposes though
// as ultimately the register load regs factored out
EPspReg reg_a( GetRegisterAndLoadLo( rs, PspReg_V0 ) );
EPspReg reg_b( GetRegisterAndLoadLo( rt, PspReg_A0 ) );
// XXXX This may actually need to be a 64 bit compare, but this is what R4300.cpp does
if( p_branch->ConditionalBranchTaken )
{
// Flip the sign of the test -
*p_branch_jump = BEQ( reg_a, reg_b, CCodeLabel(nullptr), true );
}
else
{
*p_branch_jump = BNE( reg_a, reg_b, CCodeLabel(nullptr), true );
}
}
//
inline void CCodeGeneratorPSP::GenerateBLEZ( EN64Reg rs, const SBranchDetails * p_branch, CJumpLocation * p_branch_jump )
{
#ifdef DAEDALUS_ENABLE_ASSERTS
DAEDALUS_ASSERT( p_branch != nullptr, "No branch details?" );
DAEDALUS_ASSERT( p_branch->Direct, "Indirect branch for BLEZ?" );
#endif
EPspReg reg_a( GetRegisterAndLoadLo( rs, PspReg_V0 ) );
// XXXX This may actually need to be a 64 bit compare, but this is what R4300.cpp does
if( p_branch->ConditionalBranchTaken )
{
// Flip the sign of the test -
*p_branch_jump = BGTZ( reg_a, CCodeLabel(nullptr), true );
}
else
{
*p_branch_jump = BLEZ( reg_a, CCodeLabel(nullptr), true );
}
}
//
inline void CCodeGeneratorPSP::GenerateBGTZ( EN64Reg rs, const SBranchDetails * p_branch, CJumpLocation * p_branch_jump )
{
#ifdef DAEDALUS_ENABLE_ASSERTS
DAEDALUS_ASSERT( p_branch != nullptr, "No branch details?" );
DAEDALUS_ASSERT( p_branch->Direct, "Indirect branch for BGTZ?" );
#endif
EPspReg reg_lo( GetRegisterAndLoadLo( rs, PspReg_V0 ) );
//64bit compare is needed for DK64 or DK can walk trough walls
if(g_ROM.GameHacks == DK64)
{
#if 0
//Do a full 64 bit compare //Corn
SRL( PspReg_V0, reg_lo, 1); //Free MSB sign bit
ANDI( PspReg_V1, reg_lo, 1); //Extract LSB bit
OR( PspReg_V0, PspReg_V0, PspReg_V1); //Merge with OR
EPspReg reg_hi( GetRegisterAndLoadHi( rs, PspReg_V1 ) );
OR( PspReg_V1, PspReg_V0, reg_hi); //Merge lo with hi part of register with sign bit
#else
//This takes some shortcuts //Corn
EPspReg reg_hi( GetRegisterAndLoadHi( rs, PspReg_V1 ) );
OR( PspReg_V1, reg_lo, reg_hi);
#endif
if( p_branch->ConditionalBranchTaken )
{
// Flip the sign of the test -
*p_branch_jump = BLEZ( PspReg_V1, CCodeLabel(nullptr), true );
}
else
{
*p_branch_jump = BGTZ( PspReg_V1, CCodeLabel(nullptr), true );
}
}
else
{
//Do a fast 32 bit compare //Corn
if( p_branch->ConditionalBranchTaken )
{
// Flip the sign of the test -
*p_branch_jump = BLEZ( reg_lo, CCodeLabel(nullptr), true );
}
else
{
*p_branch_jump = BGTZ( reg_lo, CCodeLabel(nullptr), true );
}
}
}
//
inline void CCodeGeneratorPSP::GenerateBLTZ( EN64Reg rs, const SBranchDetails * p_branch, CJumpLocation * p_branch_jump )
{
#ifdef DAEDALUS_ENABLE_ASSERTS
DAEDALUS_ASSERT( p_branch != nullptr, "No branch details?" );
DAEDALUS_ASSERT( p_branch->Direct, "Indirect branch for BLTZ?" );
#endif
EPspReg reg_a( GetRegisterAndLoadLo( rs, PspReg_V0 ) );
// XXXX This should actually need to be a 64 bit compare???
if( p_branch->ConditionalBranchTaken )
{
// Flip the sign of the test -
*p_branch_jump = BGEZ( reg_a, CCodeLabel(nullptr), true );
}
else
{
*p_branch_jump = BLTZ( reg_a, CCodeLabel(nullptr), true );
}
}
//
inline void CCodeGeneratorPSP::GenerateBGEZ( EN64Reg rs, const SBranchDetails * p_branch, CJumpLocation * p_branch_jump )
{
#ifdef DAEDALUS_ENABLE_ASSERTS
DAEDALUS_ASSERT( p_branch != nullptr, "No branch details?" );
DAEDALUS_ASSERT( p_branch->Direct, "Indirect branch for BGEZ?" );
#endif
EPspReg reg_a( GetRegisterAndLoadLo( rs, PspReg_V0 ) );
// XXXX This should actually need to be a 64 bit compare???
if( p_branch->ConditionalBranchTaken )
{
// Flip the sign of the test -
*p_branch_jump = BLTZ( reg_a, CCodeLabel(nullptr), true );
}
else
{
*p_branch_jump = BGEZ( reg_a, CCodeLabel(nullptr), true );
}
}
//
inline void CCodeGeneratorPSP::GenerateBC1F( const SBranchDetails * p_branch, CJumpLocation * p_branch_jump )
{
#ifdef DAEDALUS_ENABLE_ASSERTS
DAEDALUS_ASSERT( p_branch != nullptr, "No branch details?" );
DAEDALUS_ASSERT( p_branch->Direct, "Indirect branch for BC1F?" );
#endif
//If compare was done in current fragment then use BC1T or BC1F directly //Corn
if( mFloatCMPIsValid )
{
if( p_branch->ConditionalBranchTaken )
{
// Flip the sign of the test -
*p_branch_jump = BC1T( CCodeLabel(nullptr), true );
}
else
{
*p_branch_jump = BC1F( CCodeLabel(nullptr), true );
}
}
else
{
GetVar( PspReg_V0, &gCPUState.FPUControl[31]._u32 );
#if 1
EXT( PspReg_V0, PspReg_V0, 0, 23 ); //Extract condition bit (true/false)
#else
LoadConstant( PspReg_A0, FPCSR_C );
AND( PspReg_V0, PspReg_V0, PspReg_A0 );
#endif
if( p_branch->ConditionalBranchTaken )
{
// Flip the sign of the test -
*p_branch_jump = BNE( PspReg_V0, PspReg_R0, CCodeLabel(nullptr), true );
}
else
{
*p_branch_jump = BEQ( PspReg_V0, PspReg_R0, CCodeLabel(nullptr), true );
}
}
}
//
inline void CCodeGeneratorPSP::GenerateBC1T( const SBranchDetails * p_branch, CJumpLocation * p_branch_jump )
{
#ifdef DAEDALUS_ENABLE_ASSERTS
DAEDALUS_ASSERT( p_branch != nullptr, "No branch details?" );
DAEDALUS_ASSERT( p_branch->Direct, "Indirect branch for BC1T?" );
#endif
//If compare was done in current fragment then use BC1T or BC1F directly //Corn
if( mFloatCMPIsValid )
{
if( p_branch->ConditionalBranchTaken )
{
// Flip the sign of the test -
*p_branch_jump = BC1F( CCodeLabel(nullptr), true );
}
else
{
*p_branch_jump = BC1T( CCodeLabel(nullptr), true );
}
}
else
{
GetVar( PspReg_V0, &gCPUState.FPUControl[31]._u32 );
#if 1
EXT( PspReg_V0, PspReg_V0, 0, 23 ); //Extract condition bit (true/false)
#else
LoadConstant( PspReg_A0, FPCSR_C );
AND( PspReg_V0, PspReg_V0, PspReg_A0 );
#endif
if( p_branch->ConditionalBranchTaken )
{
// Flip the sign of the test -
*p_branch_jump = BEQ( PspReg_V0, PspReg_R0, CCodeLabel(nullptr), true );
}
else
{
*p_branch_jump = BNE( PspReg_V0, PspReg_R0, CCodeLabel(nullptr), true );
}
}
}
//
inline void CCodeGeneratorPSP::GenerateADD_D_Sim( u32 fd, u32 fs, u32 ft )
{
EN64FloatReg n64_fs = EN64FloatReg( fs );
EN64FloatReg n64_ft = EN64FloatReg( ft );
EN64FloatReg n64_fd = EN64FloatReg( fd );
EPspFloatReg psp_fd_sig = EPspFloatReg( n64_fd );//1:1 Mapping
EPspFloatReg psp_fd = EPspFloatReg( n64_fd + 1 );//1:1 Mapping
EPspFloatReg psp_fs( GetSimFloatRegisterAndLoad( n64_fs ) );
EPspFloatReg psp_ft( GetSimFloatRegisterAndLoad( n64_ft ) );
//Use float now instead of double :)
ADD_S( psp_fd, psp_fs, psp_ft );
if( !mRegisterCache.IsFPSim( n64_fd ) ) MOV_S( psp_fd_sig, EPspFloatReg( fs )); //Copy signature as well if needed
UpdateSimDoubleRegister( n64_fd );
}
//
inline void CCodeGeneratorPSP::GenerateSUB_D_Sim( u32 fd, u32 fs, u32 ft )
{
EN64FloatReg n64_fs = EN64FloatReg( fs );
EN64FloatReg n64_ft = EN64FloatReg( ft );
EN64FloatReg n64_fd = EN64FloatReg( fd );
EPspFloatReg psp_fd_sig = EPspFloatReg( n64_fd );//1:1 Mapping
EPspFloatReg psp_fd = EPspFloatReg( n64_fd + 1 );//1:1 Mapping
EPspFloatReg psp_fs( GetSimFloatRegisterAndLoad( n64_fs ) );
EPspFloatReg psp_ft( GetSimFloatRegisterAndLoad( n64_ft ) );
//Use float now instead of double :)
SUB_S( psp_fd, psp_fs, psp_ft );
if( !mRegisterCache.IsFPSim( n64_fd ) ) MOV_S( psp_fd_sig, EPspFloatReg( fs )); //Copy signature as well if needed
UpdateSimDoubleRegister( n64_fd );
}
//
inline void CCodeGeneratorPSP::GenerateMUL_D_Sim( u32 fd, u32 fs, u32 ft )
{
EN64FloatReg n64_fs = EN64FloatReg( fs );
EN64FloatReg n64_ft = EN64FloatReg( ft );
EN64FloatReg n64_fd = EN64FloatReg( fd );
EPspFloatReg psp_fd_sig = EPspFloatReg( n64_fd );//1:1 Mapping
EPspFloatReg psp_fd = EPspFloatReg( n64_fd + 1 );//1:1 Mapping
EPspFloatReg psp_fs( GetSimFloatRegisterAndLoad( n64_fs ) );
EPspFloatReg psp_ft( GetSimFloatRegisterAndLoad( n64_ft ) );
//Use float now instead of double :)
MUL_S( psp_fd, psp_fs, psp_ft );
if( !mRegisterCache.IsFPSim( n64_fd ) ) MOV_S( psp_fd_sig, EPspFloatReg( fs )); //Copy signature as well if needed
UpdateSimDoubleRegister( n64_fd );
}
//
inline void CCodeGeneratorPSP::GenerateDIV_D_Sim( u32 fd, u32 fs, u32 ft )
{
EN64FloatReg n64_fs = EN64FloatReg( fs );
EN64FloatReg n64_ft = EN64FloatReg( ft );
EN64FloatReg n64_fd = EN64FloatReg( fd );
EPspFloatReg psp_fd_sig = EPspFloatReg( n64_fd );//1:1 Mapping
EPspFloatReg psp_fd = EPspFloatReg( n64_fd + 1 );//1:1 Mapping
EPspFloatReg psp_fs( GetSimFloatRegisterAndLoad( n64_fs ) );
EPspFloatReg psp_ft( GetSimFloatRegisterAndLoad( n64_ft ) );
//Use float now instead of double :)
DIV_S( psp_fd, psp_fs, psp_ft );
if( !mRegisterCache.IsFPSim( n64_fd ) ) MOV_S( psp_fd_sig, EPspFloatReg( fs )); //Copy signature as well if needed
UpdateSimDoubleRegister( n64_fd );
}
//
inline void CCodeGeneratorPSP::GenerateSQRT_D_Sim( u32 fd, u32 fs )
{
EN64FloatReg n64_fs = EN64FloatReg( fs );
EN64FloatReg n64_fd = EN64FloatReg( fd );
EPspFloatReg psp_fd_sig = EPspFloatReg( n64_fd );//1:1 Mapping
EPspFloatReg psp_fd = EPspFloatReg( n64_fd + 1 );//1:1 Mapping
EPspFloatReg psp_fs( GetSimFloatRegisterAndLoad( n64_fs ) );
//Use float now instead of double :)
SQRT_S( psp_fd, psp_fs );
if( !mRegisterCache.IsFPSim( n64_fd ) ) MOV_S( psp_fd_sig, EPspFloatReg( fs )); //Copy signature as well if needed
UpdateSimDoubleRegister( n64_fd );
}
//
inline void CCodeGeneratorPSP::GenerateABS_D_Sim( u32 fd, u32 fs )
{
EN64FloatReg n64_fs = EN64FloatReg( fs );
EN64FloatReg n64_fd = EN64FloatReg( fd );
EPspFloatReg psp_fd_sig = EPspFloatReg( n64_fd );//1:1 Mapping
EPspFloatReg psp_fd = EPspFloatReg( n64_fd + 1 );//1:1 Mapping
EPspFloatReg psp_fs( GetSimFloatRegisterAndLoad( n64_fs ) );
//Use float now instead of double :)
ABS_S( psp_fd, psp_fs );
if( !mRegisterCache.IsFPSim( n64_fd ) ) MOV_S( psp_fd_sig, EPspFloatReg( fs )); //Copy signature as well if needed
UpdateSimDoubleRegister( n64_fd );
}
//
inline void CCodeGeneratorPSP::GenerateMOV_D_Sim( u32 fd, u32 fs )
{
EN64FloatReg n64_fs = EN64FloatReg( fs );
EN64FloatReg n64_fd = EN64FloatReg( fd );
EPspFloatReg psp_fd_sig = EPspFloatReg( n64_fd );//1:1 Mapping
EPspFloatReg psp_fd = EPspFloatReg( n64_fd + 1 );//1:1 Mapping
EPspFloatReg psp_fs_sig = EPspFloatReg( n64_fs );//1:1 Mapping
if( mRegisterCache.IsFPSim( n64_fs ) )
{
//Copy Sim double
EPspFloatReg psp_fs( GetSimFloatRegisterAndLoad( n64_fs ) );
//Move Sim double :)
MOV_S( psp_fd, psp_fs );
MOV_S( psp_fd_sig, psp_fs_sig); //Copy signature as well
UpdateSimDoubleRegister( n64_fd );
}
else
{
//Copy true double!
GetFloatRegisterAndLoad( n64_fs );
EPspFloatReg psp_fs( GetFloatRegisterAndLoad( EN64FloatReg(n64_fs + 1) ) );
//Move double :)
MOV_S( psp_fd_sig, psp_fs_sig); //Lo part
MOV_S( psp_fd, psp_fs ); //Hi part
UpdateFloatRegister( n64_fd );
UpdateFloatRegister( EN64FloatReg(n64_fd + 1) );
}
}
//
inline void CCodeGeneratorPSP::GenerateNEG_D_Sim( u32 fd, u32 fs )
{
EN64FloatReg n64_fs = EN64FloatReg( fs );
EN64FloatReg n64_fd = EN64FloatReg( fd );
EPspFloatReg psp_fd_sig = EPspFloatReg( n64_fd );//1:1 Mapping
EPspFloatReg psp_fd = EPspFloatReg( n64_fd + 1 );//1:1 Mapping
EPspFloatReg psp_fs( GetSimFloatRegisterAndLoad( n64_fs ) );
//Use float now instead of double :)
NEG_S( psp_fd, psp_fs );
if( !mRegisterCache.IsFPSim( n64_fd ) ) MOV_S( psp_fd_sig, EPspFloatReg( fs )); //Copy signature as well if needed
UpdateSimDoubleRegister( n64_fd );
}
//Convert Double to s32(TRUNC)
inline void CCodeGeneratorPSP::GenerateTRUNC_W_D_Sim( u32 fd, u32 fs )
{
EN64FloatReg n64_fs = EN64FloatReg( fs );
EN64FloatReg n64_fd = EN64FloatReg( fd );
EPspFloatReg psp_fd_sig = EPspFloatReg( n64_fd );//1:1 Mapping
//EPspFloatReg psp_fd = EPspFloatReg( n64_fd + 1 );//1:1 Mapping
EPspFloatReg psp_fs( GetSimFloatRegisterAndLoad( n64_fs ) );
//Use float now instead of double :)
TRUNC_W_S( psp_fd_sig, psp_fs );
UpdateFloatRegister( n64_fd );
}
//Convert Double to s32
inline void CCodeGeneratorPSP::GenerateCVT_W_D_Sim( u32 fd, u32 fs )
{
EN64FloatReg n64_fs = EN64FloatReg( fs );
EN64FloatReg n64_fd = EN64FloatReg( fd );
EPspFloatReg psp_fd_sig = EPspFloatReg( n64_fd );//1:1 Mapping
//EPspFloatReg psp_fd = EPspFloatReg( n64_fd + 1 );//1:1 Mapping
EPspFloatReg psp_fs( GetSimFloatRegisterAndLoad( n64_fs ) );
//Use float now instead of double :)
CVT_W_S( psp_fd_sig, psp_fs );
UpdateFloatRegister( n64_fd );
}
//Convert Double to Float
inline void CCodeGeneratorPSP::GenerateCVT_S_D_Sim( u32 fd, u32 fs )
{
EN64FloatReg n64_fs = EN64FloatReg( fs );
EN64FloatReg n64_fd = EN64FloatReg( fd );
EPspFloatReg psp_fd_sig = EPspFloatReg( n64_fd );//1:1 Mapping
//EPspFloatReg psp_fd = EPspFloatReg( n64_fd + 1 );//1:1 Mapping
EPspFloatReg psp_fs( GetSimFloatRegisterAndLoad( n64_fs ) );
//Use float now instead of double :)
MOV_S( psp_fd_sig, psp_fs );
UpdateFloatRegister( n64_fd );
}
//
inline void CCodeGeneratorPSP::GenerateCMP_D_Sim( u32 fs, ECop1OpFunction cmp_op, u32 ft )
{
mFloatCMPIsValid = true;
EN64FloatReg n64_fs = EN64FloatReg( fs );
EN64FloatReg n64_ft = EN64FloatReg( ft );
EPspFloatReg psp_fs( GetSimFloatRegisterAndLoad( n64_fs ) );
EPspFloatReg psp_ft( GetSimFloatRegisterAndLoad( n64_ft ) );
//Use float now instead of double :)
CMP_S( psp_fs, cmp_op, psp_ft );
#if 1 //Improved version no branch //Corn
GetVar( PspReg_V0, &gCPUState.FPUControl[31]._u32 );
CFC1( PspReg_A0, (EPspFloatReg)31 );
EXT( PspReg_A0, PspReg_A0, 0, 23 ); //Extract condition bit (true/false)
INS( PspReg_V0, PspReg_A0, 23, 23 ); //Insert condition bit (true/false)
SetVar( &gCPUState.FPUControl[31]._u32, PspReg_V0 );
#else //Improved version with only one branch //Corn
GetVar( PspReg_V0, &gCPUState.FPUControl[31]._u32 );
LoadConstant( PspReg_A0, FPCSR_C );
CJumpLocation test_condition( BC1T( CCodeLabel( nullptr ), false ) );
OR( PspReg_V0, PspReg_V0, PspReg_A0 ); // flag |= c
NOR( PspReg_A0, PspReg_A0, PspReg_V0 ); // c = !c
AND( PspReg_V0, PspReg_V0, PspReg_A0 ); // flag &= !c
CCodeLabel condition_true( GetAssemblyBuffer()->GetLabel() );
SetVar( &gCPUState.FPUControl[31]._u32, PspReg_V0 );
PatchJumpLong( test_condition, condition_true );
#endif
}
//
inline void CCodeGeneratorPSP::GenerateADD_S( u32 fd, u32 fs, u32 ft )
{
EN64FloatReg n64_fs = EN64FloatReg( fs );
EN64FloatReg n64_ft = EN64FloatReg( ft );
EN64FloatReg n64_fd = EN64FloatReg( fd );
EPspFloatReg psp_fd = EPspFloatReg( n64_fd );//1:1 Mapping
EPspFloatReg psp_fs( GetFloatRegisterAndLoad( n64_fs ) );
EPspFloatReg psp_ft( GetFloatRegisterAndLoad( n64_ft ) );
//SET_ROUND_MODE( gRoundingMode ); //XXXX Is this needed?
ADD_S( psp_fd, psp_fs, psp_ft );
UpdateFloatRegister( n64_fd );
}
//
inline void CCodeGeneratorPSP::GenerateSUB_S( u32 fd, u32 fs, u32 ft )
{
EN64FloatReg n64_fs = EN64FloatReg( fs );
EN64FloatReg n64_ft = EN64FloatReg( ft );
EN64FloatReg n64_fd = EN64FloatReg( fd );
EPspFloatReg psp_fd = EPspFloatReg(n64_fd ); //1:1 Mapping
EPspFloatReg psp_fs( GetFloatRegisterAndLoad( n64_fs ) );
EPspFloatReg psp_ft( GetFloatRegisterAndLoad( n64_ft ) );
//SET_ROUND_MODE( gRoundingMode ); //XXXX Is this needed?
SUB_S( psp_fd, psp_fs, psp_ft );
UpdateFloatRegister( n64_fd );
}
//
inline void CCodeGeneratorPSP::GenerateMUL_S( u32 fd, u32 fs, u32 ft )
{
EN64FloatReg n64_fs = EN64FloatReg( fs );
EN64FloatReg n64_ft = EN64FloatReg( ft );
EN64FloatReg n64_fd = EN64FloatReg( fd );
EPspFloatReg psp_fd = EPspFloatReg( n64_fd );
EPspFloatReg psp_fs( GetFloatRegisterAndLoad( n64_fs ) );
EPspFloatReg psp_ft( GetFloatRegisterAndLoad( n64_ft ) );
//SET_ROUND_MODE( gRoundingMode ); //XXXX Is this needed?
MUL_S( psp_fd, psp_fs, psp_ft );
UpdateFloatRegister( n64_fd );
}
//
inline void CCodeGeneratorPSP::GenerateDIV_S( u32 fd, u32 fs, u32 ft )
{
EN64FloatReg n64_fs = EN64FloatReg( fs );
EN64FloatReg n64_ft = EN64FloatReg( ft );
EN64FloatReg n64_fd = EN64FloatReg( fd );
EPspFloatReg psp_fd = EPspFloatReg( n64_fd );
EPspFloatReg psp_fs( GetFloatRegisterAndLoad( n64_fs ) );
EPspFloatReg psp_ft( GetFloatRegisterAndLoad( n64_ft ) );
//SET_ROUND_MODE( gRoundingMode ); //XXXX Is this needed?
DIV_S( psp_fd, psp_fs, psp_ft );
UpdateFloatRegister( n64_fd );
}
//
inline void CCodeGeneratorPSP::GenerateSQRT_S( u32 fd, u32 fs )
{
EN64FloatReg n64_fs = EN64FloatReg( fs );
EN64FloatReg n64_fd = EN64FloatReg( fd );
EPspFloatReg psp_fd = EPspFloatReg( n64_fd );
EPspFloatReg psp_fs( GetFloatRegisterAndLoad( n64_fs ) );
//SET_ROUND_MODE( gRoundingMode ); //XXXX Is this needed?
SQRT_S( psp_fd, psp_fs );
UpdateFloatRegister( n64_fd );
}
//
inline void CCodeGeneratorPSP::GenerateABS_S( u32 fd, u32 fs )
{
EN64FloatReg n64_fs = EN64FloatReg( fs );
EN64FloatReg n64_fd = EN64FloatReg( fd );
EPspFloatReg psp_fd = EPspFloatReg( n64_fd );
EPspFloatReg psp_fs( GetFloatRegisterAndLoad( n64_fs ) );
//SET_ROUND_MODE( gRoundingMode ); //XXXX Is this needed?
ABS_S( psp_fd, psp_fs );
UpdateFloatRegister( n64_fd );
}
//
inline void CCodeGeneratorPSP::GenerateMOV_S( u32 fd, u32 fs )
{
EN64FloatReg n64_fs = EN64FloatReg( fs );
EN64FloatReg n64_fd = EN64FloatReg( fd );
EPspFloatReg psp_fd = EPspFloatReg( n64_fd );
EPspFloatReg psp_fs( GetFloatRegisterAndLoad( n64_fs ) );
//SET_ROUND_MODE( gRoundingMode ); //XXXX Is this needed?
MOV_S( psp_fd, psp_fs );
UpdateFloatRegister( n64_fd );
}
//
inline void CCodeGeneratorPSP::GenerateNEG_S( u32 fd, u32 fs )
{
EN64FloatReg n64_fs = EN64FloatReg( fs );
EN64FloatReg n64_fd = EN64FloatReg( fd );
EPspFloatReg psp_fd = EPspFloatReg( n64_fd );
EPspFloatReg psp_fs( GetFloatRegisterAndLoad( n64_fs ) );
//SET_ROUND_MODE( gRoundingMode ); //XXXX Is this needed?
NEG_S( psp_fd, psp_fs );
UpdateFloatRegister( n64_fd );
}
//Convert Float to s32
inline void CCodeGeneratorPSP::GenerateTRUNC_W_S( u32 fd, u32 fs )
{
EN64FloatReg n64_fs = EN64FloatReg( fs );
EN64FloatReg n64_fd = EN64FloatReg( fd );
EPspFloatReg psp_fd = EPspFloatReg( n64_fd );
EPspFloatReg psp_fs( GetFloatRegisterAndLoad( n64_fs ) );
//SET_ROUND_MODE( gRoundingMode ); //XXXX Is this needed?
TRUNC_W_S( psp_fd, psp_fs );
UpdateFloatRegister( n64_fd );
}
//Convert Float to s32
inline void CCodeGeneratorPSP::GenerateFLOOR_W_S( u32 fd, u32 fs )
{
EN64FloatReg n64_fs = EN64FloatReg( fs );
EN64FloatReg n64_fd = EN64FloatReg( fd );
EPspFloatReg psp_fd = EPspFloatReg( n64_fd );
EPspFloatReg psp_fs( GetFloatRegisterAndLoad( n64_fs ) );
//SET_ROUND_MODE( gRoundingMode ); //XXXX Is this needed?
FLOOR_W_S( psp_fd, psp_fs );
UpdateFloatRegister( n64_fd );
}
//Convert Float to s32
inline void CCodeGeneratorPSP::GenerateCVT_W_S( u32 fd, u32 fs )
{
EN64FloatReg n64_fs = EN64FloatReg( fs );
EN64FloatReg n64_fd = EN64FloatReg( fd );
EPspFloatReg psp_fd = EPspFloatReg( n64_fd );
EPspFloatReg psp_fs( GetFloatRegisterAndLoad( n64_fs ) );
//SET_ROUND_MODE( gRoundingMode ); //XXXX Is this needed?
CVT_W_S( psp_fd, psp_fs );
UpdateFloatRegister( n64_fd );
}
//Convert s32 to (simulated)Double
inline void CCodeGeneratorPSP::GenerateCVT_D_W_Sim( u32 fd, u32 fs )
{
EN64FloatReg n64_fs = EN64FloatReg( fs );
EN64FloatReg n64_fd = EN64FloatReg( fd );
EPspFloatReg psp_fd_sig = EPspFloatReg( n64_fd );
EPspFloatReg psp_fd = EPspFloatReg( n64_fd + 1 );
EPspFloatReg psp_fs( GetFloatRegisterAndLoad( n64_fs ) );
CVT_S_W( psp_fd, psp_fs );
LoadConstant( PspReg_A0, SIMULATESIG ); //Get signature
MTC1( psp_fd_sig , PspReg_A0 ); //Write signature to float reg
UpdateSimDoubleRegister( n64_fd );
}
//Convert Float to (simulated)Double
inline void CCodeGeneratorPSP::GenerateCVT_D_S_Sim( u32 fd, u32 fs )
{
EN64FloatReg n64_fs = EN64FloatReg( fs );
EN64FloatReg n64_fd = EN64FloatReg( fd );
EPspFloatReg psp_fd_sig = EPspFloatReg( n64_fd );
EPspFloatReg psp_fd = EPspFloatReg( n64_fd + 1 );
EPspFloatReg psp_fs( GetFloatRegisterAndLoad( n64_fs ) );
MOV_S( psp_fd, psp_fs );
LoadConstant( PspReg_A0, SIMULATESIG ); //Get signature
MTC1( psp_fd_sig , PspReg_A0 ); //Write signature to float reg
//SetFloatVar( &gCPUState.FPU[fd + 0]._f32, psp_fd_sig );
//SetFloatVar( &gCPUState.FPU[fd + 1]._f32, psp_fd );
UpdateSimDoubleRegister( n64_fd );
}
//Convert Float to Double
inline void CCodeGeneratorPSP::GenerateCVT_D_S( u32 fd, u32 fs )
{
EN64FloatReg n64_fs = EN64FloatReg( fs );
EN64FloatReg n64_fd = EN64FloatReg( fd );
EPspFloatReg psp_fd_sig = EPspFloatReg( n64_fd );//1:1 Mapping
EPspFloatReg psp_fd = EPspFloatReg( n64_fd + 1 );//1:1 Mapping
EPspFloatReg psp_fs( GetFloatRegisterAndLoad( n64_fs ) );
JAL( CCodeLabel( reinterpret_cast< const void * >( _FloatToDouble ) ), false ); //Convert Float to Double
MFC1( PspReg_A0, psp_fs ); //Get float to convert
MTC1( psp_fd_sig , PspReg_V0 ); //Copy converted Double lo to FPU
MTC1( psp_fd , PspReg_V1 ); //Copy converted Double hi to FPU
mRegisterCache.MarkFPAsValid( n64_fd, true );
mRegisterCache.MarkFPAsDirty( n64_fd, true );
mRegisterCache.MarkFPAsSim( n64_fd, false ); //Dont flag as simulated its a real Double!!!
mRegisterCache.MarkFPAsValid( EN64FloatReg(n64_fd + 1), true );
mRegisterCache.MarkFPAsDirty( EN64FloatReg(n64_fd + 1), true );
}
//
inline void CCodeGeneratorPSP::GenerateCMP_S( u32 fs, ECop1OpFunction cmp_op, u32 ft )
{
mFloatCMPIsValid = true;
EN64FloatReg n64_fs = EN64FloatReg( fs );
EN64FloatReg n64_ft = EN64FloatReg( ft );
EPspFloatReg psp_fs( GetFloatRegisterAndLoad( n64_fs ) );
EPspFloatReg psp_ft( GetFloatRegisterAndLoad( n64_ft ) );
CMP_S( psp_fs, cmp_op, psp_ft );
#if 1 //Improved version no branch //Corn
GetVar( PspReg_V0, &gCPUState.FPUControl[31]._u32 );
CFC1( PspReg_A0, (EPspFloatReg)31 );
EXT( PspReg_A0, PspReg_A0, 0, 23 ); //Extract condition bit (true/false)
INS( PspReg_V0, PspReg_A0, 23, 23 ); //Insert condition bit (true/false)
SetVar( &gCPUState.FPUControl[31]._u32, PspReg_V0 );
#else //Improved version with only one branch //Corn
GetVar( PspReg_V0, &gCPUState.FPUControl[31]._u32 );
LoadConstant( PspReg_A0, FPCSR_C );
CJumpLocation test_condition( BC1T( CCodeLabel( nullptr ), false ) );
OR( PspReg_V0, PspReg_V0, PspReg_A0 ); // flag |= c
NOR( PspReg_A0, PspReg_A0, PspReg_R0 ); // c = !c
AND( PspReg_V0, PspReg_V0, PspReg_A0 ); // flag &= !c
CCodeLabel condition_true( GetAssemblyBuffer()->GetLabel() );
SetVar( &gCPUState.FPUControl[31]._u32, PspReg_V0 );
PatchJumpLong( test_condition, condition_true );
#endif
}
//
inline void CCodeGeneratorPSP::GenerateADD_D( u32 fd, u32 fs, u32 ft )
{
#ifdef DAEDALUS_DEBUG_CONSOLE
NOT_IMPLEMENTED( __FUNCTION__ );
#endif
}
//
inline void CCodeGeneratorPSP::GenerateSUB_D( u32 fd, u32 fs, u32 ft )
{
#ifdef DAEDALUS_DEBUG_CONSOLE
NOT_IMPLEMENTED( __FUNCTION__ );
#endif
}
//
inline void CCodeGeneratorPSP::GenerateMUL_D( u32 fd, u32 fs, u32 ft )
{
#ifdef DAEDALUS_DEBUG_CONSOLE
NOT_IMPLEMENTED( __FUNCTION__ );
#endif
}
//
inline void CCodeGeneratorPSP::GenerateDIV_D( u32 fd, u32 fs, u32 ft )
{
#ifdef DAEDALUS_DEBUG_CONSOLE
NOT_IMPLEMENTED( __FUNCTION__ );
#endif
}
//
inline void CCodeGeneratorPSP::GenerateSQRT_D( u32 fd, u32 fs )
{
#ifdef DAEDALUS_DEBUG_CONSOLE
NOT_IMPLEMENTED( __FUNCTION__ );
#endif
}
//
inline void CCodeGeneratorPSP::GenerateMOV_D( u32 fd, u32 fs )
{
#ifdef DAEDALUS_DEBUG_CONSOLE
NOT_IMPLEMENTED( __FUNCTION__ );
#endif
}
//
inline void CCodeGeneratorPSP::GenerateNEG_D( u32 fd, u32 fs )
{
#ifdef DAEDALUS_DEBUG_CONSOLE
NOT_IMPLEMENTED( __FUNCTION__ );
#endif
}
//Convert s32 to Float
inline void CCodeGeneratorPSP::GenerateCVT_S_W( u32 fd, u32 fs )
{
EN64FloatReg n64_fs = EN64FloatReg( fs );
EN64FloatReg n64_fd = EN64FloatReg( fd );
EPspFloatReg psp_fd = EPspFloatReg( n64_fd );
EPspFloatReg psp_fs( GetFloatRegisterAndLoad( n64_fs ) );
//SET_ROUND_MODE( gRoundingMode ); //XXXX Is this needed?
CVT_S_W( psp_fd, psp_fs );
UpdateFloatRegister( n64_fd );
}
//
inline void CCodeGeneratorPSP::GenerateMFC0( EN64Reg rt, u32 fs )
{
#ifdef DAEDALUS_ENABLE_ASSERTS
// Never seen this to happen, no reason to bother to handle it
DAEDALUS_ASSERT( fs != C0_RAND, "Reading MFC0 random register is unhandled");
#endif
EPspReg reg_dst( GetRegisterNoLoadLo( rt, PspReg_V0 ) );
GetVar( reg_dst, &gCPUState.CPUControl[ fs ]._u32 );
UpdateRegister( rt, reg_dst, URO_HI_SIGN_EXTEND );
}
//
inline void CCodeGeneratorPSP::GenerateMTC0( EN64Reg rt, u32 fs )
{
EPspReg reg_src( GetRegisterAndLoadLo( rt, PspReg_V0 ) );
SetVar( &gCPUState.CPUControl[ fs ]._u32, reg_src );
}