mirror of
https://github.com/hrydgard/ppsspp.git
synced 2025-04-02 11:01:50 -04:00
Merge branch 'master' into better-controlmap
This commit is contained in:
commit
c14fcfdae1
34 changed files with 3826 additions and 52 deletions
|
@ -342,7 +342,7 @@ add_library(rg_etc1 STATIC
|
|||
include_directories(native/ext/rg_etc1)
|
||||
|
||||
add_library(cityhash STATIC
|
||||
native/ext/cityhash/city.cpp
|
||||
native/ext/cityhash/city.cpp
|
||||
native/ext/cityhash/city.h
|
||||
native/ext/cityhash/citycrc.h
|
||||
)
|
||||
|
@ -688,6 +688,12 @@ add_library(xbrz STATIC
|
|||
)
|
||||
include_directories(ext/xbrz)
|
||||
|
||||
add_library(xxhash STATIC
|
||||
ext/xxhash.c
|
||||
ext/xxhash.h
|
||||
)
|
||||
include_directories(ext/xxhash)
|
||||
|
||||
set(CoreExtra)
|
||||
set(CoreExtraLibs)
|
||||
if(ARM)
|
||||
|
@ -966,7 +972,7 @@ add_library(${CoreLibName} ${CoreLinkType}
|
|||
$<TARGET_OBJECTS:GPU>
|
||||
Globals.h
|
||||
git-version.cpp)
|
||||
target_link_libraries(${CoreLibName} Common native kirk cityhash xbrz
|
||||
target_link_libraries(${CoreLibName} Common native kirk cityhash xbrz xxhash
|
||||
${CoreExtraLibs} ${GLEW_LIBRARIES} ${OPENGL_LIBRARIES})
|
||||
setup_target_project(${CoreLibName} Core)
|
||||
|
||||
|
|
|
@ -169,7 +169,7 @@ private:
|
|||
# elif defined __SSE3__
|
||||
# define _M_SSE 0x300
|
||||
# endif
|
||||
#elif (_MSC_VER >= 1500) || __INTEL_COMPILER // Visual Studio 2008
|
||||
#elif ((_MSC_VER >= 1500) || __INTEL_COMPILER) // Visual Studio 2008
|
||||
# define _M_SSE 0x402
|
||||
#endif
|
||||
|
||||
|
|
|
@ -85,7 +85,9 @@ public:
|
|||
return *this;
|
||||
}
|
||||
|
||||
operator long() const { return (long)swap(); }
|
||||
operator long() const { return (long)swap(); }
|
||||
operator s8() const { return (s8)swap(); }
|
||||
operator u8() const { return (u8)swap(); }
|
||||
operator s16() const { return (s16)swap(); }
|
||||
operator u16() const { return (u16)swap(); }
|
||||
operator s32() const { return (s32)swap(); }
|
||||
|
|
0
Common/ppcAbi.cpp
Normal file
0
Common/ppcAbi.cpp
Normal file
508
Common/ppcEmitter.cpp
Normal file
508
Common/ppcEmitter.cpp
Normal file
|
@ -0,0 +1,508 @@
|
|||
#include <xtl.h>
|
||||
#include "ppcEmitter.h"
|
||||
|
||||
namespace PpcGen {
|
||||
|
||||
// Arithmetics ops
|
||||
void PPCXEmitter::ADD (PPCReg Rd, PPCReg Ra, PPCReg Rb) {
|
||||
u32 instr = (0x7C000214 | (Rd << 21) | (Ra << 16) | (Rb << 11));
|
||||
Write32(instr);
|
||||
}
|
||||
|
||||
void PPCXEmitter::ADDI (PPCReg Rd, PPCReg Ra, short imm) {
|
||||
u32 instr = (0x38000000 | (Rd << 21) | (Ra << 16) | ((imm) & 0xffff));
|
||||
Write32(instr);
|
||||
}
|
||||
|
||||
void PPCXEmitter::ADDIS (PPCReg Rd, PPCReg Ra, short imm) {
|
||||
u32 instr = (0x3C000000 | (Rd << 21) | (Ra << 16) | ((imm) & 0xffff));
|
||||
Write32(instr);
|
||||
}
|
||||
|
||||
void PPCXEmitter::AND (PPCReg Rs, PPCReg Ra, PPCReg Rb) {
|
||||
u32 instr = (0x7C000038 | (Ra << 21) | (Rs << 16) | (Rb << 11));
|
||||
Write32(instr);
|
||||
}
|
||||
|
||||
void PPCXEmitter::ANDI (PPCReg Rd, PPCReg Ra, unsigned short imm) {
|
||||
u32 instr = (0x70000000 | (Rd << 21) | (Ra << 16) | ((imm) & 0xffff));
|
||||
Write32(instr);
|
||||
}
|
||||
|
||||
void PPCXEmitter::ANDIS (PPCReg Rd, PPCReg Ra, unsigned short imm) {
|
||||
u32 instr = (0x74000000 | (Rd << 21) | (Ra << 16) | ((imm) & 0xffff));
|
||||
Write32(instr);
|
||||
}
|
||||
|
||||
// Memory load/store operations
|
||||
void PPCXEmitter::LI(PPCReg dest, unsigned short imm) {
|
||||
u32 instr = (0x38000000 | (dest << 21) | ((imm) & 0xffff));
|
||||
Write32(instr);
|
||||
}
|
||||
|
||||
void PPCXEmitter::LIS(PPCReg dest, unsigned short imm) {
|
||||
u32 instr = (0x3C000000 | (dest << 21) | ((imm) & 0xffff));
|
||||
Write32(instr);
|
||||
}
|
||||
|
||||
void PPCXEmitter::LBZ (PPCReg dest, PPCReg src, int offset) {
|
||||
u32 instr = (0x88000000 | (dest << 21) | (src << 16) | ((offset) & 0xffff));
|
||||
Write32(instr);
|
||||
}
|
||||
|
||||
void PPCXEmitter::LBZX (PPCReg dest, PPCReg a, PPCReg b) {
|
||||
u32 instr = ((31<<26) | (dest << 21) | (a << 16) | (b << 11) | (87<<1));
|
||||
Write32(instr);
|
||||
}
|
||||
|
||||
void PPCXEmitter::LHZ (PPCReg dest, PPCReg src, int offset) {
|
||||
u32 instr = (0xA0000000 | (dest << 21) | (src << 16) | ((offset) & 0xffff));
|
||||
Write32(instr);
|
||||
}
|
||||
|
||||
void PPCXEmitter::LHBRX (PPCReg dest, PPCReg src, PPCReg offset) {
|
||||
u32 instr = (0x7C00062C | (dest << 21) | (src << 16) | (offset << 11));
|
||||
Write32(instr);
|
||||
}
|
||||
|
||||
void PPCXEmitter::LWZ (PPCReg dest, PPCReg src, int offset) {
|
||||
u32 instr = (0x80000000 | (dest << 21) | (src << 16) | ((offset) & 0xffff));
|
||||
Write32(instr);
|
||||
}
|
||||
|
||||
void PPCXEmitter::LWBRX (PPCReg dest, PPCReg src, PPCReg offset) {
|
||||
u32 instr = (0x7C00042C | (dest << 21) | (src << 16) | (offset << 11));
|
||||
Write32(instr);
|
||||
}
|
||||
|
||||
void PPCXEmitter::STB (PPCReg dest, PPCReg src, int offset) {
|
||||
u32 instr = (0x98000000 | (dest << 21) | (src << 16) | ((offset) & 0xffff));
|
||||
Write32(instr);
|
||||
}
|
||||
|
||||
void PPCXEmitter::STBX (PPCReg dest, PPCReg a, PPCReg b) {
|
||||
u32 instr = ((31<<26) | (dest << 21) | (a << 16) | (b << 11) | (215 << 1));
|
||||
Write32(instr);
|
||||
}
|
||||
|
||||
void PPCXEmitter::STH (PPCReg dest, PPCReg src, int offset) {
|
||||
u32 instr = (0xB0000000 | (dest << 21) | (src << 16) | ((offset) & 0xffff));
|
||||
Write32(instr);
|
||||
}
|
||||
|
||||
void PPCXEmitter::STHBRX (PPCReg dest, PPCReg src, PPCReg offset) {
|
||||
u32 instr = (0x7C00072C | (dest << 21) | (src << 16) | (offset << 11));
|
||||
Write32(instr);
|
||||
}
|
||||
|
||||
void PPCXEmitter::STW (PPCReg dest, PPCReg src, int offset) {
|
||||
u32 instr = (0x90000000 | (dest << 21) | (src << 16) | ((offset) & 0xffff));
|
||||
Write32(instr);
|
||||
}
|
||||
|
||||
void PPCXEmitter::STWU (PPCReg dest, PPCReg src, int offset) {
|
||||
u32 instr = (0x94000000 | (dest << 21) | (src << 16) | ((offset) & 0xffff));
|
||||
Write32(instr);
|
||||
}
|
||||
|
||||
void PPCXEmitter::STWBRX (PPCReg dest, PPCReg src, PPCReg offset) {
|
||||
u32 instr = (0x7C00052C | (dest << 21) | (src << 16) | (offset << 11));
|
||||
Write32(instr);
|
||||
}
|
||||
|
||||
void PPCXEmitter::LD (PPCReg dest, PPCReg src, int offset) {
|
||||
u32 instr = ((58 << 26) | (dest << 21) | (src << 16) | ((offset) & 0xffff));
|
||||
Write32(instr);
|
||||
}
|
||||
void PPCXEmitter::STD (PPCReg dest, PPCReg src, int offset) {
|
||||
u32 instr = ((62 << 26) | (dest << 21) | (src << 16) | ((offset) & 0xffff));
|
||||
Write32(instr);
|
||||
}
|
||||
|
||||
// Branch operations
|
||||
void PPCXEmitter::B (const void *fnptr) {
|
||||
s32 func = (s32)fnptr - s32(code);
|
||||
u32 instr = (0x48000000 | ((s32)((func) & 0x3fffffc)));
|
||||
Write32(instr);
|
||||
}
|
||||
|
||||
void PPCXEmitter::BL(const void *fnptr) {
|
||||
s32 func = (s32)fnptr - s32(code);
|
||||
u32 instr = (0x48000001 | ((s32)((func) & 0x3fffffc)));
|
||||
Write32(instr);
|
||||
}
|
||||
|
||||
void PPCXEmitter::BA (const void *fnptr) {
|
||||
s32 func = (s32)fnptr;
|
||||
u32 instr = (0x48000002 | ((s32)((func) & 0x3fffffc)));
|
||||
Write32(instr);
|
||||
}
|
||||
|
||||
void PPCXEmitter::BLA (const void *fnptr) {
|
||||
s32 func = (s32)fnptr;
|
||||
u32 instr = (0x48000003 | ((s32)((func) & 0x3fffffc)));
|
||||
Write32(instr);
|
||||
}
|
||||
|
||||
|
||||
#define IS_SMALL_JUMP (((u32)code - (u32)fnptr)>=-32767 && ((u32)code - (u32)fnptr)<=-32767)
|
||||
#define CHECK_SMALL_JUMP { if(IS_SMALL_JUMP) { DebugBreak(); } }
|
||||
|
||||
void PPCXEmitter::BEQ (const void *fnptr) {
|
||||
CHECK_SMALL_JUMP
|
||||
|
||||
s32 func = (s32)fnptr - s32(code);
|
||||
u32 instr = (0x41820000 | ( func & 0xfffc));
|
||||
Write32(instr);
|
||||
}
|
||||
|
||||
|
||||
void PPCXEmitter::BGT(const void *fnptr) {
|
||||
CHECK_SMALL_JUMP
|
||||
|
||||
s32 func = (s32)fnptr - s32(code);
|
||||
u32 instr = (0x41810000 | (((s16)(((func)+1))) & 0xfffc));
|
||||
Write32(instr);
|
||||
}
|
||||
|
||||
|
||||
void PPCXEmitter::BLTCTR() {
|
||||
Write32((19 << 26) | (12 << 21) | (528 <<1));
|
||||
// Break();
|
||||
}
|
||||
|
||||
void PPCXEmitter::BLT (const void *fnptr) {
|
||||
//CHECK_JUMP
|
||||
if (!IS_SMALL_JUMP) {
|
||||
u32 func_addr = (u32) fnptr;
|
||||
// Load func address
|
||||
MOVI2R(R0, func_addr);
|
||||
// Set it to link register
|
||||
MTCTR(R0);
|
||||
// Branch
|
||||
BLTCTR();
|
||||
return;
|
||||
}
|
||||
|
||||
s32 func = (s32)fnptr - s32(code);
|
||||
u32 instr = (0x41800000 | (((s16)(((func)+1))) & 0xfffc));
|
||||
Write32(instr);
|
||||
}
|
||||
|
||||
void PPCXEmitter::BLE (const void *fnptr) {
|
||||
CHECK_SMALL_JUMP
|
||||
|
||||
s32 func = (s32)fnptr - s32(code);
|
||||
u32 instr = (0x40810000 | (((s16)(((func)+1))) & 0xfffc));
|
||||
Write32(instr);
|
||||
}
|
||||
|
||||
void PPCXEmitter::BCTRL() {
|
||||
Write32(0x4E800421);
|
||||
}
|
||||
|
||||
void PPCXEmitter::BCTR() {
|
||||
Write32(0x4E800420);
|
||||
}
|
||||
|
||||
// Link Register
|
||||
void PPCXEmitter::MFLR(PPCReg r) {
|
||||
Write32(0x7C0802A6 | r << 21);
|
||||
}
|
||||
|
||||
void PPCXEmitter::MTLR(PPCReg r) {
|
||||
Write32(0x7C0803A6 | r << 21);
|
||||
}
|
||||
|
||||
void PPCXEmitter::MTCTR(PPCReg r) {
|
||||
Write32(0x7C0903A6 | r << 21);
|
||||
}
|
||||
|
||||
void PPCXEmitter::BLR() {
|
||||
Write32(0x4E800020);
|
||||
}
|
||||
|
||||
void PPCXEmitter::BGTLR() {
|
||||
Write32(0x4D810020);
|
||||
}
|
||||
|
||||
// Fixup
|
||||
FixupBranch PPCXEmitter::B()
|
||||
{
|
||||
FixupBranch branch;
|
||||
branch.type = _B;
|
||||
branch.ptr = code;
|
||||
branch.condition = condition;
|
||||
//We'll write NOP here for now.
|
||||
Write32(0x60000000);
|
||||
return branch;
|
||||
}
|
||||
|
||||
FixupBranch PPCXEmitter::BL()
|
||||
{
|
||||
FixupBranch branch;
|
||||
branch.type = _BL;
|
||||
branch.ptr = code;
|
||||
branch.condition = condition;
|
||||
//We'll write NOP here for now.
|
||||
Write32(0x60000000);
|
||||
return branch;
|
||||
}
|
||||
|
||||
|
||||
FixupBranch PPCXEmitter::BNE() {
|
||||
FixupBranch branch;
|
||||
branch.type = _BNE;
|
||||
branch.ptr = code;
|
||||
branch.condition = condition;
|
||||
//We'll write NOP here for now.
|
||||
Write32(0x60000000);
|
||||
return branch;
|
||||
}
|
||||
|
||||
FixupBranch PPCXEmitter::BLT() {
|
||||
FixupBranch branch;
|
||||
branch.type = _BLT;
|
||||
branch.ptr = code;
|
||||
branch.condition = condition;
|
||||
//We'll write NOP here for now.
|
||||
Write32(0x60000000);
|
||||
return branch;
|
||||
}
|
||||
|
||||
FixupBranch PPCXEmitter::BLE() {
|
||||
FixupBranch branch;
|
||||
branch.type = _BLE;
|
||||
branch.ptr = code;
|
||||
branch.condition = condition;
|
||||
//We'll write NOP here for now.
|
||||
Write32(0x60000000);
|
||||
return branch;
|
||||
}
|
||||
|
||||
FixupBranch PPCXEmitter::B_Cond(FixupBranchType type) {
|
||||
FixupBranch branch;
|
||||
branch.type = type;
|
||||
branch.ptr = code;
|
||||
branch.condition = condition;
|
||||
//We'll write NOP here for now.
|
||||
Write32(0x60000000);
|
||||
return branch;
|
||||
}
|
||||
|
||||
void PPCXEmitter::SetJumpTarget(FixupBranch const &branch)
|
||||
{
|
||||
s32 distance = s32(code) - (s32)branch.ptr;
|
||||
_assert_msg_(DYNA_REC, distance > -32767
|
||||
&& distance <= 32767,
|
||||
"SetJumpTarget out of range (%p calls %p)", code,
|
||||
branch.ptr);
|
||||
|
||||
switch(branch.type) {
|
||||
case _B:
|
||||
*(u32*)branch.ptr = (0x48000000 | ((s32)((distance) & 0x3fffffc)));
|
||||
break;
|
||||
case _BL:
|
||||
*(u32*)branch.ptr = (0x48000001 | ((s32)((distance) & 0x3fffffc)));
|
||||
break;
|
||||
case _BEQ:
|
||||
*(u32*)branch.ptr = (0x41820000 | ((s16)(((distance)+1)) & 0xfffc));
|
||||
break;
|
||||
case _BNE:
|
||||
*(u32*)branch.ptr = (0x40820000 | ((s16)(((distance)+1)) & 0xfffc));
|
||||
break;
|
||||
case _BLT:
|
||||
*(u32*)branch.ptr = (0x41800000 | ((s16)(((distance)+1)) & 0xfffc));
|
||||
break;
|
||||
case _BLE:
|
||||
*(u32*)branch.ptr = (0x40810000 | ((s16)(((distance)+1)) & 0xfffc));
|
||||
break;
|
||||
case _BGT:
|
||||
*(u32*)branch.ptr = (0x41810000 | ((s16)(((distance)+1)) & 0xfffc));
|
||||
break;
|
||||
case _BGE:
|
||||
*(u32*)branch.ptr = (0x40800000 | ((s16)(((distance)+1)) & 0xfffc));
|
||||
break;
|
||||
default:
|
||||
// Error !!!
|
||||
_assert_msg_(DYNA_REC, 0, "SetJumpTarget unknow branch type: %d", branch.type);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Compare (Only use CR0 atm...)
|
||||
void PPCXEmitter::CMPI(PPCReg dest, unsigned short imm) {
|
||||
Write32((11<<26) | (dest << 16) | ((imm) & 0xffff));
|
||||
}
|
||||
|
||||
void PPCXEmitter::CMPLI(PPCReg dest, unsigned short imm) {
|
||||
Write32((10<<26) | (dest << 16) | ((imm) & 0xffff));
|
||||
}
|
||||
|
||||
void PPCXEmitter::CMP(PPCReg a, PPCReg b) {
|
||||
Write32((31 << 26) | (a << 16) | (b << 11));
|
||||
}
|
||||
void PPCXEmitter::CMPL(PPCReg a, PPCReg b) {
|
||||
Write32((31 << 26) | (a << 16) | (b << 11) | (1<<6));
|
||||
}
|
||||
|
||||
// Others operation
|
||||
void PPCXEmitter::ORI(PPCReg src, PPCReg dest, unsigned short imm) {
|
||||
u32 instr = (0x60000000 | (src << 21) | (dest << 16) | (imm & 0xffff));
|
||||
Write32(instr);
|
||||
}
|
||||
|
||||
void PPCXEmitter::OR(PPCReg Rd, PPCReg Ra, PPCReg Rb) {
|
||||
u32 instr = (0x7C000378 | (Ra << 21) | (Rd << 16) | (Rb << 11));
|
||||
Write32(instr);
|
||||
}
|
||||
|
||||
void PPCXEmitter::XOR(PPCReg Rd, PPCReg Ra, PPCReg Rb) {
|
||||
u32 instr = (0x7C000278 | (Ra << 21) | (Rd << 16) | (Rb << 11));
|
||||
Write32(instr);
|
||||
}
|
||||
|
||||
void PPCXEmitter::SUBF(PPCReg Rd, PPCReg Ra, PPCReg Rb, int RCFlags) {
|
||||
u32 instr = (0x7C000050 | (Rd << 21) | (Ra << 16) | (Rb << 11) | (RCFlags & 1));
|
||||
Write32(instr);
|
||||
}
|
||||
|
||||
// Quick Call
|
||||
// dest = LIS(imm) + ORI(+imm)
|
||||
void PPCXEmitter::MOVI2R(PPCReg dest, unsigned int imm) {
|
||||
if (imm == (unsigned short)imm) {
|
||||
// 16bit
|
||||
LI(dest, imm & 0xFFFF);
|
||||
} else {
|
||||
// HI 16bit
|
||||
LIS(dest, imm>>16);
|
||||
if ((imm & 0xFFFF) != 0) {
|
||||
// LO 16bit
|
||||
ORI(dest, dest, imm & 0xFFFF);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void PPCXEmitter::QuickCallFunction(void *func) {
|
||||
/** TODO : can use simple jump **/
|
||||
|
||||
u32 func_addr = (u32) func;
|
||||
// Load func address
|
||||
MOVI2R(R0, func_addr);
|
||||
// Set it to link register
|
||||
MTCTR(R0);
|
||||
// Branch
|
||||
BCTRL();
|
||||
}
|
||||
|
||||
// sign
|
||||
void PPCXEmitter::EXTSB (PPCReg dest, PPCReg src) {
|
||||
Write32((0x7C000774 | (src << 21) | (dest << 16)));
|
||||
}
|
||||
|
||||
void PPCXEmitter::EXTSH (PPCReg dest, PPCReg src) {
|
||||
Write32(0x7C000734 | (src << 21) | (dest << 16));
|
||||
}
|
||||
|
||||
void PPCXEmitter::RLWINM (PPCReg dest, PPCReg src, int shift, int start, int end) {
|
||||
Write32((21<<26) | (src << 21) | (dest << 16) | (shift << 11) | (start << 6) | (end << 1));
|
||||
}
|
||||
|
||||
// Prologue / epilogue
|
||||
|
||||
void PPCXEmitter::Prologue() {
|
||||
// Save regs
|
||||
u32 regSize = 8; // 4 in 32bit system
|
||||
u32 stackFrameSize = 32*32;//(35 - 12) * regSize;
|
||||
|
||||
// Write Prologue (setup stack frame etc ...)
|
||||
// Save Lr
|
||||
MFLR(R12);
|
||||
|
||||
for(int i = 14; i < 32; i ++) {
|
||||
STD((PPCReg)i, R1, -((33 - i) * regSize));
|
||||
}
|
||||
|
||||
// Save r12
|
||||
STW(R12, R1, -0x8);
|
||||
|
||||
// allocate stack
|
||||
STWU(R1, R1, -stackFrameSize);
|
||||
}
|
||||
|
||||
void PPCXEmitter::Epilogue() {
|
||||
u32 regSize = 8; // 4 in 32bit system
|
||||
u32 stackFrameSize = 32*32;//(35 - 12) * regSize;
|
||||
|
||||
// Write Epilogue (restore stack frame, return)
|
||||
// free stack
|
||||
ADDI(R1, R1, stackFrameSize);
|
||||
|
||||
// Restore regs
|
||||
for(int i = 14; i < 32; i ++) {
|
||||
LD((PPCReg)i, R1, -((33 - i) * regSize));
|
||||
}
|
||||
|
||||
// recover r12 (LR saved register)
|
||||
LWZ (R12, R1, -0x8);
|
||||
|
||||
// Restore Lr
|
||||
MTLR(R12);
|
||||
}
|
||||
|
||||
// Others ...
|
||||
void PPCXEmitter::SetCodePtr(u8 *ptr)
|
||||
{
|
||||
code = ptr;
|
||||
startcode = code;
|
||||
lastCacheFlushEnd = ptr;
|
||||
}
|
||||
|
||||
const u8 *PPCXEmitter::GetCodePtr() const
|
||||
{
|
||||
return code;
|
||||
}
|
||||
|
||||
u8 *PPCXEmitter::GetWritableCodePtr()
|
||||
{
|
||||
return code;
|
||||
}
|
||||
|
||||
void PPCXEmitter::ReserveCodeSpace(u32 bytes)
|
||||
{
|
||||
for (u32 i = 0; i < bytes/4; i++)
|
||||
Write32(0x60000000); //nop
|
||||
}
|
||||
|
||||
const u8 *PPCXEmitter::AlignCode16()
|
||||
{
|
||||
ReserveCodeSpace((-(s32)code) & 15);
|
||||
return code;
|
||||
}
|
||||
|
||||
const u8 *PPCXEmitter::AlignCodePage()
|
||||
{
|
||||
ReserveCodeSpace((-(s32)code) & 4095);
|
||||
return code;
|
||||
}
|
||||
|
||||
void PPCXEmitter::FlushIcache()
|
||||
{
|
||||
FlushIcacheSection(lastCacheFlushEnd, code);
|
||||
lastCacheFlushEnd = code;
|
||||
}
|
||||
|
||||
void PPCXEmitter::FlushIcacheSection(u8 *start, u8 *end)
|
||||
{
|
||||
u8 * addr = start;
|
||||
while(addr < end) {
|
||||
__asm dcbst r0, addr
|
||||
__asm icbi r0, addr
|
||||
addr += 4;
|
||||
}
|
||||
__emit(0x7c0004ac);//sync
|
||||
__emit(0x4C00012C);//isync
|
||||
}
|
||||
|
||||
|
||||
} // namespace
|
381
Common/ppcEmitter.h
Normal file
381
Common/ppcEmitter.h
Normal file
|
@ -0,0 +1,381 @@
|
|||
// Copyright (C) 2003 Dolphin Project.
|
||||
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, version 2.0.
|
||||
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License 2.0 for more details.
|
||||
|
||||
// A copy of the GPL 2.0 should have been included with the program.
|
||||
// If not, see http://www.gnu.org/licenses/
|
||||
|
||||
// Official SVN repository and contact information can be found at
|
||||
// http://code.google.com/p/dolphin-emu/
|
||||
|
||||
// WARNING - THIS LIBRARY IS NOT THREAD SAFE!!!
|
||||
|
||||
|
||||
// http://www.csd.uwo.ca/~mburrel/stuff/ppc-asm.html
|
||||
// http://publib.boulder.ibm.com/infocenter/pseries/v5r3/index.jsp?topic=/com.ibm.aix.aixassem/doc/alangref/linkage_convent.htm
|
||||
// http://publib.boulder.ibm.com/infocenter/pseries/v5r3/index.jsp?topic=/com.ibm.aix.aixassem/doc/alangref/instruction_set.htm
|
||||
|
||||
#ifndef _DOLPHIN_PPC_CODEGEN_
|
||||
#define _DOLPHIN_PPC_CODEGEN_
|
||||
|
||||
#include "Common.h"
|
||||
#include "MemoryUtil.h"
|
||||
#include <vector>
|
||||
|
||||
#undef _IP
|
||||
#undef R0
|
||||
#undef _SP
|
||||
#undef _LR
|
||||
#undef _PC
|
||||
#undef CALL
|
||||
|
||||
namespace PpcGen
|
||||
{
|
||||
enum PPCReg
|
||||
{
|
||||
// GPRs (32)
|
||||
// Behaves as zero does in some instructions
|
||||
R0 = 0,
|
||||
// Stack pointer (SP)
|
||||
R1,
|
||||
// Reserved
|
||||
R2,
|
||||
// Used to pass integer function parameters and return values
|
||||
R3, R4,
|
||||
// Used to pass integer function parameters
|
||||
R5, R6, R7, R8, R9, R10,
|
||||
// General purpose
|
||||
R11,
|
||||
// Scratch
|
||||
R12,
|
||||
// Unused by the compiler reserved
|
||||
R13,
|
||||
// General purpose
|
||||
R14, R15, R16, R17, R18, R19,
|
||||
R20, R21, R22, R23, R24, R25,
|
||||
R26, R27, R28, R29, R30, R31,
|
||||
|
||||
// CRs (7)
|
||||
CR0 = 0,
|
||||
|
||||
// FPRs (32)
|
||||
// Scratch
|
||||
FPR0,
|
||||
// Used to pass double word function parameters and return values
|
||||
FPR1, FPR2, FPR3, FPR4,
|
||||
FPR5, FPR6, FPR7, FPR8,
|
||||
FPR9, FPR10, FPR11, FPR12,
|
||||
FPR13,
|
||||
// General purpose
|
||||
FPR14, FPR15, FPR16, FPR17,
|
||||
FPR18, FPR19, FPR20, FPR21,
|
||||
FPR22, FPR23, FPR24, FPR25,
|
||||
FPR26, FPR27, FPR28, FPR29,
|
||||
FPR30, FPR31,
|
||||
|
||||
|
||||
// Vmx (128)
|
||||
VR0,
|
||||
// Used to pass vector function parameters and return values
|
||||
VR1, VR2, VR3, VR4,
|
||||
VR5, VR6, VR7, VR8,
|
||||
VR9, VR10, VR11, VR12,
|
||||
VR13, // ...
|
||||
|
||||
// Others regs
|
||||
LR, CTR, XER, FPSCR,
|
||||
|
||||
// End
|
||||
|
||||
INVALID_REG = 0xFFFFFFFF
|
||||
};
|
||||
enum IntegerSize
|
||||
{
|
||||
I_I8 = 0,
|
||||
I_I16,
|
||||
I_I32,
|
||||
I_I64
|
||||
};
|
||||
|
||||
enum
|
||||
{
|
||||
NUMGPRs = 31,
|
||||
};
|
||||
|
||||
typedef const u8* JumpTarget;
|
||||
|
||||
|
||||
enum FixupBranchType {
|
||||
_B,
|
||||
_BEQ,
|
||||
_BNE,
|
||||
_BLT,
|
||||
_BLE,
|
||||
_BGT,
|
||||
_BGE,
|
||||
// Link register
|
||||
_BL
|
||||
};
|
||||
|
||||
struct FixupBranch
|
||||
{
|
||||
u8 *ptr;
|
||||
u32 condition; // Remembers our codition at the time
|
||||
FixupBranchType type; //0 = B 1 = BL
|
||||
};
|
||||
|
||||
class PPCXEmitter
|
||||
{
|
||||
private:
|
||||
u8 *code, *startcode;
|
||||
u8 *lastCacheFlushEnd;
|
||||
u32 condition;
|
||||
|
||||
protected:
|
||||
// Write opcode
|
||||
inline void Write32(u32 value) {*(u32*)code = value; code+=4;}
|
||||
|
||||
public:
|
||||
PPCXEmitter() : code(0), startcode(0), lastCacheFlushEnd(0) {
|
||||
}
|
||||
PPCXEmitter(u8 *code_ptr) {
|
||||
code = code_ptr;
|
||||
lastCacheFlushEnd = code_ptr;
|
||||
startcode = code_ptr;
|
||||
}
|
||||
virtual ~PPCXEmitter() {}
|
||||
|
||||
void SetCodePtr(u8 *ptr);
|
||||
void ReserveCodeSpace(u32 bytes);
|
||||
const u8 *AlignCode16();
|
||||
const u8 *AlignCodePage();
|
||||
const u8 *GetCodePtr() const;
|
||||
void FlushIcache();
|
||||
void FlushIcacheSection(u8 *start, u8 *end);
|
||||
u8 *GetWritableCodePtr();
|
||||
|
||||
|
||||
// Special purpose instructions
|
||||
|
||||
// Debug Breakpoint
|
||||
void BKPT(u16 arg);
|
||||
|
||||
// Hint instruction
|
||||
void YIELD();
|
||||
|
||||
// Do nothing
|
||||
void NOP(int count = 1); //nop padding - TODO: fast nop slides, for amd and intel (check their manuals)
|
||||
|
||||
// FixupBranch ops
|
||||
FixupBranch B();
|
||||
FixupBranch BL();
|
||||
FixupBranch BNE();
|
||||
FixupBranch BLT();
|
||||
FixupBranch BLE();
|
||||
|
||||
FixupBranch B_Cond(FixupBranchType type);
|
||||
|
||||
void SetJumpTarget(FixupBranch const &branch);
|
||||
|
||||
// Branch ops
|
||||
void B (const void *fnptr);
|
||||
void BL(const void *fnptr);
|
||||
void BA (const void *fnptr);
|
||||
void BLA(const void *fnptr);
|
||||
void BEQ(const void *fnptr);
|
||||
void BLE(const void *fnptr);
|
||||
void BLT(const void *fnptr);
|
||||
void BGT(const void *fnptr);
|
||||
void BEQ (PPCReg r);
|
||||
|
||||
void BLR();
|
||||
void BGTLR(); // ??? used ?
|
||||
void BLTCTR();
|
||||
void BGTCTR();
|
||||
void BLECTR();
|
||||
void BGECTR();
|
||||
void BCTRL ();
|
||||
void BCTR();
|
||||
|
||||
// Link Register
|
||||
void MFLR(PPCReg r);
|
||||
void MTLR(PPCReg r);
|
||||
void MTCTR(PPCReg r);
|
||||
|
||||
|
||||
// Logical Ops
|
||||
void AND (PPCReg Rs, PPCReg Ra, PPCReg Rb);
|
||||
void ANDI (PPCReg Rs, PPCReg Ra, unsigned short imm);
|
||||
void ANDIS(PPCReg Rs, PPCReg Ra, unsigned short imm);
|
||||
void NAND (PPCReg Rs, PPCReg Ra, PPCReg Rb);
|
||||
void OR (PPCReg Rs, PPCReg Ra, PPCReg Rb);
|
||||
void ORC (PPCReg Rs, PPCReg Ra, PPCReg Rb);
|
||||
void NOR (PPCReg Rs, PPCReg Ra, PPCReg Rb);
|
||||
void XOR (PPCReg Rs, PPCReg Ra, PPCReg Rb);
|
||||
void NEG (PPCReg Rs, PPCReg Ra, PPCReg Rb);
|
||||
|
||||
// Arithmetics ops
|
||||
void ADD (PPCReg Rd, PPCReg Ra, PPCReg Rb);
|
||||
void ADDI (PPCReg Rd, PPCReg Ra, short imm);
|
||||
void ADDIS (PPCReg Rd, PPCReg Ra, short imm);
|
||||
void ADDC (PPCReg Rd, PPCReg Ra, PPCReg Rb);
|
||||
void SUB (PPCReg Rd, PPCReg Ra, PPCReg Rb) {
|
||||
// reverse ?
|
||||
SUBF(Rd, Rb, Ra);
|
||||
}
|
||||
// if RCFlags update CR0
|
||||
void SUBF (PPCReg Rd, PPCReg Ra, PPCReg Rb, int RCFlags = 0);
|
||||
void SUBFC (PPCReg Rd, PPCReg Ra, PPCReg Rb);
|
||||
|
||||
// Floating ops
|
||||
void DIVW(PPCReg dest, PPCReg dividend, PPCReg divisor);
|
||||
void DIVWU(PPCReg dest, PPCReg dividend, PPCReg divisor);
|
||||
void MULLW(PPCReg dest, PPCReg src, PPCReg op2);
|
||||
void MULHW (PPCReg dest, PPCReg src, PPCReg op2);
|
||||
void MULHWS(PPCReg dest, PPCReg src, PPCReg op2);
|
||||
|
||||
void ORI (PPCReg src, PPCReg dest, unsigned short imm);
|
||||
|
||||
// Memory load/store operations
|
||||
void LI (PPCReg dest, unsigned short imm);
|
||||
void LIS (PPCReg dest, unsigned short imm);
|
||||
// dest = LIS(imm) + ORI(+imm)
|
||||
void MOVI2R (PPCReg dest, unsigned int imm);
|
||||
|
||||
// 8bit
|
||||
void LBZ (PPCReg dest, PPCReg src, int offset = 0);
|
||||
void LBZX (PPCReg dest, PPCReg a, PPCReg b);
|
||||
|
||||
// 16bit
|
||||
void LHZ (PPCReg dest, PPCReg src, int offset = 0);
|
||||
void LHBRX (PPCReg dest, PPCReg src, PPCReg offset);
|
||||
// 32 bit
|
||||
void LWZ (PPCReg dest, PPCReg src, int offset = 0);
|
||||
void LWBRX (PPCReg dest, PPCReg src, PPCReg offset);
|
||||
// 64 bit
|
||||
void LD (PPCReg dest, PPCReg src, int offset = 0);
|
||||
|
||||
// 8 bit
|
||||
void STB (PPCReg dest, PPCReg src, int offset = 0);
|
||||
void STBX (PPCReg dest, PPCReg a, PPCReg b);
|
||||
// 16 bit
|
||||
void STH (PPCReg dest, PPCReg src, int offset = 0);
|
||||
void STHBRX (PPCReg dest, PPCReg src, PPCReg offset);
|
||||
// 32 bit
|
||||
void STW (PPCReg dest, PPCReg src, int offset = 0);
|
||||
void STWU (PPCReg dest, PPCReg src, int offset = 0);
|
||||
void STWBRX (PPCReg dest, PPCReg src, PPCReg offset);
|
||||
// 64 bit
|
||||
void STD (PPCReg dest, PPCReg src, int offset = 0);
|
||||
|
||||
// sign
|
||||
void EXTSB (PPCReg dest, PPCReg src);
|
||||
void EXTSH (PPCReg dest, PPCReg src);
|
||||
|
||||
void RLWINM (PPCReg dest, PPCReg src, int shift, int start, int end);
|
||||
|
||||
// Compare
|
||||
void CMPLI (PPCReg dest, unsigned short imm);
|
||||
void CMPI (PPCReg dest, unsigned short imm);
|
||||
void CMPL (PPCReg a, PPCReg b);
|
||||
void CMP (PPCReg a, PPCReg b);
|
||||
|
||||
void Prologue();
|
||||
void Epilogue();
|
||||
|
||||
// Debug !
|
||||
void Break() {
|
||||
Write32(0x0FE00016);
|
||||
}
|
||||
|
||||
void MR (PPCReg to, PPCReg from) {
|
||||
OR(to, from, from);
|
||||
}
|
||||
|
||||
void QuickCallFunction(void *func);
|
||||
protected:
|
||||
|
||||
}; // class PPCXEmitter
|
||||
|
||||
|
||||
// You get memory management for free, plus, you can use all the MOV etc functions without
|
||||
// having to prefix them with gen-> or something similar.
|
||||
class PPCXCodeBlock : public PPCXEmitter
|
||||
{
|
||||
protected:
|
||||
u8 *region;
|
||||
size_t region_size;
|
||||
|
||||
public:
|
||||
PPCXCodeBlock() : region(NULL), region_size(0) {}
|
||||
virtual ~PPCXCodeBlock() { if (region) FreeCodeSpace(); }
|
||||
|
||||
// Call this before you generate any code.
|
||||
void AllocCodeSpace(int size)
|
||||
{
|
||||
region_size = size;
|
||||
region = (u8*)AllocateExecutableMemory(region_size);
|
||||
SetCodePtr(region);
|
||||
}
|
||||
|
||||
// Always clear code space with breakpoints, so that if someone accidentally executes
|
||||
// uninitialized, it just breaks into the debugger.
|
||||
void ClearCodeSpace()
|
||||
{
|
||||
// x86/64: 0xCC = breakpoint
|
||||
memset(region, 0xCC, region_size);
|
||||
ResetCodePtr();
|
||||
}
|
||||
|
||||
// Call this when shutting down. Don't rely on the destructor, even though it'll do the job.
|
||||
void FreeCodeSpace()
|
||||
{
|
||||
region = NULL;
|
||||
region_size = 0;
|
||||
}
|
||||
|
||||
bool IsInSpace(u8 *ptr)
|
||||
{
|
||||
return ptr >= region && ptr < region + region_size;
|
||||
}
|
||||
|
||||
// Cannot currently be undone. Will write protect the entire code region.
|
||||
// Start over if you need to change the code (call FreeCodeSpace(), AllocCodeSpace()).
|
||||
void WriteProtect()
|
||||
{
|
||||
//WriteProtectMemory(region, region_size, true);
|
||||
}
|
||||
void UnWriteProtect()
|
||||
{
|
||||
//UnWriteProtectMemory(region, region_size, false);
|
||||
}
|
||||
|
||||
void ResetCodePtr()
|
||||
{
|
||||
SetCodePtr(region);
|
||||
}
|
||||
|
||||
size_t GetSpaceLeft() const
|
||||
{
|
||||
return region_size - (GetCodePtr() - region);
|
||||
}
|
||||
|
||||
u8 *GetBasePtr() {
|
||||
return region;
|
||||
}
|
||||
|
||||
size_t GetOffset(u8 *ptr) {
|
||||
return ptr - region;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
#endif // _DOLPHIN_INTEL_CODEGEN_
|
|
@ -95,17 +95,19 @@ void Core_WaitInactive(int milliseconds)
|
|||
void UpdateScreenScale() {
|
||||
dp_xres = PSP_CoreParameter().pixelWidth;
|
||||
dp_yres = PSP_CoreParameter().pixelHeight;
|
||||
pixel_xres = PSP_CoreParameter().pixelWidth;
|
||||
pixel_yres = PSP_CoreParameter().pixelHeight;
|
||||
g_dpi = 72;
|
||||
g_dpi_scale = 1.0f;
|
||||
#ifdef _WIN32
|
||||
if (g_Config.iWindowZoom == 1)
|
||||
{
|
||||
dp_xres *= 2;
|
||||
dp_yres *= 2;
|
||||
g_dpi_scale = 2.0f;
|
||||
}
|
||||
else
|
||||
#endif
|
||||
pixel_xres = PSP_CoreParameter().pixelWidth;
|
||||
pixel_yres = PSP_CoreParameter().pixelHeight;
|
||||
g_dpi = 72;
|
||||
g_dpi_scale = 1.0f;
|
||||
pixel_in_dps = (float)pixel_xres / dp_xres;
|
||||
}
|
||||
|
||||
|
|
|
@ -341,9 +341,10 @@
|
|||
<ClCompile Include="Util\BlockAllocator.cpp" />
|
||||
<ClCompile Include="Util\PPGeDraw.cpp" />
|
||||
<ClCompile Include="Util\ppge_atlas.cpp" />
|
||||
<ClCompile Include="..\ext\xxhash.c" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="..\Ext\disarm.h" />
|
||||
<ClInclude Include="..\ext\disarm.h" />
|
||||
<ClInclude Include="..\ext\snappy\snappy-internal.h" />
|
||||
<ClInclude Include="..\ext\snappy\snappy-sinksource.h" />
|
||||
<ClInclude Include="..\ext\snappy\snappy-stubs-internal.h" />
|
||||
|
@ -494,6 +495,7 @@
|
|||
<ClInclude Include="Util\BlockAllocator.h" />
|
||||
<ClInclude Include="Util\PPGeDraw.h" />
|
||||
<ClInclude Include="Util\ppge_atlas.h" />
|
||||
<ClInclude Include="..\ext\xxhash.h" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<None Include="..\android\jni\Android.mk" />
|
||||
|
|
|
@ -457,6 +457,9 @@
|
|||
<ClCompile Include="MIPS\MIPSStackWalk.cpp">
|
||||
<Filter>MIPS</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\ext\xxhash.c">
|
||||
<Filter>Ext</Filter>
|
||||
</ClCompile>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="ELF\ElfReader.h">
|
||||
|
@ -777,9 +780,6 @@
|
|||
<ClInclude Include="MIPS\ARM\ArmRegCache.h">
|
||||
<Filter>MIPS\ARM</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\Ext\disarm.h">
|
||||
<Filter>Ext</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="MIPS\x86\RegCacheFPU.h">
|
||||
<Filter>MIPS\x86</Filter>
|
||||
</ClInclude>
|
||||
|
@ -852,6 +852,12 @@
|
|||
<ClInclude Include="MIPS\MIPSStackWalk.h">
|
||||
<Filter>MIPS</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\ext\disarm.h">
|
||||
<Filter>Ext</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\ext\xxhash.h">
|
||||
<Filter>Ext</Filter>
|
||||
</ClInclude>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<None Include="CMakeLists.txt" />
|
||||
|
|
|
@ -29,6 +29,7 @@ enum GPUCore {
|
|||
GPU_NULL,
|
||||
GPU_GLES,
|
||||
GPU_SOFTWARE,
|
||||
GPU_DIRECTX9,
|
||||
};
|
||||
|
||||
struct CoreParameter
|
||||
|
|
|
@ -48,6 +48,10 @@ using namespace ArmGen;
|
|||
#include "Common/x64Analyzer.h"
|
||||
#include "Core/MIPS/x86/Asm.h"
|
||||
using namespace Gen;
|
||||
#elif defined(PPC)
|
||||
#include "Common/ppcEmitter.h"
|
||||
#include "Core/MIPS/MIPS.h"
|
||||
using namespace PpcGen;
|
||||
#else
|
||||
#error "Unsupported arch!"
|
||||
#endif
|
||||
|
@ -274,6 +278,10 @@ void JitBlockCache::LinkBlockExits(int i)
|
|||
#elif defined(_M_IX86) || defined(_M_X64)
|
||||
XEmitter emit(b.exitPtrs[e]);
|
||||
emit.JMP(blocks[destinationBlock].checkedEntry, true);
|
||||
#elif defined(PPC)
|
||||
PPCXEmitter emit(b.exitPtrs[e]);
|
||||
emit.B(blocks[destinationBlock].checkedEntry);
|
||||
emit.FlushIcache();
|
||||
#endif
|
||||
b.linkStatus[e] = true;
|
||||
}
|
||||
|
@ -356,6 +364,12 @@ void JitBlockCache::DestroyBlock(int block_num, bool invalidate)
|
|||
XEmitter emit((u8 *)b.checkedEntry);
|
||||
emit.MOV(32, M(&mips->pc), Imm32(b.originalAddress));
|
||||
emit.JMP(MIPSComp::jit->Asm().dispatcher, true);
|
||||
#elif defined(PPC)
|
||||
PPCXEmitter emit((u8 *)b.checkedEntry);
|
||||
emit.MOVI2R(R3, b.originalAddress);
|
||||
emit.STW(R0, CTXREG, offsetof(MIPSState, pc));
|
||||
emit.B(MIPSComp::jit->dispatcher);
|
||||
emit.FlushIcache();
|
||||
#endif
|
||||
}
|
||||
|
||||
|
|
|
@ -36,6 +36,11 @@ typedef ArmGen::ARMXCodeBlock CodeBlock;
|
|||
namespace Gen { class XEmitter; }
|
||||
using namespace Gen;
|
||||
typedef Gen::XCodeBlock CodeBlock;
|
||||
#elif defined(PPC)
|
||||
#include "Common/ppcEmitter.h"
|
||||
namespace PpcGen { class PPCXEmitter; }
|
||||
using namespace PpcGen;
|
||||
typedef PpcGen::PPCXCodeBlock CodeBlock;
|
||||
#else
|
||||
#error "Unsupported arch!"
|
||||
#endif
|
||||
|
|
|
@ -19,7 +19,9 @@
|
|||
|
||||
#include "Common/Common.h"
|
||||
|
||||
#if defined(ARM)
|
||||
#if defined(PPC)
|
||||
#include "../PPC/PpcJit.h"
|
||||
#elif defined(ARM)
|
||||
#include "../ARM/ArmJit.h"
|
||||
#else
|
||||
#include "../x86/Jit.h"
|
||||
|
|
285
Core/MIPS/PPC/PpcAsm.cpp
Normal file
285
Core/MIPS/PPC/PpcAsm.cpp
Normal file
|
@ -0,0 +1,285 @@
|
|||
#include "Common/ChunkFile.h"
|
||||
#include "Core/Core.h"
|
||||
#include "Core/CoreTiming.h"
|
||||
#include "Core/MIPS/MIPS.h"
|
||||
#include "Core/MIPS/MIPSCodeUtils.h"
|
||||
#include "Core/MIPS/MIPSInt.h"
|
||||
#include "Core/MIPS/MIPSTables.h"
|
||||
|
||||
#include "PpcRegCache.h"
|
||||
#include "ppcEmitter.h"
|
||||
#include "PpcJit.h"
|
||||
|
||||
#include <ppcintrinsics.h>
|
||||
|
||||
using namespace PpcGen;
|
||||
|
||||
extern volatile CoreState coreState;
|
||||
|
||||
static void JitAt()
|
||||
{
|
||||
MIPSComp::jit->Compile(currentMIPS->pc);
|
||||
}
|
||||
|
||||
namespace MIPSComp
|
||||
{
|
||||
static int dontLogBlocks = 20;
|
||||
static int logBlocks = 40;
|
||||
|
||||
const u8 *Jit::DoJit(u32 em_address, JitBlock *b)
|
||||
{
|
||||
js.cancel = false;
|
||||
js.blockStart = js.compilerPC = mips_->pc;
|
||||
js.downcountAmount = 0;
|
||||
js.curBlock = b;
|
||||
js.compiling = true;
|
||||
js.inDelaySlot = false;
|
||||
js.PrefixStart();
|
||||
|
||||
// We add a check before the block, used when entering from a linked block.
|
||||
b->checkedEntry = GetCodePtr();
|
||||
// Downcount flag check. The last block decremented downcounter, and the flag should still be available.
|
||||
|
||||
MOVI2R(SREG, js.blockStart);
|
||||
|
||||
// if (currentMIPS->downcount<0)
|
||||
CMPI(DCNTREG, 0);
|
||||
BLT((const void *)outerLoopPCInR0);
|
||||
|
||||
b->normalEntry = GetCodePtr();
|
||||
// TODO: this needs work
|
||||
MIPSAnalyst::AnalysisResults analysis; // = MIPSAnalyst::Analyze(em_address);
|
||||
|
||||
gpr.Start(analysis);
|
||||
//fpr.Start(analysis);
|
||||
|
||||
int numInstructions = 0;
|
||||
int cycles = 0;
|
||||
int partialFlushOffset = 0;
|
||||
if (logBlocks > 0) logBlocks--;
|
||||
if (dontLogBlocks > 0) dontLogBlocks--;
|
||||
|
||||
// #define LOGASM
|
||||
#ifdef LOGASM
|
||||
char temp[256];
|
||||
#endif
|
||||
while (js.compiling)
|
||||
{
|
||||
gpr.SetCompilerPC(js.compilerPC); // Let it know for log messages
|
||||
//fpr.SetCompilerPC(js.compilerPC);
|
||||
u32 inst = Memory::Read_Instruction(js.compilerPC);
|
||||
js.downcountAmount += MIPSGetInstructionCycleEstimate(inst);
|
||||
|
||||
MIPSCompileOp(inst);
|
||||
|
||||
js.compilerPC += 4;
|
||||
numInstructions++;
|
||||
}
|
||||
//FlushLitPool();
|
||||
#ifdef LOGASM
|
||||
if (logBlocks > 0 && dontLogBlocks == 0) {
|
||||
for (u32 cpc = em_address; cpc != js.compilerPC + 4; cpc += 4) {
|
||||
MIPSDisAsm(Memory::Read_Instruction(cpc), cpc, temp, true);
|
||||
INFO_LOG(DYNA_REC, "M: %08x %s", cpc, temp);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
b->codeSize = GetCodePtr() - b->normalEntry;
|
||||
|
||||
#ifdef LOGASM
|
||||
if (logBlocks > 0 && dontLogBlocks == 0) {
|
||||
INFO_LOG(DYNA_REC, "=============== ARM ===============");
|
||||
DisassembleArm(b->normalEntry, GetCodePtr() - b->normalEntry);
|
||||
}
|
||||
#endif
|
||||
//DumpJit();
|
||||
|
||||
AlignCode16();
|
||||
|
||||
// Don't forget to zap the instruction cache!
|
||||
FlushIcache();
|
||||
|
||||
b->originalSize = numInstructions;
|
||||
return b->normalEntry;
|
||||
}
|
||||
|
||||
void Jit::DumpJit() {
|
||||
#ifdef _XBOX
|
||||
u32 len = (u32)GetCodePtr() - (u32)GetBasePtr();
|
||||
FILE * fd;
|
||||
fd = fopen("game:\\jit.bin", "wb");
|
||||
fwrite(GetBasePtr(), len, 1, fd);
|
||||
fclose(fd);
|
||||
#endif
|
||||
}
|
||||
|
||||
void Jit::GenerateFixedCode() {
|
||||
enterCode = AlignCode16();
|
||||
|
||||
INFO_LOG(HLE, "Base: %08x", (u32)Memory::base);
|
||||
INFO_LOG(HLE, "enterCode: 0x%08p", enterCode);
|
||||
INFO_LOG(HLE, "GetBasePtr: 0x%08p", GetBasePtr());
|
||||
|
||||
Prologue();
|
||||
|
||||
// Map fixed register
|
||||
MOVI2R(BASEREG, (u32)Memory::base);
|
||||
MOVI2R(CTXREG, (u32)mips_);
|
||||
MOVI2R(CODEREG, (u32)GetBasePtr());
|
||||
|
||||
// Update downcount reg value from memory
|
||||
RestoreDowncount(DCNTREG);
|
||||
|
||||
// SREG = mips->pc
|
||||
MovFromPC(SREG);
|
||||
|
||||
// Keep current location, TODO rename it, outerLoopPCInR0 to outerLoopPCInR3 ??
|
||||
outerLoopPCInR0 = GetCodePtr();
|
||||
|
||||
// mips->pc = SREG
|
||||
MovToPC(SREG);
|
||||
|
||||
// Keep current location
|
||||
outerLoop = GetCodePtr();
|
||||
|
||||
// Jit loop
|
||||
// {
|
||||
// Save downcount reg value to memory
|
||||
SaveDowncount(DCNTREG);
|
||||
// Call CoreTiming::Advance() => update donwcount
|
||||
QuickCallFunction((void *)&CoreTiming::Advance);
|
||||
// Update downcount reg value from memory
|
||||
RestoreDowncount(DCNTREG);
|
||||
|
||||
// branch to skipToRealDispatch
|
||||
FixupBranch skipToRealDispatch = B(); //skip the sync and compare first time
|
||||
|
||||
// Keep current location dispatcherCheckCoreState:
|
||||
dispatcherCheckCoreState = GetCodePtr();
|
||||
|
||||
// The result of slice decrementation should be in flags if somebody jumped here
|
||||
// IMPORTANT - We jump on negative, not carry!!!
|
||||
// branch to bailCoreState: (jump if(what ??) negative )
|
||||
FixupBranch bailCoreState = BLT(); // BLT ???
|
||||
|
||||
// SREG = coreState
|
||||
MOVI2R(SREG, (u32)&coreState);
|
||||
// Compare coreState and CORE_RUNNING
|
||||
LWZ(SREG, SREG); // SREG = *SREG
|
||||
CMPI(SREG, 0); // compare 0(CORE_RUNNING) and CR0
|
||||
|
||||
// branch to badCoreState: (jump if coreState != CORE_RUNNING)
|
||||
FixupBranch badCoreState = BNE();
|
||||
|
||||
// branch to skipToRealDispatch2:
|
||||
FixupBranch skipToRealDispatch2 = B(); //skip the sync and compare first time
|
||||
|
||||
// Keep current location, TODO rename it, outerLoopPCInR0 to outerLoopPCInSREG ??
|
||||
dispatcherPCInR0 = GetCodePtr();
|
||||
|
||||
// mips->pc = SREG
|
||||
MovToPC(SREG);
|
||||
|
||||
// At this point : flags = EQ. Fine for the next check, no need to jump over it.
|
||||
// label dispatcher:
|
||||
dispatcher = GetCodePtr();
|
||||
|
||||
// {
|
||||
// The result of slice decrementation should be in flags if somebody jumped here
|
||||
// IMPORTANT - We jump on negative, not carry!!!
|
||||
// label bail:
|
||||
// arm B_CC(CC_MI);
|
||||
FixupBranch bail = BLT();
|
||||
|
||||
// label skipToRealDispatch:
|
||||
SetJumpTarget(skipToRealDispatch);
|
||||
|
||||
// label skipToRealDispatch2:
|
||||
SetJumpTarget(skipToRealDispatch2);
|
||||
|
||||
// Keep current location
|
||||
dispatcherNoCheck = GetCodePtr();
|
||||
|
||||
// read op
|
||||
// R3 = mips->pc & Memory::MEMVIEW32_MASK
|
||||
LWZ(R3, CTXREG, offsetof(MIPSState, pc));
|
||||
// & Memory::MEMVIEW32_MASK
|
||||
RLWINM(R3, R3, 0, 2, 31);
|
||||
|
||||
// R3 = memory::base[r3];
|
||||
ADD(R3, BASEREG, R3);
|
||||
MOVI2R(R0, 0);
|
||||
LWBRX(R3, R3, R0);
|
||||
|
||||
// R4 = R3 & MIPS_EMUHACK_VALUE_MASK
|
||||
RLWINM(R4, R3, 0, 6, 31);
|
||||
|
||||
// R3 = R3 & MIPS_EMUHACK_MASK
|
||||
RLWINM(R3, R3, 0, 0, 6);
|
||||
|
||||
// compare, op == MIPS_EMUHACK_OPCODE
|
||||
MOVI2R(SREG, MIPS_EMUHACK_OPCODE);
|
||||
CMPL(R3, SREG);
|
||||
|
||||
// Branch if func block not found
|
||||
FixupBranch notfound = BNE();
|
||||
|
||||
// {
|
||||
// R3 = R4 + GetBasePtr()
|
||||
ADD(R3, R4, CODEREG);
|
||||
|
||||
MTCTR(R3);
|
||||
BCTR();
|
||||
// }
|
||||
|
||||
// label notfound:
|
||||
SetJumpTarget(notfound);
|
||||
|
||||
//Ok, no block, let's jit
|
||||
// Save downcount reg value to memory
|
||||
SaveDowncount(DCNTREG);
|
||||
|
||||
// Exec JitAt => Compile block !
|
||||
QuickCallFunction((void *)&JitAt);
|
||||
|
||||
// Update downcount reg value from memory
|
||||
RestoreDowncount(DCNTREG);
|
||||
|
||||
// branch to dispatcherNoCheck:
|
||||
B(dispatcherNoCheck); // no point in special casing this
|
||||
// }
|
||||
|
||||
// label bail:
|
||||
SetJumpTarget(bail);
|
||||
|
||||
// label bailCoreState:
|
||||
SetJumpTarget(bailCoreState);
|
||||
|
||||
// Compare coreState and CORE_RUNNING
|
||||
MOVI2R(SREG, (u32)&coreState);
|
||||
LWZ(SREG, SREG); // SREG = *SREG => SREG = coreState
|
||||
CMPLI(SREG, 0); // compare 0(CORE_RUNNING) and corestate
|
||||
|
||||
BEQ(outerLoop);
|
||||
// }
|
||||
|
||||
// badCoreState label:
|
||||
SetJumpTarget(badCoreState);
|
||||
|
||||
// Keep current location
|
||||
breakpointBailout = GetCodePtr();
|
||||
|
||||
// mips->downcount = DCNTREG
|
||||
SaveDowncount(DCNTREG);
|
||||
|
||||
Epilogue();
|
||||
|
||||
// Go back to caller
|
||||
BLR();
|
||||
|
||||
// Don't forget to zap the instruction cache!
|
||||
FlushIcache();
|
||||
}
|
||||
|
||||
}
|
172
Core/MIPS/PPC/PpcCompAlu.cpp
Normal file
172
Core/MIPS/PPC/PpcCompAlu.cpp
Normal file
|
@ -0,0 +1,172 @@
|
|||
#include "Common/ChunkFile.h"
|
||||
#include "Core/Core.h"
|
||||
#include "Core/CoreTiming.h"
|
||||
#include "Core/MIPS/MIPS.h"
|
||||
#include "Core/MIPS/MIPSCodeUtils.h"
|
||||
#include "Core/MIPS/MIPSInt.h"
|
||||
#include "Core/MIPS/MIPSTables.h"
|
||||
|
||||
#include "PpcRegCache.h"
|
||||
#include "ppcEmitter.h"
|
||||
#include "PpcJit.h"
|
||||
|
||||
using namespace MIPSAnalyst;
|
||||
#define _RS ((op>>21) & 0x1F)
|
||||
#define _RT ((op>>16) & 0x1F)
|
||||
#define _RD ((op>>11) & 0x1F)
|
||||
#define _FS ((op>>11) & 0x1F)
|
||||
#define _FT ((op>>16) & 0x1F)
|
||||
#define _FD ((op>>6 ) & 0x1F)
|
||||
#define _SA ((op>>6 ) & 0x1F)
|
||||
#define _POS ((op>>6 ) & 0x1F)
|
||||
#define _SIZE ((op>>11 ) & 0x1F)
|
||||
|
||||
// All functions should have CONDITIONAL_DISABLE, so we can narrow things down to a file quickly.
|
||||
// Currently known non working ones should have DISABLE.
|
||||
|
||||
//#define CONDITIONAL_DISABLE { Comp_Generic(op); return; }
|
||||
#define CONDITIONAL_DISABLE ;
|
||||
#define DISABLE { Comp_Generic(op); return; }
|
||||
|
||||
namespace MIPSComp
|
||||
{
|
||||
|
||||
static u32 EvalOr(u32 a, u32 b) { return a | b; }
|
||||
static u32 EvalEor(u32 a, u32 b) { return a ^ b; }
|
||||
static u32 EvalAnd(u32 a, u32 b) { return a & b; }
|
||||
static u32 EvalAdd(u32 a, u32 b) { return a + b; }
|
||||
static u32 EvalSub(u32 a, u32 b) { return a - b; }
|
||||
|
||||
void Jit::Comp_IType(u32 op)
|
||||
{
|
||||
CONDITIONAL_DISABLE;
|
||||
s32 simm = (s32)(s16)(op & 0xFFFF); // sign extension
|
||||
u32 uimm = op & 0xFFFF;
|
||||
u32 suimm = (u32)(s32)simm;
|
||||
|
||||
int rt = _RT;
|
||||
int rs = _RS;
|
||||
|
||||
// noop, won't write to ZERO.
|
||||
if (rt == 0)
|
||||
return;
|
||||
|
||||
switch (op >> 26)
|
||||
{
|
||||
|
||||
case 8: // same as addiu?
|
||||
case 9: // R(rt) = R(rs) + simm; break; //addiu
|
||||
{
|
||||
if (gpr.IsImm(rs)) {
|
||||
gpr.SetImm(rt, gpr.GetImm(rs) + simm);
|
||||
} else {
|
||||
gpr.MapDirtyIn(rt, rs);
|
||||
ADDI(gpr.R(rt), gpr.R(rs), simm);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 15: // R(rt) = uimm << 16; //lui
|
||||
gpr.SetImm(rt, uimm << 16);
|
||||
break;
|
||||
default:
|
||||
Comp_Generic(op);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void Jit::Comp_RType2(u32 op) {
|
||||
Comp_Generic(op);
|
||||
}
|
||||
|
||||
// Utilities to reduce duplicated code
|
||||
void Jit::CompImmLogic(int rs, int rt, u32 uimm, void (PPCXEmitter::*arith)(PPCReg Rd, PPCReg Ra, PPCReg Rb), u32 (*eval)(u32 a, u32 b)) {
|
||||
DebugBreak();
|
||||
}
|
||||
void Jit::CompType3(int rd, int rs, int rt, void (PPCXEmitter::*arith)(PPCReg Rd, PPCReg Ra, PPCReg Rb), u32 (*eval)(u32 a, u32 b), bool isSub) {
|
||||
if (gpr.IsImm(rs) && gpr.IsImm(rt)) {
|
||||
gpr.SetImm(rd, (*eval)(gpr.GetImm(rs), gpr.GetImm(rt)));
|
||||
} else if (gpr.IsImm(rt)) {
|
||||
u32 rtImm = gpr.GetImm(rt);
|
||||
gpr.MapDirtyIn(rd, rs);
|
||||
|
||||
MOVI2R(SREG, rtImm);
|
||||
(this->*arith)(gpr.R(rd), gpr.R(rs), SREG);
|
||||
} else if (gpr.IsImm(rs)) {
|
||||
u32 rsImm = gpr.GetImm(rs);
|
||||
gpr.MapDirtyIn(rd, rt);
|
||||
// TODO: Special case when rsImm can be represented as an Operand2
|
||||
MOVI2R(SREG, rsImm);
|
||||
(this->*arith)(gpr.R(rd), SREG, gpr.R(rt));
|
||||
} else {
|
||||
// Generic solution
|
||||
gpr.MapDirtyInIn(rd, rs, rt);
|
||||
(this->*arith)(gpr.R(rd), gpr.R(rs), gpr.R(rt));
|
||||
}
|
||||
}
|
||||
|
||||
void Jit::Comp_RType3(u32 op) {
|
||||
CONDITIONAL_DISABLE;
|
||||
int rt = _RT;
|
||||
int rs = _RS;
|
||||
int rd = _RD;
|
||||
|
||||
// noop, won't write to ZERO.
|
||||
if (rd == 0)
|
||||
return;
|
||||
|
||||
switch (op & 63)
|
||||
{
|
||||
|
||||
case 32: //R(rd) = R(rs) + R(rt); break; //add
|
||||
case 33: //R(rd) = R(rs) + R(rt); break; //addu
|
||||
// Some optimized special cases
|
||||
if (gpr.IsImm(rs) && gpr.GetImm(rs) == 0) {
|
||||
gpr.MapDirtyIn(rd, rt);
|
||||
MR(gpr.R(rd), gpr.R(rt));
|
||||
} else if (gpr.IsImm(rt) && gpr.GetImm(rt) == 0) {
|
||||
gpr.MapDirtyIn(rd, rs);
|
||||
MR(gpr.R(rd), gpr.R(rs));
|
||||
} else {
|
||||
CompType3(rd, rs, rt, &PPCXEmitter::ADD, &EvalAdd);
|
||||
}
|
||||
break;
|
||||
case 34: //R(rd) = R(rs) - R(rt); break; //sub
|
||||
case 35: //R(rd) = R(rs) - R(rt); break; //subu
|
||||
CompType3(rd, rs, rt, &PPCXEmitter::SUB, &EvalSub, true);
|
||||
break;
|
||||
case 36: //R(rd) = R(rs) & R(rt); break; //and
|
||||
CompType3(rd, rs, rt, &PPCXEmitter::AND, &EvalAnd);
|
||||
break;
|
||||
case 37: //R(rd) = R(rs) | R(rt); break; //or
|
||||
CompType3(rd, rs, rt, &PPCXEmitter::OR, &EvalOr);
|
||||
break;
|
||||
case 38: //R(rd) = R(rs) ^ R(rt); break; //xor/eor
|
||||
CompType3(rd, rs, rt, &PPCXEmitter::XOR, &EvalEor);
|
||||
break;
|
||||
default:
|
||||
Comp_Generic(op);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void Jit::Comp_ShiftType(u32 op) {
|
||||
Comp_Generic(op);
|
||||
}
|
||||
|
||||
void Jit::Comp_Allegrex(u32 op) {
|
||||
Comp_Generic(op);
|
||||
}
|
||||
|
||||
void Jit::Comp_Allegrex2(u32 op) {
|
||||
Comp_Generic(op);
|
||||
}
|
||||
|
||||
void Jit::Comp_MulDivType(u32 op) {
|
||||
Comp_Generic(op);
|
||||
}
|
||||
|
||||
void Jit::Comp_Special3(u32 op) {
|
||||
Comp_Generic(op);
|
||||
}
|
||||
|
||||
}
|
434
Core/MIPS/PPC/PpcCompBranch.cpp
Normal file
434
Core/MIPS/PPC/PpcCompBranch.cpp
Normal file
|
@ -0,0 +1,434 @@
|
|||
#include "Common/ChunkFile.h"
|
||||
#include "Core/Core.h"
|
||||
#include "Core/CoreTiming.h"
|
||||
#include "Core/MIPS/MIPS.h"
|
||||
#include "Core/MIPS/MIPSCodeUtils.h"
|
||||
#include "Core/MIPS/MIPSInt.h"
|
||||
#include "Core/MIPS/MIPSTables.h"
|
||||
|
||||
#include "Core/Reporting.h"
|
||||
#include "Core/HLE/HLE.h"
|
||||
|
||||
#include "PpcRegCache.h"
|
||||
#include "ppcEmitter.h"
|
||||
#include "PpcJit.h"
|
||||
|
||||
#include <ppcintrinsics.h>
|
||||
|
||||
|
||||
#define _RS ((op>>21) & 0x1F)
|
||||
#define _RT ((op>>16) & 0x1F)
|
||||
#define _RD ((op>>11) & 0x1F)
|
||||
#define _FS ((op>>11) & 0x1F)
|
||||
#define _FT ((op>>16) & 0x1F)
|
||||
#define _FD ((op>>6 ) & 0x1F)
|
||||
#define _POS ((op>>6 ) & 0x1F)
|
||||
#define _SIZE ((op>>11 ) & 0x1F)
|
||||
|
||||
#define LOOPOPTIMIZATION 0
|
||||
|
||||
// We can disable nice delay slots.
|
||||
#define CONDITIONAL_NICE_DELAYSLOT delaySlotIsNice = false;
|
||||
// #define CONDITIONAL_NICE_DELAYSLOT ;
|
||||
|
||||
#define SHOW_JS_COMPILER_PC { printf("js.compilerPC: %08x\n", js.compilerPC); }
|
||||
|
||||
#define BRANCH_COMPILE_LOG { printf("JIT(%8x): %s => %d - %08x\n", (u32)GetCodePtr() ,__FUNCTION__, cc, js.compilerPC); }
|
||||
|
||||
using namespace MIPSAnalyst;
|
||||
|
||||
using namespace PpcGen;
|
||||
|
||||
namespace MIPSComp
|
||||
{
|
||||
|
||||
void Jit::BranchRSRTComp(u32 op, PpcGen::FixupBranchType cc, bool likely)
|
||||
{
|
||||
if (js.inDelaySlot) {
|
||||
ERROR_LOG_REPORT(JIT, "Branch in RSRTComp delay slot at %08x in block starting at %08x", js.compilerPC, js.blockStart);
|
||||
return;
|
||||
}
|
||||
int offset = (signed short)(op&0xFFFF)<<2;
|
||||
int rt = _RT;
|
||||
int rs = _RS;
|
||||
u32 targetAddr = js.compilerPC + offset + 4;
|
||||
|
||||
u32 delaySlotOp = Memory::ReadUnchecked_U32(js.compilerPC+4);
|
||||
bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rt, rs);
|
||||
CONDITIONAL_NICE_DELAYSLOT;
|
||||
if (!likely && delaySlotIsNice)
|
||||
CompileDelaySlot(DELAYSLOT_NICE);
|
||||
|
||||
if (gpr.IsImm(rt) && gpr.GetImm(rt) == 0)
|
||||
{
|
||||
gpr.MapReg(rs);
|
||||
CMPLI(gpr.R(rs), 0);
|
||||
}
|
||||
else if (gpr.IsImm(rs) && gpr.GetImm(rs) == 0) // only these are easily 'flippable'
|
||||
{
|
||||
gpr.MapReg(rt);
|
||||
CMPLI(gpr.R(rt), 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
gpr.MapInIn(rs, rt);
|
||||
CMPL(gpr.R(rs), gpr.R(rt));
|
||||
}
|
||||
|
||||
PpcGen::FixupBranch ptr;
|
||||
if (!likely)
|
||||
{
|
||||
if (!delaySlotIsNice)
|
||||
CompileDelaySlot(DELAYSLOT_SAFE_FLUSH);
|
||||
else
|
||||
FlushAll();
|
||||
ptr = B_Cond(cc);
|
||||
}
|
||||
else
|
||||
{
|
||||
FlushAll();
|
||||
ptr = B_Cond(cc);
|
||||
CompileDelaySlot(DELAYSLOT_FLUSH);
|
||||
}
|
||||
|
||||
INFO_LOG(CPU, "targetAddr: %08x,js.compilerPC: %08x offset: %08x, op: %08x\n", targetAddr, js.compilerPC, offset, op);
|
||||
|
||||
// Take the branch
|
||||
WriteExit(targetAddr, 0);
|
||||
|
||||
SetJumpTarget(ptr);
|
||||
|
||||
// Not taken
|
||||
WriteExit(js.compilerPC+8, 1);
|
||||
|
||||
js.compiling = false;
|
||||
}
|
||||
|
||||
|
||||
void Jit::BranchRSZeroComp(u32 op, PpcGen::FixupBranchType cc, bool andLink, bool likely)
|
||||
{
|
||||
if (js.inDelaySlot) {
|
||||
ERROR_LOG_REPORT(JIT, "Branch in RSZeroComp delay slot at %08x in block starting at %08x", js.compilerPC, js.blockStart);
|
||||
return;
|
||||
}
|
||||
int offset = (signed short)(op&0xFFFF)<<2;
|
||||
int rs = _RS;
|
||||
u32 targetAddr = js.compilerPC + offset + 4;
|
||||
|
||||
u32 delaySlotOp = Memory::ReadUnchecked_U32(js.compilerPC + 4);
|
||||
bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rs);
|
||||
CONDITIONAL_NICE_DELAYSLOT;
|
||||
if (!likely && delaySlotIsNice)
|
||||
CompileDelaySlot(DELAYSLOT_NICE);
|
||||
|
||||
gpr.MapReg(rs);
|
||||
CMPI(gpr.R(rs), 0);
|
||||
|
||||
PpcGen::FixupBranch ptr;
|
||||
if (!likely)
|
||||
{
|
||||
if (!delaySlotIsNice)
|
||||
CompileDelaySlot(DELAYSLOT_SAFE_FLUSH);
|
||||
else
|
||||
FlushAll();
|
||||
ptr = B_Cond(cc);
|
||||
}
|
||||
else
|
||||
{
|
||||
FlushAll();
|
||||
ptr = B_Cond(cc);
|
||||
CompileDelaySlot(DELAYSLOT_FLUSH);
|
||||
}
|
||||
|
||||
// Take the branch
|
||||
if (andLink)
|
||||
{
|
||||
//Break();
|
||||
MOVI2R(SREG, js.compilerPC + 8);
|
||||
STW(SREG, CTXREG, MIPS_REG_RA * 4);
|
||||
}
|
||||
|
||||
WriteExit(targetAddr, 0);
|
||||
|
||||
SetJumpTarget(ptr);
|
||||
|
||||
// Not taken
|
||||
WriteExit(js.compilerPC + 8, 1);
|
||||
|
||||
js.compiling = false;
|
||||
}
|
||||
|
||||
void Jit::Comp_RelBranch(u32 op) {
|
||||
// The CC flags here should be opposite of the actual branch becuase they skip the branching action.
|
||||
switch (op>>26)
|
||||
{
|
||||
case 4: BranchRSRTComp(op, _BNE, false); break;//beq
|
||||
case 5: BranchRSRTComp(op, _BEQ, false); break;//bne
|
||||
|
||||
case 6: BranchRSZeroComp(op, _BGT, false, false); break;//blez
|
||||
case 7: BranchRSZeroComp(op, _BLE, false, false); break;//bgtz
|
||||
|
||||
case 20: BranchRSRTComp(op, _BNE, true); break;//beql
|
||||
case 21: BranchRSRTComp(op, _BEQ, true); break;//bnel
|
||||
|
||||
case 22: BranchRSZeroComp(op, _BGT, false, true); break;//blezl
|
||||
case 23: BranchRSZeroComp(op, _BLE, false, true); break;//bgtzl
|
||||
|
||||
default:
|
||||
_dbg_assert_msg_(CPU,0,"Trying to compile instruction that can't be compiled");
|
||||
break;
|
||||
}
|
||||
js.compiling = false;
|
||||
}
|
||||
|
||||
void Jit::Comp_RelBranchRI(u32 op) {
|
||||
switch ((op >> 16) & 0x1F)
|
||||
{
|
||||
case 0: BranchRSZeroComp(op, _BGE, false, false); break; //if ((s32)R(rs) < 0) DelayBranchTo(addr); else PC += 4; break;//bltz
|
||||
case 1: BranchRSZeroComp(op, _BLT, false, false); break; //if ((s32)R(rs) >= 0) DelayBranchTo(addr); else PC += 4; break;//bgez
|
||||
case 2: BranchRSZeroComp(op, _BGE, false, true); break; //if ((s32)R(rs) < 0) DelayBranchTo(addr); else PC += 8; break;//bltzl
|
||||
case 3: BranchRSZeroComp(op, _BLT, false, true); break; //if ((s32)R(rs) >= 0) DelayBranchTo(addr); else PC += 8; break;//bgezl
|
||||
case 16: BranchRSZeroComp(op, _BGE, true, false); break; //R(MIPS_REG_RA) = PC + 8; if ((s32)R(rs) < 0) DelayBranchTo(addr); else PC += 4; break;//bltzal
|
||||
case 17: BranchRSZeroComp(op, _BLT, true, false); break; //R(MIPS_REG_RA) = PC + 8; if ((s32)R(rs) >= 0) DelayBranchTo(addr); else PC += 4; break;//bgezal
|
||||
case 18: BranchRSZeroComp(op, _BGE, true, true); break; //R(MIPS_REG_RA) = PC + 8; if ((s32)R(rs) < 0) DelayBranchTo(addr); else SkipLikely(); break;//bltzall
|
||||
case 19: BranchRSZeroComp(op, _BLT, true, true); break; //R(MIPS_REG_RA) = PC + 8; if ((s32)R(rs) >= 0) DelayBranchTo(addr); else SkipLikely(); break;//bgezall
|
||||
default:
|
||||
_dbg_assert_msg_(CPU,0,"Trying to compile instruction that can't be compiled");
|
||||
break;
|
||||
}
|
||||
js.compiling = false;
|
||||
}
|
||||
|
||||
|
||||
// If likely is set, discard the branch slot if NOT taken.
|
||||
void Jit::BranchFPFlag(u32 op, PpcGen::FixupBranchType cc, bool likely)
|
||||
{
|
||||
if (js.inDelaySlot) {
|
||||
ERROR_LOG_REPORT(JIT, "Branch in FPFlag delay slot at %08x in block starting at %08x", js.compilerPC, js.blockStart);
|
||||
return;
|
||||
}
|
||||
int offset = (signed short)(op & 0xFFFF) << 2;
|
||||
u32 targetAddr = js.compilerPC + offset + 4;
|
||||
|
||||
u32 delaySlotOp = Memory::ReadUnchecked_U32(js.compilerPC + 4);
|
||||
bool delaySlotIsNice = IsDelaySlotNiceFPU(op, delaySlotOp);
|
||||
CONDITIONAL_NICE_DELAYSLOT;
|
||||
if (!likely && delaySlotIsNice)
|
||||
CompileDelaySlot(DELAYSLOT_NICE);
|
||||
|
||||
FlushAll();
|
||||
|
||||
LWZ(SREG, CTXREG, offsetof(MIPSState, fpcond));
|
||||
// change CR0
|
||||
ANDI(SREG, SREG, 1);
|
||||
|
||||
PpcGen::FixupBranch ptr;
|
||||
if (!likely)
|
||||
{
|
||||
if (!delaySlotIsNice)
|
||||
CompileDelaySlot(DELAYSLOT_SAFE_FLUSH);
|
||||
ptr = B_Cond(cc);
|
||||
}
|
||||
else
|
||||
{
|
||||
ptr = B_Cond(cc);
|
||||
CompileDelaySlot(DELAYSLOT_FLUSH);
|
||||
}
|
||||
|
||||
// Take the branch
|
||||
WriteExit(targetAddr, 0);
|
||||
|
||||
SetJumpTarget(ptr);
|
||||
// Not taken
|
||||
WriteExit(js.compilerPC + 8, 1);
|
||||
js.compiling = false;
|
||||
}
|
||||
|
||||
void Jit::Comp_FPUBranch(u32 op) {
|
||||
switch((op >> 16) & 0x1f)
|
||||
{
|
||||
case 0: BranchFPFlag(op, _BNE, false); break; // bc1f
|
||||
case 1: BranchFPFlag(op, _BEQ, false); break; // bc1t
|
||||
case 2: BranchFPFlag(op, _BNE, true); break; // bc1fl
|
||||
case 3: BranchFPFlag(op, _BEQ, true); break; // bc1tl
|
||||
default:
|
||||
_dbg_assert_msg_(CPU,0,"Trying to interpret instruction that can't be interpreted");
|
||||
break;
|
||||
}
|
||||
js.compiling = false;
|
||||
}
|
||||
|
||||
|
||||
// If likely is set, discard the branch slot if NOT taken.
|
||||
void Jit::BranchVFPUFlag(u32 op, PpcGen::FixupBranchType cc, bool likely)
|
||||
{
|
||||
if (js.inDelaySlot) {
|
||||
ERROR_LOG_REPORT(JIT, "Branch in VFPU delay slot at %08x in block starting at %08x", js.compilerPC, js.blockStart);
|
||||
return;
|
||||
}
|
||||
int offset = (signed short)(op & 0xFFFF) << 2;
|
||||
u32 targetAddr = js.compilerPC + offset + 4;
|
||||
|
||||
u32 delaySlotOp = Memory::ReadUnchecked_U32(js.compilerPC + 4);
|
||||
|
||||
bool delaySlotIsNice = IsDelaySlotNiceVFPU(op, delaySlotOp);
|
||||
CONDITIONAL_NICE_DELAYSLOT;
|
||||
if (!likely && delaySlotIsNice)
|
||||
CompileDelaySlot(DELAYSLOT_NICE);
|
||||
|
||||
FlushAll();
|
||||
|
||||
int imm3 = (op >> 18) & 7;
|
||||
|
||||
|
||||
MOVI2R(SREG, (u32)&(mips_->vfpuCtrl[VFPU_CTRL_CC]));
|
||||
LWZ(SREG, SREG, 0);
|
||||
// change CR0
|
||||
ANDI(SREG, SREG, 1 << imm3);
|
||||
|
||||
PpcGen::FixupBranch ptr;
|
||||
js.inDelaySlot = true;
|
||||
if (!likely)
|
||||
{
|
||||
if (!delaySlotIsNice)
|
||||
CompileDelaySlot(DELAYSLOT_SAFE_FLUSH);
|
||||
ptr = B_Cond(cc);
|
||||
}
|
||||
else
|
||||
{
|
||||
ptr = B_Cond(cc);
|
||||
CompileDelaySlot(DELAYSLOT_FLUSH);
|
||||
}
|
||||
js.inDelaySlot = false;
|
||||
|
||||
// Take the branch
|
||||
WriteExit(targetAddr, 0);
|
||||
|
||||
SetJumpTarget(ptr);
|
||||
|
||||
// Not taken
|
||||
WriteExit(js.compilerPC + 8, 1);
|
||||
js.compiling = false;
|
||||
}
|
||||
|
||||
void Jit::Comp_VBranch(u32 op) {
|
||||
switch ((op >> 16) & 3)
|
||||
{
|
||||
case 0: BranchVFPUFlag(op, _BNE, false); break; // bvf
|
||||
case 1: BranchVFPUFlag(op, _BEQ, false); break; // bvt
|
||||
case 2: BranchVFPUFlag(op, _BNE, true); break; // bvfl
|
||||
case 3: BranchVFPUFlag(op, _BEQ, true); break; // bvtl
|
||||
}
|
||||
js.compiling = false;
|
||||
}
|
||||
|
||||
void Jit::Comp_Jump(u32 op) {
|
||||
if (js.inDelaySlot) {
|
||||
ERROR_LOG_REPORT(JIT, "Branch in Jump delay slot at %08x in block starting at %08x", js.compilerPC, js.blockStart);
|
||||
return;
|
||||
}
|
||||
u32 off = ((op & 0x03FFFFFF) << 2);
|
||||
u32 targetAddr = (js.compilerPC & 0xF0000000) | off;
|
||||
|
||||
switch (op >> 26)
|
||||
{
|
||||
case 2: //j
|
||||
CompileDelaySlot(DELAYSLOT_NICE);
|
||||
FlushAll();
|
||||
WriteExit(targetAddr, 0);
|
||||
break;
|
||||
|
||||
case 3: //jal
|
||||
//Break();
|
||||
gpr.MapReg(MIPS_REG_RA, MAP_NOINIT | MAP_DIRTY);
|
||||
MOVI2R(gpr.R(MIPS_REG_RA), js.compilerPC + 8);
|
||||
CompileDelaySlot(DELAYSLOT_NICE);
|
||||
FlushAll();
|
||||
WriteExit(targetAddr, 0);
|
||||
break;
|
||||
|
||||
default:
|
||||
_dbg_assert_msg_(CPU,0,"Trying to compile instruction that can't be compiled");
|
||||
break;
|
||||
}
|
||||
js.compiling = false;
|
||||
}
|
||||
|
||||
void Jit::Comp_JumpReg(u32 op) {
|
||||
if (js.inDelaySlot) {
|
||||
ERROR_LOG_REPORT(JIT, "Branch in JumpReg delay slot at %08x in block starting at %08x", js.compilerPC, js.blockStart);
|
||||
return;
|
||||
}
|
||||
int rs = _RS;
|
||||
|
||||
u32 delaySlotOp = Memory::ReadUnchecked_U32(js.compilerPC + 4);
|
||||
bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rs);
|
||||
CONDITIONAL_NICE_DELAYSLOT;
|
||||
|
||||
if (IsSyscall(delaySlotOp)) {
|
||||
gpr.MapReg(rs);
|
||||
PPCReg mRs = gpr.R(rs);
|
||||
MR(FLAGREG, mRs);
|
||||
MovToPC(FLAGREG); // For syscall to be able to return.
|
||||
CompileDelaySlot(DELAYSLOT_FLUSH);
|
||||
return; // Syscall wrote exit code.
|
||||
} else if (delaySlotIsNice) {
|
||||
CompileDelaySlot(DELAYSLOT_NICE);
|
||||
gpr.MapReg(rs);
|
||||
PPCReg mRs = gpr.R(rs);
|
||||
MR(FLAGREG, mRs); // Save the destination address through the delay slot. Could use isNice to avoid when the jit is fully implemented
|
||||
FlushAll();
|
||||
} else {
|
||||
// Delay slot
|
||||
gpr.MapReg(rs);
|
||||
PPCReg mRs = gpr.R(rs);
|
||||
MR(FLAGREG, mRs); // Save the destination address through the delay slot. Could use isNice to avoid when the jit is fully implemented
|
||||
CompileDelaySlot(DELAYSLOT_NICE);
|
||||
FlushAll();
|
||||
}
|
||||
|
||||
switch (op & 0x3f)
|
||||
{
|
||||
case 8: //jr
|
||||
break;
|
||||
case 9: //jalr
|
||||
// mips->reg = js.compilerPC + 8;
|
||||
//Break();
|
||||
MOVI2R(SREG, js.compilerPC + 8);
|
||||
STW(SREG, CTXREG, MIPS_REG_RA * 4);
|
||||
break;
|
||||
default:
|
||||
_dbg_assert_msg_(CPU,0,"Trying to compile instruction that can't be compiled");
|
||||
break;
|
||||
}
|
||||
|
||||
WriteExitDestInR(FLAGREG);
|
||||
js.compiling = false;
|
||||
}
|
||||
|
||||
void Jit::Comp_Syscall(u32 op) {
|
||||
FlushAll();
|
||||
|
||||
// If we're in a delay slot, this is off by one.
|
||||
const int offset = js.inDelaySlot ? -1 : 0;
|
||||
WriteDownCount(offset);
|
||||
js.downcountAmount = -offset;
|
||||
|
||||
// CallSyscall(op);
|
||||
MOVI2R(R3, op);
|
||||
SaveDowncount(DCNTREG);
|
||||
QuickCallFunction((void *)&CallSyscall);
|
||||
RestoreDowncount(DCNTREG);
|
||||
|
||||
WriteSyscallExit();
|
||||
js.compiling = false;
|
||||
}
|
||||
|
||||
void Jit::Comp_Break(u32 op) {
|
||||
Comp_Generic(op);
|
||||
WriteSyscallExit();
|
||||
js.compiling = false;
|
||||
}
|
||||
|
||||
|
||||
}
|
42
Core/MIPS/PPC/PpcCompFpu.cpp
Normal file
42
Core/MIPS/PPC/PpcCompFpu.cpp
Normal file
|
@ -0,0 +1,42 @@
|
|||
#include "Common/ChunkFile.h"
|
||||
#include "Core/Core.h"
|
||||
#include "Core/CoreTiming.h"
|
||||
#include "Core/MIPS/MIPS.h"
|
||||
#include "Core/MIPS/MIPSCodeUtils.h"
|
||||
#include "Core/MIPS/MIPSInt.h"
|
||||
#include "Core/MIPS/MIPSTables.h"
|
||||
|
||||
#include "PpcRegCache.h"
|
||||
#include "ppcEmitter.h"
|
||||
#include "PpcJit.h"
|
||||
|
||||
#include <ppcintrinsics.h>
|
||||
|
||||
using namespace PpcGen;
|
||||
|
||||
extern volatile CoreState coreState;
|
||||
|
||||
namespace MIPSComp
|
||||
{
|
||||
|
||||
void Jit::Comp_FPULS(u32 op) {
|
||||
Comp_Generic(op);
|
||||
}
|
||||
|
||||
void Jit::Comp_FPUComp(u32 op) {
|
||||
Comp_Generic(op);
|
||||
}
|
||||
|
||||
void Jit::Comp_FPU3op(u32 op) {
|
||||
Comp_Generic(op);
|
||||
}
|
||||
|
||||
void Jit::Comp_FPU2op(u32 op) {
|
||||
Comp_Generic(op);
|
||||
}
|
||||
|
||||
void Jit::Comp_mxc1(u32 op) {
|
||||
Comp_Generic(op);
|
||||
}
|
||||
|
||||
}
|
142
Core/MIPS/PPC/PpcCompLoadStore.cpp
Normal file
142
Core/MIPS/PPC/PpcCompLoadStore.cpp
Normal file
|
@ -0,0 +1,142 @@
|
|||
#include "Common/ChunkFile.h"
|
||||
#include "Core/Core.h"
|
||||
#include "Core/CoreTiming.h"
|
||||
#include "Core/MIPS/MIPS.h"
|
||||
#include "Core/MIPS/MIPSCodeUtils.h"
|
||||
#include "Core/MIPS/MIPSInt.h"
|
||||
#include "Core/MIPS/MIPSTables.h"
|
||||
|
||||
#include "PpcRegCache.h"
|
||||
#include "ppcEmitter.h"
|
||||
#include "PpcJit.h"
|
||||
|
||||
|
||||
#define _RS ((op>>21) & 0x1F)
|
||||
#define _RT ((op>>16) & 0x1F)
|
||||
#define _RD ((op>>11) & 0x1F)
|
||||
#define _FS ((op>>11) & 0x1F)
|
||||
#define _FT ((op>>16) & 0x1F)
|
||||
#define _FD ((op>>6 ) & 0x1F)
|
||||
#define _POS ((op>>6 ) & 0x1F)
|
||||
#define _SIZE ((op>>11 ) & 0x1F)
|
||||
|
||||
// All functions should have CONDITIONAL_DISABLE, so we can narrow things down to a file quickly.
|
||||
// Currently known non working ones should have DISABLE.
|
||||
|
||||
//#define CONDITIONAL_DISABLE { Comp_Generic(op); return; }
|
||||
#define CONDITIONAL_DISABLE ;
|
||||
#define DISABLE { Comp_Generic(op); return; }
|
||||
|
||||
using namespace PpcGen;
|
||||
|
||||
namespace MIPSComp
|
||||
{
|
||||
|
||||
void Jit::SetRegToEffectiveAddress(PpcGen::PPCReg r, int rs, s16 offset) {
|
||||
if (offset) {
|
||||
ADDI(SREG, gpr.R(rs), offset);
|
||||
RLWINM(SREG, SREG, 0, 2, 31); // &= 0x3FFFFFFF
|
||||
} else {
|
||||
RLWINM(SREG, gpr.R(rs), 0, 2, 31); // &= 0x3FFFFFFF
|
||||
}
|
||||
|
||||
}
|
||||
void Jit::Comp_ITypeMem(u32 op) {
|
||||
CONDITIONAL_DISABLE;
|
||||
int offset = (signed short)(op&0xFFFF);
|
||||
bool load = false;
|
||||
int rt = _RT;
|
||||
int rs = _RS;
|
||||
int o = op>>26;
|
||||
if (((op >> 29) & 1) == 0 && rt == 0) {
|
||||
// Don't load anything into $zr
|
||||
return;
|
||||
}
|
||||
|
||||
u32 iaddr = gpr.IsImm(rs) ? offset + gpr.GetImm(rs) : 0xFFFFFFFF;
|
||||
bool doCheck = false;
|
||||
|
||||
switch (o)
|
||||
{
|
||||
case 32: //lb
|
||||
case 33: //lh
|
||||
case 35: //lw
|
||||
case 36: //lbu
|
||||
case 37: //lhu
|
||||
load = true;
|
||||
case 40: //sb
|
||||
case 41: //sh
|
||||
case 43: //sw
|
||||
|
||||
if (gpr.IsImm(rs) && Memory::IsValidAddress(iaddr)) {
|
||||
// We can compute the full address at compile time. Kickass.
|
||||
u32 addr = iaddr & 0x3FFFFFFF;
|
||||
// Must be OK even if rs == rt since we have the value from imm already.
|
||||
gpr.MapReg(rt, load ? MAP_NOINIT | MAP_DIRTY : 0);
|
||||
MOVI2R(SREG, addr);
|
||||
} else {
|
||||
_dbg_assert_msg_(JIT, !gpr.IsImm(rs), "Invalid immediate address? CPU bug?");
|
||||
load ? gpr.MapDirtyIn(rt, rs) : gpr.MapInIn(rt, rs);
|
||||
|
||||
SetRegToEffectiveAddress(SREG, rs, offset);
|
||||
}
|
||||
switch (o)
|
||||
{
|
||||
// Load
|
||||
case 32: //lb
|
||||
LBZX(gpr.R(rt), BASEREG, SREG);
|
||||
EXTSB(gpr.R(rt), gpr.R(rt));
|
||||
break;
|
||||
case 33: //lh
|
||||
LHBRX(gpr.R(rt), BASEREG, SREG);
|
||||
EXTSH(gpr.R(rt), gpr.R(rt));
|
||||
break;
|
||||
case 35: //lw
|
||||
LWBRX(gpr.R(rt), BASEREG, SREG);
|
||||
break;
|
||||
case 36: //lbu
|
||||
LBZX (gpr.R(rt), BASEREG, SREG);
|
||||
break;
|
||||
case 37: //lhu
|
||||
LHBRX (gpr.R(rt), BASEREG, SREG);
|
||||
break;
|
||||
// Store
|
||||
case 40: //sb
|
||||
STBX (gpr.R(rt), BASEREG, SREG);
|
||||
break;
|
||||
case 41: //sh
|
||||
STHBRX(gpr.R(rt), BASEREG, SREG);
|
||||
break;
|
||||
case 43: //sw
|
||||
STWBRX(gpr.R(rt), BASEREG, SREG);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case 34: //lwl
|
||||
case 38: //lwr
|
||||
load = true;
|
||||
case 42: //swl
|
||||
case 46: //swr
|
||||
if (!js.inDelaySlot) {
|
||||
// Optimisation: Combine to single unaligned load/store
|
||||
bool isLeft = (o == 34 || o == 42);
|
||||
u32 nextOp = Memory::Read_Instruction(js.compilerPC + 4);
|
||||
// Find a matching shift in opposite direction with opposite offset.
|
||||
if (nextOp == (isLeft ? (op + (4<<26) - 3)
|
||||
: (op - (4<<26) + 3)))
|
||||
{
|
||||
EatInstruction(nextOp);
|
||||
nextOp = ((load ? 35 : 43) << 26) | ((isLeft ? nextOp : op) & 0x3FFFFFF); //lw, sw
|
||||
Comp_ITypeMem(nextOp);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
DISABLE; // Disabled until crashes are resolved.
|
||||
break;
|
||||
default:
|
||||
Comp_Generic(op);
|
||||
return ;
|
||||
}
|
||||
}
|
||||
}
|
139
Core/MIPS/PPC/PpcCompVFPU.cpp
Normal file
139
Core/MIPS/PPC/PpcCompVFPU.cpp
Normal file
|
@ -0,0 +1,139 @@
|
|||
#include "Common/ChunkFile.h"
|
||||
#include "Core/Core.h"
|
||||
#include "Core/CoreTiming.h"
|
||||
#include "Core/MIPS/MIPS.h"
|
||||
#include "Core/MIPS/MIPSCodeUtils.h"
|
||||
#include "Core/MIPS/MIPSInt.h"
|
||||
#include "Core/MIPS/MIPSTables.h"
|
||||
|
||||
#include "PpcRegCache.h"
|
||||
#include "ppcEmitter.h"
|
||||
#include "PpcJit.h"
|
||||
|
||||
#include <ppcintrinsics.h>
|
||||
|
||||
using namespace PpcGen;
|
||||
|
||||
|
||||
namespace MIPSComp
|
||||
{
|
||||
void Jit::Comp_SV(u32 op) {
|
||||
Comp_Generic(op);
|
||||
}
|
||||
|
||||
void Jit::Comp_SVQ(u32 op) {
|
||||
Comp_Generic(op);
|
||||
}
|
||||
|
||||
void Jit::Comp_VPFX(u32 op) {
|
||||
Comp_Generic(op);
|
||||
}
|
||||
|
||||
void Jit::Comp_VVectorInit(u32 op) {
|
||||
Comp_Generic(op);
|
||||
}
|
||||
|
||||
void Jit::Comp_VMatrixInit(u32 op) {
|
||||
Comp_Generic(op);
|
||||
}
|
||||
|
||||
void Jit::Comp_VDot(u32 op) {
|
||||
Comp_Generic(op);
|
||||
}
|
||||
|
||||
void Jit::Comp_VecDo3(u32 op) {
|
||||
Comp_Generic(op);
|
||||
}
|
||||
|
||||
void Jit::Comp_VV2Op(u32 op) {
|
||||
Comp_Generic(op);
|
||||
}
|
||||
|
||||
void Jit::Comp_Mftv(u32 op) {
|
||||
Comp_Generic(op);
|
||||
}
|
||||
|
||||
void Jit::Comp_Vmtvc(u32 op) {
|
||||
Comp_Generic(op);
|
||||
}
|
||||
|
||||
void Jit::Comp_Vmmov(u32 op) {
|
||||
Comp_Generic(op);
|
||||
}
|
||||
|
||||
void Jit::Comp_VScl(u32 op) {
|
||||
Comp_Generic(op);
|
||||
}
|
||||
|
||||
void Jit::Comp_Vmmul(u32 op) {
|
||||
Comp_Generic(op);
|
||||
}
|
||||
|
||||
void Jit::Comp_Vmscl(u32 op) {
|
||||
Comp_Generic(op);
|
||||
}
|
||||
|
||||
void Jit::Comp_Vtfm(u32 op) {
|
||||
Comp_Generic(op);
|
||||
}
|
||||
|
||||
void Jit::Comp_VHdp(u32 op) {
|
||||
Comp_Generic(op);
|
||||
}
|
||||
|
||||
void Jit::Comp_VCrs(u32 op) {
|
||||
Comp_Generic(op);
|
||||
}
|
||||
|
||||
void Jit::Comp_VDet(u32 op) {
|
||||
Comp_Generic(op);
|
||||
}
|
||||
|
||||
void Jit::Comp_Vi2x(u32 op) {
|
||||
Comp_Generic(op);
|
||||
}
|
||||
|
||||
void Jit::Comp_Vx2i(u32 op) {
|
||||
Comp_Generic(op);
|
||||
}
|
||||
|
||||
void Jit::Comp_Vf2i(u32 op) {
|
||||
Comp_Generic(op);
|
||||
}
|
||||
|
||||
void Jit::Comp_Vi2f(u32 op) {
|
||||
Comp_Generic(op);
|
||||
}
|
||||
|
||||
void Jit::Comp_Vcst(u32 op) {
|
||||
Comp_Generic(op);
|
||||
}
|
||||
|
||||
void Jit::Comp_Vhoriz(u32 op) {
|
||||
Comp_Generic(op);
|
||||
}
|
||||
|
||||
void Jit::Comp_VRot(u32 op) {
|
||||
Comp_Generic(op);
|
||||
}
|
||||
|
||||
void Jit::Comp_VIdt(u32 op) {
|
||||
Comp_Generic(op);
|
||||
}
|
||||
|
||||
void Jit::Comp_Vcmp(u32 op) {
|
||||
Comp_Generic(op);
|
||||
}
|
||||
|
||||
void Jit::Comp_Vcmov(u32 op) {
|
||||
Comp_Generic(op);
|
||||
}
|
||||
|
||||
void Jit::Comp_Viim(u32 op) {
|
||||
Comp_Generic(op);
|
||||
}
|
||||
|
||||
void Jit::Comp_Vfim(u32 op) {
|
||||
Comp_Generic(op);
|
||||
}
|
||||
}
|
235
Core/MIPS/PPC/PpcJit.cpp
Normal file
235
Core/MIPS/PPC/PpcJit.cpp
Normal file
|
@ -0,0 +1,235 @@
|
|||
#include "Common/ChunkFile.h"
|
||||
#include "Core/Core.h"
|
||||
#include "Core/CoreTiming.h"
|
||||
#include "Core/MIPS/MIPS.h"
|
||||
#include "Core/MIPS/MIPSCodeUtils.h"
|
||||
#include "Core/MIPS/MIPSInt.h"
|
||||
#include "Core/MIPS/MIPSTables.h"
|
||||
|
||||
#include "PpcRegCache.h"
|
||||
#include "ppcEmitter.h"
|
||||
#include "PpcJit.h"
|
||||
|
||||
#include <ppcintrinsics.h>
|
||||
|
||||
using namespace PpcGen;
|
||||
|
||||
extern volatile CoreState coreState;
|
||||
|
||||
namespace MIPSComp
|
||||
{
|
||||
|
||||
static u32 delaySlotFlagsValue;
|
||||
|
||||
void Jit::CompileDelaySlot(int flags)
|
||||
{
|
||||
// preserve flag around the delay slot! Maybe this is not always necessary on ARM where
|
||||
// we can (mostly) control whether we set the flag or not. Of course, if someone puts an slt in to the
|
||||
// delay slot, we're screwed.
|
||||
if (flags & DELAYSLOT_SAFE) {
|
||||
// Save flags register
|
||||
MOVI2R(SREG, (u32)&delaySlotFlagsValue);
|
||||
STW(FLAGREG, SREG);
|
||||
}
|
||||
|
||||
js.inDelaySlot = true;
|
||||
u32 op = Memory::Read_Instruction(js.compilerPC + 4);
|
||||
MIPSCompileOp(op);
|
||||
js.inDelaySlot = false;
|
||||
|
||||
if (flags & DELAYSLOT_FLUSH)
|
||||
FlushAll();
|
||||
|
||||
if (flags & DELAYSLOT_SAFE) {
|
||||
// Restore flags register
|
||||
MOVI2R(SREG, (u32)&delaySlotFlagsValue);
|
||||
LWZ(FLAGREG, SREG);
|
||||
}
|
||||
}
|
||||
|
||||
void Jit::Compile(u32 em_address)
|
||||
{
|
||||
if (GetSpaceLeft() < 0x10000 || blocks.IsFull())
|
||||
{
|
||||
ClearCache();
|
||||
}
|
||||
|
||||
int block_num = blocks.AllocateBlock(em_address);
|
||||
JitBlock *b = blocks.GetBlock(block_num);
|
||||
DoJit(em_address, b);
|
||||
blocks.FinalizeBlock(block_num, jo.enableBlocklink);
|
||||
|
||||
// Drat. The VFPU hit an uneaten prefix at the end of a block.
|
||||
if (js.startDefaultPrefix && js.MayHavePrefix())
|
||||
{
|
||||
js.startDefaultPrefix = false;
|
||||
// Our assumptions are all wrong so it's clean-slate time.
|
||||
ClearCache();
|
||||
|
||||
// Let's try that one more time. We won't get back here because we toggled the value.
|
||||
Compile(em_address);
|
||||
}
|
||||
}
|
||||
|
||||
void Jit::MovFromPC(PPCReg r) {
|
||||
LWZ(r, CTXREG, offsetof(MIPSState, pc));
|
||||
}
|
||||
|
||||
void Jit::MovToPC(PPCReg r) {
|
||||
STW(r, CTXREG, offsetof(MIPSState, pc));
|
||||
}
|
||||
|
||||
void Jit::SaveDowncount(PPCReg r) {
|
||||
STW(r, CTXREG, offsetof(MIPSState, downcount));
|
||||
}
|
||||
|
||||
void Jit::RestoreDowncount(PPCReg r) {
|
||||
LWZ(r, CTXREG, offsetof(MIPSState, downcount));
|
||||
}
|
||||
|
||||
static void ShowDownCount() {
|
||||
if (currentMIPS->downcount<0) {
|
||||
//ERROR_LOG(DYNA_REC, "MIPSState, downcount %08x", currentMIPS->downcount);
|
||||
Crash();
|
||||
}
|
||||
}
|
||||
|
||||
void Jit::WriteDownCount(int offset)
|
||||
{
|
||||
// don't know if the result is correct
|
||||
int theDowncount = js.downcountAmount + offset;
|
||||
if (jo.downcountInRegister) {
|
||||
// DCNTREG = DCNTREG - theDowncount;
|
||||
MOVI2R(SREG, theDowncount);
|
||||
SUBF(DCNTREG, SREG, DCNTREG, 1);
|
||||
STW(DCNTREG, CTXREG, offsetof(MIPSState, downcount));
|
||||
} else {
|
||||
// DCNTREG = MIPSState->downcount - theDowncount;
|
||||
MOVI2R(SREG, theDowncount);
|
||||
LWZ(DCNTREG, CTXREG, offsetof(MIPSState, downcount));
|
||||
SUBF(DCNTREG, SREG, DCNTREG, 1);
|
||||
STW(DCNTREG, CTXREG, offsetof(MIPSState, downcount));
|
||||
}
|
||||
|
||||
//QuickCallFunction(ShowDownCount);
|
||||
|
||||
CMPI(DCNTREG, 0);
|
||||
}
|
||||
|
||||
void Jit::Comp_Generic(u32 op) {
|
||||
FlushAll();
|
||||
|
||||
// basic jit !!
|
||||
MIPSInterpretFunc func = MIPSGetInterpretFunc(op);
|
||||
if (func)
|
||||
{
|
||||
// Save mips PC and cycles
|
||||
SaveDowncount(DCNTREG);
|
||||
|
||||
// call interpreted function
|
||||
MOVI2R(R3, op);
|
||||
QuickCallFunction((void *)func);
|
||||
|
||||
// restore pc and cycles
|
||||
RestoreDowncount(DCNTREG);
|
||||
}
|
||||
// Might have eaten prefixes, hard to tell...
|
||||
if ((MIPSGetInfo(op) & IS_VFPU) != 0)
|
||||
js.PrefixStart();
|
||||
}
|
||||
|
||||
void Jit::EatInstruction(u32 op) {
|
||||
u32 info = MIPSGetInfo(op);
|
||||
_dbg_assert_msg_(JIT, !(info & DELAYSLOT), "Never eat a branch op.");
|
||||
_dbg_assert_msg_(JIT, !js.inDelaySlot, "Never eat an instruction inside a delayslot.");
|
||||
|
||||
js.compilerPC += 4;
|
||||
js.downcountAmount += MIPSGetInstructionCycleEstimate(op);
|
||||
}
|
||||
|
||||
void Jit::Comp_RunBlock(u32 op) {
|
||||
// This shouldn't be necessary, the dispatcher should catch us before we get here.
|
||||
ERROR_LOG(DYNA_REC, "Comp_RunBlock should never be reached!");
|
||||
}
|
||||
|
||||
void Jit::Comp_DoNothing(u32 op) {
|
||||
|
||||
}
|
||||
|
||||
void Jit::FlushAll()
|
||||
{
|
||||
gpr.FlushAll();
|
||||
//fpr.FlushAll();
|
||||
//FlushPrefixV();
|
||||
}
|
||||
|
||||
void Jit::ClearCache() {
|
||||
blocks.Clear();
|
||||
ClearCodeSpace();
|
||||
GenerateFixedCode();
|
||||
}
|
||||
|
||||
void Jit::ClearCacheAt(u32 em_address) {
|
||||
ClearCache();
|
||||
}
|
||||
|
||||
Jit::Jit(MIPSState *mips) : blocks(mips, this), gpr(mips, &jo),mips_(mips)
|
||||
{
|
||||
blocks.Init();
|
||||
gpr.SetEmitter(this);
|
||||
AllocCodeSpace(1024 * 1024 * 16); // 32MB is the absolute max because that's what an ARM branch instruction can reach, backwards and forwards.
|
||||
GenerateFixedCode();
|
||||
|
||||
js.startDefaultPrefix = true;
|
||||
}
|
||||
|
||||
void Jit::RunLoopUntil(u64 globalticks) {
|
||||
#ifdef _XBOX
|
||||
// force stack alinement
|
||||
_alloca(8*1024);
|
||||
#endif
|
||||
|
||||
// Run the compiled code
|
||||
((void (*)())enterCode)();
|
||||
}
|
||||
|
||||
|
||||
// IDEA - could have a WriteDualExit that takes two destinations and two condition flags,
|
||||
// and just have conditional that set PC "twice". This only works when we fall back to dispatcher
|
||||
// though, as we need to have the SUBS flag set in the end. So with block linking in the mix,
|
||||
// I don't think this gives us that much benefit.
|
||||
void Jit::WriteExit(u32 destination, int exit_num)
|
||||
{
|
||||
WriteDownCount();
|
||||
//If nobody has taken care of this yet (this can be removed when all branches are done)
|
||||
JitBlock *b = js.curBlock;
|
||||
b->exitAddress[exit_num] = destination;
|
||||
b->exitPtrs[exit_num] = GetWritableCodePtr();
|
||||
|
||||
// Link opportunity!
|
||||
int block = blocks.GetBlockNumberFromStartAddress(destination);
|
||||
if (block >= 0 && jo.enableBlocklink) {
|
||||
// It exists! Joy of joy!
|
||||
B(blocks.GetBlock(block)->checkedEntry);
|
||||
b->linkStatus[exit_num] = true;
|
||||
} else {
|
||||
MOVI2R(SREG, destination);
|
||||
B((const void *)dispatcherPCInR0);
|
||||
}
|
||||
}
|
||||
|
||||
void Jit::WriteExitDestInR(PPCReg Reg)
|
||||
{
|
||||
//Break();
|
||||
MovToPC(Reg);
|
||||
WriteDownCount();
|
||||
// TODO: shouldn't need an indirect branch here...
|
||||
B((const void *)dispatcher);
|
||||
}
|
||||
|
||||
void Jit::WriteSyscallExit()
|
||||
{
|
||||
WriteDownCount();
|
||||
B((const void *)dispatcherCheckCoreState);
|
||||
}
|
||||
}
|
284
Core/MIPS/PPC/PpcJit.h
Normal file
284
Core/MIPS/PPC/PpcJit.h
Normal file
|
@ -0,0 +1,284 @@
|
|||
// Copyright (c) 2012- PPSSPP Project.
|
||||
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, version 2.0 or later versions.
|
||||
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License 2.0 for more details.
|
||||
|
||||
// A copy of the GPL 2.0 should have been included with the program.
|
||||
// If not, see http://www.gnu.org/licenses/
|
||||
|
||||
// Official git repository and contact information can be found at
|
||||
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "../../../Globals.h"
|
||||
|
||||
#include "Core/MIPS/JitCommon/JitBlockCache.h"
|
||||
#include "Core/MIPS/PPC/PpcRegCache.h"
|
||||
|
||||
#include "Core/MIPS/MIPS.h"
|
||||
#include <ppcEmitter.h>
|
||||
|
||||
namespace MIPSComp
|
||||
{
|
||||
|
||||
struct PpcJitOptions
|
||||
{
|
||||
PpcJitOptions()
|
||||
{
|
||||
enableBlocklink = true;
|
||||
downcountInRegister = true;
|
||||
}
|
||||
|
||||
bool enableBlocklink;
|
||||
bool downcountInRegister;
|
||||
};
|
||||
|
||||
struct PpcJitState
|
||||
{
|
||||
enum PrefixState
|
||||
{
|
||||
PREFIX_UNKNOWN = 0x00,
|
||||
PREFIX_KNOWN = 0x01,
|
||||
PREFIX_DIRTY = 0x10,
|
||||
PREFIX_KNOWN_DIRTY = 0x11,
|
||||
};
|
||||
|
||||
u32 compilerPC;
|
||||
u32 blockStart;
|
||||
bool cancel;
|
||||
bool inDelaySlot;
|
||||
int downcountAmount;
|
||||
bool compiling; // TODO: get rid of this in favor of using analysis results to determine end of block
|
||||
JitBlock *curBlock;
|
||||
|
||||
// VFPU prefix magic
|
||||
bool startDefaultPrefix;
|
||||
u32 prefixS;
|
||||
u32 prefixT;
|
||||
u32 prefixD;
|
||||
PrefixState prefixSFlag;
|
||||
PrefixState prefixTFlag;
|
||||
PrefixState prefixDFlag;
|
||||
void PrefixStart() {
|
||||
if (startDefaultPrefix) {
|
||||
EatPrefix();
|
||||
} else {
|
||||
PrefixUnknown();
|
||||
}
|
||||
}
|
||||
void PrefixUnknown() {
|
||||
prefixSFlag = PREFIX_UNKNOWN;
|
||||
prefixTFlag = PREFIX_UNKNOWN;
|
||||
prefixDFlag = PREFIX_UNKNOWN;
|
||||
}
|
||||
bool MayHavePrefix() const {
|
||||
if (HasUnknownPrefix()) {
|
||||
return true;
|
||||
} else if (prefixS != 0xE4 || prefixT != 0xE4 || prefixD != 0) {
|
||||
return true;
|
||||
} else if (VfpuWriteMask() != 0) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
bool HasUnknownPrefix() const {
|
||||
if (!(prefixSFlag & PREFIX_KNOWN) || !(prefixTFlag & PREFIX_KNOWN) || !(prefixDFlag & PREFIX_KNOWN)) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
bool HasNoPrefix() const {
|
||||
return (prefixDFlag & PREFIX_KNOWN) && (prefixSFlag & PREFIX_KNOWN) && (prefixTFlag & PREFIX_KNOWN) && (prefixS == 0xE4 && prefixT == 0xE4 && prefixD == 0);
|
||||
}
|
||||
|
||||
void EatPrefix() {
|
||||
if ((prefixSFlag & PREFIX_KNOWN) == 0 || prefixS != 0xE4) {
|
||||
prefixSFlag = PREFIX_KNOWN_DIRTY;
|
||||
prefixS = 0xE4;
|
||||
}
|
||||
if ((prefixTFlag & PREFIX_KNOWN) == 0 || prefixT != 0xE4) {
|
||||
prefixTFlag = PREFIX_KNOWN_DIRTY;
|
||||
prefixT = 0xE4;
|
||||
}
|
||||
if ((prefixDFlag & PREFIX_KNOWN) == 0 || prefixD != 0x0 || VfpuWriteMask() != 0) {
|
||||
prefixDFlag = PREFIX_KNOWN_DIRTY;
|
||||
prefixD = 0x0;
|
||||
}
|
||||
}
|
||||
u8 VfpuWriteMask() const {
|
||||
_assert_(prefixDFlag & PREFIX_KNOWN);
|
||||
return (prefixD >> 8) & 0xF;
|
||||
}
|
||||
bool VfpuWriteMask(int i) const {
|
||||
_assert_(prefixDFlag & PREFIX_KNOWN);
|
||||
return (prefixD >> (8 + i)) & 1;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
enum CompileDelaySlotFlags
|
||||
{
|
||||
// Easy, nothing extra.
|
||||
DELAYSLOT_NICE = 0,
|
||||
// Flush registers after delay slot.
|
||||
DELAYSLOT_FLUSH = 1,
|
||||
// Preserve flags.
|
||||
DELAYSLOT_SAFE = 2,
|
||||
// Flush registers after and preserve flags.
|
||||
DELAYSLOT_SAFE_FLUSH = DELAYSLOT_FLUSH | DELAYSLOT_SAFE,
|
||||
};
|
||||
|
||||
class Jit: public PpcGen::PPCXCodeBlock
|
||||
{
|
||||
protected:
|
||||
JitBlockCache blocks;
|
||||
public:
|
||||
Jit(MIPSState *mips);
|
||||
|
||||
// Compiled ops should ignore delay slots
|
||||
// the compiler will take care of them by itself
|
||||
// OR NOT
|
||||
void Comp_Generic(u32 op);
|
||||
|
||||
void EatInstruction(u32 op);
|
||||
void Comp_RunBlock(u32 op);
|
||||
|
||||
// TODO: Eat VFPU prefixes here.
|
||||
void EatPrefix() { }
|
||||
|
||||
// Ops
|
||||
void Comp_ITypeMem(u32 op);
|
||||
|
||||
void Comp_RelBranch(u32 op);
|
||||
void Comp_RelBranchRI(u32 op);
|
||||
void Comp_FPUBranch(u32 op);
|
||||
void Comp_FPULS(u32 op);
|
||||
void Comp_FPUComp(u32 op);
|
||||
void Comp_Jump(u32 op);
|
||||
void Comp_JumpReg(u32 op);
|
||||
void Comp_Syscall(u32 op);
|
||||
void Comp_Break(u32 op);
|
||||
|
||||
void Comp_IType(u32 op);
|
||||
void Comp_RType2(u32 op);
|
||||
void Comp_RType3(u32 op);
|
||||
void Comp_ShiftType(u32 op);
|
||||
void Comp_Allegrex(u32 op);
|
||||
void Comp_Allegrex2(u32 op);
|
||||
void Comp_VBranch(u32 op);
|
||||
void Comp_MulDivType(u32 op);
|
||||
void Comp_Special3(u32 op);
|
||||
|
||||
void Comp_FPU3op(u32 op);
|
||||
void Comp_FPU2op(u32 op);
|
||||
void Comp_mxc1(u32 op);
|
||||
|
||||
void Comp_DoNothing(u32 op);
|
||||
|
||||
void Comp_SV(u32 op);
|
||||
void Comp_SVQ(u32 op);
|
||||
void Comp_VPFX(u32 op);
|
||||
void Comp_VVectorInit(u32 op);
|
||||
void Comp_VMatrixInit(u32 op);
|
||||
void Comp_VDot(u32 op);
|
||||
void Comp_VecDo3(u32 op);
|
||||
void Comp_VV2Op(u32 op);
|
||||
void Comp_Mftv(u32 op);
|
||||
void Comp_Vmtvc(u32 op);
|
||||
void Comp_Vmmov(u32 op);
|
||||
void Comp_VScl(u32 op);
|
||||
void Comp_Vmmul(u32 op);
|
||||
void Comp_Vmscl(u32 op);
|
||||
void Comp_Vtfm(u32 op);
|
||||
void Comp_VHdp(u32 op);
|
||||
void Comp_VCrs(u32 op);
|
||||
void Comp_VDet(u32 op);
|
||||
void Comp_Vi2x(u32 op);
|
||||
void Comp_Vx2i(u32 op);
|
||||
void Comp_Vf2i(u32 op);
|
||||
void Comp_Vi2f(u32 op);
|
||||
void Comp_Vcst(u32 op);
|
||||
void Comp_Vhoriz(u32 op);
|
||||
void Comp_VRot(u32 op);
|
||||
void Comp_VIdt(u32 op);
|
||||
void Comp_Vcmp(u32 op);
|
||||
void Comp_Vcmov(u32 op);
|
||||
void Comp_Viim(u32 op);
|
||||
void Comp_Vfim(u32 op);
|
||||
|
||||
|
||||
// Utility compilation functions
|
||||
void BranchFPFlag(u32 op, PpcGen::FixupBranchType cc, bool likely);
|
||||
void BranchVFPUFlag(u32 op, PpcGen::FixupBranchType cc, bool likely);
|
||||
void BranchRSZeroComp(u32 op, PpcGen::FixupBranchType cc, bool andLink, bool likely);
|
||||
void BranchRSRTComp(u32 op, PpcGen::FixupBranchType cc, bool likely);
|
||||
|
||||
void SetRegToEffectiveAddress(PpcGen::PPCReg r, int rs, s16 offset);
|
||||
|
||||
// Utilities to reduce duplicated code
|
||||
void CompImmLogic(int rs, int rt, u32 uimm, void (PPCXEmitter::*arith)(PPCReg Rd, PPCReg Ra, PPCReg Rb), u32 (*eval)(u32 a, u32 b));
|
||||
void CompType3(int rd, int rs, int rt, void (PPCXEmitter::*arithOp2)(PPCReg Rd, PPCReg Ra, PPCReg Rb), u32 (*eval)(u32 a, u32 b), bool isSub = false);
|
||||
|
||||
// flush regs
|
||||
void FlushAll();
|
||||
|
||||
void WriteDownCount(int offset = 0);
|
||||
void MovFromPC(PpcGen::PPCReg r);
|
||||
void MovToPC(PpcGen::PPCReg r);
|
||||
|
||||
void SaveDowncount(PpcGen::PPCReg r);
|
||||
void RestoreDowncount(PpcGen::PPCReg r);
|
||||
|
||||
void WriteExit(u32 destination, int exit_num);
|
||||
void WriteExitDestInR(PPCReg Reg);
|
||||
void WriteSyscallExit();
|
||||
|
||||
void ClearCache();
|
||||
void ClearCacheAt(u32 em_address);
|
||||
|
||||
void RunLoopUntil(u64 globalticks);
|
||||
void GenerateFixedCode();
|
||||
|
||||
void DumpJit();
|
||||
|
||||
void CompileDelaySlot(int flags);
|
||||
void Compile(u32 em_address); // Compiles a block at current MIPS PC
|
||||
const u8 *DoJit(u32 em_address, JitBlock *b);
|
||||
|
||||
PpcJitOptions jo;
|
||||
PpcJitState js;
|
||||
|
||||
PpcRegCache gpr;
|
||||
//PpcRegCacheFPU fpr;
|
||||
|
||||
MIPSState *mips_;
|
||||
|
||||
JitBlockCache *GetBlockCache() { return &blocks; }
|
||||
|
||||
public:
|
||||
// Code pointers
|
||||
const u8 *enterCode;
|
||||
|
||||
const u8 *outerLoop;
|
||||
const u8 *outerLoopPCInR0;
|
||||
const u8 *dispatcherCheckCoreState;
|
||||
const u8 *dispatcherPCInR0;
|
||||
const u8 *dispatcher;
|
||||
const u8 *dispatcherNoCheck;
|
||||
|
||||
const u8 *breakpointBailout;
|
||||
|
||||
};
|
||||
|
||||
typedef void (Jit::*MIPSCompileFunc)(u32 opcode);
|
||||
|
||||
} // namespace MIPSComp
|
||||
|
313
Core/MIPS/PPC/PpcRegCache.cpp
Normal file
313
Core/MIPS/PPC/PpcRegCache.cpp
Normal file
|
@ -0,0 +1,313 @@
|
|||
// Copyright (c) 2012- PPSSPP Project.
|
||||
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, version 2.0 or later versions.
|
||||
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License 2.0 for more details.
|
||||
|
||||
// A copy of the GPL 2.0 should have been included with the program.
|
||||
// If not, see http://www.gnu.org/licenses/
|
||||
|
||||
// Official git repository and contact information can be found at
|
||||
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
|
||||
|
||||
#include <PpcEmitter.h>
|
||||
#include "PpcRegCache.h"
|
||||
#include "PpcJit.h"
|
||||
|
||||
using namespace PpcGen;
|
||||
|
||||
PpcRegCache::PpcRegCache(MIPSState *mips, MIPSComp::PpcJitOptions *options) : mips_(mips), options_(options) {
|
||||
}
|
||||
|
||||
void PpcRegCache::Init(PPCXEmitter *emitter) {
|
||||
emit_ = emitter;
|
||||
}
|
||||
|
||||
void PpcRegCache::Start(MIPSAnalyst::AnalysisResults &stats) {
|
||||
for (int i = 0; i < NUM_PPCREG; i++) {
|
||||
ar[i].mipsReg = -1;
|
||||
ar[i].isDirty = false;
|
||||
}
|
||||
for (int i = 0; i < NUM_MIPSREG; i++) {
|
||||
mr[i].loc = ML_MEM;
|
||||
mr[i].reg = INVALID_REG;
|
||||
mr[i].imm = -1;
|
||||
mr[i].spillLock = false;
|
||||
}
|
||||
}
|
||||
|
||||
const PPCReg *PpcRegCache::GetMIPSAllocationOrder(int &count) {
|
||||
// Note that R0 is reserved as scratch for now.
|
||||
// R1 could be used as it's only used for scratch outside "regalloc space" now.
|
||||
// R12 is also potentially usable.
|
||||
// R4-R7 are registers we could use for static allocation or downcount.
|
||||
// R8 is used to preserve flags in nasty branches.
|
||||
// R9 and upwards are reserved for jit basics.
|
||||
if (options_->downcountInRegister) {
|
||||
static const PPCReg allocationOrder[] = {
|
||||
/*R14, R15, R16, R17, R18, */R19,
|
||||
R20, R21, R22, R23, R24, R25,
|
||||
R26, R27, R28, R29, R30, R31,
|
||||
};
|
||||
count = sizeof(allocationOrder) / sizeof(const int);
|
||||
return allocationOrder;
|
||||
} else {
|
||||
static const PPCReg allocationOrder2[] = {
|
||||
/*R14, R15, R16, R17, R18,*/ R19,
|
||||
R20, R21, R22, R23, R24, R25,
|
||||
R26, R27, R28, R29, R30, R31,
|
||||
};
|
||||
count = sizeof(allocationOrder2) / sizeof(const int);
|
||||
return allocationOrder2;
|
||||
}
|
||||
}
|
||||
|
||||
void PpcRegCache::FlushBeforeCall() {
|
||||
// R4-R11 are preserved. Others need flushing.
|
||||
/*
|
||||
FlushPpcReg(R2);
|
||||
FlushPpcReg(R3);
|
||||
FlushPpcReg(R12);
|
||||
*/
|
||||
}
|
||||
|
||||
// TODO: Somewhat smarter spilling - currently simply spills the first available, should do
|
||||
// round robin or FIFO or something.
|
||||
PPCReg PpcRegCache::MapReg(MIPSReg mipsReg, int mapFlags) {
|
||||
// Let's see if it's already mapped. If so we just need to update the dirty flag.
|
||||
// We don't need to check for ML_NOINIT because we assume that anyone who maps
|
||||
// with that flag immediately writes a "known" value to the register.
|
||||
if (mr[mipsReg].loc == ML_PPCREG) {
|
||||
if (ar[mr[mipsReg].reg].mipsReg != mipsReg) {
|
||||
ERROR_LOG(HLE, "Register mapping out of sync! %i", mipsReg);
|
||||
}
|
||||
if (mapFlags & MAP_DIRTY) {
|
||||
ar[mr[mipsReg].reg].isDirty = true;
|
||||
}
|
||||
return (PPCReg)mr[mipsReg].reg;
|
||||
}
|
||||
|
||||
// Okay, not mapped, so we need to allocate an ARM register.
|
||||
|
||||
int allocCount;
|
||||
const PPCReg *allocOrder = GetMIPSAllocationOrder(allocCount);
|
||||
|
||||
allocate:
|
||||
for (int i = 0; i < allocCount; i++) {
|
||||
int reg = allocOrder[i];
|
||||
|
||||
if (ar[reg].mipsReg == -1) {
|
||||
// That means it's free. Grab it, and load the value into it (if requested).
|
||||
ar[reg].isDirty = (mapFlags & MAP_DIRTY) ? true : false;
|
||||
if (!(mapFlags & MAP_NOINIT)) {
|
||||
if (mr[mipsReg].loc == ML_MEM) {
|
||||
if (mipsReg != 0) {
|
||||
emit_->LWZ((PPCReg)reg, CTXREG, GetMipsRegOffset(mipsReg));
|
||||
} else {
|
||||
// If we get a request to load the zero register, at least we won't spend
|
||||
// time on a memory access...
|
||||
emit_->MOVI2R((PPCReg)reg, 0);
|
||||
}
|
||||
} else if (mr[mipsReg].loc == ML_IMM) {
|
||||
emit_->MOVI2R((PPCReg)reg, mr[mipsReg].imm);
|
||||
ar[reg].isDirty = true; // IMM is always dirty.
|
||||
}
|
||||
}
|
||||
ar[reg].mipsReg = mipsReg;
|
||||
mr[mipsReg].loc = ML_PPCREG;
|
||||
mr[mipsReg].reg = (PPCReg)reg;
|
||||
return (PPCReg)reg;
|
||||
}
|
||||
}
|
||||
|
||||
// Still nothing. Let's spill a reg and goto 10.
|
||||
// TODO: Use age or something to choose which register to spill?
|
||||
// TODO: Spill dirty regs first? or opposite?
|
||||
int bestToSpill = -1;
|
||||
for (int i = 0; i < allocCount; i++) {
|
||||
int reg = allocOrder[i];
|
||||
if (ar[reg].mipsReg != -1 && mr[ar[reg].mipsReg].spillLock)
|
||||
continue;
|
||||
bestToSpill = reg;
|
||||
break;
|
||||
}
|
||||
|
||||
if (bestToSpill != -1) {
|
||||
// ERROR_LOG(JIT, "Out of registers at PC %08x - spills register %i.", mips_->pc, bestToSpill);
|
||||
FlushPpcReg((PPCReg)bestToSpill);
|
||||
goto allocate;
|
||||
}
|
||||
|
||||
// Uh oh, we have all them spilllocked....
|
||||
ERROR_LOG(JIT, "Out of spillable registers at PC %08x!!!", mips_->pc);
|
||||
return INVALID_REG;
|
||||
}
|
||||
|
||||
void PpcRegCache::MapInIn(MIPSReg rd, MIPSReg rs) {
|
||||
SpillLock(rd, rs);
|
||||
MapReg(rd);
|
||||
MapReg(rs);
|
||||
ReleaseSpillLocks();
|
||||
}
|
||||
|
||||
void PpcRegCache::MapDirtyIn(MIPSReg rd, MIPSReg rs, bool avoidLoad) {
|
||||
SpillLock(rd, rs);
|
||||
bool load = !avoidLoad || rd == rs;
|
||||
MapReg(rd, MAP_DIRTY | (load ? 0 : MAP_NOINIT));
|
||||
MapReg(rs);
|
||||
ReleaseSpillLocks();
|
||||
}
|
||||
|
||||
void PpcRegCache::MapDirtyInIn(MIPSReg rd, MIPSReg rs, MIPSReg rt, bool avoidLoad) {
|
||||
SpillLock(rd, rs, rt);
|
||||
bool load = !avoidLoad || (rd == rs || rd == rt);
|
||||
MapReg(rd, MAP_DIRTY | (load ? 0 : MAP_NOINIT));
|
||||
MapReg(rt);
|
||||
MapReg(rs);
|
||||
ReleaseSpillLocks();
|
||||
}
|
||||
|
||||
void PpcRegCache::MapDirtyDirtyInIn(MIPSReg rd1, MIPSReg rd2, MIPSReg rs, MIPSReg rt, bool avoidLoad) {
|
||||
SpillLock(rd1, rd2, rs, rt);
|
||||
bool load1 = !avoidLoad || (rd1 == rs || rd1 == rt);
|
||||
bool load2 = !avoidLoad || (rd2 == rs || rd2 == rt);
|
||||
MapReg(rd1, MAP_DIRTY | (load1 ? 0 : MAP_NOINIT));
|
||||
MapReg(rd2, MAP_DIRTY | (load2 ? 0 : MAP_NOINIT));
|
||||
MapReg(rt);
|
||||
MapReg(rs);
|
||||
ReleaseSpillLocks();
|
||||
}
|
||||
|
||||
void PpcRegCache::FlushPpcReg(PPCReg r) {
|
||||
if (ar[r].mipsReg == -1) {
|
||||
// Nothing to do, reg not mapped.
|
||||
return;
|
||||
}
|
||||
if (ar[r].mipsReg != -1) {
|
||||
if (ar[r].isDirty && mr[ar[r].mipsReg].loc == ML_PPCREG)
|
||||
emit_->STW(r, CTXREG, GetMipsRegOffset(ar[r].mipsReg));
|
||||
// IMMs won't be in an ARM reg.
|
||||
mr[ar[r].mipsReg].loc = ML_MEM;
|
||||
mr[ar[r].mipsReg].reg = INVALID_REG;
|
||||
mr[ar[r].mipsReg].imm = 0;
|
||||
} else {
|
||||
ERROR_LOG(HLE, "Dirty but no mipsreg?");
|
||||
}
|
||||
ar[r].isDirty = false;
|
||||
ar[r].mipsReg = -1;
|
||||
}
|
||||
|
||||
void PpcRegCache::FlushR(MIPSReg r) {
|
||||
switch (mr[r].loc) {
|
||||
case ML_IMM:
|
||||
// IMM is always "dirty".
|
||||
emit_->MOVI2R(SREG, mr[r].imm);
|
||||
emit_->STW(SREG, CTXREG, GetMipsRegOffset(r));
|
||||
break;
|
||||
|
||||
case ML_PPCREG:
|
||||
if (mr[r].reg == INVALID_REG) {
|
||||
ERROR_LOG(HLE, "FlushMipsReg: MipsReg had bad PpcReg");
|
||||
}
|
||||
if (ar[mr[r].reg].isDirty) {
|
||||
emit_->STW((PPCReg)mr[r].reg, CTXREG, GetMipsRegOffset(r));
|
||||
ar[mr[r].reg].isDirty = false;
|
||||
}
|
||||
ar[mr[r].reg].mipsReg = -1;
|
||||
break;
|
||||
|
||||
case ML_MEM:
|
||||
// Already there, nothing to do.
|
||||
break;
|
||||
|
||||
default:
|
||||
//BAD
|
||||
break;
|
||||
}
|
||||
mr[r].loc = ML_MEM;
|
||||
mr[r].reg = INVALID_REG;
|
||||
mr[r].imm = 0;
|
||||
}
|
||||
|
||||
void PpcRegCache::FlushAll() {
|
||||
for (int i = 0; i < NUM_MIPSREG; i++) {
|
||||
FlushR(i);
|
||||
}
|
||||
// Sanity check
|
||||
for (int i = 0; i < NUM_PPCREG; i++) {
|
||||
if (ar[i].mipsReg != -1) {
|
||||
ERROR_LOG(JIT, "Flush fail: ar[%i].mipsReg=%i", i, ar[i].mipsReg);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void PpcRegCache::SetImm(MIPSReg r, u32 immVal) {
|
||||
if (r == 0)
|
||||
ERROR_LOG(JIT, "Trying to set immediate %08x to r0", immVal);
|
||||
|
||||
// Zap existing value if cached in a reg
|
||||
if (mr[r].loc == ML_PPCREG) {
|
||||
ar[mr[r].reg].mipsReg = -1;
|
||||
ar[mr[r].reg].isDirty = false;
|
||||
}
|
||||
mr[r].loc = ML_IMM;
|
||||
mr[r].imm = immVal;
|
||||
mr[r].reg = INVALID_REG;
|
||||
}
|
||||
|
||||
bool PpcRegCache::IsImm(MIPSReg r) const {
|
||||
if (r == 0) return true;
|
||||
return mr[r].loc == ML_IMM;
|
||||
}
|
||||
|
||||
u32 PpcRegCache::GetImm(MIPSReg r) const {
|
||||
if (r == 0) return 0;
|
||||
if (mr[r].loc != ML_IMM) {
|
||||
ERROR_LOG(JIT, "Trying to get imm from non-imm register %i", r);
|
||||
}
|
||||
return mr[r].imm;
|
||||
}
|
||||
|
||||
int PpcRegCache::GetMipsRegOffset(MIPSReg r) {
|
||||
if (r < 32)
|
||||
return r * 4;
|
||||
switch (r) {
|
||||
case MIPSREG_HI:
|
||||
return offsetof(MIPSState, hi);
|
||||
case MIPSREG_LO:
|
||||
return offsetof(MIPSState, lo);
|
||||
}
|
||||
ERROR_LOG(JIT, "bad mips register %i", r);
|
||||
return 0; // or what?
|
||||
}
|
||||
|
||||
void PpcRegCache::SpillLock(MIPSReg r1, MIPSReg r2, MIPSReg r3, MIPSReg r4) {
|
||||
mr[r1].spillLock = true;
|
||||
if (r2 != -1) mr[r2].spillLock = true;
|
||||
if (r3 != -1) mr[r3].spillLock = true;
|
||||
if (r4 != -1) mr[r4].spillLock = true;
|
||||
}
|
||||
|
||||
void PpcRegCache::ReleaseSpillLocks() {
|
||||
for (int i = 0; i < NUM_MIPSREG; i++) {
|
||||
mr[i].spillLock = false;
|
||||
}
|
||||
}
|
||||
|
||||
void PpcRegCache::ReleaseSpillLock(MIPSReg reg) {
|
||||
mr[reg].spillLock = false;
|
||||
}
|
||||
|
||||
PPCReg PpcRegCache::R(int mipsReg) {
|
||||
if (mr[mipsReg].loc == ML_PPCREG) {
|
||||
return (PPCReg)mr[mipsReg].reg;
|
||||
} else {
|
||||
ERROR_LOG(JIT, "Reg %i not in ppc reg. compilerPC = %08x", mipsReg, compilerPC_);
|
||||
return INVALID_REG; // BAAAD
|
||||
}
|
||||
}
|
156
Core/MIPS/PPC/PpcRegCache.h
Normal file
156
Core/MIPS/PPC/PpcRegCache.h
Normal file
|
@ -0,0 +1,156 @@
|
|||
// Copyright (c) 2012- PPSSPP Project.
|
||||
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, version 2.0 or later versions.
|
||||
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License 2.0 for more details.
|
||||
|
||||
// A copy of the GPL 2.0 should have been included with the program.
|
||||
// If not, see http://www.gnu.org/licenses/
|
||||
|
||||
// Official git repository and contact information can be found at
|
||||
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
|
||||
|
||||
|
||||
/**
|
||||
PPC reg cache based on arm version
|
||||
**/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "../MIPS.h"
|
||||
#include "../MIPSAnalyst.h"
|
||||
#include "ppcEmitter.h"
|
||||
|
||||
using namespace PpcGen;
|
||||
|
||||
// R2 to R8: mapped MIPS regs
|
||||
// R9 = code pointers
|
||||
// R10 = MIPS context
|
||||
// R11 = base pointer
|
||||
|
||||
|
||||
// R18 to R31: mapped MIPS regs
|
||||
// R14 = MIPS context
|
||||
// R15 = downcount register
|
||||
// R16 = code pointer
|
||||
// R17 = base pointer
|
||||
|
||||
#if 1
|
||||
#define CTXREG (R14)
|
||||
#define DCNTREG (R15)
|
||||
#define CODEREG (R16)
|
||||
#define BASEREG (R17)
|
||||
#else
|
||||
#define CTXREG (R6)
|
||||
#define DCNTREG (R7)
|
||||
#define CODEREG (R8)
|
||||
#define BASEREG (R9)
|
||||
#endif
|
||||
|
||||
|
||||
// Safe to use this as scratch regs ?
|
||||
#define SREG (R5)
|
||||
#define FLAGREG (R18)
|
||||
|
||||
// Special MIPS registers:
|
||||
enum {
|
||||
MIPSREG_HI = 32,
|
||||
MIPSREG_LO = 33,
|
||||
TOTAL_MAPPABLE_MIPSREGS = 34,
|
||||
};
|
||||
|
||||
typedef int MIPSReg;
|
||||
|
||||
struct RegPPC {
|
||||
int mipsReg; // if -1, no mipsreg attached.
|
||||
bool isDirty; // Should the register be written back?
|
||||
};
|
||||
|
||||
enum RegMIPSLoc {
|
||||
ML_IMM,
|
||||
ML_PPCREG,
|
||||
ML_MEM,
|
||||
};
|
||||
|
||||
struct RegMIPS {
|
||||
// Where is this MIPS register?
|
||||
RegMIPSLoc loc;
|
||||
// Data (only one of these is used, depending on loc. Could make a union).
|
||||
u32 imm;
|
||||
PPCReg reg; // reg index
|
||||
bool spillLock; // if true, this register cannot be spilled.
|
||||
// If loc == ML_MEM, it's back in its location in the CPU context struct.
|
||||
};
|
||||
|
||||
#undef MAP_DIRTY
|
||||
#undef MAP_NOINIT
|
||||
// Initing is the default so the flag is reversed.
|
||||
enum {
|
||||
MAP_DIRTY = 1,
|
||||
MAP_NOINIT = 2,
|
||||
};
|
||||
|
||||
namespace MIPSComp {
|
||||
struct PpcJitOptions;
|
||||
}
|
||||
|
||||
class PpcRegCache
|
||||
{
|
||||
public:
|
||||
PpcRegCache(MIPSState *mips, MIPSComp::PpcJitOptions *options);
|
||||
~PpcRegCache() {}
|
||||
|
||||
void Init(PPCXEmitter *emitter);
|
||||
void Start(MIPSAnalyst::AnalysisResults &stats);
|
||||
|
||||
// Protect the arm register containing a MIPS register from spilling, to ensure that
|
||||
// it's being kept allocated.
|
||||
void SpillLock(MIPSReg reg, MIPSReg reg2 = -1, MIPSReg reg3 = -1, MIPSReg reg4 = -1);
|
||||
void ReleaseSpillLock(MIPSReg reg);
|
||||
void ReleaseSpillLocks();
|
||||
|
||||
void SetImm(MIPSReg reg, u32 immVal);
|
||||
bool IsImm(MIPSReg reg) const;
|
||||
u32 GetImm(MIPSReg reg) const;
|
||||
|
||||
// Returns an ARM register containing the requested MIPS register.
|
||||
PPCReg MapReg(MIPSReg reg, int mapFlags = 0);
|
||||
void MapInIn(MIPSReg rd, MIPSReg rs);
|
||||
void MapDirtyIn(MIPSReg rd, MIPSReg rs, bool avoidLoad = true);
|
||||
void MapDirtyInIn(MIPSReg rd, MIPSReg rs, MIPSReg rt, bool avoidLoad = true);
|
||||
void MapDirtyDirtyInIn(MIPSReg rd1, MIPSReg rd2, MIPSReg rs, MIPSReg rt, bool avoidLoad = true);
|
||||
void FlushPpcReg(PPCReg r);
|
||||
void FlushR(MIPSReg r);
|
||||
void FlushBeforeCall();
|
||||
void FlushAll();
|
||||
|
||||
PPCReg R(int preg); // Returns a cached register
|
||||
|
||||
void SetEmitter(PPCXEmitter *emitter) { emit_ = emitter; }
|
||||
|
||||
// For better log output only.
|
||||
void SetCompilerPC(u32 compilerPC) { compilerPC_ = compilerPC; }
|
||||
|
||||
int GetMipsRegOffset(MIPSReg r);
|
||||
|
||||
private:
|
||||
const PPCReg *GetMIPSAllocationOrder(int &count);
|
||||
|
||||
MIPSState *mips_;
|
||||
MIPSComp::PpcJitOptions *options_;
|
||||
PPCXEmitter *emit_;
|
||||
u32 compilerPC_;
|
||||
|
||||
enum {
|
||||
NUM_PPCREG = 32,
|
||||
NUM_MIPSREG = TOTAL_MAPPABLE_MIPSREGS,
|
||||
};
|
||||
|
||||
RegPPC ar[NUM_MIPSREG];
|
||||
RegMIPS mr[NUM_MIPSREG];
|
||||
};
|
|
@ -176,7 +176,7 @@ void Jit::BranchRSRTComp(u32 op, Gen::CCFlags cc, bool likely)
|
|||
if (!likely && delaySlotIsNice)
|
||||
CompileDelaySlot(DELAYSLOT_NICE);
|
||||
|
||||
if (rt == 0)
|
||||
if (gpr.IsImmediate(rt) && gpr.GetImmediate32(rt) == 0)
|
||||
{
|
||||
gpr.KillImmediate(rs, true, false);
|
||||
CMP(32, gpr.R(rs), Imm32(0));
|
||||
|
|
|
@ -136,23 +136,23 @@ void WriteUnchecked_U32(const u32 _Data, const u32 _Address);
|
|||
#else
|
||||
|
||||
inline u32 ReadUnchecked_U32(const u32 address) {
|
||||
#if defined(_M_IX86) || defined(_M_ARM32)
|
||||
return (*(u32 *)(base + (address & MEMVIEW32_MASK)));
|
||||
#if defined(_M_IX86) || defined(_M_ARM32) || defined (_XBOX)
|
||||
return *(u32_le *)(base + (address & MEMVIEW32_MASK));
|
||||
#else
|
||||
return (*(u32 *)(base + address));
|
||||
return *(u32_le *)(base + address);
|
||||
#endif
|
||||
}
|
||||
|
||||
inline u16 ReadUnchecked_U16(const u32 address) {
|
||||
#if defined(_M_IX86) || defined(_M_ARM32)
|
||||
return (*(u16 *)(base + (address & MEMVIEW32_MASK)));
|
||||
#if defined(_M_IX86) || defined(_M_ARM32) || defined (_XBOX)
|
||||
return *(u16_le *)(base + (address & MEMVIEW32_MASK));
|
||||
#else
|
||||
return (*(u16 *)(base + address));
|
||||
return *(u16_le *)(base + address);
|
||||
#endif
|
||||
}
|
||||
|
||||
inline u8 ReadUnchecked_U8(const u32 address) {
|
||||
#if defined(_M_IX86) || defined(_M_ARM32)
|
||||
#if defined(_M_IX86) || defined(_M_ARM32) || defined (_XBOX)
|
||||
return (*(u8 *)(base + (address & MEMVIEW32_MASK)));
|
||||
#else
|
||||
return (*(u8 *)(base + address));
|
||||
|
@ -160,23 +160,23 @@ inline u8 ReadUnchecked_U8(const u32 address) {
|
|||
}
|
||||
|
||||
inline void WriteUnchecked_U32(u32 data, u32 address) {
|
||||
#if defined(_M_IX86) || defined(_M_ARM32)
|
||||
(*(u32 *)(base + (address & MEMVIEW32_MASK))) = data;
|
||||
#if defined(_M_IX86) || defined(_M_ARM32) || defined (_XBOX)
|
||||
*(u32_le *)(base + (address & MEMVIEW32_MASK)) = data;
|
||||
#else
|
||||
(*(u32 *)(base + address)) = data;
|
||||
*(u32_le *)(base + address) = data;
|
||||
#endif
|
||||
}
|
||||
|
||||
inline void WriteUnchecked_U16(u16 data, u32 address) {
|
||||
#if defined(_M_IX86) || defined(_M_ARM32)
|
||||
(*(u16 *)(base + (address & MEMVIEW32_MASK))) = data;
|
||||
#if defined(_M_IX86) || defined(_M_ARM32) || defined (_XBOX)
|
||||
*(u16_le *)(base + (address & MEMVIEW32_MASK)) = data;
|
||||
#else
|
||||
(*(u16 *)(base + address)) = data;
|
||||
*(u16_le *)(base + address) = data;
|
||||
#endif
|
||||
}
|
||||
|
||||
inline void WriteUnchecked_U8(u8 data, u32 address) {
|
||||
#if defined(_M_IX86) || defined(_M_ARM32)
|
||||
#if defined(_M_IX86) || defined(_M_ARM32) || defined (_XBOX)
|
||||
(*(u8 *)(base + (address & MEMVIEW32_MASK))) = data;
|
||||
#else
|
||||
(*(u8 *)(base + address)) = data;
|
||||
|
|
|
@ -156,22 +156,22 @@ u8 Read_U8(const u32 _Address)
|
|||
|
||||
u16 Read_U16(const u32 _Address)
|
||||
{
|
||||
u16 _var = 0;
|
||||
ReadFromHardware<u16>(_var, _Address);
|
||||
u16_le _var = 0;
|
||||
ReadFromHardware<u16_le>(_var, _Address);
|
||||
return (u16)_var;
|
||||
}
|
||||
|
||||
u32 Read_U32(const u32 _Address)
|
||||
{
|
||||
u32 _var = 0;
|
||||
ReadFromHardware<u32>(_var, _Address);
|
||||
u32_le _var = 0;
|
||||
ReadFromHardware<u32_le>(_var, _Address);
|
||||
return _var;
|
||||
}
|
||||
|
||||
u64 Read_U64(const u32 _Address)
|
||||
{
|
||||
u64 _var = 0;
|
||||
ReadFromHardware<u64>(_var, _Address);
|
||||
u64_le _var = 0;
|
||||
ReadFromHardware<u64_le>(_var, _Address);
|
||||
return _var;
|
||||
}
|
||||
|
||||
|
@ -190,20 +190,19 @@ void Write_U8(const u8 _Data, const u32 _Address)
|
|||
WriteToHardware<u8>(_Address, _Data);
|
||||
}
|
||||
|
||||
|
||||
void Write_U16(const u16 _Data, const u32 _Address)
|
||||
{
|
||||
WriteToHardware<u16>(_Address, _Data);
|
||||
WriteToHardware<u16_le>(_Address, _Data);
|
||||
}
|
||||
|
||||
void Write_U32(const u32 _Data, const u32 _Address)
|
||||
{
|
||||
WriteToHardware<u32>(_Address, _Data);
|
||||
WriteToHardware<u32_le>(_Address, _Data);
|
||||
}
|
||||
|
||||
void Write_U64(const u64 _Data, const u32 _Address)
|
||||
{
|
||||
WriteToHardware<u64>(_Address, _Data);
|
||||
WriteToHardware<u64_le>(_Address, _Data);
|
||||
}
|
||||
|
||||
#ifdef SAFE_MEMORY
|
||||
|
@ -217,15 +216,15 @@ u8 ReadUnchecked_U8(const u32 _Address)
|
|||
|
||||
u16 ReadUnchecked_U16(const u32 _Address)
|
||||
{
|
||||
u16 _var = 0;
|
||||
ReadFromHardware<u16>(_var, _Address);
|
||||
u16_le _var = 0;
|
||||
ReadFromHardware<u16_le>(_var, _Address);
|
||||
return _var;
|
||||
}
|
||||
|
||||
u32 ReadUnchecked_U32(const u32 _Address)
|
||||
{
|
||||
u32 _var = 0;
|
||||
ReadFromHardware<u32>(_var, _Address);
|
||||
u32_le _var = 0;
|
||||
ReadFromHardware<u32_le>(_var, _Address);
|
||||
return _var;
|
||||
}
|
||||
|
||||
|
@ -236,12 +235,12 @@ void WriteUnchecked_U8(const u8 _iValue, const u32 _Address)
|
|||
|
||||
void WriteUnchecked_U16(const u16 _iValue, const u32 _Address)
|
||||
{
|
||||
WriteToHardware<u16>(_Address, _iValue);
|
||||
WriteToHardware<u16_le>(_Address, _iValue);
|
||||
}
|
||||
|
||||
void WriteUnchecked_U32(const u32 _iValue, const u32 _Address)
|
||||
{
|
||||
WriteToHardware<u32>(_Address, _iValue);
|
||||
WriteToHardware<u32_le>(_Address, _iValue);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -26,6 +26,7 @@
|
|||
#include "GPU/GLES/Framebuffer.h"
|
||||
#include "Core/Config.h"
|
||||
|
||||
#include "ext/xxhash.h"
|
||||
#include "native/ext/cityhash/city.h"
|
||||
|
||||
#ifdef _M_SSE
|
||||
|
@ -892,9 +893,7 @@ void TextureCache::UpdateCurrentClut() {
|
|||
// If not, we're going to hash random data, which hopefully doesn't cause a performance issue.
|
||||
const u32 clutExtendedBytes = clutTotalBytes_ + clutBaseBytes;
|
||||
|
||||
// QuickClutHash is not quite good enough apparently.
|
||||
// clutHash_ = QuickClutHash((const u8 *)clutBufRaw_, clutExtendedBytes);
|
||||
clutHash_ = CityHash32((const char *)clutBufRaw_, clutExtendedBytes);
|
||||
clutHash_ = XXH32((const char *)clutBufRaw_, clutExtendedBytes, 0xC0108888);
|
||||
|
||||
// Avoid a copy when we don't need to convert colors.
|
||||
if (clutFormat != GE_CMODE_32BIT_ABGR8888) {
|
||||
|
|
|
@ -27,6 +27,7 @@
|
|||
|
||||
#include "native/gfx_es2/gl_state.h"
|
||||
#include "native/ext/cityhash/city.h"
|
||||
#include "ext/xxhash.h"
|
||||
|
||||
#include "GPU/Math3D.h"
|
||||
#include "GPU/GPUState.h"
|
||||
|
@ -1034,14 +1035,14 @@ u32 TransformDrawEngine::ComputeHash() {
|
|||
// It is really very expensive to check all the vertex data so often.
|
||||
for (int i = 0; i < numDrawCalls; i++) {
|
||||
if (!drawCalls[i].inds) {
|
||||
fullhash += CityHash32((const char *)drawCalls[i].verts, vertexSize * drawCalls[i].vertexCount);
|
||||
fullhash += XXH32((const char *)drawCalls[i].verts, vertexSize * drawCalls[i].vertexCount, 0x1DE8CAC4);
|
||||
} else {
|
||||
// This could get seriously expensive with sparse indices. Need to combine hashing ranges the same way
|
||||
// we do when drawing.
|
||||
fullhash += CityHash32((const char *)drawCalls[i].verts + vertexSize * drawCalls[i].indexLowerBound,
|
||||
vertexSize * (drawCalls[i].indexUpperBound - drawCalls[i].indexLowerBound));
|
||||
fullhash += XXH32((const char *)drawCalls[i].verts + vertexSize * drawCalls[i].indexLowerBound,
|
||||
vertexSize * (drawCalls[i].indexUpperBound - drawCalls[i].indexLowerBound), 0x029F3EE1);
|
||||
int indexSize = (dec_->VertexType() & GE_VTYPE_IDX_MASK) == GE_VTYPE_IDX_16BIT ? 2 : 1;
|
||||
fullhash += CityHash32((const char *)drawCalls[i].inds, indexSize * drawCalls[i].vertexCount);
|
||||
fullhash += XXH32((const char *)drawCalls[i].inds, indexSize * drawCalls[i].vertexCount, 0x955FD1CA);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -50,6 +50,7 @@ SOURCES += ../Core/*.cpp \ # Core
|
|||
../GPU/Null/NullGpu.cpp \
|
||||
../GPU/GLES/*.cpp \
|
||||
../ext/libkirk/*.c \ # Kirk
|
||||
../ext/xxhash.c \ # xxHash
|
||||
../ext/xbrz/*.cpp # XBRZ
|
||||
|
||||
HEADERS += ../Core/*.h \
|
||||
|
|
|
@ -382,7 +382,7 @@ void GameBrowser::Refresh() {
|
|||
b->OnHoldClick.Handle(this, &GameBrowser::GameButtonHoldClick);
|
||||
}
|
||||
|
||||
if (!lastText_.empty()) {
|
||||
if (!lastText_.empty() && gameButtons.empty()) {
|
||||
Add(new Spacer());
|
||||
Add(new Choice(lastText_, new UI::LinearLayoutParams(UI::WRAP_CONTENT, UI::WRAP_CONTENT)))->OnClick.Handle(this, &GameBrowser::LastClick);
|
||||
}
|
||||
|
|
|
@ -360,6 +360,9 @@ void NativeInit(int argc, const char *argv[],
|
|||
logman->SetLogLevel(LogTypes::G3D, LogTypes::LERROR);
|
||||
INFO_LOG(BOOT, "Logger inited.");
|
||||
#else
|
||||
if (g_Config.currentDirectory.empty()) {
|
||||
g_Config.currentDirectory = File::GetExeDirectory();
|
||||
}
|
||||
g_Config.memCardDirectory = "MemStick/";
|
||||
#endif
|
||||
|
||||
|
|
|
@ -153,6 +153,7 @@ LOCAL_SRC_FILES := \
|
|||
$(SRC)/ext/snappy/snappy-c.cpp \
|
||||
$(SRC)/ext/snappy/snappy.cpp \
|
||||
$(SRC)/ext/xbrz/xbrz.cpp \
|
||||
$(SRC)/ext/xxhash.c \
|
||||
$(SRC)/Common/Crypto/md5.cpp \
|
||||
$(SRC)/Common/KeyMap.cpp \
|
||||
$(SRC)/Common/LogManager.cpp \
|
||||
|
|
475
ext/xxhash.c
Normal file
475
ext/xxhash.c
Normal file
|
@ -0,0 +1,475 @@
|
|||
/*
|
||||
xxHash - Fast Hash algorithm
|
||||
Copyright (C) 2012-2013, Yann Collet.
|
||||
BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following disclaimer
|
||||
in the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
You can contact the author at :
|
||||
- xxHash source repository : http://code.google.com/p/xxhash/
|
||||
*/
|
||||
|
||||
|
||||
//**************************************
|
||||
// Tuning parameters
|
||||
//**************************************
|
||||
// Unaligned memory access is automatically enabled for "common" CPU, such as x86.
|
||||
// For others CPU, the compiler will be more cautious, and insert extra code to ensure aligned access is respected.
|
||||
// If you know your target CPU supports unaligned memory access, you want to force this option manually to improve performance.
|
||||
// You can also enable this parameter if you know your input data will always be aligned (boundaries of 4, for U32).
|
||||
#if defined(__ARM_FEATURE_UNALIGNED) || defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)
|
||||
//# define XXH_USE_UNALIGNED_ACCESS 1
|
||||
#endif
|
||||
|
||||
// XXH_ACCEPT_NULL_INPUT_POINTER :
|
||||
// If the input pointer is a null pointer, xxHash default behavior is to trigger a memory access error, since it is a bad pointer.
|
||||
// When this option is enabled, xxHash output for null input pointers will be the same as a null-length input.
|
||||
// This option has a very small performance cost (only measurable on small inputs).
|
||||
// By default, this option is disabled. To enable it, uncomment below define :
|
||||
//#define XXH_ACCEPT_NULL_INPUT_POINTER 1
|
||||
|
||||
// XXH_FORCE_NATIVE_FORMAT :
|
||||
// By default, xxHash library provides endian-independant Hash values, based on little-endian convention.
|
||||
// Results are therefore identical for little-endian and big-endian CPU.
|
||||
// This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format.
|
||||
// Should endian-independance be of no importance for your application, you may set the #define below to 1.
|
||||
// It will improve speed for Big-endian CPU.
|
||||
// This option has no impact on Little_Endian CPU.
|
||||
#define XXH_FORCE_NATIVE_FORMAT 1
|
||||
|
||||
|
||||
//**************************************
|
||||
// Compiler Specific Options
|
||||
//**************************************
|
||||
// Disable some Visual warning messages
|
||||
#ifdef _MSC_VER // Visual Studio
|
||||
# pragma warning(disable : 4127) // disable: C4127: conditional expression is constant
|
||||
#endif
|
||||
|
||||
#ifdef _MSC_VER // Visual Studio
|
||||
# define forceinline static __forceinline
|
||||
#else
|
||||
# ifdef __GNUC__
|
||||
# define forceinline static inline __attribute__((always_inline))
|
||||
# else
|
||||
# define forceinline static inline
|
||||
# endif
|
||||
#endif
|
||||
|
||||
|
||||
//**************************************
|
||||
// Includes & Memory related functions
|
||||
//**************************************
|
||||
#include "xxhash.h"
|
||||
// Modify the local functions below should you wish to use some other memory related routines
|
||||
// for malloc(), free()
|
||||
#include <stdlib.h>
|
||||
forceinline void* XXH_malloc(size_t s) { return malloc(s); }
|
||||
forceinline void XXH_free (void* p) { free(p); }
|
||||
// for memcpy()
|
||||
#include <string.h>
|
||||
forceinline void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcpy(dest,src,size); }
|
||||
|
||||
|
||||
//**************************************
|
||||
// Basic Types
|
||||
//**************************************
|
||||
#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L // C99
|
||||
# include <stdint.h>
|
||||
typedef uint8_t BYTE;
|
||||
typedef uint16_t U16;
|
||||
typedef uint32_t U32;
|
||||
typedef int32_t S32;
|
||||
typedef uint64_t U64;
|
||||
#else
|
||||
typedef unsigned char BYTE;
|
||||
typedef unsigned short U16;
|
||||
typedef unsigned int U32;
|
||||
typedef signed int S32;
|
||||
typedef unsigned long long U64;
|
||||
#endif
|
||||
|
||||
#if defined(__GNUC__) && !defined(XXH_USE_UNALIGNED_ACCESS)
|
||||
# define _PACKED __attribute__ ((packed))
|
||||
#else
|
||||
# define _PACKED
|
||||
#endif
|
||||
|
||||
#if !defined(XXH_USE_UNALIGNED_ACCESS) && !defined(__GNUC__)
|
||||
# ifdef __IBMC__
|
||||
# pragma pack(1)
|
||||
# else
|
||||
# pragma pack(push, 1)
|
||||
# endif
|
||||
#endif
|
||||
|
||||
typedef struct _U32_S { U32 v; } _PACKED U32_S;
|
||||
|
||||
#if !defined(XXH_USE_UNALIGNED_ACCESS) && !defined(__GNUC__)
|
||||
# pragma pack(pop)
|
||||
#endif
|
||||
|
||||
#define A32(x) (((U32_S *)(x))->v)
|
||||
|
||||
|
||||
//***************************************
|
||||
// Compiler-specific Functions and Macros
|
||||
//***************************************
|
||||
#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
|
||||
|
||||
// Note : although _rotl exists for minGW (GCC under windows), performance seems poor
|
||||
#if defined(_MSC_VER)
|
||||
# define XXH_rotl32(x,r) _rotl(x,r)
|
||||
#else
|
||||
# define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r)))
|
||||
#endif
|
||||
|
||||
#if defined(_MSC_VER) // Visual Studio
|
||||
# define XXH_swap32 _byteswap_ulong
|
||||
#elif GCC_VERSION >= 403
|
||||
# define XXH_swap32 __builtin_bswap32
|
||||
#else
|
||||
static inline U32 XXH_swap32 (U32 x) {
|
||||
return ((x << 24) & 0xff000000 ) |
|
||||
((x << 8) & 0x00ff0000 ) |
|
||||
((x >> 8) & 0x0000ff00 ) |
|
||||
((x >> 24) & 0x000000ff );}
|
||||
#endif
|
||||
|
||||
|
||||
//**************************************
|
||||
// Constants
|
||||
//**************************************
|
||||
#define PRIME32_1 2654435761U
|
||||
#define PRIME32_2 2246822519U
|
||||
#define PRIME32_3 3266489917U
|
||||
#define PRIME32_4 668265263U
|
||||
#define PRIME32_5 374761393U
|
||||
|
||||
|
||||
//**************************************
|
||||
// Architecture Macros
|
||||
//**************************************
|
||||
typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess;
|
||||
#ifndef XXH_CPU_LITTLE_ENDIAN // It is possible to define XXH_CPU_LITTLE_ENDIAN externally, for example using a compiler switch
|
||||
static const int one = 1;
|
||||
# define XXH_CPU_LITTLE_ENDIAN (*(char*)(&one))
|
||||
#endif
|
||||
|
||||
|
||||
//**************************************
|
||||
// Macros
|
||||
//**************************************
|
||||
#define XXH_STATIC_ASSERT(c) { enum { XXH_static_assert = 1/(!!(c)) }; } // use only *after* variable declarations
|
||||
|
||||
|
||||
//****************************
|
||||
// Memory reads
|
||||
//****************************
|
||||
typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment;
|
||||
|
||||
forceinline U32 XXH_readLE32_align(const U32* ptr, XXH_endianess endian, XXH_alignment align)
|
||||
{
|
||||
if (align==XXH_unaligned)
|
||||
return endian==XXH_littleEndian ? A32(ptr) : XXH_swap32(A32(ptr));
|
||||
else
|
||||
return endian==XXH_littleEndian ? *ptr : XXH_swap32(*ptr);
|
||||
}
|
||||
|
||||
forceinline U32 XXH_readLE32(const U32* ptr, XXH_endianess endian) { return XXH_readLE32_align(ptr, endian, XXH_unaligned); }
|
||||
|
||||
|
||||
//****************************
|
||||
// Simple Hash Functions
|
||||
//****************************
|
||||
forceinline U32 XXH32_endian_align(const void* input, int len, U32 seed, XXH_endianess endian, XXH_alignment align)
|
||||
{
|
||||
const BYTE* p = (const BYTE*)input;
|
||||
const BYTE* const bEnd = p + len;
|
||||
U32 h32;
|
||||
|
||||
#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
|
||||
if (p==NULL) { len=0; p=(const BYTE*)(size_t)16; }
|
||||
#endif
|
||||
|
||||
if (len>=16)
|
||||
{
|
||||
const BYTE* const limit = bEnd - 16;
|
||||
U32 v1 = seed + PRIME32_1 + PRIME32_2;
|
||||
U32 v2 = seed + PRIME32_2;
|
||||
U32 v3 = seed + 0;
|
||||
U32 v4 = seed - PRIME32_1;
|
||||
|
||||
do
|
||||
{
|
||||
v1 += XXH_readLE32_align((const U32*)p, endian, align) * PRIME32_2; v1 = XXH_rotl32(v1, 13); v1 *= PRIME32_1; p+=4;
|
||||
v2 += XXH_readLE32_align((const U32*)p, endian, align) * PRIME32_2; v2 = XXH_rotl32(v2, 13); v2 *= PRIME32_1; p+=4;
|
||||
v3 += XXH_readLE32_align((const U32*)p, endian, align) * PRIME32_2; v3 = XXH_rotl32(v3, 13); v3 *= PRIME32_1; p+=4;
|
||||
v4 += XXH_readLE32_align((const U32*)p, endian, align) * PRIME32_2; v4 = XXH_rotl32(v4, 13); v4 *= PRIME32_1; p+=4;
|
||||
} while (p<=limit);
|
||||
|
||||
h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18);
|
||||
}
|
||||
else
|
||||
{
|
||||
h32 = seed + PRIME32_5;
|
||||
}
|
||||
|
||||
h32 += (U32) len;
|
||||
|
||||
while (p<=bEnd-4)
|
||||
{
|
||||
h32 += XXH_readLE32_align((const U32*)p, endian, align) * PRIME32_3;
|
||||
h32 = XXH_rotl32(h32, 17) * PRIME32_4 ;
|
||||
p+=4;
|
||||
}
|
||||
|
||||
while (p<bEnd)
|
||||
{
|
||||
h32 += (*p) * PRIME32_5;
|
||||
h32 = XXH_rotl32(h32, 11) * PRIME32_1 ;
|
||||
p++;
|
||||
}
|
||||
|
||||
h32 ^= h32 >> 15;
|
||||
h32 *= PRIME32_2;
|
||||
h32 ^= h32 >> 13;
|
||||
h32 *= PRIME32_3;
|
||||
h32 ^= h32 >> 16;
|
||||
|
||||
return h32;
|
||||
}
|
||||
|
||||
|
||||
U32 XXH32(const void* input, int len, U32 seed)
|
||||
{
|
||||
#if 0
|
||||
// Simple version, good for code maintenance, but unfortunately slow for small inputs
|
||||
void* state = XXH32_init(seed);
|
||||
XXH32_update(state, input, len);
|
||||
return XXH32_digest(state);
|
||||
#else
|
||||
XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
|
||||
|
||||
# if !defined(XXH_USE_UNALIGNED_ACCESS)
|
||||
if ((((size_t)input) & 3)) // Input is aligned, let's leverage the speed advantage
|
||||
{
|
||||
if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
|
||||
return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned);
|
||||
else
|
||||
return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned);
|
||||
}
|
||||
# endif
|
||||
|
||||
if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
|
||||
return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned);
|
||||
else
|
||||
return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
//****************************
|
||||
// Advanced Hash Functions
|
||||
//****************************
|
||||
|
||||
struct XXH_state32_t
|
||||
{
|
||||
U64 total_len;
|
||||
U32 seed;
|
||||
U32 v1;
|
||||
U32 v2;
|
||||
U32 v3;
|
||||
U32 v4;
|
||||
int memsize;
|
||||
char memory[16];
|
||||
};
|
||||
|
||||
|
||||
int XXH32_sizeofState()
|
||||
{
|
||||
XXH_STATIC_ASSERT(XXH32_SIZEOFSTATE >= sizeof(struct XXH_state32_t)); // A compilation error here means XXH32_SIZEOFSTATE is not large enough
|
||||
return sizeof(struct XXH_state32_t);
|
||||
}
|
||||
|
||||
|
||||
XXH_errorcode XXH32_resetState(void* state_in, U32 seed)
|
||||
{
|
||||
struct XXH_state32_t * state = (struct XXH_state32_t *) state_in;
|
||||
state->seed = seed;
|
||||
state->v1 = seed + PRIME32_1 + PRIME32_2;
|
||||
state->v2 = seed + PRIME32_2;
|
||||
state->v3 = seed + 0;
|
||||
state->v4 = seed - PRIME32_1;
|
||||
state->total_len = 0;
|
||||
state->memsize = 0;
|
||||
return XXH_OK;
|
||||
}
|
||||
|
||||
|
||||
void* XXH32_init (U32 seed)
|
||||
{
|
||||
void* state = XXH_malloc (sizeof(struct XXH_state32_t));
|
||||
XXH32_resetState(state, seed);
|
||||
return state;
|
||||
}
|
||||
|
||||
|
||||
forceinline XXH_errorcode XXH32_update_endian (void* state_in, const void* input, int len, XXH_endianess endian)
|
||||
{
|
||||
struct XXH_state32_t * state = (struct XXH_state32_t *) state_in;
|
||||
const BYTE* p = (const BYTE*)input;
|
||||
const BYTE* const bEnd = p + len;
|
||||
|
||||
#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
|
||||
if (input==NULL) return XXH_ERROR;
|
||||
#endif
|
||||
|
||||
state->total_len += len;
|
||||
|
||||
if (state->memsize + len < 16) // fill in tmp buffer
|
||||
{
|
||||
XXH_memcpy(state->memory + state->memsize, input, len);
|
||||
state->memsize += len;
|
||||
return XXH_OK;
|
||||
}
|
||||
|
||||
if (state->memsize) // some data left from previous update
|
||||
{
|
||||
XXH_memcpy(state->memory + state->memsize, input, 16-state->memsize);
|
||||
{
|
||||
const U32* p32 = (const U32*)state->memory;
|
||||
state->v1 += XXH_readLE32(p32, endian) * PRIME32_2; state->v1 = XXH_rotl32(state->v1, 13); state->v1 *= PRIME32_1; p32++;
|
||||
state->v2 += XXH_readLE32(p32, endian) * PRIME32_2; state->v2 = XXH_rotl32(state->v2, 13); state->v2 *= PRIME32_1; p32++;
|
||||
state->v3 += XXH_readLE32(p32, endian) * PRIME32_2; state->v3 = XXH_rotl32(state->v3, 13); state->v3 *= PRIME32_1; p32++;
|
||||
state->v4 += XXH_readLE32(p32, endian) * PRIME32_2; state->v4 = XXH_rotl32(state->v4, 13); state->v4 *= PRIME32_1; p32++;
|
||||
}
|
||||
p += 16-state->memsize;
|
||||
state->memsize = 0;
|
||||
}
|
||||
|
||||
if (p <= bEnd-16)
|
||||
{
|
||||
const BYTE* const limit = bEnd - 16;
|
||||
U32 v1 = state->v1;
|
||||
U32 v2 = state->v2;
|
||||
U32 v3 = state->v3;
|
||||
U32 v4 = state->v4;
|
||||
|
||||
do
|
||||
{
|
||||
v1 += XXH_readLE32((const U32*)p, endian) * PRIME32_2; v1 = XXH_rotl32(v1, 13); v1 *= PRIME32_1; p+=4;
|
||||
v2 += XXH_readLE32((const U32*)p, endian) * PRIME32_2; v2 = XXH_rotl32(v2, 13); v2 *= PRIME32_1; p+=4;
|
||||
v3 += XXH_readLE32((const U32*)p, endian) * PRIME32_2; v3 = XXH_rotl32(v3, 13); v3 *= PRIME32_1; p+=4;
|
||||
v4 += XXH_readLE32((const U32*)p, endian) * PRIME32_2; v4 = XXH_rotl32(v4, 13); v4 *= PRIME32_1; p+=4;
|
||||
} while (p<=limit);
|
||||
|
||||
state->v1 = v1;
|
||||
state->v2 = v2;
|
||||
state->v3 = v3;
|
||||
state->v4 = v4;
|
||||
}
|
||||
|
||||
if (p < bEnd)
|
||||
{
|
||||
XXH_memcpy(state->memory, p, bEnd-p);
|
||||
state->memsize = (int)(bEnd-p);
|
||||
}
|
||||
|
||||
return XXH_OK;
|
||||
}
|
||||
|
||||
XXH_errorcode XXH32_update (void* state_in, const void* input, int len)
|
||||
{
|
||||
XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
|
||||
|
||||
if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
|
||||
return XXH32_update_endian(state_in, input, len, XXH_littleEndian);
|
||||
else
|
||||
return XXH32_update_endian(state_in, input, len, XXH_bigEndian);
|
||||
}
|
||||
|
||||
|
||||
|
||||
forceinline U32 XXH32_intermediateDigest_endian (void* state_in, XXH_endianess endian)
|
||||
{
|
||||
struct XXH_state32_t * state = (struct XXH_state32_t *) state_in;
|
||||
const BYTE * p = (const BYTE*)state->memory;
|
||||
BYTE* bEnd = (BYTE*)state->memory + state->memsize;
|
||||
U32 h32;
|
||||
|
||||
if (state->total_len >= 16)
|
||||
{
|
||||
h32 = XXH_rotl32(state->v1, 1) + XXH_rotl32(state->v2, 7) + XXH_rotl32(state->v3, 12) + XXH_rotl32(state->v4, 18);
|
||||
}
|
||||
else
|
||||
{
|
||||
h32 = state->seed + PRIME32_5;
|
||||
}
|
||||
|
||||
h32 += (U32) state->total_len;
|
||||
|
||||
while (p<=bEnd-4)
|
||||
{
|
||||
h32 += XXH_readLE32((const U32*)p, endian) * PRIME32_3;
|
||||
h32 = XXH_rotl32(h32, 17) * PRIME32_4;
|
||||
p+=4;
|
||||
}
|
||||
|
||||
while (p<bEnd)
|
||||
{
|
||||
h32 += (*p) * PRIME32_5;
|
||||
h32 = XXH_rotl32(h32, 11) * PRIME32_1;
|
||||
p++;
|
||||
}
|
||||
|
||||
h32 ^= h32 >> 15;
|
||||
h32 *= PRIME32_2;
|
||||
h32 ^= h32 >> 13;
|
||||
h32 *= PRIME32_3;
|
||||
h32 ^= h32 >> 16;
|
||||
|
||||
return h32;
|
||||
}
|
||||
|
||||
|
||||
U32 XXH32_intermediateDigest (void* state_in)
|
||||
{
|
||||
XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
|
||||
|
||||
if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
|
||||
return XXH32_intermediateDigest_endian(state_in, XXH_littleEndian);
|
||||
else
|
||||
return XXH32_intermediateDigest_endian(state_in, XXH_bigEndian);
|
||||
}
|
||||
|
||||
|
||||
U32 XXH32_digest (void* state_in)
|
||||
{
|
||||
U32 h32 = XXH32_intermediateDigest(state_in);
|
||||
|
||||
XXH_free(state_in);
|
||||
|
||||
return h32;
|
||||
}
|
164
ext/xxhash.h
Normal file
164
ext/xxhash.h
Normal file
|
@ -0,0 +1,164 @@
|
|||
/*
|
||||
xxHash - Fast Hash algorithm
|
||||
Header File
|
||||
Copyright (C) 2012-2013, Yann Collet.
|
||||
BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following disclaimer
|
||||
in the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
You can contact the author at :
|
||||
- xxHash source repository : http://code.google.com/p/xxhash/
|
||||
*/
|
||||
|
||||
/* Notice extracted from xxHash homepage :
|
||||
|
||||
xxHash is an extremely fast Hash algorithm, running at RAM speed limits.
|
||||
It also successfully passes all tests from the SMHasher suite.
|
||||
|
||||
Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz)
|
||||
|
||||
Name Speed Q.Score Author
|
||||
xxHash 5.4 GB/s 10
|
||||
CrapWow 3.2 GB/s 2 Andrew
|
||||
MumurHash 3a 2.7 GB/s 10 Austin Appleby
|
||||
SpookyHash 2.0 GB/s 10 Bob Jenkins
|
||||
SBox 1.4 GB/s 9 Bret Mulvey
|
||||
Lookup3 1.2 GB/s 9 Bob Jenkins
|
||||
SuperFastHash 1.2 GB/s 1 Paul Hsieh
|
||||
CityHash64 1.05 GB/s 10 Pike & Alakuijala
|
||||
FNV 0.55 GB/s 5 Fowler, Noll, Vo
|
||||
CRC32 0.43 GB/s 9
|
||||
MD5-32 0.33 GB/s 10 Ronald L. Rivest
|
||||
SHA1-32 0.28 GB/s 10
|
||||
|
||||
Q.Score is a measure of quality of the hash function.
|
||||
It depends on successfully passing SMHasher test set.
|
||||
10 is a perfect score.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#if defined (__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
//****************************
|
||||
// Type
|
||||
//****************************
|
||||
typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode;
|
||||
|
||||
|
||||
|
||||
//****************************
|
||||
// Simple Hash Functions
|
||||
//****************************
|
||||
|
||||
unsigned int XXH32 (const void* input, int len, unsigned int seed);
|
||||
|
||||
/*
|
||||
XXH32() :
|
||||
Calculate the 32-bits hash of sequence of length "len" stored at memory address "input".
|
||||
The memory between input & input+len must be valid (allocated and read-accessible).
|
||||
"seed" can be used to alter the result predictably.
|
||||
This function successfully passes all SMHasher tests.
|
||||
Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s
|
||||
Note that "len" is type "int", which means it is limited to 2^31-1.
|
||||
If your data is larger, use the advanced functions below.
|
||||
*/
|
||||
|
||||
|
||||
|
||||
//****************************
|
||||
// Advanced Hash Functions
|
||||
//****************************
|
||||
|
||||
void* XXH32_init (unsigned int seed);
|
||||
XXH_errorcode XXH32_update (void* state, const void* input, int len);
|
||||
unsigned int XXH32_digest (void* state);
|
||||
|
||||
/*
|
||||
These functions calculate the xxhash of an input provided in several small packets,
|
||||
as opposed to an input provided as a single block.
|
||||
|
||||
It must be started with :
|
||||
void* XXH32_init()
|
||||
The function returns a pointer which holds the state of calculation.
|
||||
|
||||
This pointer must be provided as "void* state" parameter for XXH32_update().
|
||||
XXH32_update() can be called as many times as necessary.
|
||||
The user must provide a valid (allocated) input.
|
||||
The function returns an error code, with 0 meaning OK, and any other value meaning there is an error.
|
||||
Note that "len" is type "int", which means it is limited to 2^31-1.
|
||||
If your data is larger, it is recommended to chunk your data into blocks
|
||||
of size for example 2^30 (1GB) to avoid any "int" overflow issue.
|
||||
|
||||
Finally, you can end the calculation anytime, by using XXH32_digest().
|
||||
This function returns the final 32-bits hash.
|
||||
You must provide the same "void* state" parameter created by XXH32_init().
|
||||
Memory will be freed by XXH32_digest().
|
||||
*/
|
||||
|
||||
|
||||
int XXH32_sizeofState();
|
||||
XXH_errorcode XXH32_resetState(void* state, unsigned int seed);
|
||||
|
||||
#define XXH32_SIZEOFSTATE 48
|
||||
typedef struct { long long ll[(XXH32_SIZEOFSTATE+(sizeof(long long)-1))/sizeof(long long)]; } XXH32_stateSpace_t;
|
||||
/*
|
||||
These functions allow user application to make its own allocation for state.
|
||||
|
||||
XXH32_sizeofState() is used to know how much space must be allocated for the xxHash 32-bits state.
|
||||
Note that the state must be aligned to access 'long long' fields. Memory must be allocated and referenced by a pointer.
|
||||
This pointer must then be provided as 'state' into XXH32_resetState(), which initializes the state.
|
||||
|
||||
For static allocation purposes (such as allocation on stack, or freestanding systems without malloc()),
|
||||
use the structure XXH32_stateSpace_t, which will ensure that memory space is large enough and correctly aligned to access 'long long' fields.
|
||||
*/
|
||||
|
||||
|
||||
unsigned int XXH32_intermediateDigest (void* state);
|
||||
/*
|
||||
This function does the same as XXH32_digest(), generating a 32-bit hash,
|
||||
but preserve memory context.
|
||||
This way, it becomes possible to generate intermediate hashes, and then continue feeding data with XXH32_update().
|
||||
To free memory context, use XXH32_digest(), or free().
|
||||
*/
|
||||
|
||||
|
||||
|
||||
//****************************
|
||||
// Deprecated function names
|
||||
//****************************
|
||||
// The following translations are provided to ease code transition
|
||||
// You are encouraged to no longer this function names
|
||||
#define XXH32_feed XXH32_update
|
||||
#define XXH32_result XXH32_digest
|
||||
#define XXH32_getIntermediateResult XXH32_intermediateDigest
|
||||
|
||||
|
||||
|
||||
#if defined (__cplusplus)
|
||||
}
|
||||
#endif
|
Loading…
Add table
Reference in a new issue