Merge branch 'master' into better-controlmap

This commit is contained in:
Henrik Rydgard 2013-08-17 10:37:04 +02:00
commit c14fcfdae1
34 changed files with 3826 additions and 52 deletions

View file

@ -342,7 +342,7 @@ add_library(rg_etc1 STATIC
include_directories(native/ext/rg_etc1)
add_library(cityhash STATIC
native/ext/cityhash/city.cpp
native/ext/cityhash/city.cpp
native/ext/cityhash/city.h
native/ext/cityhash/citycrc.h
)
@ -688,6 +688,12 @@ add_library(xbrz STATIC
)
include_directories(ext/xbrz)
add_library(xxhash STATIC
ext/xxhash.c
ext/xxhash.h
)
include_directories(ext/xxhash)
set(CoreExtra)
set(CoreExtraLibs)
if(ARM)
@ -966,7 +972,7 @@ add_library(${CoreLibName} ${CoreLinkType}
$<TARGET_OBJECTS:GPU>
Globals.h
git-version.cpp)
target_link_libraries(${CoreLibName} Common native kirk cityhash xbrz
target_link_libraries(${CoreLibName} Common native kirk cityhash xbrz xxhash
${CoreExtraLibs} ${GLEW_LIBRARIES} ${OPENGL_LIBRARIES})
setup_target_project(${CoreLibName} Core)

View file

@ -169,7 +169,7 @@ private:
# elif defined __SSE3__
# define _M_SSE 0x300
# endif
#elif (_MSC_VER >= 1500) || __INTEL_COMPILER // Visual Studio 2008
#elif ((_MSC_VER >= 1500) || __INTEL_COMPILER) // Visual Studio 2008
# define _M_SSE 0x402
#endif

View file

@ -85,7 +85,9 @@ public:
return *this;
}
operator long() const { return (long)swap(); }
operator long() const { return (long)swap(); }
operator s8() const { return (s8)swap(); }
operator u8() const { return (u8)swap(); }
operator s16() const { return (s16)swap(); }
operator u16() const { return (u16)swap(); }
operator s32() const { return (s32)swap(); }

0
Common/ppcAbi.cpp Normal file
View file

508
Common/ppcEmitter.cpp Normal file
View file

@ -0,0 +1,508 @@
#include <xtl.h>
#include "ppcEmitter.h"
namespace PpcGen {
// Arithmetics ops
void PPCXEmitter::ADD (PPCReg Rd, PPCReg Ra, PPCReg Rb) {
u32 instr = (0x7C000214 | (Rd << 21) | (Ra << 16) | (Rb << 11));
Write32(instr);
}
void PPCXEmitter::ADDI (PPCReg Rd, PPCReg Ra, short imm) {
u32 instr = (0x38000000 | (Rd << 21) | (Ra << 16) | ((imm) & 0xffff));
Write32(instr);
}
void PPCXEmitter::ADDIS (PPCReg Rd, PPCReg Ra, short imm) {
u32 instr = (0x3C000000 | (Rd << 21) | (Ra << 16) | ((imm) & 0xffff));
Write32(instr);
}
void PPCXEmitter::AND (PPCReg Rs, PPCReg Ra, PPCReg Rb) {
u32 instr = (0x7C000038 | (Ra << 21) | (Rs << 16) | (Rb << 11));
Write32(instr);
}
void PPCXEmitter::ANDI (PPCReg Rd, PPCReg Ra, unsigned short imm) {
u32 instr = (0x70000000 | (Rd << 21) | (Ra << 16) | ((imm) & 0xffff));
Write32(instr);
}
void PPCXEmitter::ANDIS (PPCReg Rd, PPCReg Ra, unsigned short imm) {
u32 instr = (0x74000000 | (Rd << 21) | (Ra << 16) | ((imm) & 0xffff));
Write32(instr);
}
// Memory load/store operations
void PPCXEmitter::LI(PPCReg dest, unsigned short imm) {
u32 instr = (0x38000000 | (dest << 21) | ((imm) & 0xffff));
Write32(instr);
}
void PPCXEmitter::LIS(PPCReg dest, unsigned short imm) {
u32 instr = (0x3C000000 | (dest << 21) | ((imm) & 0xffff));
Write32(instr);
}
void PPCXEmitter::LBZ (PPCReg dest, PPCReg src, int offset) {
u32 instr = (0x88000000 | (dest << 21) | (src << 16) | ((offset) & 0xffff));
Write32(instr);
}
void PPCXEmitter::LBZX (PPCReg dest, PPCReg a, PPCReg b) {
u32 instr = ((31<<26) | (dest << 21) | (a << 16) | (b << 11) | (87<<1));
Write32(instr);
}
void PPCXEmitter::LHZ (PPCReg dest, PPCReg src, int offset) {
u32 instr = (0xA0000000 | (dest << 21) | (src << 16) | ((offset) & 0xffff));
Write32(instr);
}
void PPCXEmitter::LHBRX (PPCReg dest, PPCReg src, PPCReg offset) {
u32 instr = (0x7C00062C | (dest << 21) | (src << 16) | (offset << 11));
Write32(instr);
}
void PPCXEmitter::LWZ (PPCReg dest, PPCReg src, int offset) {
u32 instr = (0x80000000 | (dest << 21) | (src << 16) | ((offset) & 0xffff));
Write32(instr);
}
void PPCXEmitter::LWBRX (PPCReg dest, PPCReg src, PPCReg offset) {
u32 instr = (0x7C00042C | (dest << 21) | (src << 16) | (offset << 11));
Write32(instr);
}
void PPCXEmitter::STB (PPCReg dest, PPCReg src, int offset) {
u32 instr = (0x98000000 | (dest << 21) | (src << 16) | ((offset) & 0xffff));
Write32(instr);
}
void PPCXEmitter::STBX (PPCReg dest, PPCReg a, PPCReg b) {
u32 instr = ((31<<26) | (dest << 21) | (a << 16) | (b << 11) | (215 << 1));
Write32(instr);
}
void PPCXEmitter::STH (PPCReg dest, PPCReg src, int offset) {
u32 instr = (0xB0000000 | (dest << 21) | (src << 16) | ((offset) & 0xffff));
Write32(instr);
}
void PPCXEmitter::STHBRX (PPCReg dest, PPCReg src, PPCReg offset) {
u32 instr = (0x7C00072C | (dest << 21) | (src << 16) | (offset << 11));
Write32(instr);
}
void PPCXEmitter::STW (PPCReg dest, PPCReg src, int offset) {
u32 instr = (0x90000000 | (dest << 21) | (src << 16) | ((offset) & 0xffff));
Write32(instr);
}
void PPCXEmitter::STWU (PPCReg dest, PPCReg src, int offset) {
u32 instr = (0x94000000 | (dest << 21) | (src << 16) | ((offset) & 0xffff));
Write32(instr);
}
void PPCXEmitter::STWBRX (PPCReg dest, PPCReg src, PPCReg offset) {
u32 instr = (0x7C00052C | (dest << 21) | (src << 16) | (offset << 11));
Write32(instr);
}
void PPCXEmitter::LD (PPCReg dest, PPCReg src, int offset) {
u32 instr = ((58 << 26) | (dest << 21) | (src << 16) | ((offset) & 0xffff));
Write32(instr);
}
void PPCXEmitter::STD (PPCReg dest, PPCReg src, int offset) {
u32 instr = ((62 << 26) | (dest << 21) | (src << 16) | ((offset) & 0xffff));
Write32(instr);
}
// Branch operations
void PPCXEmitter::B (const void *fnptr) {
s32 func = (s32)fnptr - s32(code);
u32 instr = (0x48000000 | ((s32)((func) & 0x3fffffc)));
Write32(instr);
}
void PPCXEmitter::BL(const void *fnptr) {
s32 func = (s32)fnptr - s32(code);
u32 instr = (0x48000001 | ((s32)((func) & 0x3fffffc)));
Write32(instr);
}
void PPCXEmitter::BA (const void *fnptr) {
s32 func = (s32)fnptr;
u32 instr = (0x48000002 | ((s32)((func) & 0x3fffffc)));
Write32(instr);
}
void PPCXEmitter::BLA (const void *fnptr) {
s32 func = (s32)fnptr;
u32 instr = (0x48000003 | ((s32)((func) & 0x3fffffc)));
Write32(instr);
}
#define IS_SMALL_JUMP (((u32)code - (u32)fnptr)>=-32767 && ((u32)code - (u32)fnptr)<=-32767)
#define CHECK_SMALL_JUMP { if(IS_SMALL_JUMP) { DebugBreak(); } }
void PPCXEmitter::BEQ (const void *fnptr) {
CHECK_SMALL_JUMP
s32 func = (s32)fnptr - s32(code);
u32 instr = (0x41820000 | ( func & 0xfffc));
Write32(instr);
}
void PPCXEmitter::BGT(const void *fnptr) {
CHECK_SMALL_JUMP
s32 func = (s32)fnptr - s32(code);
u32 instr = (0x41810000 | (((s16)(((func)+1))) & 0xfffc));
Write32(instr);
}
void PPCXEmitter::BLTCTR() {
Write32((19 << 26) | (12 << 21) | (528 <<1));
// Break();
}
void PPCXEmitter::BLT (const void *fnptr) {
//CHECK_JUMP
if (!IS_SMALL_JUMP) {
u32 func_addr = (u32) fnptr;
// Load func address
MOVI2R(R0, func_addr);
// Set it to link register
MTCTR(R0);
// Branch
BLTCTR();
return;
}
s32 func = (s32)fnptr - s32(code);
u32 instr = (0x41800000 | (((s16)(((func)+1))) & 0xfffc));
Write32(instr);
}
void PPCXEmitter::BLE (const void *fnptr) {
CHECK_SMALL_JUMP
s32 func = (s32)fnptr - s32(code);
u32 instr = (0x40810000 | (((s16)(((func)+1))) & 0xfffc));
Write32(instr);
}
void PPCXEmitter::BCTRL() {
Write32(0x4E800421);
}
void PPCXEmitter::BCTR() {
Write32(0x4E800420);
}
// Link Register
void PPCXEmitter::MFLR(PPCReg r) {
Write32(0x7C0802A6 | r << 21);
}
void PPCXEmitter::MTLR(PPCReg r) {
Write32(0x7C0803A6 | r << 21);
}
void PPCXEmitter::MTCTR(PPCReg r) {
Write32(0x7C0903A6 | r << 21);
}
void PPCXEmitter::BLR() {
Write32(0x4E800020);
}
void PPCXEmitter::BGTLR() {
Write32(0x4D810020);
}
// Fixup
FixupBranch PPCXEmitter::B()
{
FixupBranch branch;
branch.type = _B;
branch.ptr = code;
branch.condition = condition;
//We'll write NOP here for now.
Write32(0x60000000);
return branch;
}
FixupBranch PPCXEmitter::BL()
{
FixupBranch branch;
branch.type = _BL;
branch.ptr = code;
branch.condition = condition;
//We'll write NOP here for now.
Write32(0x60000000);
return branch;
}
FixupBranch PPCXEmitter::BNE() {
FixupBranch branch;
branch.type = _BNE;
branch.ptr = code;
branch.condition = condition;
//We'll write NOP here for now.
Write32(0x60000000);
return branch;
}
FixupBranch PPCXEmitter::BLT() {
FixupBranch branch;
branch.type = _BLT;
branch.ptr = code;
branch.condition = condition;
//We'll write NOP here for now.
Write32(0x60000000);
return branch;
}
FixupBranch PPCXEmitter::BLE() {
FixupBranch branch;
branch.type = _BLE;
branch.ptr = code;
branch.condition = condition;
//We'll write NOP here for now.
Write32(0x60000000);
return branch;
}
FixupBranch PPCXEmitter::B_Cond(FixupBranchType type) {
FixupBranch branch;
branch.type = type;
branch.ptr = code;
branch.condition = condition;
//We'll write NOP here for now.
Write32(0x60000000);
return branch;
}
void PPCXEmitter::SetJumpTarget(FixupBranch const &branch)
{
s32 distance = s32(code) - (s32)branch.ptr;
_assert_msg_(DYNA_REC, distance > -32767
&& distance <= 32767,
"SetJumpTarget out of range (%p calls %p)", code,
branch.ptr);
switch(branch.type) {
case _B:
*(u32*)branch.ptr = (0x48000000 | ((s32)((distance) & 0x3fffffc)));
break;
case _BL:
*(u32*)branch.ptr = (0x48000001 | ((s32)((distance) & 0x3fffffc)));
break;
case _BEQ:
*(u32*)branch.ptr = (0x41820000 | ((s16)(((distance)+1)) & 0xfffc));
break;
case _BNE:
*(u32*)branch.ptr = (0x40820000 | ((s16)(((distance)+1)) & 0xfffc));
break;
case _BLT:
*(u32*)branch.ptr = (0x41800000 | ((s16)(((distance)+1)) & 0xfffc));
break;
case _BLE:
*(u32*)branch.ptr = (0x40810000 | ((s16)(((distance)+1)) & 0xfffc));
break;
case _BGT:
*(u32*)branch.ptr = (0x41810000 | ((s16)(((distance)+1)) & 0xfffc));
break;
case _BGE:
*(u32*)branch.ptr = (0x40800000 | ((s16)(((distance)+1)) & 0xfffc));
break;
default:
// Error !!!
_assert_msg_(DYNA_REC, 0, "SetJumpTarget unknow branch type: %d", branch.type);
break;
}
}
// Compare (Only use CR0 atm...)
void PPCXEmitter::CMPI(PPCReg dest, unsigned short imm) {
Write32((11<<26) | (dest << 16) | ((imm) & 0xffff));
}
void PPCXEmitter::CMPLI(PPCReg dest, unsigned short imm) {
Write32((10<<26) | (dest << 16) | ((imm) & 0xffff));
}
void PPCXEmitter::CMP(PPCReg a, PPCReg b) {
Write32((31 << 26) | (a << 16) | (b << 11));
}
void PPCXEmitter::CMPL(PPCReg a, PPCReg b) {
Write32((31 << 26) | (a << 16) | (b << 11) | (1<<6));
}
// Others operation
void PPCXEmitter::ORI(PPCReg src, PPCReg dest, unsigned short imm) {
u32 instr = (0x60000000 | (src << 21) | (dest << 16) | (imm & 0xffff));
Write32(instr);
}
void PPCXEmitter::OR(PPCReg Rd, PPCReg Ra, PPCReg Rb) {
u32 instr = (0x7C000378 | (Ra << 21) | (Rd << 16) | (Rb << 11));
Write32(instr);
}
void PPCXEmitter::XOR(PPCReg Rd, PPCReg Ra, PPCReg Rb) {
u32 instr = (0x7C000278 | (Ra << 21) | (Rd << 16) | (Rb << 11));
Write32(instr);
}
void PPCXEmitter::SUBF(PPCReg Rd, PPCReg Ra, PPCReg Rb, int RCFlags) {
u32 instr = (0x7C000050 | (Rd << 21) | (Ra << 16) | (Rb << 11) | (RCFlags & 1));
Write32(instr);
}
// Quick Call
// dest = LIS(imm) + ORI(+imm)
void PPCXEmitter::MOVI2R(PPCReg dest, unsigned int imm) {
if (imm == (unsigned short)imm) {
// 16bit
LI(dest, imm & 0xFFFF);
} else {
// HI 16bit
LIS(dest, imm>>16);
if ((imm & 0xFFFF) != 0) {
// LO 16bit
ORI(dest, dest, imm & 0xFFFF);
}
}
}
void PPCXEmitter::QuickCallFunction(void *func) {
/** TODO : can use simple jump **/
u32 func_addr = (u32) func;
// Load func address
MOVI2R(R0, func_addr);
// Set it to link register
MTCTR(R0);
// Branch
BCTRL();
}
// sign
void PPCXEmitter::EXTSB (PPCReg dest, PPCReg src) {
Write32((0x7C000774 | (src << 21) | (dest << 16)));
}
void PPCXEmitter::EXTSH (PPCReg dest, PPCReg src) {
Write32(0x7C000734 | (src << 21) | (dest << 16));
}
void PPCXEmitter::RLWINM (PPCReg dest, PPCReg src, int shift, int start, int end) {
Write32((21<<26) | (src << 21) | (dest << 16) | (shift << 11) | (start << 6) | (end << 1));
}
// Prologue / epilogue
void PPCXEmitter::Prologue() {
// Save regs
u32 regSize = 8; // 4 in 32bit system
u32 stackFrameSize = 32*32;//(35 - 12) * regSize;
// Write Prologue (setup stack frame etc ...)
// Save Lr
MFLR(R12);
for(int i = 14; i < 32; i ++) {
STD((PPCReg)i, R1, -((33 - i) * regSize));
}
// Save r12
STW(R12, R1, -0x8);
// allocate stack
STWU(R1, R1, -stackFrameSize);
}
void PPCXEmitter::Epilogue() {
u32 regSize = 8; // 4 in 32bit system
u32 stackFrameSize = 32*32;//(35 - 12) * regSize;
// Write Epilogue (restore stack frame, return)
// free stack
ADDI(R1, R1, stackFrameSize);
// Restore regs
for(int i = 14; i < 32; i ++) {
LD((PPCReg)i, R1, -((33 - i) * regSize));
}
// recover r12 (LR saved register)
LWZ (R12, R1, -0x8);
// Restore Lr
MTLR(R12);
}
// Others ...
void PPCXEmitter::SetCodePtr(u8 *ptr)
{
code = ptr;
startcode = code;
lastCacheFlushEnd = ptr;
}
const u8 *PPCXEmitter::GetCodePtr() const
{
return code;
}
u8 *PPCXEmitter::GetWritableCodePtr()
{
return code;
}
void PPCXEmitter::ReserveCodeSpace(u32 bytes)
{
for (u32 i = 0; i < bytes/4; i++)
Write32(0x60000000); //nop
}
const u8 *PPCXEmitter::AlignCode16()
{
ReserveCodeSpace((-(s32)code) & 15);
return code;
}
const u8 *PPCXEmitter::AlignCodePage()
{
ReserveCodeSpace((-(s32)code) & 4095);
return code;
}
void PPCXEmitter::FlushIcache()
{
FlushIcacheSection(lastCacheFlushEnd, code);
lastCacheFlushEnd = code;
}
void PPCXEmitter::FlushIcacheSection(u8 *start, u8 *end)
{
u8 * addr = start;
while(addr < end) {
__asm dcbst r0, addr
__asm icbi r0, addr
addr += 4;
}
__emit(0x7c0004ac);//sync
__emit(0x4C00012C);//isync
}
} // namespace

381
Common/ppcEmitter.h Normal file
View file

@ -0,0 +1,381 @@
// Copyright (C) 2003 Dolphin Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official SVN repository and contact information can be found at
// http://code.google.com/p/dolphin-emu/
// WARNING - THIS LIBRARY IS NOT THREAD SAFE!!!
// http://www.csd.uwo.ca/~mburrel/stuff/ppc-asm.html
// http://publib.boulder.ibm.com/infocenter/pseries/v5r3/index.jsp?topic=/com.ibm.aix.aixassem/doc/alangref/linkage_convent.htm
// http://publib.boulder.ibm.com/infocenter/pseries/v5r3/index.jsp?topic=/com.ibm.aix.aixassem/doc/alangref/instruction_set.htm
#ifndef _DOLPHIN_PPC_CODEGEN_
#define _DOLPHIN_PPC_CODEGEN_
#include "Common.h"
#include "MemoryUtil.h"
#include <vector>
#undef _IP
#undef R0
#undef _SP
#undef _LR
#undef _PC
#undef CALL
namespace PpcGen
{
enum PPCReg
{
// GPRs (32)
// Behaves as zero does in some instructions
R0 = 0,
// Stack pointer (SP)
R1,
// Reserved
R2,
// Used to pass integer function parameters and return values
R3, R4,
// Used to pass integer function parameters
R5, R6, R7, R8, R9, R10,
// General purpose
R11,
// Scratch
R12,
// Unused by the compiler reserved
R13,
// General purpose
R14, R15, R16, R17, R18, R19,
R20, R21, R22, R23, R24, R25,
R26, R27, R28, R29, R30, R31,
// CRs (7)
CR0 = 0,
// FPRs (32)
// Scratch
FPR0,
// Used to pass double word function parameters and return values
FPR1, FPR2, FPR3, FPR4,
FPR5, FPR6, FPR7, FPR8,
FPR9, FPR10, FPR11, FPR12,
FPR13,
// General purpose
FPR14, FPR15, FPR16, FPR17,
FPR18, FPR19, FPR20, FPR21,
FPR22, FPR23, FPR24, FPR25,
FPR26, FPR27, FPR28, FPR29,
FPR30, FPR31,
// Vmx (128)
VR0,
// Used to pass vector function parameters and return values
VR1, VR2, VR3, VR4,
VR5, VR6, VR7, VR8,
VR9, VR10, VR11, VR12,
VR13, // ...
// Others regs
LR, CTR, XER, FPSCR,
// End
INVALID_REG = 0xFFFFFFFF
};
enum IntegerSize
{
I_I8 = 0,
I_I16,
I_I32,
I_I64
};
enum
{
NUMGPRs = 31,
};
typedef const u8* JumpTarget;
enum FixupBranchType {
_B,
_BEQ,
_BNE,
_BLT,
_BLE,
_BGT,
_BGE,
// Link register
_BL
};
struct FixupBranch
{
u8 *ptr;
u32 condition; // Remembers our codition at the time
FixupBranchType type; //0 = B 1 = BL
};
class PPCXEmitter
{
private:
u8 *code, *startcode;
u8 *lastCacheFlushEnd;
u32 condition;
protected:
// Write opcode
inline void Write32(u32 value) {*(u32*)code = value; code+=4;}
public:
PPCXEmitter() : code(0), startcode(0), lastCacheFlushEnd(0) {
}
PPCXEmitter(u8 *code_ptr) {
code = code_ptr;
lastCacheFlushEnd = code_ptr;
startcode = code_ptr;
}
virtual ~PPCXEmitter() {}
void SetCodePtr(u8 *ptr);
void ReserveCodeSpace(u32 bytes);
const u8 *AlignCode16();
const u8 *AlignCodePage();
const u8 *GetCodePtr() const;
void FlushIcache();
void FlushIcacheSection(u8 *start, u8 *end);
u8 *GetWritableCodePtr();
// Special purpose instructions
// Debug Breakpoint
void BKPT(u16 arg);
// Hint instruction
void YIELD();
// Do nothing
void NOP(int count = 1); //nop padding - TODO: fast nop slides, for amd and intel (check their manuals)
// FixupBranch ops
FixupBranch B();
FixupBranch BL();
FixupBranch BNE();
FixupBranch BLT();
FixupBranch BLE();
FixupBranch B_Cond(FixupBranchType type);
void SetJumpTarget(FixupBranch const &branch);
// Branch ops
void B (const void *fnptr);
void BL(const void *fnptr);
void BA (const void *fnptr);
void BLA(const void *fnptr);
void BEQ(const void *fnptr);
void BLE(const void *fnptr);
void BLT(const void *fnptr);
void BGT(const void *fnptr);
void BEQ (PPCReg r);
void BLR();
void BGTLR(); // ??? used ?
void BLTCTR();
void BGTCTR();
void BLECTR();
void BGECTR();
void BCTRL ();
void BCTR();
// Link Register
void MFLR(PPCReg r);
void MTLR(PPCReg r);
void MTCTR(PPCReg r);
// Logical Ops
void AND (PPCReg Rs, PPCReg Ra, PPCReg Rb);
void ANDI (PPCReg Rs, PPCReg Ra, unsigned short imm);
void ANDIS(PPCReg Rs, PPCReg Ra, unsigned short imm);
void NAND (PPCReg Rs, PPCReg Ra, PPCReg Rb);
void OR (PPCReg Rs, PPCReg Ra, PPCReg Rb);
void ORC (PPCReg Rs, PPCReg Ra, PPCReg Rb);
void NOR (PPCReg Rs, PPCReg Ra, PPCReg Rb);
void XOR (PPCReg Rs, PPCReg Ra, PPCReg Rb);
void NEG (PPCReg Rs, PPCReg Ra, PPCReg Rb);
// Arithmetics ops
void ADD (PPCReg Rd, PPCReg Ra, PPCReg Rb);
void ADDI (PPCReg Rd, PPCReg Ra, short imm);
void ADDIS (PPCReg Rd, PPCReg Ra, short imm);
void ADDC (PPCReg Rd, PPCReg Ra, PPCReg Rb);
void SUB (PPCReg Rd, PPCReg Ra, PPCReg Rb) {
// reverse ?
SUBF(Rd, Rb, Ra);
}
// if RCFlags update CR0
void SUBF (PPCReg Rd, PPCReg Ra, PPCReg Rb, int RCFlags = 0);
void SUBFC (PPCReg Rd, PPCReg Ra, PPCReg Rb);
// Floating ops
void DIVW(PPCReg dest, PPCReg dividend, PPCReg divisor);
void DIVWU(PPCReg dest, PPCReg dividend, PPCReg divisor);
void MULLW(PPCReg dest, PPCReg src, PPCReg op2);
void MULHW (PPCReg dest, PPCReg src, PPCReg op2);
void MULHWS(PPCReg dest, PPCReg src, PPCReg op2);
void ORI (PPCReg src, PPCReg dest, unsigned short imm);
// Memory load/store operations
void LI (PPCReg dest, unsigned short imm);
void LIS (PPCReg dest, unsigned short imm);
// dest = LIS(imm) + ORI(+imm)
void MOVI2R (PPCReg dest, unsigned int imm);
// 8bit
void LBZ (PPCReg dest, PPCReg src, int offset = 0);
void LBZX (PPCReg dest, PPCReg a, PPCReg b);
// 16bit
void LHZ (PPCReg dest, PPCReg src, int offset = 0);
void LHBRX (PPCReg dest, PPCReg src, PPCReg offset);
// 32 bit
void LWZ (PPCReg dest, PPCReg src, int offset = 0);
void LWBRX (PPCReg dest, PPCReg src, PPCReg offset);
// 64 bit
void LD (PPCReg dest, PPCReg src, int offset = 0);
// 8 bit
void STB (PPCReg dest, PPCReg src, int offset = 0);
void STBX (PPCReg dest, PPCReg a, PPCReg b);
// 16 bit
void STH (PPCReg dest, PPCReg src, int offset = 0);
void STHBRX (PPCReg dest, PPCReg src, PPCReg offset);
// 32 bit
void STW (PPCReg dest, PPCReg src, int offset = 0);
void STWU (PPCReg dest, PPCReg src, int offset = 0);
void STWBRX (PPCReg dest, PPCReg src, PPCReg offset);
// 64 bit
void STD (PPCReg dest, PPCReg src, int offset = 0);
// sign
void EXTSB (PPCReg dest, PPCReg src);
void EXTSH (PPCReg dest, PPCReg src);
void RLWINM (PPCReg dest, PPCReg src, int shift, int start, int end);
// Compare
void CMPLI (PPCReg dest, unsigned short imm);
void CMPI (PPCReg dest, unsigned short imm);
void CMPL (PPCReg a, PPCReg b);
void CMP (PPCReg a, PPCReg b);
void Prologue();
void Epilogue();
// Debug !
void Break() {
Write32(0x0FE00016);
}
void MR (PPCReg to, PPCReg from) {
OR(to, from, from);
}
void QuickCallFunction(void *func);
protected:
}; // class PPCXEmitter
// You get memory management for free, plus, you can use all the MOV etc functions without
// having to prefix them with gen-> or something similar.
class PPCXCodeBlock : public PPCXEmitter
{
protected:
u8 *region;
size_t region_size;
public:
PPCXCodeBlock() : region(NULL), region_size(0) {}
virtual ~PPCXCodeBlock() { if (region) FreeCodeSpace(); }
// Call this before you generate any code.
void AllocCodeSpace(int size)
{
region_size = size;
region = (u8*)AllocateExecutableMemory(region_size);
SetCodePtr(region);
}
// Always clear code space with breakpoints, so that if someone accidentally executes
// uninitialized, it just breaks into the debugger.
void ClearCodeSpace()
{
// x86/64: 0xCC = breakpoint
memset(region, 0xCC, region_size);
ResetCodePtr();
}
// Call this when shutting down. Don't rely on the destructor, even though it'll do the job.
void FreeCodeSpace()
{
region = NULL;
region_size = 0;
}
bool IsInSpace(u8 *ptr)
{
return ptr >= region && ptr < region + region_size;
}
// Cannot currently be undone. Will write protect the entire code region.
// Start over if you need to change the code (call FreeCodeSpace(), AllocCodeSpace()).
void WriteProtect()
{
//WriteProtectMemory(region, region_size, true);
}
void UnWriteProtect()
{
//UnWriteProtectMemory(region, region_size, false);
}
void ResetCodePtr()
{
SetCodePtr(region);
}
size_t GetSpaceLeft() const
{
return region_size - (GetCodePtr() - region);
}
u8 *GetBasePtr() {
return region;
}
size_t GetOffset(u8 *ptr) {
return ptr - region;
}
};
} // namespace
#endif // _DOLPHIN_INTEL_CODEGEN_

View file

@ -95,17 +95,19 @@ void Core_WaitInactive(int milliseconds)
void UpdateScreenScale() {
dp_xres = PSP_CoreParameter().pixelWidth;
dp_yres = PSP_CoreParameter().pixelHeight;
pixel_xres = PSP_CoreParameter().pixelWidth;
pixel_yres = PSP_CoreParameter().pixelHeight;
g_dpi = 72;
g_dpi_scale = 1.0f;
#ifdef _WIN32
if (g_Config.iWindowZoom == 1)
{
dp_xres *= 2;
dp_yres *= 2;
g_dpi_scale = 2.0f;
}
else
#endif
pixel_xres = PSP_CoreParameter().pixelWidth;
pixel_yres = PSP_CoreParameter().pixelHeight;
g_dpi = 72;
g_dpi_scale = 1.0f;
pixel_in_dps = (float)pixel_xres / dp_xres;
}

View file

@ -341,9 +341,10 @@
<ClCompile Include="Util\BlockAllocator.cpp" />
<ClCompile Include="Util\PPGeDraw.cpp" />
<ClCompile Include="Util\ppge_atlas.cpp" />
<ClCompile Include="..\ext\xxhash.c" />
</ItemGroup>
<ItemGroup>
<ClInclude Include="..\Ext\disarm.h" />
<ClInclude Include="..\ext\disarm.h" />
<ClInclude Include="..\ext\snappy\snappy-internal.h" />
<ClInclude Include="..\ext\snappy\snappy-sinksource.h" />
<ClInclude Include="..\ext\snappy\snappy-stubs-internal.h" />
@ -494,6 +495,7 @@
<ClInclude Include="Util\BlockAllocator.h" />
<ClInclude Include="Util\PPGeDraw.h" />
<ClInclude Include="Util\ppge_atlas.h" />
<ClInclude Include="..\ext\xxhash.h" />
</ItemGroup>
<ItemGroup>
<None Include="..\android\jni\Android.mk" />

View file

@ -457,6 +457,9 @@
<ClCompile Include="MIPS\MIPSStackWalk.cpp">
<Filter>MIPS</Filter>
</ClCompile>
<ClCompile Include="..\ext\xxhash.c">
<Filter>Ext</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="ELF\ElfReader.h">
@ -777,9 +780,6 @@
<ClInclude Include="MIPS\ARM\ArmRegCache.h">
<Filter>MIPS\ARM</Filter>
</ClInclude>
<ClInclude Include="..\Ext\disarm.h">
<Filter>Ext</Filter>
</ClInclude>
<ClInclude Include="MIPS\x86\RegCacheFPU.h">
<Filter>MIPS\x86</Filter>
</ClInclude>
@ -852,6 +852,12 @@
<ClInclude Include="MIPS\MIPSStackWalk.h">
<Filter>MIPS</Filter>
</ClInclude>
<ClInclude Include="..\ext\disarm.h">
<Filter>Ext</Filter>
</ClInclude>
<ClInclude Include="..\ext\xxhash.h">
<Filter>Ext</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<None Include="CMakeLists.txt" />

View file

@ -29,6 +29,7 @@ enum GPUCore {
GPU_NULL,
GPU_GLES,
GPU_SOFTWARE,
GPU_DIRECTX9,
};
struct CoreParameter

View file

@ -48,6 +48,10 @@ using namespace ArmGen;
#include "Common/x64Analyzer.h"
#include "Core/MIPS/x86/Asm.h"
using namespace Gen;
#elif defined(PPC)
#include "Common/ppcEmitter.h"
#include "Core/MIPS/MIPS.h"
using namespace PpcGen;
#else
#error "Unsupported arch!"
#endif
@ -274,6 +278,10 @@ void JitBlockCache::LinkBlockExits(int i)
#elif defined(_M_IX86) || defined(_M_X64)
XEmitter emit(b.exitPtrs[e]);
emit.JMP(blocks[destinationBlock].checkedEntry, true);
#elif defined(PPC)
PPCXEmitter emit(b.exitPtrs[e]);
emit.B(blocks[destinationBlock].checkedEntry);
emit.FlushIcache();
#endif
b.linkStatus[e] = true;
}
@ -356,6 +364,12 @@ void JitBlockCache::DestroyBlock(int block_num, bool invalidate)
XEmitter emit((u8 *)b.checkedEntry);
emit.MOV(32, M(&mips->pc), Imm32(b.originalAddress));
emit.JMP(MIPSComp::jit->Asm().dispatcher, true);
#elif defined(PPC)
PPCXEmitter emit((u8 *)b.checkedEntry);
emit.MOVI2R(R3, b.originalAddress);
emit.STW(R0, CTXREG, offsetof(MIPSState, pc));
emit.B(MIPSComp::jit->dispatcher);
emit.FlushIcache();
#endif
}

View file

@ -36,6 +36,11 @@ typedef ArmGen::ARMXCodeBlock CodeBlock;
namespace Gen { class XEmitter; }
using namespace Gen;
typedef Gen::XCodeBlock CodeBlock;
#elif defined(PPC)
#include "Common/ppcEmitter.h"
namespace PpcGen { class PPCXEmitter; }
using namespace PpcGen;
typedef PpcGen::PPCXCodeBlock CodeBlock;
#else
#error "Unsupported arch!"
#endif

View file

@ -19,7 +19,9 @@
#include "Common/Common.h"
#if defined(ARM)
#if defined(PPC)
#include "../PPC/PpcJit.h"
#elif defined(ARM)
#include "../ARM/ArmJit.h"
#else
#include "../x86/Jit.h"

285
Core/MIPS/PPC/PpcAsm.cpp Normal file
View file

@ -0,0 +1,285 @@
#include "Common/ChunkFile.h"
#include "Core/Core.h"
#include "Core/CoreTiming.h"
#include "Core/MIPS/MIPS.h"
#include "Core/MIPS/MIPSCodeUtils.h"
#include "Core/MIPS/MIPSInt.h"
#include "Core/MIPS/MIPSTables.h"
#include "PpcRegCache.h"
#include "ppcEmitter.h"
#include "PpcJit.h"
#include <ppcintrinsics.h>
using namespace PpcGen;
extern volatile CoreState coreState;
static void JitAt()
{
MIPSComp::jit->Compile(currentMIPS->pc);
}
namespace MIPSComp
{
static int dontLogBlocks = 20;
static int logBlocks = 40;
const u8 *Jit::DoJit(u32 em_address, JitBlock *b)
{
js.cancel = false;
js.blockStart = js.compilerPC = mips_->pc;
js.downcountAmount = 0;
js.curBlock = b;
js.compiling = true;
js.inDelaySlot = false;
js.PrefixStart();
// We add a check before the block, used when entering from a linked block.
b->checkedEntry = GetCodePtr();
// Downcount flag check. The last block decremented downcounter, and the flag should still be available.
MOVI2R(SREG, js.blockStart);
// if (currentMIPS->downcount<0)
CMPI(DCNTREG, 0);
BLT((const void *)outerLoopPCInR0);
b->normalEntry = GetCodePtr();
// TODO: this needs work
MIPSAnalyst::AnalysisResults analysis; // = MIPSAnalyst::Analyze(em_address);
gpr.Start(analysis);
//fpr.Start(analysis);
int numInstructions = 0;
int cycles = 0;
int partialFlushOffset = 0;
if (logBlocks > 0) logBlocks--;
if (dontLogBlocks > 0) dontLogBlocks--;
// #define LOGASM
#ifdef LOGASM
char temp[256];
#endif
while (js.compiling)
{
gpr.SetCompilerPC(js.compilerPC); // Let it know for log messages
//fpr.SetCompilerPC(js.compilerPC);
u32 inst = Memory::Read_Instruction(js.compilerPC);
js.downcountAmount += MIPSGetInstructionCycleEstimate(inst);
MIPSCompileOp(inst);
js.compilerPC += 4;
numInstructions++;
}
//FlushLitPool();
#ifdef LOGASM
if (logBlocks > 0 && dontLogBlocks == 0) {
for (u32 cpc = em_address; cpc != js.compilerPC + 4; cpc += 4) {
MIPSDisAsm(Memory::Read_Instruction(cpc), cpc, temp, true);
INFO_LOG(DYNA_REC, "M: %08x %s", cpc, temp);
}
}
#endif
b->codeSize = GetCodePtr() - b->normalEntry;
#ifdef LOGASM
if (logBlocks > 0 && dontLogBlocks == 0) {
INFO_LOG(DYNA_REC, "=============== ARM ===============");
DisassembleArm(b->normalEntry, GetCodePtr() - b->normalEntry);
}
#endif
//DumpJit();
AlignCode16();
// Don't forget to zap the instruction cache!
FlushIcache();
b->originalSize = numInstructions;
return b->normalEntry;
}
void Jit::DumpJit() {
#ifdef _XBOX
u32 len = (u32)GetCodePtr() - (u32)GetBasePtr();
FILE * fd;
fd = fopen("game:\\jit.bin", "wb");
fwrite(GetBasePtr(), len, 1, fd);
fclose(fd);
#endif
}
void Jit::GenerateFixedCode() {
enterCode = AlignCode16();
INFO_LOG(HLE, "Base: %08x", (u32)Memory::base);
INFO_LOG(HLE, "enterCode: 0x%08p", enterCode);
INFO_LOG(HLE, "GetBasePtr: 0x%08p", GetBasePtr());
Prologue();
// Map fixed register
MOVI2R(BASEREG, (u32)Memory::base);
MOVI2R(CTXREG, (u32)mips_);
MOVI2R(CODEREG, (u32)GetBasePtr());
// Update downcount reg value from memory
RestoreDowncount(DCNTREG);
// SREG = mips->pc
MovFromPC(SREG);
// Keep current location, TODO rename it, outerLoopPCInR0 to outerLoopPCInR3 ??
outerLoopPCInR0 = GetCodePtr();
// mips->pc = SREG
MovToPC(SREG);
// Keep current location
outerLoop = GetCodePtr();
// Jit loop
// {
// Save downcount reg value to memory
SaveDowncount(DCNTREG);
// Call CoreTiming::Advance() => update donwcount
QuickCallFunction((void *)&CoreTiming::Advance);
// Update downcount reg value from memory
RestoreDowncount(DCNTREG);
// branch to skipToRealDispatch
FixupBranch skipToRealDispatch = B(); //skip the sync and compare first time
// Keep current location dispatcherCheckCoreState:
dispatcherCheckCoreState = GetCodePtr();
// The result of slice decrementation should be in flags if somebody jumped here
// IMPORTANT - We jump on negative, not carry!!!
// branch to bailCoreState: (jump if(what ??) negative )
FixupBranch bailCoreState = BLT(); // BLT ???
// SREG = coreState
MOVI2R(SREG, (u32)&coreState);
// Compare coreState and CORE_RUNNING
LWZ(SREG, SREG); // SREG = *SREG
CMPI(SREG, 0); // compare 0(CORE_RUNNING) and CR0
// branch to badCoreState: (jump if coreState != CORE_RUNNING)
FixupBranch badCoreState = BNE();
// branch to skipToRealDispatch2:
FixupBranch skipToRealDispatch2 = B(); //skip the sync and compare first time
// Keep current location, TODO rename it, outerLoopPCInR0 to outerLoopPCInSREG ??
dispatcherPCInR0 = GetCodePtr();
// mips->pc = SREG
MovToPC(SREG);
// At this point : flags = EQ. Fine for the next check, no need to jump over it.
// label dispatcher:
dispatcher = GetCodePtr();
// {
// The result of slice decrementation should be in flags if somebody jumped here
// IMPORTANT - We jump on negative, not carry!!!
// label bail:
// arm B_CC(CC_MI);
FixupBranch bail = BLT();
// label skipToRealDispatch:
SetJumpTarget(skipToRealDispatch);
// label skipToRealDispatch2:
SetJumpTarget(skipToRealDispatch2);
// Keep current location
dispatcherNoCheck = GetCodePtr();
// read op
// R3 = mips->pc & Memory::MEMVIEW32_MASK
LWZ(R3, CTXREG, offsetof(MIPSState, pc));
// & Memory::MEMVIEW32_MASK
RLWINM(R3, R3, 0, 2, 31);
// R3 = memory::base[r3];
ADD(R3, BASEREG, R3);
MOVI2R(R0, 0);
LWBRX(R3, R3, R0);
// R4 = R3 & MIPS_EMUHACK_VALUE_MASK
RLWINM(R4, R3, 0, 6, 31);
// R3 = R3 & MIPS_EMUHACK_MASK
RLWINM(R3, R3, 0, 0, 6);
// compare, op == MIPS_EMUHACK_OPCODE
MOVI2R(SREG, MIPS_EMUHACK_OPCODE);
CMPL(R3, SREG);
// Branch if func block not found
FixupBranch notfound = BNE();
// {
// R3 = R4 + GetBasePtr()
ADD(R3, R4, CODEREG);
MTCTR(R3);
BCTR();
// }
// label notfound:
SetJumpTarget(notfound);
//Ok, no block, let's jit
// Save downcount reg value to memory
SaveDowncount(DCNTREG);
// Exec JitAt => Compile block !
QuickCallFunction((void *)&JitAt);
// Update downcount reg value from memory
RestoreDowncount(DCNTREG);
// branch to dispatcherNoCheck:
B(dispatcherNoCheck); // no point in special casing this
// }
// label bail:
SetJumpTarget(bail);
// label bailCoreState:
SetJumpTarget(bailCoreState);
// Compare coreState and CORE_RUNNING
MOVI2R(SREG, (u32)&coreState);
LWZ(SREG, SREG); // SREG = *SREG => SREG = coreState
CMPLI(SREG, 0); // compare 0(CORE_RUNNING) and corestate
BEQ(outerLoop);
// }
// badCoreState label:
SetJumpTarget(badCoreState);
// Keep current location
breakpointBailout = GetCodePtr();
// mips->downcount = DCNTREG
SaveDowncount(DCNTREG);
Epilogue();
// Go back to caller
BLR();
// Don't forget to zap the instruction cache!
FlushIcache();
}
}

View file

@ -0,0 +1,172 @@
#include "Common/ChunkFile.h"
#include "Core/Core.h"
#include "Core/CoreTiming.h"
#include "Core/MIPS/MIPS.h"
#include "Core/MIPS/MIPSCodeUtils.h"
#include "Core/MIPS/MIPSInt.h"
#include "Core/MIPS/MIPSTables.h"
#include "PpcRegCache.h"
#include "ppcEmitter.h"
#include "PpcJit.h"
using namespace MIPSAnalyst;
#define _RS ((op>>21) & 0x1F)
#define _RT ((op>>16) & 0x1F)
#define _RD ((op>>11) & 0x1F)
#define _FS ((op>>11) & 0x1F)
#define _FT ((op>>16) & 0x1F)
#define _FD ((op>>6 ) & 0x1F)
#define _SA ((op>>6 ) & 0x1F)
#define _POS ((op>>6 ) & 0x1F)
#define _SIZE ((op>>11 ) & 0x1F)
// All functions should have CONDITIONAL_DISABLE, so we can narrow things down to a file quickly.
// Currently known non working ones should have DISABLE.
//#define CONDITIONAL_DISABLE { Comp_Generic(op); return; }
#define CONDITIONAL_DISABLE ;
#define DISABLE { Comp_Generic(op); return; }
namespace MIPSComp
{
static u32 EvalOr(u32 a, u32 b) { return a | b; }
static u32 EvalEor(u32 a, u32 b) { return a ^ b; }
static u32 EvalAnd(u32 a, u32 b) { return a & b; }
static u32 EvalAdd(u32 a, u32 b) { return a + b; }
static u32 EvalSub(u32 a, u32 b) { return a - b; }
void Jit::Comp_IType(u32 op)
{
CONDITIONAL_DISABLE;
s32 simm = (s32)(s16)(op & 0xFFFF); // sign extension
u32 uimm = op & 0xFFFF;
u32 suimm = (u32)(s32)simm;
int rt = _RT;
int rs = _RS;
// noop, won't write to ZERO.
if (rt == 0)
return;
switch (op >> 26)
{
case 8: // same as addiu?
case 9: // R(rt) = R(rs) + simm; break; //addiu
{
if (gpr.IsImm(rs)) {
gpr.SetImm(rt, gpr.GetImm(rs) + simm);
} else {
gpr.MapDirtyIn(rt, rs);
ADDI(gpr.R(rt), gpr.R(rs), simm);
}
break;
}
case 15: // R(rt) = uimm << 16; //lui
gpr.SetImm(rt, uimm << 16);
break;
default:
Comp_Generic(op);
break;
}
}
void Jit::Comp_RType2(u32 op) {
Comp_Generic(op);
}
// Utilities to reduce duplicated code
void Jit::CompImmLogic(int rs, int rt, u32 uimm, void (PPCXEmitter::*arith)(PPCReg Rd, PPCReg Ra, PPCReg Rb), u32 (*eval)(u32 a, u32 b)) {
DebugBreak();
}
void Jit::CompType3(int rd, int rs, int rt, void (PPCXEmitter::*arith)(PPCReg Rd, PPCReg Ra, PPCReg Rb), u32 (*eval)(u32 a, u32 b), bool isSub) {
if (gpr.IsImm(rs) && gpr.IsImm(rt)) {
gpr.SetImm(rd, (*eval)(gpr.GetImm(rs), gpr.GetImm(rt)));
} else if (gpr.IsImm(rt)) {
u32 rtImm = gpr.GetImm(rt);
gpr.MapDirtyIn(rd, rs);
MOVI2R(SREG, rtImm);
(this->*arith)(gpr.R(rd), gpr.R(rs), SREG);
} else if (gpr.IsImm(rs)) {
u32 rsImm = gpr.GetImm(rs);
gpr.MapDirtyIn(rd, rt);
// TODO: Special case when rsImm can be represented as an Operand2
MOVI2R(SREG, rsImm);
(this->*arith)(gpr.R(rd), SREG, gpr.R(rt));
} else {
// Generic solution
gpr.MapDirtyInIn(rd, rs, rt);
(this->*arith)(gpr.R(rd), gpr.R(rs), gpr.R(rt));
}
}
void Jit::Comp_RType3(u32 op) {
CONDITIONAL_DISABLE;
int rt = _RT;
int rs = _RS;
int rd = _RD;
// noop, won't write to ZERO.
if (rd == 0)
return;
switch (op & 63)
{
case 32: //R(rd) = R(rs) + R(rt); break; //add
case 33: //R(rd) = R(rs) + R(rt); break; //addu
// Some optimized special cases
if (gpr.IsImm(rs) && gpr.GetImm(rs) == 0) {
gpr.MapDirtyIn(rd, rt);
MR(gpr.R(rd), gpr.R(rt));
} else if (gpr.IsImm(rt) && gpr.GetImm(rt) == 0) {
gpr.MapDirtyIn(rd, rs);
MR(gpr.R(rd), gpr.R(rs));
} else {
CompType3(rd, rs, rt, &PPCXEmitter::ADD, &EvalAdd);
}
break;
case 34: //R(rd) = R(rs) - R(rt); break; //sub
case 35: //R(rd) = R(rs) - R(rt); break; //subu
CompType3(rd, rs, rt, &PPCXEmitter::SUB, &EvalSub, true);
break;
case 36: //R(rd) = R(rs) & R(rt); break; //and
CompType3(rd, rs, rt, &PPCXEmitter::AND, &EvalAnd);
break;
case 37: //R(rd) = R(rs) | R(rt); break; //or
CompType3(rd, rs, rt, &PPCXEmitter::OR, &EvalOr);
break;
case 38: //R(rd) = R(rs) ^ R(rt); break; //xor/eor
CompType3(rd, rs, rt, &PPCXEmitter::XOR, &EvalEor);
break;
default:
Comp_Generic(op);
break;
}
}
void Jit::Comp_ShiftType(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_Allegrex(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_Allegrex2(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_MulDivType(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_Special3(u32 op) {
Comp_Generic(op);
}
}

View file

@ -0,0 +1,434 @@
#include "Common/ChunkFile.h"
#include "Core/Core.h"
#include "Core/CoreTiming.h"
#include "Core/MIPS/MIPS.h"
#include "Core/MIPS/MIPSCodeUtils.h"
#include "Core/MIPS/MIPSInt.h"
#include "Core/MIPS/MIPSTables.h"
#include "Core/Reporting.h"
#include "Core/HLE/HLE.h"
#include "PpcRegCache.h"
#include "ppcEmitter.h"
#include "PpcJit.h"
#include <ppcintrinsics.h>
#define _RS ((op>>21) & 0x1F)
#define _RT ((op>>16) & 0x1F)
#define _RD ((op>>11) & 0x1F)
#define _FS ((op>>11) & 0x1F)
#define _FT ((op>>16) & 0x1F)
#define _FD ((op>>6 ) & 0x1F)
#define _POS ((op>>6 ) & 0x1F)
#define _SIZE ((op>>11 ) & 0x1F)
#define LOOPOPTIMIZATION 0
// We can disable nice delay slots.
#define CONDITIONAL_NICE_DELAYSLOT delaySlotIsNice = false;
// #define CONDITIONAL_NICE_DELAYSLOT ;
#define SHOW_JS_COMPILER_PC { printf("js.compilerPC: %08x\n", js.compilerPC); }
#define BRANCH_COMPILE_LOG { printf("JIT(%8x): %s => %d - %08x\n", (u32)GetCodePtr() ,__FUNCTION__, cc, js.compilerPC); }
using namespace MIPSAnalyst;
using namespace PpcGen;
namespace MIPSComp
{
void Jit::BranchRSRTComp(u32 op, PpcGen::FixupBranchType cc, bool likely)
{
if (js.inDelaySlot) {
ERROR_LOG_REPORT(JIT, "Branch in RSRTComp delay slot at %08x in block starting at %08x", js.compilerPC, js.blockStart);
return;
}
int offset = (signed short)(op&0xFFFF)<<2;
int rt = _RT;
int rs = _RS;
u32 targetAddr = js.compilerPC + offset + 4;
u32 delaySlotOp = Memory::ReadUnchecked_U32(js.compilerPC+4);
bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rt, rs);
CONDITIONAL_NICE_DELAYSLOT;
if (!likely && delaySlotIsNice)
CompileDelaySlot(DELAYSLOT_NICE);
if (gpr.IsImm(rt) && gpr.GetImm(rt) == 0)
{
gpr.MapReg(rs);
CMPLI(gpr.R(rs), 0);
}
else if (gpr.IsImm(rs) && gpr.GetImm(rs) == 0) // only these are easily 'flippable'
{
gpr.MapReg(rt);
CMPLI(gpr.R(rt), 0);
}
else
{
gpr.MapInIn(rs, rt);
CMPL(gpr.R(rs), gpr.R(rt));
}
PpcGen::FixupBranch ptr;
if (!likely)
{
if (!delaySlotIsNice)
CompileDelaySlot(DELAYSLOT_SAFE_FLUSH);
else
FlushAll();
ptr = B_Cond(cc);
}
else
{
FlushAll();
ptr = B_Cond(cc);
CompileDelaySlot(DELAYSLOT_FLUSH);
}
INFO_LOG(CPU, "targetAddr: %08x,js.compilerPC: %08x offset: %08x, op: %08x\n", targetAddr, js.compilerPC, offset, op);
// Take the branch
WriteExit(targetAddr, 0);
SetJumpTarget(ptr);
// Not taken
WriteExit(js.compilerPC+8, 1);
js.compiling = false;
}
void Jit::BranchRSZeroComp(u32 op, PpcGen::FixupBranchType cc, bool andLink, bool likely)
{
if (js.inDelaySlot) {
ERROR_LOG_REPORT(JIT, "Branch in RSZeroComp delay slot at %08x in block starting at %08x", js.compilerPC, js.blockStart);
return;
}
int offset = (signed short)(op&0xFFFF)<<2;
int rs = _RS;
u32 targetAddr = js.compilerPC + offset + 4;
u32 delaySlotOp = Memory::ReadUnchecked_U32(js.compilerPC + 4);
bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rs);
CONDITIONAL_NICE_DELAYSLOT;
if (!likely && delaySlotIsNice)
CompileDelaySlot(DELAYSLOT_NICE);
gpr.MapReg(rs);
CMPI(gpr.R(rs), 0);
PpcGen::FixupBranch ptr;
if (!likely)
{
if (!delaySlotIsNice)
CompileDelaySlot(DELAYSLOT_SAFE_FLUSH);
else
FlushAll();
ptr = B_Cond(cc);
}
else
{
FlushAll();
ptr = B_Cond(cc);
CompileDelaySlot(DELAYSLOT_FLUSH);
}
// Take the branch
if (andLink)
{
//Break();
MOVI2R(SREG, js.compilerPC + 8);
STW(SREG, CTXREG, MIPS_REG_RA * 4);
}
WriteExit(targetAddr, 0);
SetJumpTarget(ptr);
// Not taken
WriteExit(js.compilerPC + 8, 1);
js.compiling = false;
}
void Jit::Comp_RelBranch(u32 op) {
// The CC flags here should be opposite of the actual branch becuase they skip the branching action.
switch (op>>26)
{
case 4: BranchRSRTComp(op, _BNE, false); break;//beq
case 5: BranchRSRTComp(op, _BEQ, false); break;//bne
case 6: BranchRSZeroComp(op, _BGT, false, false); break;//blez
case 7: BranchRSZeroComp(op, _BLE, false, false); break;//bgtz
case 20: BranchRSRTComp(op, _BNE, true); break;//beql
case 21: BranchRSRTComp(op, _BEQ, true); break;//bnel
case 22: BranchRSZeroComp(op, _BGT, false, true); break;//blezl
case 23: BranchRSZeroComp(op, _BLE, false, true); break;//bgtzl
default:
_dbg_assert_msg_(CPU,0,"Trying to compile instruction that can't be compiled");
break;
}
js.compiling = false;
}
void Jit::Comp_RelBranchRI(u32 op) {
switch ((op >> 16) & 0x1F)
{
case 0: BranchRSZeroComp(op, _BGE, false, false); break; //if ((s32)R(rs) < 0) DelayBranchTo(addr); else PC += 4; break;//bltz
case 1: BranchRSZeroComp(op, _BLT, false, false); break; //if ((s32)R(rs) >= 0) DelayBranchTo(addr); else PC += 4; break;//bgez
case 2: BranchRSZeroComp(op, _BGE, false, true); break; //if ((s32)R(rs) < 0) DelayBranchTo(addr); else PC += 8; break;//bltzl
case 3: BranchRSZeroComp(op, _BLT, false, true); break; //if ((s32)R(rs) >= 0) DelayBranchTo(addr); else PC += 8; break;//bgezl
case 16: BranchRSZeroComp(op, _BGE, true, false); break; //R(MIPS_REG_RA) = PC + 8; if ((s32)R(rs) < 0) DelayBranchTo(addr); else PC += 4; break;//bltzal
case 17: BranchRSZeroComp(op, _BLT, true, false); break; //R(MIPS_REG_RA) = PC + 8; if ((s32)R(rs) >= 0) DelayBranchTo(addr); else PC += 4; break;//bgezal
case 18: BranchRSZeroComp(op, _BGE, true, true); break; //R(MIPS_REG_RA) = PC + 8; if ((s32)R(rs) < 0) DelayBranchTo(addr); else SkipLikely(); break;//bltzall
case 19: BranchRSZeroComp(op, _BLT, true, true); break; //R(MIPS_REG_RA) = PC + 8; if ((s32)R(rs) >= 0) DelayBranchTo(addr); else SkipLikely(); break;//bgezall
default:
_dbg_assert_msg_(CPU,0,"Trying to compile instruction that can't be compiled");
break;
}
js.compiling = false;
}
// If likely is set, discard the branch slot if NOT taken.
void Jit::BranchFPFlag(u32 op, PpcGen::FixupBranchType cc, bool likely)
{
if (js.inDelaySlot) {
ERROR_LOG_REPORT(JIT, "Branch in FPFlag delay slot at %08x in block starting at %08x", js.compilerPC, js.blockStart);
return;
}
int offset = (signed short)(op & 0xFFFF) << 2;
u32 targetAddr = js.compilerPC + offset + 4;
u32 delaySlotOp = Memory::ReadUnchecked_U32(js.compilerPC + 4);
bool delaySlotIsNice = IsDelaySlotNiceFPU(op, delaySlotOp);
CONDITIONAL_NICE_DELAYSLOT;
if (!likely && delaySlotIsNice)
CompileDelaySlot(DELAYSLOT_NICE);
FlushAll();
LWZ(SREG, CTXREG, offsetof(MIPSState, fpcond));
// change CR0
ANDI(SREG, SREG, 1);
PpcGen::FixupBranch ptr;
if (!likely)
{
if (!delaySlotIsNice)
CompileDelaySlot(DELAYSLOT_SAFE_FLUSH);
ptr = B_Cond(cc);
}
else
{
ptr = B_Cond(cc);
CompileDelaySlot(DELAYSLOT_FLUSH);
}
// Take the branch
WriteExit(targetAddr, 0);
SetJumpTarget(ptr);
// Not taken
WriteExit(js.compilerPC + 8, 1);
js.compiling = false;
}
void Jit::Comp_FPUBranch(u32 op) {
switch((op >> 16) & 0x1f)
{
case 0: BranchFPFlag(op, _BNE, false); break; // bc1f
case 1: BranchFPFlag(op, _BEQ, false); break; // bc1t
case 2: BranchFPFlag(op, _BNE, true); break; // bc1fl
case 3: BranchFPFlag(op, _BEQ, true); break; // bc1tl
default:
_dbg_assert_msg_(CPU,0,"Trying to interpret instruction that can't be interpreted");
break;
}
js.compiling = false;
}
// If likely is set, discard the branch slot if NOT taken.
void Jit::BranchVFPUFlag(u32 op, PpcGen::FixupBranchType cc, bool likely)
{
if (js.inDelaySlot) {
ERROR_LOG_REPORT(JIT, "Branch in VFPU delay slot at %08x in block starting at %08x", js.compilerPC, js.blockStart);
return;
}
int offset = (signed short)(op & 0xFFFF) << 2;
u32 targetAddr = js.compilerPC + offset + 4;
u32 delaySlotOp = Memory::ReadUnchecked_U32(js.compilerPC + 4);
bool delaySlotIsNice = IsDelaySlotNiceVFPU(op, delaySlotOp);
CONDITIONAL_NICE_DELAYSLOT;
if (!likely && delaySlotIsNice)
CompileDelaySlot(DELAYSLOT_NICE);
FlushAll();
int imm3 = (op >> 18) & 7;
MOVI2R(SREG, (u32)&(mips_->vfpuCtrl[VFPU_CTRL_CC]));
LWZ(SREG, SREG, 0);
// change CR0
ANDI(SREG, SREG, 1 << imm3);
PpcGen::FixupBranch ptr;
js.inDelaySlot = true;
if (!likely)
{
if (!delaySlotIsNice)
CompileDelaySlot(DELAYSLOT_SAFE_FLUSH);
ptr = B_Cond(cc);
}
else
{
ptr = B_Cond(cc);
CompileDelaySlot(DELAYSLOT_FLUSH);
}
js.inDelaySlot = false;
// Take the branch
WriteExit(targetAddr, 0);
SetJumpTarget(ptr);
// Not taken
WriteExit(js.compilerPC + 8, 1);
js.compiling = false;
}
void Jit::Comp_VBranch(u32 op) {
switch ((op >> 16) & 3)
{
case 0: BranchVFPUFlag(op, _BNE, false); break; // bvf
case 1: BranchVFPUFlag(op, _BEQ, false); break; // bvt
case 2: BranchVFPUFlag(op, _BNE, true); break; // bvfl
case 3: BranchVFPUFlag(op, _BEQ, true); break; // bvtl
}
js.compiling = false;
}
void Jit::Comp_Jump(u32 op) {
if (js.inDelaySlot) {
ERROR_LOG_REPORT(JIT, "Branch in Jump delay slot at %08x in block starting at %08x", js.compilerPC, js.blockStart);
return;
}
u32 off = ((op & 0x03FFFFFF) << 2);
u32 targetAddr = (js.compilerPC & 0xF0000000) | off;
switch (op >> 26)
{
case 2: //j
CompileDelaySlot(DELAYSLOT_NICE);
FlushAll();
WriteExit(targetAddr, 0);
break;
case 3: //jal
//Break();
gpr.MapReg(MIPS_REG_RA, MAP_NOINIT | MAP_DIRTY);
MOVI2R(gpr.R(MIPS_REG_RA), js.compilerPC + 8);
CompileDelaySlot(DELAYSLOT_NICE);
FlushAll();
WriteExit(targetAddr, 0);
break;
default:
_dbg_assert_msg_(CPU,0,"Trying to compile instruction that can't be compiled");
break;
}
js.compiling = false;
}
void Jit::Comp_JumpReg(u32 op) {
if (js.inDelaySlot) {
ERROR_LOG_REPORT(JIT, "Branch in JumpReg delay slot at %08x in block starting at %08x", js.compilerPC, js.blockStart);
return;
}
int rs = _RS;
u32 delaySlotOp = Memory::ReadUnchecked_U32(js.compilerPC + 4);
bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rs);
CONDITIONAL_NICE_DELAYSLOT;
if (IsSyscall(delaySlotOp)) {
gpr.MapReg(rs);
PPCReg mRs = gpr.R(rs);
MR(FLAGREG, mRs);
MovToPC(FLAGREG); // For syscall to be able to return.
CompileDelaySlot(DELAYSLOT_FLUSH);
return; // Syscall wrote exit code.
} else if (delaySlotIsNice) {
CompileDelaySlot(DELAYSLOT_NICE);
gpr.MapReg(rs);
PPCReg mRs = gpr.R(rs);
MR(FLAGREG, mRs); // Save the destination address through the delay slot. Could use isNice to avoid when the jit is fully implemented
FlushAll();
} else {
// Delay slot
gpr.MapReg(rs);
PPCReg mRs = gpr.R(rs);
MR(FLAGREG, mRs); // Save the destination address through the delay slot. Could use isNice to avoid when the jit is fully implemented
CompileDelaySlot(DELAYSLOT_NICE);
FlushAll();
}
switch (op & 0x3f)
{
case 8: //jr
break;
case 9: //jalr
// mips->reg = js.compilerPC + 8;
//Break();
MOVI2R(SREG, js.compilerPC + 8);
STW(SREG, CTXREG, MIPS_REG_RA * 4);
break;
default:
_dbg_assert_msg_(CPU,0,"Trying to compile instruction that can't be compiled");
break;
}
WriteExitDestInR(FLAGREG);
js.compiling = false;
}
void Jit::Comp_Syscall(u32 op) {
FlushAll();
// If we're in a delay slot, this is off by one.
const int offset = js.inDelaySlot ? -1 : 0;
WriteDownCount(offset);
js.downcountAmount = -offset;
// CallSyscall(op);
MOVI2R(R3, op);
SaveDowncount(DCNTREG);
QuickCallFunction((void *)&CallSyscall);
RestoreDowncount(DCNTREG);
WriteSyscallExit();
js.compiling = false;
}
void Jit::Comp_Break(u32 op) {
Comp_Generic(op);
WriteSyscallExit();
js.compiling = false;
}
}

View file

@ -0,0 +1,42 @@
#include "Common/ChunkFile.h"
#include "Core/Core.h"
#include "Core/CoreTiming.h"
#include "Core/MIPS/MIPS.h"
#include "Core/MIPS/MIPSCodeUtils.h"
#include "Core/MIPS/MIPSInt.h"
#include "Core/MIPS/MIPSTables.h"
#include "PpcRegCache.h"
#include "ppcEmitter.h"
#include "PpcJit.h"
#include <ppcintrinsics.h>
using namespace PpcGen;
extern volatile CoreState coreState;
namespace MIPSComp
{
void Jit::Comp_FPULS(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_FPUComp(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_FPU3op(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_FPU2op(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_mxc1(u32 op) {
Comp_Generic(op);
}
}

View file

@ -0,0 +1,142 @@
#include "Common/ChunkFile.h"
#include "Core/Core.h"
#include "Core/CoreTiming.h"
#include "Core/MIPS/MIPS.h"
#include "Core/MIPS/MIPSCodeUtils.h"
#include "Core/MIPS/MIPSInt.h"
#include "Core/MIPS/MIPSTables.h"
#include "PpcRegCache.h"
#include "ppcEmitter.h"
#include "PpcJit.h"
#define _RS ((op>>21) & 0x1F)
#define _RT ((op>>16) & 0x1F)
#define _RD ((op>>11) & 0x1F)
#define _FS ((op>>11) & 0x1F)
#define _FT ((op>>16) & 0x1F)
#define _FD ((op>>6 ) & 0x1F)
#define _POS ((op>>6 ) & 0x1F)
#define _SIZE ((op>>11 ) & 0x1F)
// All functions should have CONDITIONAL_DISABLE, so we can narrow things down to a file quickly.
// Currently known non working ones should have DISABLE.
//#define CONDITIONAL_DISABLE { Comp_Generic(op); return; }
#define CONDITIONAL_DISABLE ;
#define DISABLE { Comp_Generic(op); return; }
using namespace PpcGen;
namespace MIPSComp
{
void Jit::SetRegToEffectiveAddress(PpcGen::PPCReg r, int rs, s16 offset) {
if (offset) {
ADDI(SREG, gpr.R(rs), offset);
RLWINM(SREG, SREG, 0, 2, 31); // &= 0x3FFFFFFF
} else {
RLWINM(SREG, gpr.R(rs), 0, 2, 31); // &= 0x3FFFFFFF
}
}
void Jit::Comp_ITypeMem(u32 op) {
CONDITIONAL_DISABLE;
int offset = (signed short)(op&0xFFFF);
bool load = false;
int rt = _RT;
int rs = _RS;
int o = op>>26;
if (((op >> 29) & 1) == 0 && rt == 0) {
// Don't load anything into $zr
return;
}
u32 iaddr = gpr.IsImm(rs) ? offset + gpr.GetImm(rs) : 0xFFFFFFFF;
bool doCheck = false;
switch (o)
{
case 32: //lb
case 33: //lh
case 35: //lw
case 36: //lbu
case 37: //lhu
load = true;
case 40: //sb
case 41: //sh
case 43: //sw
if (gpr.IsImm(rs) && Memory::IsValidAddress(iaddr)) {
// We can compute the full address at compile time. Kickass.
u32 addr = iaddr & 0x3FFFFFFF;
// Must be OK even if rs == rt since we have the value from imm already.
gpr.MapReg(rt, load ? MAP_NOINIT | MAP_DIRTY : 0);
MOVI2R(SREG, addr);
} else {
_dbg_assert_msg_(JIT, !gpr.IsImm(rs), "Invalid immediate address? CPU bug?");
load ? gpr.MapDirtyIn(rt, rs) : gpr.MapInIn(rt, rs);
SetRegToEffectiveAddress(SREG, rs, offset);
}
switch (o)
{
// Load
case 32: //lb
LBZX(gpr.R(rt), BASEREG, SREG);
EXTSB(gpr.R(rt), gpr.R(rt));
break;
case 33: //lh
LHBRX(gpr.R(rt), BASEREG, SREG);
EXTSH(gpr.R(rt), gpr.R(rt));
break;
case 35: //lw
LWBRX(gpr.R(rt), BASEREG, SREG);
break;
case 36: //lbu
LBZX (gpr.R(rt), BASEREG, SREG);
break;
case 37: //lhu
LHBRX (gpr.R(rt), BASEREG, SREG);
break;
// Store
case 40: //sb
STBX (gpr.R(rt), BASEREG, SREG);
break;
case 41: //sh
STHBRX(gpr.R(rt), BASEREG, SREG);
break;
case 43: //sw
STWBRX(gpr.R(rt), BASEREG, SREG);
break;
}
break;
case 34: //lwl
case 38: //lwr
load = true;
case 42: //swl
case 46: //swr
if (!js.inDelaySlot) {
// Optimisation: Combine to single unaligned load/store
bool isLeft = (o == 34 || o == 42);
u32 nextOp = Memory::Read_Instruction(js.compilerPC + 4);
// Find a matching shift in opposite direction with opposite offset.
if (nextOp == (isLeft ? (op + (4<<26) - 3)
: (op - (4<<26) + 3)))
{
EatInstruction(nextOp);
nextOp = ((load ? 35 : 43) << 26) | ((isLeft ? nextOp : op) & 0x3FFFFFF); //lw, sw
Comp_ITypeMem(nextOp);
return;
}
}
DISABLE; // Disabled until crashes are resolved.
break;
default:
Comp_Generic(op);
return ;
}
}
}

View file

@ -0,0 +1,139 @@
#include "Common/ChunkFile.h"
#include "Core/Core.h"
#include "Core/CoreTiming.h"
#include "Core/MIPS/MIPS.h"
#include "Core/MIPS/MIPSCodeUtils.h"
#include "Core/MIPS/MIPSInt.h"
#include "Core/MIPS/MIPSTables.h"
#include "PpcRegCache.h"
#include "ppcEmitter.h"
#include "PpcJit.h"
#include <ppcintrinsics.h>
using namespace PpcGen;
namespace MIPSComp
{
void Jit::Comp_SV(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_SVQ(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_VPFX(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_VVectorInit(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_VMatrixInit(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_VDot(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_VecDo3(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_VV2Op(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_Mftv(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_Vmtvc(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_Vmmov(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_VScl(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_Vmmul(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_Vmscl(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_Vtfm(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_VHdp(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_VCrs(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_VDet(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_Vi2x(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_Vx2i(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_Vf2i(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_Vi2f(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_Vcst(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_Vhoriz(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_VRot(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_VIdt(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_Vcmp(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_Vcmov(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_Viim(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_Vfim(u32 op) {
Comp_Generic(op);
}
}

235
Core/MIPS/PPC/PpcJit.cpp Normal file
View file

@ -0,0 +1,235 @@
#include "Common/ChunkFile.h"
#include "Core/Core.h"
#include "Core/CoreTiming.h"
#include "Core/MIPS/MIPS.h"
#include "Core/MIPS/MIPSCodeUtils.h"
#include "Core/MIPS/MIPSInt.h"
#include "Core/MIPS/MIPSTables.h"
#include "PpcRegCache.h"
#include "ppcEmitter.h"
#include "PpcJit.h"
#include <ppcintrinsics.h>
using namespace PpcGen;
extern volatile CoreState coreState;
namespace MIPSComp
{
static u32 delaySlotFlagsValue;
void Jit::CompileDelaySlot(int flags)
{
// preserve flag around the delay slot! Maybe this is not always necessary on ARM where
// we can (mostly) control whether we set the flag or not. Of course, if someone puts an slt in to the
// delay slot, we're screwed.
if (flags & DELAYSLOT_SAFE) {
// Save flags register
MOVI2R(SREG, (u32)&delaySlotFlagsValue);
STW(FLAGREG, SREG);
}
js.inDelaySlot = true;
u32 op = Memory::Read_Instruction(js.compilerPC + 4);
MIPSCompileOp(op);
js.inDelaySlot = false;
if (flags & DELAYSLOT_FLUSH)
FlushAll();
if (flags & DELAYSLOT_SAFE) {
// Restore flags register
MOVI2R(SREG, (u32)&delaySlotFlagsValue);
LWZ(FLAGREG, SREG);
}
}
void Jit::Compile(u32 em_address)
{
if (GetSpaceLeft() < 0x10000 || blocks.IsFull())
{
ClearCache();
}
int block_num = blocks.AllocateBlock(em_address);
JitBlock *b = blocks.GetBlock(block_num);
DoJit(em_address, b);
blocks.FinalizeBlock(block_num, jo.enableBlocklink);
// Drat. The VFPU hit an uneaten prefix at the end of a block.
if (js.startDefaultPrefix && js.MayHavePrefix())
{
js.startDefaultPrefix = false;
// Our assumptions are all wrong so it's clean-slate time.
ClearCache();
// Let's try that one more time. We won't get back here because we toggled the value.
Compile(em_address);
}
}
void Jit::MovFromPC(PPCReg r) {
LWZ(r, CTXREG, offsetof(MIPSState, pc));
}
void Jit::MovToPC(PPCReg r) {
STW(r, CTXREG, offsetof(MIPSState, pc));
}
void Jit::SaveDowncount(PPCReg r) {
STW(r, CTXREG, offsetof(MIPSState, downcount));
}
void Jit::RestoreDowncount(PPCReg r) {
LWZ(r, CTXREG, offsetof(MIPSState, downcount));
}
static void ShowDownCount() {
if (currentMIPS->downcount<0) {
//ERROR_LOG(DYNA_REC, "MIPSState, downcount %08x", currentMIPS->downcount);
Crash();
}
}
void Jit::WriteDownCount(int offset)
{
// don't know if the result is correct
int theDowncount = js.downcountAmount + offset;
if (jo.downcountInRegister) {
// DCNTREG = DCNTREG - theDowncount;
MOVI2R(SREG, theDowncount);
SUBF(DCNTREG, SREG, DCNTREG, 1);
STW(DCNTREG, CTXREG, offsetof(MIPSState, downcount));
} else {
// DCNTREG = MIPSState->downcount - theDowncount;
MOVI2R(SREG, theDowncount);
LWZ(DCNTREG, CTXREG, offsetof(MIPSState, downcount));
SUBF(DCNTREG, SREG, DCNTREG, 1);
STW(DCNTREG, CTXREG, offsetof(MIPSState, downcount));
}
//QuickCallFunction(ShowDownCount);
CMPI(DCNTREG, 0);
}
void Jit::Comp_Generic(u32 op) {
FlushAll();
// basic jit !!
MIPSInterpretFunc func = MIPSGetInterpretFunc(op);
if (func)
{
// Save mips PC and cycles
SaveDowncount(DCNTREG);
// call interpreted function
MOVI2R(R3, op);
QuickCallFunction((void *)func);
// restore pc and cycles
RestoreDowncount(DCNTREG);
}
// Might have eaten prefixes, hard to tell...
if ((MIPSGetInfo(op) & IS_VFPU) != 0)
js.PrefixStart();
}
void Jit::EatInstruction(u32 op) {
u32 info = MIPSGetInfo(op);
_dbg_assert_msg_(JIT, !(info & DELAYSLOT), "Never eat a branch op.");
_dbg_assert_msg_(JIT, !js.inDelaySlot, "Never eat an instruction inside a delayslot.");
js.compilerPC += 4;
js.downcountAmount += MIPSGetInstructionCycleEstimate(op);
}
void Jit::Comp_RunBlock(u32 op) {
// This shouldn't be necessary, the dispatcher should catch us before we get here.
ERROR_LOG(DYNA_REC, "Comp_RunBlock should never be reached!");
}
void Jit::Comp_DoNothing(u32 op) {
}
void Jit::FlushAll()
{
gpr.FlushAll();
//fpr.FlushAll();
//FlushPrefixV();
}
void Jit::ClearCache() {
blocks.Clear();
ClearCodeSpace();
GenerateFixedCode();
}
void Jit::ClearCacheAt(u32 em_address) {
ClearCache();
}
Jit::Jit(MIPSState *mips) : blocks(mips, this), gpr(mips, &jo),mips_(mips)
{
blocks.Init();
gpr.SetEmitter(this);
AllocCodeSpace(1024 * 1024 * 16); // 32MB is the absolute max because that's what an ARM branch instruction can reach, backwards and forwards.
GenerateFixedCode();
js.startDefaultPrefix = true;
}
void Jit::RunLoopUntil(u64 globalticks) {
#ifdef _XBOX
// force stack alinement
_alloca(8*1024);
#endif
// Run the compiled code
((void (*)())enterCode)();
}
// IDEA - could have a WriteDualExit that takes two destinations and two condition flags,
// and just have conditional that set PC "twice". This only works when we fall back to dispatcher
// though, as we need to have the SUBS flag set in the end. So with block linking in the mix,
// I don't think this gives us that much benefit.
void Jit::WriteExit(u32 destination, int exit_num)
{
WriteDownCount();
//If nobody has taken care of this yet (this can be removed when all branches are done)
JitBlock *b = js.curBlock;
b->exitAddress[exit_num] = destination;
b->exitPtrs[exit_num] = GetWritableCodePtr();
// Link opportunity!
int block = blocks.GetBlockNumberFromStartAddress(destination);
if (block >= 0 && jo.enableBlocklink) {
// It exists! Joy of joy!
B(blocks.GetBlock(block)->checkedEntry);
b->linkStatus[exit_num] = true;
} else {
MOVI2R(SREG, destination);
B((const void *)dispatcherPCInR0);
}
}
void Jit::WriteExitDestInR(PPCReg Reg)
{
//Break();
MovToPC(Reg);
WriteDownCount();
// TODO: shouldn't need an indirect branch here...
B((const void *)dispatcher);
}
void Jit::WriteSyscallExit()
{
WriteDownCount();
B((const void *)dispatcherCheckCoreState);
}
}

284
Core/MIPS/PPC/PpcJit.h Normal file
View file

@ -0,0 +1,284 @@
// Copyright (c) 2012- PPSSPP Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
#pragma once
#include "../../../Globals.h"
#include "Core/MIPS/JitCommon/JitBlockCache.h"
#include "Core/MIPS/PPC/PpcRegCache.h"
#include "Core/MIPS/MIPS.h"
#include <ppcEmitter.h>
namespace MIPSComp
{
struct PpcJitOptions
{
PpcJitOptions()
{
enableBlocklink = true;
downcountInRegister = true;
}
bool enableBlocklink;
bool downcountInRegister;
};
struct PpcJitState
{
enum PrefixState
{
PREFIX_UNKNOWN = 0x00,
PREFIX_KNOWN = 0x01,
PREFIX_DIRTY = 0x10,
PREFIX_KNOWN_DIRTY = 0x11,
};
u32 compilerPC;
u32 blockStart;
bool cancel;
bool inDelaySlot;
int downcountAmount;
bool compiling; // TODO: get rid of this in favor of using analysis results to determine end of block
JitBlock *curBlock;
// VFPU prefix magic
bool startDefaultPrefix;
u32 prefixS;
u32 prefixT;
u32 prefixD;
PrefixState prefixSFlag;
PrefixState prefixTFlag;
PrefixState prefixDFlag;
void PrefixStart() {
if (startDefaultPrefix) {
EatPrefix();
} else {
PrefixUnknown();
}
}
void PrefixUnknown() {
prefixSFlag = PREFIX_UNKNOWN;
prefixTFlag = PREFIX_UNKNOWN;
prefixDFlag = PREFIX_UNKNOWN;
}
bool MayHavePrefix() const {
if (HasUnknownPrefix()) {
return true;
} else if (prefixS != 0xE4 || prefixT != 0xE4 || prefixD != 0) {
return true;
} else if (VfpuWriteMask() != 0) {
return true;
}
return false;
}
bool HasUnknownPrefix() const {
if (!(prefixSFlag & PREFIX_KNOWN) || !(prefixTFlag & PREFIX_KNOWN) || !(prefixDFlag & PREFIX_KNOWN)) {
return true;
}
return false;
}
bool HasNoPrefix() const {
return (prefixDFlag & PREFIX_KNOWN) && (prefixSFlag & PREFIX_KNOWN) && (prefixTFlag & PREFIX_KNOWN) && (prefixS == 0xE4 && prefixT == 0xE4 && prefixD == 0);
}
void EatPrefix() {
if ((prefixSFlag & PREFIX_KNOWN) == 0 || prefixS != 0xE4) {
prefixSFlag = PREFIX_KNOWN_DIRTY;
prefixS = 0xE4;
}
if ((prefixTFlag & PREFIX_KNOWN) == 0 || prefixT != 0xE4) {
prefixTFlag = PREFIX_KNOWN_DIRTY;
prefixT = 0xE4;
}
if ((prefixDFlag & PREFIX_KNOWN) == 0 || prefixD != 0x0 || VfpuWriteMask() != 0) {
prefixDFlag = PREFIX_KNOWN_DIRTY;
prefixD = 0x0;
}
}
u8 VfpuWriteMask() const {
_assert_(prefixDFlag & PREFIX_KNOWN);
return (prefixD >> 8) & 0xF;
}
bool VfpuWriteMask(int i) const {
_assert_(prefixDFlag & PREFIX_KNOWN);
return (prefixD >> (8 + i)) & 1;
}
};
enum CompileDelaySlotFlags
{
// Easy, nothing extra.
DELAYSLOT_NICE = 0,
// Flush registers after delay slot.
DELAYSLOT_FLUSH = 1,
// Preserve flags.
DELAYSLOT_SAFE = 2,
// Flush registers after and preserve flags.
DELAYSLOT_SAFE_FLUSH = DELAYSLOT_FLUSH | DELAYSLOT_SAFE,
};
class Jit: public PpcGen::PPCXCodeBlock
{
protected:
JitBlockCache blocks;
public:
Jit(MIPSState *mips);
// Compiled ops should ignore delay slots
// the compiler will take care of them by itself
// OR NOT
void Comp_Generic(u32 op);
void EatInstruction(u32 op);
void Comp_RunBlock(u32 op);
// TODO: Eat VFPU prefixes here.
void EatPrefix() { }
// Ops
void Comp_ITypeMem(u32 op);
void Comp_RelBranch(u32 op);
void Comp_RelBranchRI(u32 op);
void Comp_FPUBranch(u32 op);
void Comp_FPULS(u32 op);
void Comp_FPUComp(u32 op);
void Comp_Jump(u32 op);
void Comp_JumpReg(u32 op);
void Comp_Syscall(u32 op);
void Comp_Break(u32 op);
void Comp_IType(u32 op);
void Comp_RType2(u32 op);
void Comp_RType3(u32 op);
void Comp_ShiftType(u32 op);
void Comp_Allegrex(u32 op);
void Comp_Allegrex2(u32 op);
void Comp_VBranch(u32 op);
void Comp_MulDivType(u32 op);
void Comp_Special3(u32 op);
void Comp_FPU3op(u32 op);
void Comp_FPU2op(u32 op);
void Comp_mxc1(u32 op);
void Comp_DoNothing(u32 op);
void Comp_SV(u32 op);
void Comp_SVQ(u32 op);
void Comp_VPFX(u32 op);
void Comp_VVectorInit(u32 op);
void Comp_VMatrixInit(u32 op);
void Comp_VDot(u32 op);
void Comp_VecDo3(u32 op);
void Comp_VV2Op(u32 op);
void Comp_Mftv(u32 op);
void Comp_Vmtvc(u32 op);
void Comp_Vmmov(u32 op);
void Comp_VScl(u32 op);
void Comp_Vmmul(u32 op);
void Comp_Vmscl(u32 op);
void Comp_Vtfm(u32 op);
void Comp_VHdp(u32 op);
void Comp_VCrs(u32 op);
void Comp_VDet(u32 op);
void Comp_Vi2x(u32 op);
void Comp_Vx2i(u32 op);
void Comp_Vf2i(u32 op);
void Comp_Vi2f(u32 op);
void Comp_Vcst(u32 op);
void Comp_Vhoriz(u32 op);
void Comp_VRot(u32 op);
void Comp_VIdt(u32 op);
void Comp_Vcmp(u32 op);
void Comp_Vcmov(u32 op);
void Comp_Viim(u32 op);
void Comp_Vfim(u32 op);
// Utility compilation functions
void BranchFPFlag(u32 op, PpcGen::FixupBranchType cc, bool likely);
void BranchVFPUFlag(u32 op, PpcGen::FixupBranchType cc, bool likely);
void BranchRSZeroComp(u32 op, PpcGen::FixupBranchType cc, bool andLink, bool likely);
void BranchRSRTComp(u32 op, PpcGen::FixupBranchType cc, bool likely);
void SetRegToEffectiveAddress(PpcGen::PPCReg r, int rs, s16 offset);
// Utilities to reduce duplicated code
void CompImmLogic(int rs, int rt, u32 uimm, void (PPCXEmitter::*arith)(PPCReg Rd, PPCReg Ra, PPCReg Rb), u32 (*eval)(u32 a, u32 b));
void CompType3(int rd, int rs, int rt, void (PPCXEmitter::*arithOp2)(PPCReg Rd, PPCReg Ra, PPCReg Rb), u32 (*eval)(u32 a, u32 b), bool isSub = false);
// flush regs
void FlushAll();
void WriteDownCount(int offset = 0);
void MovFromPC(PpcGen::PPCReg r);
void MovToPC(PpcGen::PPCReg r);
void SaveDowncount(PpcGen::PPCReg r);
void RestoreDowncount(PpcGen::PPCReg r);
void WriteExit(u32 destination, int exit_num);
void WriteExitDestInR(PPCReg Reg);
void WriteSyscallExit();
void ClearCache();
void ClearCacheAt(u32 em_address);
void RunLoopUntil(u64 globalticks);
void GenerateFixedCode();
void DumpJit();
void CompileDelaySlot(int flags);
void Compile(u32 em_address); // Compiles a block at current MIPS PC
const u8 *DoJit(u32 em_address, JitBlock *b);
PpcJitOptions jo;
PpcJitState js;
PpcRegCache gpr;
//PpcRegCacheFPU fpr;
MIPSState *mips_;
JitBlockCache *GetBlockCache() { return &blocks; }
public:
// Code pointers
const u8 *enterCode;
const u8 *outerLoop;
const u8 *outerLoopPCInR0;
const u8 *dispatcherCheckCoreState;
const u8 *dispatcherPCInR0;
const u8 *dispatcher;
const u8 *dispatcherNoCheck;
const u8 *breakpointBailout;
};
typedef void (Jit::*MIPSCompileFunc)(u32 opcode);
} // namespace MIPSComp

View file

@ -0,0 +1,313 @@
// Copyright (c) 2012- PPSSPP Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
#include <PpcEmitter.h>
#include "PpcRegCache.h"
#include "PpcJit.h"
using namespace PpcGen;
PpcRegCache::PpcRegCache(MIPSState *mips, MIPSComp::PpcJitOptions *options) : mips_(mips), options_(options) {
}
void PpcRegCache::Init(PPCXEmitter *emitter) {
emit_ = emitter;
}
void PpcRegCache::Start(MIPSAnalyst::AnalysisResults &stats) {
for (int i = 0; i < NUM_PPCREG; i++) {
ar[i].mipsReg = -1;
ar[i].isDirty = false;
}
for (int i = 0; i < NUM_MIPSREG; i++) {
mr[i].loc = ML_MEM;
mr[i].reg = INVALID_REG;
mr[i].imm = -1;
mr[i].spillLock = false;
}
}
const PPCReg *PpcRegCache::GetMIPSAllocationOrder(int &count) {
// Note that R0 is reserved as scratch for now.
// R1 could be used as it's only used for scratch outside "regalloc space" now.
// R12 is also potentially usable.
// R4-R7 are registers we could use for static allocation or downcount.
// R8 is used to preserve flags in nasty branches.
// R9 and upwards are reserved for jit basics.
if (options_->downcountInRegister) {
static const PPCReg allocationOrder[] = {
/*R14, R15, R16, R17, R18, */R19,
R20, R21, R22, R23, R24, R25,
R26, R27, R28, R29, R30, R31,
};
count = sizeof(allocationOrder) / sizeof(const int);
return allocationOrder;
} else {
static const PPCReg allocationOrder2[] = {
/*R14, R15, R16, R17, R18,*/ R19,
R20, R21, R22, R23, R24, R25,
R26, R27, R28, R29, R30, R31,
};
count = sizeof(allocationOrder2) / sizeof(const int);
return allocationOrder2;
}
}
void PpcRegCache::FlushBeforeCall() {
// R4-R11 are preserved. Others need flushing.
/*
FlushPpcReg(R2);
FlushPpcReg(R3);
FlushPpcReg(R12);
*/
}
// TODO: Somewhat smarter spilling - currently simply spills the first available, should do
// round robin or FIFO or something.
PPCReg PpcRegCache::MapReg(MIPSReg mipsReg, int mapFlags) {
// Let's see if it's already mapped. If so we just need to update the dirty flag.
// We don't need to check for ML_NOINIT because we assume that anyone who maps
// with that flag immediately writes a "known" value to the register.
if (mr[mipsReg].loc == ML_PPCREG) {
if (ar[mr[mipsReg].reg].mipsReg != mipsReg) {
ERROR_LOG(HLE, "Register mapping out of sync! %i", mipsReg);
}
if (mapFlags & MAP_DIRTY) {
ar[mr[mipsReg].reg].isDirty = true;
}
return (PPCReg)mr[mipsReg].reg;
}
// Okay, not mapped, so we need to allocate an ARM register.
int allocCount;
const PPCReg *allocOrder = GetMIPSAllocationOrder(allocCount);
allocate:
for (int i = 0; i < allocCount; i++) {
int reg = allocOrder[i];
if (ar[reg].mipsReg == -1) {
// That means it's free. Grab it, and load the value into it (if requested).
ar[reg].isDirty = (mapFlags & MAP_DIRTY) ? true : false;
if (!(mapFlags & MAP_NOINIT)) {
if (mr[mipsReg].loc == ML_MEM) {
if (mipsReg != 0) {
emit_->LWZ((PPCReg)reg, CTXREG, GetMipsRegOffset(mipsReg));
} else {
// If we get a request to load the zero register, at least we won't spend
// time on a memory access...
emit_->MOVI2R((PPCReg)reg, 0);
}
} else if (mr[mipsReg].loc == ML_IMM) {
emit_->MOVI2R((PPCReg)reg, mr[mipsReg].imm);
ar[reg].isDirty = true; // IMM is always dirty.
}
}
ar[reg].mipsReg = mipsReg;
mr[mipsReg].loc = ML_PPCREG;
mr[mipsReg].reg = (PPCReg)reg;
return (PPCReg)reg;
}
}
// Still nothing. Let's spill a reg and goto 10.
// TODO: Use age or something to choose which register to spill?
// TODO: Spill dirty regs first? or opposite?
int bestToSpill = -1;
for (int i = 0; i < allocCount; i++) {
int reg = allocOrder[i];
if (ar[reg].mipsReg != -1 && mr[ar[reg].mipsReg].spillLock)
continue;
bestToSpill = reg;
break;
}
if (bestToSpill != -1) {
// ERROR_LOG(JIT, "Out of registers at PC %08x - spills register %i.", mips_->pc, bestToSpill);
FlushPpcReg((PPCReg)bestToSpill);
goto allocate;
}
// Uh oh, we have all them spilllocked....
ERROR_LOG(JIT, "Out of spillable registers at PC %08x!!!", mips_->pc);
return INVALID_REG;
}
void PpcRegCache::MapInIn(MIPSReg rd, MIPSReg rs) {
SpillLock(rd, rs);
MapReg(rd);
MapReg(rs);
ReleaseSpillLocks();
}
void PpcRegCache::MapDirtyIn(MIPSReg rd, MIPSReg rs, bool avoidLoad) {
SpillLock(rd, rs);
bool load = !avoidLoad || rd == rs;
MapReg(rd, MAP_DIRTY | (load ? 0 : MAP_NOINIT));
MapReg(rs);
ReleaseSpillLocks();
}
void PpcRegCache::MapDirtyInIn(MIPSReg rd, MIPSReg rs, MIPSReg rt, bool avoidLoad) {
SpillLock(rd, rs, rt);
bool load = !avoidLoad || (rd == rs || rd == rt);
MapReg(rd, MAP_DIRTY | (load ? 0 : MAP_NOINIT));
MapReg(rt);
MapReg(rs);
ReleaseSpillLocks();
}
void PpcRegCache::MapDirtyDirtyInIn(MIPSReg rd1, MIPSReg rd2, MIPSReg rs, MIPSReg rt, bool avoidLoad) {
SpillLock(rd1, rd2, rs, rt);
bool load1 = !avoidLoad || (rd1 == rs || rd1 == rt);
bool load2 = !avoidLoad || (rd2 == rs || rd2 == rt);
MapReg(rd1, MAP_DIRTY | (load1 ? 0 : MAP_NOINIT));
MapReg(rd2, MAP_DIRTY | (load2 ? 0 : MAP_NOINIT));
MapReg(rt);
MapReg(rs);
ReleaseSpillLocks();
}
void PpcRegCache::FlushPpcReg(PPCReg r) {
if (ar[r].mipsReg == -1) {
// Nothing to do, reg not mapped.
return;
}
if (ar[r].mipsReg != -1) {
if (ar[r].isDirty && mr[ar[r].mipsReg].loc == ML_PPCREG)
emit_->STW(r, CTXREG, GetMipsRegOffset(ar[r].mipsReg));
// IMMs won't be in an ARM reg.
mr[ar[r].mipsReg].loc = ML_MEM;
mr[ar[r].mipsReg].reg = INVALID_REG;
mr[ar[r].mipsReg].imm = 0;
} else {
ERROR_LOG(HLE, "Dirty but no mipsreg?");
}
ar[r].isDirty = false;
ar[r].mipsReg = -1;
}
void PpcRegCache::FlushR(MIPSReg r) {
switch (mr[r].loc) {
case ML_IMM:
// IMM is always "dirty".
emit_->MOVI2R(SREG, mr[r].imm);
emit_->STW(SREG, CTXREG, GetMipsRegOffset(r));
break;
case ML_PPCREG:
if (mr[r].reg == INVALID_REG) {
ERROR_LOG(HLE, "FlushMipsReg: MipsReg had bad PpcReg");
}
if (ar[mr[r].reg].isDirty) {
emit_->STW((PPCReg)mr[r].reg, CTXREG, GetMipsRegOffset(r));
ar[mr[r].reg].isDirty = false;
}
ar[mr[r].reg].mipsReg = -1;
break;
case ML_MEM:
// Already there, nothing to do.
break;
default:
//BAD
break;
}
mr[r].loc = ML_MEM;
mr[r].reg = INVALID_REG;
mr[r].imm = 0;
}
void PpcRegCache::FlushAll() {
for (int i = 0; i < NUM_MIPSREG; i++) {
FlushR(i);
}
// Sanity check
for (int i = 0; i < NUM_PPCREG; i++) {
if (ar[i].mipsReg != -1) {
ERROR_LOG(JIT, "Flush fail: ar[%i].mipsReg=%i", i, ar[i].mipsReg);
}
}
}
void PpcRegCache::SetImm(MIPSReg r, u32 immVal) {
if (r == 0)
ERROR_LOG(JIT, "Trying to set immediate %08x to r0", immVal);
// Zap existing value if cached in a reg
if (mr[r].loc == ML_PPCREG) {
ar[mr[r].reg].mipsReg = -1;
ar[mr[r].reg].isDirty = false;
}
mr[r].loc = ML_IMM;
mr[r].imm = immVal;
mr[r].reg = INVALID_REG;
}
bool PpcRegCache::IsImm(MIPSReg r) const {
if (r == 0) return true;
return mr[r].loc == ML_IMM;
}
u32 PpcRegCache::GetImm(MIPSReg r) const {
if (r == 0) return 0;
if (mr[r].loc != ML_IMM) {
ERROR_LOG(JIT, "Trying to get imm from non-imm register %i", r);
}
return mr[r].imm;
}
int PpcRegCache::GetMipsRegOffset(MIPSReg r) {
if (r < 32)
return r * 4;
switch (r) {
case MIPSREG_HI:
return offsetof(MIPSState, hi);
case MIPSREG_LO:
return offsetof(MIPSState, lo);
}
ERROR_LOG(JIT, "bad mips register %i", r);
return 0; // or what?
}
void PpcRegCache::SpillLock(MIPSReg r1, MIPSReg r2, MIPSReg r3, MIPSReg r4) {
mr[r1].spillLock = true;
if (r2 != -1) mr[r2].spillLock = true;
if (r3 != -1) mr[r3].spillLock = true;
if (r4 != -1) mr[r4].spillLock = true;
}
void PpcRegCache::ReleaseSpillLocks() {
for (int i = 0; i < NUM_MIPSREG; i++) {
mr[i].spillLock = false;
}
}
void PpcRegCache::ReleaseSpillLock(MIPSReg reg) {
mr[reg].spillLock = false;
}
PPCReg PpcRegCache::R(int mipsReg) {
if (mr[mipsReg].loc == ML_PPCREG) {
return (PPCReg)mr[mipsReg].reg;
} else {
ERROR_LOG(JIT, "Reg %i not in ppc reg. compilerPC = %08x", mipsReg, compilerPC_);
return INVALID_REG; // BAAAD
}
}

156
Core/MIPS/PPC/PpcRegCache.h Normal file
View file

@ -0,0 +1,156 @@
// Copyright (c) 2012- PPSSPP Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
/**
PPC reg cache based on arm version
**/
#pragma once
#include "../MIPS.h"
#include "../MIPSAnalyst.h"
#include "ppcEmitter.h"
using namespace PpcGen;
// R2 to R8: mapped MIPS regs
// R9 = code pointers
// R10 = MIPS context
// R11 = base pointer
// R18 to R31: mapped MIPS regs
// R14 = MIPS context
// R15 = downcount register
// R16 = code pointer
// R17 = base pointer
#if 1
#define CTXREG (R14)
#define DCNTREG (R15)
#define CODEREG (R16)
#define BASEREG (R17)
#else
#define CTXREG (R6)
#define DCNTREG (R7)
#define CODEREG (R8)
#define BASEREG (R9)
#endif
// Safe to use this as scratch regs ?
#define SREG (R5)
#define FLAGREG (R18)
// Special MIPS registers:
enum {
MIPSREG_HI = 32,
MIPSREG_LO = 33,
TOTAL_MAPPABLE_MIPSREGS = 34,
};
typedef int MIPSReg;
struct RegPPC {
int mipsReg; // if -1, no mipsreg attached.
bool isDirty; // Should the register be written back?
};
enum RegMIPSLoc {
ML_IMM,
ML_PPCREG,
ML_MEM,
};
struct RegMIPS {
// Where is this MIPS register?
RegMIPSLoc loc;
// Data (only one of these is used, depending on loc. Could make a union).
u32 imm;
PPCReg reg; // reg index
bool spillLock; // if true, this register cannot be spilled.
// If loc == ML_MEM, it's back in its location in the CPU context struct.
};
#undef MAP_DIRTY
#undef MAP_NOINIT
// Initing is the default so the flag is reversed.
enum {
MAP_DIRTY = 1,
MAP_NOINIT = 2,
};
namespace MIPSComp {
struct PpcJitOptions;
}
class PpcRegCache
{
public:
PpcRegCache(MIPSState *mips, MIPSComp::PpcJitOptions *options);
~PpcRegCache() {}
void Init(PPCXEmitter *emitter);
void Start(MIPSAnalyst::AnalysisResults &stats);
// Protect the arm register containing a MIPS register from spilling, to ensure that
// it's being kept allocated.
void SpillLock(MIPSReg reg, MIPSReg reg2 = -1, MIPSReg reg3 = -1, MIPSReg reg4 = -1);
void ReleaseSpillLock(MIPSReg reg);
void ReleaseSpillLocks();
void SetImm(MIPSReg reg, u32 immVal);
bool IsImm(MIPSReg reg) const;
u32 GetImm(MIPSReg reg) const;
// Returns an ARM register containing the requested MIPS register.
PPCReg MapReg(MIPSReg reg, int mapFlags = 0);
void MapInIn(MIPSReg rd, MIPSReg rs);
void MapDirtyIn(MIPSReg rd, MIPSReg rs, bool avoidLoad = true);
void MapDirtyInIn(MIPSReg rd, MIPSReg rs, MIPSReg rt, bool avoidLoad = true);
void MapDirtyDirtyInIn(MIPSReg rd1, MIPSReg rd2, MIPSReg rs, MIPSReg rt, bool avoidLoad = true);
void FlushPpcReg(PPCReg r);
void FlushR(MIPSReg r);
void FlushBeforeCall();
void FlushAll();
PPCReg R(int preg); // Returns a cached register
void SetEmitter(PPCXEmitter *emitter) { emit_ = emitter; }
// For better log output only.
void SetCompilerPC(u32 compilerPC) { compilerPC_ = compilerPC; }
int GetMipsRegOffset(MIPSReg r);
private:
const PPCReg *GetMIPSAllocationOrder(int &count);
MIPSState *mips_;
MIPSComp::PpcJitOptions *options_;
PPCXEmitter *emit_;
u32 compilerPC_;
enum {
NUM_PPCREG = 32,
NUM_MIPSREG = TOTAL_MAPPABLE_MIPSREGS,
};
RegPPC ar[NUM_MIPSREG];
RegMIPS mr[NUM_MIPSREG];
};

View file

@ -176,7 +176,7 @@ void Jit::BranchRSRTComp(u32 op, Gen::CCFlags cc, bool likely)
if (!likely && delaySlotIsNice)
CompileDelaySlot(DELAYSLOT_NICE);
if (rt == 0)
if (gpr.IsImmediate(rt) && gpr.GetImmediate32(rt) == 0)
{
gpr.KillImmediate(rs, true, false);
CMP(32, gpr.R(rs), Imm32(0));

View file

@ -136,23 +136,23 @@ void WriteUnchecked_U32(const u32 _Data, const u32 _Address);
#else
inline u32 ReadUnchecked_U32(const u32 address) {
#if defined(_M_IX86) || defined(_M_ARM32)
return (*(u32 *)(base + (address & MEMVIEW32_MASK)));
#if defined(_M_IX86) || defined(_M_ARM32) || defined (_XBOX)
return *(u32_le *)(base + (address & MEMVIEW32_MASK));
#else
return (*(u32 *)(base + address));
return *(u32_le *)(base + address);
#endif
}
inline u16 ReadUnchecked_U16(const u32 address) {
#if defined(_M_IX86) || defined(_M_ARM32)
return (*(u16 *)(base + (address & MEMVIEW32_MASK)));
#if defined(_M_IX86) || defined(_M_ARM32) || defined (_XBOX)
return *(u16_le *)(base + (address & MEMVIEW32_MASK));
#else
return (*(u16 *)(base + address));
return *(u16_le *)(base + address);
#endif
}
inline u8 ReadUnchecked_U8(const u32 address) {
#if defined(_M_IX86) || defined(_M_ARM32)
#if defined(_M_IX86) || defined(_M_ARM32) || defined (_XBOX)
return (*(u8 *)(base + (address & MEMVIEW32_MASK)));
#else
return (*(u8 *)(base + address));
@ -160,23 +160,23 @@ inline u8 ReadUnchecked_U8(const u32 address) {
}
inline void WriteUnchecked_U32(u32 data, u32 address) {
#if defined(_M_IX86) || defined(_M_ARM32)
(*(u32 *)(base + (address & MEMVIEW32_MASK))) = data;
#if defined(_M_IX86) || defined(_M_ARM32) || defined (_XBOX)
*(u32_le *)(base + (address & MEMVIEW32_MASK)) = data;
#else
(*(u32 *)(base + address)) = data;
*(u32_le *)(base + address) = data;
#endif
}
inline void WriteUnchecked_U16(u16 data, u32 address) {
#if defined(_M_IX86) || defined(_M_ARM32)
(*(u16 *)(base + (address & MEMVIEW32_MASK))) = data;
#if defined(_M_IX86) || defined(_M_ARM32) || defined (_XBOX)
*(u16_le *)(base + (address & MEMVIEW32_MASK)) = data;
#else
(*(u16 *)(base + address)) = data;
*(u16_le *)(base + address) = data;
#endif
}
inline void WriteUnchecked_U8(u8 data, u32 address) {
#if defined(_M_IX86) || defined(_M_ARM32)
#if defined(_M_IX86) || defined(_M_ARM32) || defined (_XBOX)
(*(u8 *)(base + (address & MEMVIEW32_MASK))) = data;
#else
(*(u8 *)(base + address)) = data;

View file

@ -156,22 +156,22 @@ u8 Read_U8(const u32 _Address)
u16 Read_U16(const u32 _Address)
{
u16 _var = 0;
ReadFromHardware<u16>(_var, _Address);
u16_le _var = 0;
ReadFromHardware<u16_le>(_var, _Address);
return (u16)_var;
}
u32 Read_U32(const u32 _Address)
{
u32 _var = 0;
ReadFromHardware<u32>(_var, _Address);
u32_le _var = 0;
ReadFromHardware<u32_le>(_var, _Address);
return _var;
}
u64 Read_U64(const u32 _Address)
{
u64 _var = 0;
ReadFromHardware<u64>(_var, _Address);
u64_le _var = 0;
ReadFromHardware<u64_le>(_var, _Address);
return _var;
}
@ -190,20 +190,19 @@ void Write_U8(const u8 _Data, const u32 _Address)
WriteToHardware<u8>(_Address, _Data);
}
void Write_U16(const u16 _Data, const u32 _Address)
{
WriteToHardware<u16>(_Address, _Data);
WriteToHardware<u16_le>(_Address, _Data);
}
void Write_U32(const u32 _Data, const u32 _Address)
{
WriteToHardware<u32>(_Address, _Data);
WriteToHardware<u32_le>(_Address, _Data);
}
void Write_U64(const u64 _Data, const u32 _Address)
{
WriteToHardware<u64>(_Address, _Data);
WriteToHardware<u64_le>(_Address, _Data);
}
#ifdef SAFE_MEMORY
@ -217,15 +216,15 @@ u8 ReadUnchecked_U8(const u32 _Address)
u16 ReadUnchecked_U16(const u32 _Address)
{
u16 _var = 0;
ReadFromHardware<u16>(_var, _Address);
u16_le _var = 0;
ReadFromHardware<u16_le>(_var, _Address);
return _var;
}
u32 ReadUnchecked_U32(const u32 _Address)
{
u32 _var = 0;
ReadFromHardware<u32>(_var, _Address);
u32_le _var = 0;
ReadFromHardware<u32_le>(_var, _Address);
return _var;
}
@ -236,12 +235,12 @@ void WriteUnchecked_U8(const u8 _iValue, const u32 _Address)
void WriteUnchecked_U16(const u16 _iValue, const u32 _Address)
{
WriteToHardware<u16>(_Address, _iValue);
WriteToHardware<u16_le>(_Address, _iValue);
}
void WriteUnchecked_U32(const u32 _iValue, const u32 _Address)
{
WriteToHardware<u32>(_Address, _iValue);
WriteToHardware<u32_le>(_Address, _iValue);
}
#endif

View file

@ -26,6 +26,7 @@
#include "GPU/GLES/Framebuffer.h"
#include "Core/Config.h"
#include "ext/xxhash.h"
#include "native/ext/cityhash/city.h"
#ifdef _M_SSE
@ -892,9 +893,7 @@ void TextureCache::UpdateCurrentClut() {
// If not, we're going to hash random data, which hopefully doesn't cause a performance issue.
const u32 clutExtendedBytes = clutTotalBytes_ + clutBaseBytes;
// QuickClutHash is not quite good enough apparently.
// clutHash_ = QuickClutHash((const u8 *)clutBufRaw_, clutExtendedBytes);
clutHash_ = CityHash32((const char *)clutBufRaw_, clutExtendedBytes);
clutHash_ = XXH32((const char *)clutBufRaw_, clutExtendedBytes, 0xC0108888);
// Avoid a copy when we don't need to convert colors.
if (clutFormat != GE_CMODE_32BIT_ABGR8888) {

View file

@ -27,6 +27,7 @@
#include "native/gfx_es2/gl_state.h"
#include "native/ext/cityhash/city.h"
#include "ext/xxhash.h"
#include "GPU/Math3D.h"
#include "GPU/GPUState.h"
@ -1034,14 +1035,14 @@ u32 TransformDrawEngine::ComputeHash() {
// It is really very expensive to check all the vertex data so often.
for (int i = 0; i < numDrawCalls; i++) {
if (!drawCalls[i].inds) {
fullhash += CityHash32((const char *)drawCalls[i].verts, vertexSize * drawCalls[i].vertexCount);
fullhash += XXH32((const char *)drawCalls[i].verts, vertexSize * drawCalls[i].vertexCount, 0x1DE8CAC4);
} else {
// This could get seriously expensive with sparse indices. Need to combine hashing ranges the same way
// we do when drawing.
fullhash += CityHash32((const char *)drawCalls[i].verts + vertexSize * drawCalls[i].indexLowerBound,
vertexSize * (drawCalls[i].indexUpperBound - drawCalls[i].indexLowerBound));
fullhash += XXH32((const char *)drawCalls[i].verts + vertexSize * drawCalls[i].indexLowerBound,
vertexSize * (drawCalls[i].indexUpperBound - drawCalls[i].indexLowerBound), 0x029F3EE1);
int indexSize = (dec_->VertexType() & GE_VTYPE_IDX_MASK) == GE_VTYPE_IDX_16BIT ? 2 : 1;
fullhash += CityHash32((const char *)drawCalls[i].inds, indexSize * drawCalls[i].vertexCount);
fullhash += XXH32((const char *)drawCalls[i].inds, indexSize * drawCalls[i].vertexCount, 0x955FD1CA);
}
}

View file

@ -50,6 +50,7 @@ SOURCES += ../Core/*.cpp \ # Core
../GPU/Null/NullGpu.cpp \
../GPU/GLES/*.cpp \
../ext/libkirk/*.c \ # Kirk
../ext/xxhash.c \ # xxHash
../ext/xbrz/*.cpp # XBRZ
HEADERS += ../Core/*.h \

View file

@ -382,7 +382,7 @@ void GameBrowser::Refresh() {
b->OnHoldClick.Handle(this, &GameBrowser::GameButtonHoldClick);
}
if (!lastText_.empty()) {
if (!lastText_.empty() && gameButtons.empty()) {
Add(new Spacer());
Add(new Choice(lastText_, new UI::LinearLayoutParams(UI::WRAP_CONTENT, UI::WRAP_CONTENT)))->OnClick.Handle(this, &GameBrowser::LastClick);
}

View file

@ -360,6 +360,9 @@ void NativeInit(int argc, const char *argv[],
logman->SetLogLevel(LogTypes::G3D, LogTypes::LERROR);
INFO_LOG(BOOT, "Logger inited.");
#else
if (g_Config.currentDirectory.empty()) {
g_Config.currentDirectory = File::GetExeDirectory();
}
g_Config.memCardDirectory = "MemStick/";
#endif

View file

@ -153,6 +153,7 @@ LOCAL_SRC_FILES := \
$(SRC)/ext/snappy/snappy-c.cpp \
$(SRC)/ext/snappy/snappy.cpp \
$(SRC)/ext/xbrz/xbrz.cpp \
$(SRC)/ext/xxhash.c \
$(SRC)/Common/Crypto/md5.cpp \
$(SRC)/Common/KeyMap.cpp \
$(SRC)/Common/LogManager.cpp \

475
ext/xxhash.c Normal file
View file

@ -0,0 +1,475 @@
/*
xxHash - Fast Hash algorithm
Copyright (C) 2012-2013, Yann Collet.
BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
You can contact the author at :
- xxHash source repository : http://code.google.com/p/xxhash/
*/
//**************************************
// Tuning parameters
//**************************************
// Unaligned memory access is automatically enabled for "common" CPU, such as x86.
// For others CPU, the compiler will be more cautious, and insert extra code to ensure aligned access is respected.
// If you know your target CPU supports unaligned memory access, you want to force this option manually to improve performance.
// You can also enable this parameter if you know your input data will always be aligned (boundaries of 4, for U32).
#if defined(__ARM_FEATURE_UNALIGNED) || defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)
//# define XXH_USE_UNALIGNED_ACCESS 1
#endif
// XXH_ACCEPT_NULL_INPUT_POINTER :
// If the input pointer is a null pointer, xxHash default behavior is to trigger a memory access error, since it is a bad pointer.
// When this option is enabled, xxHash output for null input pointers will be the same as a null-length input.
// This option has a very small performance cost (only measurable on small inputs).
// By default, this option is disabled. To enable it, uncomment below define :
//#define XXH_ACCEPT_NULL_INPUT_POINTER 1
// XXH_FORCE_NATIVE_FORMAT :
// By default, xxHash library provides endian-independant Hash values, based on little-endian convention.
// Results are therefore identical for little-endian and big-endian CPU.
// This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format.
// Should endian-independance be of no importance for your application, you may set the #define below to 1.
// It will improve speed for Big-endian CPU.
// This option has no impact on Little_Endian CPU.
#define XXH_FORCE_NATIVE_FORMAT 1
//**************************************
// Compiler Specific Options
//**************************************
// Disable some Visual warning messages
#ifdef _MSC_VER // Visual Studio
# pragma warning(disable : 4127) // disable: C4127: conditional expression is constant
#endif
#ifdef _MSC_VER // Visual Studio
# define forceinline static __forceinline
#else
# ifdef __GNUC__
# define forceinline static inline __attribute__((always_inline))
# else
# define forceinline static inline
# endif
#endif
//**************************************
// Includes & Memory related functions
//**************************************
#include "xxhash.h"
// Modify the local functions below should you wish to use some other memory related routines
// for malloc(), free()
#include <stdlib.h>
forceinline void* XXH_malloc(size_t s) { return malloc(s); }
forceinline void XXH_free (void* p) { free(p); }
// for memcpy()
#include <string.h>
forceinline void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcpy(dest,src,size); }
//**************************************
// Basic Types
//**************************************
#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L // C99
# include <stdint.h>
typedef uint8_t BYTE;
typedef uint16_t U16;
typedef uint32_t U32;
typedef int32_t S32;
typedef uint64_t U64;
#else
typedef unsigned char BYTE;
typedef unsigned short U16;
typedef unsigned int U32;
typedef signed int S32;
typedef unsigned long long U64;
#endif
#if defined(__GNUC__) && !defined(XXH_USE_UNALIGNED_ACCESS)
# define _PACKED __attribute__ ((packed))
#else
# define _PACKED
#endif
#if !defined(XXH_USE_UNALIGNED_ACCESS) && !defined(__GNUC__)
# ifdef __IBMC__
# pragma pack(1)
# else
# pragma pack(push, 1)
# endif
#endif
typedef struct _U32_S { U32 v; } _PACKED U32_S;
#if !defined(XXH_USE_UNALIGNED_ACCESS) && !defined(__GNUC__)
# pragma pack(pop)
#endif
#define A32(x) (((U32_S *)(x))->v)
//***************************************
// Compiler-specific Functions and Macros
//***************************************
#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
// Note : although _rotl exists for minGW (GCC under windows), performance seems poor
#if defined(_MSC_VER)
# define XXH_rotl32(x,r) _rotl(x,r)
#else
# define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r)))
#endif
#if defined(_MSC_VER) // Visual Studio
# define XXH_swap32 _byteswap_ulong
#elif GCC_VERSION >= 403
# define XXH_swap32 __builtin_bswap32
#else
static inline U32 XXH_swap32 (U32 x) {
return ((x << 24) & 0xff000000 ) |
((x << 8) & 0x00ff0000 ) |
((x >> 8) & 0x0000ff00 ) |
((x >> 24) & 0x000000ff );}
#endif
//**************************************
// Constants
//**************************************
#define PRIME32_1 2654435761U
#define PRIME32_2 2246822519U
#define PRIME32_3 3266489917U
#define PRIME32_4 668265263U
#define PRIME32_5 374761393U
//**************************************
// Architecture Macros
//**************************************
typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess;
#ifndef XXH_CPU_LITTLE_ENDIAN // It is possible to define XXH_CPU_LITTLE_ENDIAN externally, for example using a compiler switch
static const int one = 1;
# define XXH_CPU_LITTLE_ENDIAN (*(char*)(&one))
#endif
//**************************************
// Macros
//**************************************
#define XXH_STATIC_ASSERT(c) { enum { XXH_static_assert = 1/(!!(c)) }; } // use only *after* variable declarations
//****************************
// Memory reads
//****************************
typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment;
forceinline U32 XXH_readLE32_align(const U32* ptr, XXH_endianess endian, XXH_alignment align)
{
if (align==XXH_unaligned)
return endian==XXH_littleEndian ? A32(ptr) : XXH_swap32(A32(ptr));
else
return endian==XXH_littleEndian ? *ptr : XXH_swap32(*ptr);
}
forceinline U32 XXH_readLE32(const U32* ptr, XXH_endianess endian) { return XXH_readLE32_align(ptr, endian, XXH_unaligned); }
//****************************
// Simple Hash Functions
//****************************
forceinline U32 XXH32_endian_align(const void* input, int len, U32 seed, XXH_endianess endian, XXH_alignment align)
{
const BYTE* p = (const BYTE*)input;
const BYTE* const bEnd = p + len;
U32 h32;
#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
if (p==NULL) { len=0; p=(const BYTE*)(size_t)16; }
#endif
if (len>=16)
{
const BYTE* const limit = bEnd - 16;
U32 v1 = seed + PRIME32_1 + PRIME32_2;
U32 v2 = seed + PRIME32_2;
U32 v3 = seed + 0;
U32 v4 = seed - PRIME32_1;
do
{
v1 += XXH_readLE32_align((const U32*)p, endian, align) * PRIME32_2; v1 = XXH_rotl32(v1, 13); v1 *= PRIME32_1; p+=4;
v2 += XXH_readLE32_align((const U32*)p, endian, align) * PRIME32_2; v2 = XXH_rotl32(v2, 13); v2 *= PRIME32_1; p+=4;
v3 += XXH_readLE32_align((const U32*)p, endian, align) * PRIME32_2; v3 = XXH_rotl32(v3, 13); v3 *= PRIME32_1; p+=4;
v4 += XXH_readLE32_align((const U32*)p, endian, align) * PRIME32_2; v4 = XXH_rotl32(v4, 13); v4 *= PRIME32_1; p+=4;
} while (p<=limit);
h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18);
}
else
{
h32 = seed + PRIME32_5;
}
h32 += (U32) len;
while (p<=bEnd-4)
{
h32 += XXH_readLE32_align((const U32*)p, endian, align) * PRIME32_3;
h32 = XXH_rotl32(h32, 17) * PRIME32_4 ;
p+=4;
}
while (p<bEnd)
{
h32 += (*p) * PRIME32_5;
h32 = XXH_rotl32(h32, 11) * PRIME32_1 ;
p++;
}
h32 ^= h32 >> 15;
h32 *= PRIME32_2;
h32 ^= h32 >> 13;
h32 *= PRIME32_3;
h32 ^= h32 >> 16;
return h32;
}
U32 XXH32(const void* input, int len, U32 seed)
{
#if 0
// Simple version, good for code maintenance, but unfortunately slow for small inputs
void* state = XXH32_init(seed);
XXH32_update(state, input, len);
return XXH32_digest(state);
#else
XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
# if !defined(XXH_USE_UNALIGNED_ACCESS)
if ((((size_t)input) & 3)) // Input is aligned, let's leverage the speed advantage
{
if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned);
else
return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned);
}
# endif
if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned);
else
return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned);
#endif
}
//****************************
// Advanced Hash Functions
//****************************
struct XXH_state32_t
{
U64 total_len;
U32 seed;
U32 v1;
U32 v2;
U32 v3;
U32 v4;
int memsize;
char memory[16];
};
int XXH32_sizeofState()
{
XXH_STATIC_ASSERT(XXH32_SIZEOFSTATE >= sizeof(struct XXH_state32_t)); // A compilation error here means XXH32_SIZEOFSTATE is not large enough
return sizeof(struct XXH_state32_t);
}
XXH_errorcode XXH32_resetState(void* state_in, U32 seed)
{
struct XXH_state32_t * state = (struct XXH_state32_t *) state_in;
state->seed = seed;
state->v1 = seed + PRIME32_1 + PRIME32_2;
state->v2 = seed + PRIME32_2;
state->v3 = seed + 0;
state->v4 = seed - PRIME32_1;
state->total_len = 0;
state->memsize = 0;
return XXH_OK;
}
void* XXH32_init (U32 seed)
{
void* state = XXH_malloc (sizeof(struct XXH_state32_t));
XXH32_resetState(state, seed);
return state;
}
forceinline XXH_errorcode XXH32_update_endian (void* state_in, const void* input, int len, XXH_endianess endian)
{
struct XXH_state32_t * state = (struct XXH_state32_t *) state_in;
const BYTE* p = (const BYTE*)input;
const BYTE* const bEnd = p + len;
#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
if (input==NULL) return XXH_ERROR;
#endif
state->total_len += len;
if (state->memsize + len < 16) // fill in tmp buffer
{
XXH_memcpy(state->memory + state->memsize, input, len);
state->memsize += len;
return XXH_OK;
}
if (state->memsize) // some data left from previous update
{
XXH_memcpy(state->memory + state->memsize, input, 16-state->memsize);
{
const U32* p32 = (const U32*)state->memory;
state->v1 += XXH_readLE32(p32, endian) * PRIME32_2; state->v1 = XXH_rotl32(state->v1, 13); state->v1 *= PRIME32_1; p32++;
state->v2 += XXH_readLE32(p32, endian) * PRIME32_2; state->v2 = XXH_rotl32(state->v2, 13); state->v2 *= PRIME32_1; p32++;
state->v3 += XXH_readLE32(p32, endian) * PRIME32_2; state->v3 = XXH_rotl32(state->v3, 13); state->v3 *= PRIME32_1; p32++;
state->v4 += XXH_readLE32(p32, endian) * PRIME32_2; state->v4 = XXH_rotl32(state->v4, 13); state->v4 *= PRIME32_1; p32++;
}
p += 16-state->memsize;
state->memsize = 0;
}
if (p <= bEnd-16)
{
const BYTE* const limit = bEnd - 16;
U32 v1 = state->v1;
U32 v2 = state->v2;
U32 v3 = state->v3;
U32 v4 = state->v4;
do
{
v1 += XXH_readLE32((const U32*)p, endian) * PRIME32_2; v1 = XXH_rotl32(v1, 13); v1 *= PRIME32_1; p+=4;
v2 += XXH_readLE32((const U32*)p, endian) * PRIME32_2; v2 = XXH_rotl32(v2, 13); v2 *= PRIME32_1; p+=4;
v3 += XXH_readLE32((const U32*)p, endian) * PRIME32_2; v3 = XXH_rotl32(v3, 13); v3 *= PRIME32_1; p+=4;
v4 += XXH_readLE32((const U32*)p, endian) * PRIME32_2; v4 = XXH_rotl32(v4, 13); v4 *= PRIME32_1; p+=4;
} while (p<=limit);
state->v1 = v1;
state->v2 = v2;
state->v3 = v3;
state->v4 = v4;
}
if (p < bEnd)
{
XXH_memcpy(state->memory, p, bEnd-p);
state->memsize = (int)(bEnd-p);
}
return XXH_OK;
}
XXH_errorcode XXH32_update (void* state_in, const void* input, int len)
{
XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
return XXH32_update_endian(state_in, input, len, XXH_littleEndian);
else
return XXH32_update_endian(state_in, input, len, XXH_bigEndian);
}
forceinline U32 XXH32_intermediateDigest_endian (void* state_in, XXH_endianess endian)
{
struct XXH_state32_t * state = (struct XXH_state32_t *) state_in;
const BYTE * p = (const BYTE*)state->memory;
BYTE* bEnd = (BYTE*)state->memory + state->memsize;
U32 h32;
if (state->total_len >= 16)
{
h32 = XXH_rotl32(state->v1, 1) + XXH_rotl32(state->v2, 7) + XXH_rotl32(state->v3, 12) + XXH_rotl32(state->v4, 18);
}
else
{
h32 = state->seed + PRIME32_5;
}
h32 += (U32) state->total_len;
while (p<=bEnd-4)
{
h32 += XXH_readLE32((const U32*)p, endian) * PRIME32_3;
h32 = XXH_rotl32(h32, 17) * PRIME32_4;
p+=4;
}
while (p<bEnd)
{
h32 += (*p) * PRIME32_5;
h32 = XXH_rotl32(h32, 11) * PRIME32_1;
p++;
}
h32 ^= h32 >> 15;
h32 *= PRIME32_2;
h32 ^= h32 >> 13;
h32 *= PRIME32_3;
h32 ^= h32 >> 16;
return h32;
}
U32 XXH32_intermediateDigest (void* state_in)
{
XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
return XXH32_intermediateDigest_endian(state_in, XXH_littleEndian);
else
return XXH32_intermediateDigest_endian(state_in, XXH_bigEndian);
}
U32 XXH32_digest (void* state_in)
{
U32 h32 = XXH32_intermediateDigest(state_in);
XXH_free(state_in);
return h32;
}

164
ext/xxhash.h Normal file
View file

@ -0,0 +1,164 @@
/*
xxHash - Fast Hash algorithm
Header File
Copyright (C) 2012-2013, Yann Collet.
BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
You can contact the author at :
- xxHash source repository : http://code.google.com/p/xxhash/
*/
/* Notice extracted from xxHash homepage :
xxHash is an extremely fast Hash algorithm, running at RAM speed limits.
It also successfully passes all tests from the SMHasher suite.
Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz)
Name Speed Q.Score Author
xxHash 5.4 GB/s 10
CrapWow 3.2 GB/s 2 Andrew
MumurHash 3a 2.7 GB/s 10 Austin Appleby
SpookyHash 2.0 GB/s 10 Bob Jenkins
SBox 1.4 GB/s 9 Bret Mulvey
Lookup3 1.2 GB/s 9 Bob Jenkins
SuperFastHash 1.2 GB/s 1 Paul Hsieh
CityHash64 1.05 GB/s 10 Pike & Alakuijala
FNV 0.55 GB/s 5 Fowler, Noll, Vo
CRC32 0.43 GB/s 9
MD5-32 0.33 GB/s 10 Ronald L. Rivest
SHA1-32 0.28 GB/s 10
Q.Score is a measure of quality of the hash function.
It depends on successfully passing SMHasher test set.
10 is a perfect score.
*/
#pragma once
#if defined (__cplusplus)
extern "C" {
#endif
//****************************
// Type
//****************************
typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode;
//****************************
// Simple Hash Functions
//****************************
unsigned int XXH32 (const void* input, int len, unsigned int seed);
/*
XXH32() :
Calculate the 32-bits hash of sequence of length "len" stored at memory address "input".
The memory between input & input+len must be valid (allocated and read-accessible).
"seed" can be used to alter the result predictably.
This function successfully passes all SMHasher tests.
Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s
Note that "len" is type "int", which means it is limited to 2^31-1.
If your data is larger, use the advanced functions below.
*/
//****************************
// Advanced Hash Functions
//****************************
void* XXH32_init (unsigned int seed);
XXH_errorcode XXH32_update (void* state, const void* input, int len);
unsigned int XXH32_digest (void* state);
/*
These functions calculate the xxhash of an input provided in several small packets,
as opposed to an input provided as a single block.
It must be started with :
void* XXH32_init()
The function returns a pointer which holds the state of calculation.
This pointer must be provided as "void* state" parameter for XXH32_update().
XXH32_update() can be called as many times as necessary.
The user must provide a valid (allocated) input.
The function returns an error code, with 0 meaning OK, and any other value meaning there is an error.
Note that "len" is type "int", which means it is limited to 2^31-1.
If your data is larger, it is recommended to chunk your data into blocks
of size for example 2^30 (1GB) to avoid any "int" overflow issue.
Finally, you can end the calculation anytime, by using XXH32_digest().
This function returns the final 32-bits hash.
You must provide the same "void* state" parameter created by XXH32_init().
Memory will be freed by XXH32_digest().
*/
int XXH32_sizeofState();
XXH_errorcode XXH32_resetState(void* state, unsigned int seed);
#define XXH32_SIZEOFSTATE 48
typedef struct { long long ll[(XXH32_SIZEOFSTATE+(sizeof(long long)-1))/sizeof(long long)]; } XXH32_stateSpace_t;
/*
These functions allow user application to make its own allocation for state.
XXH32_sizeofState() is used to know how much space must be allocated for the xxHash 32-bits state.
Note that the state must be aligned to access 'long long' fields. Memory must be allocated and referenced by a pointer.
This pointer must then be provided as 'state' into XXH32_resetState(), which initializes the state.
For static allocation purposes (such as allocation on stack, or freestanding systems without malloc()),
use the structure XXH32_stateSpace_t, which will ensure that memory space is large enough and correctly aligned to access 'long long' fields.
*/
unsigned int XXH32_intermediateDigest (void* state);
/*
This function does the same as XXH32_digest(), generating a 32-bit hash,
but preserve memory context.
This way, it becomes possible to generate intermediate hashes, and then continue feeding data with XXH32_update().
To free memory context, use XXH32_digest(), or free().
*/
//****************************
// Deprecated function names
//****************************
// The following translations are provided to ease code transition
// You are encouraged to no longer this function names
#define XXH32_feed XXH32_update
#define XXH32_result XXH32_digest
#define XXH32_getIntermediateResult XXH32_intermediateDigest
#if defined (__cplusplus)
}
#endif