// Copyright (c) 2012- PPSSPP Project. // This program is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, version 2.0 or later versions. // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License 2.0 for more details. // A copy of the GPL 2.0 should have been included with the program. // If not, see http://www.gnu.org/licenses/ // Official git repository and contact information can be found at // https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. #pragma once #include "Common/x64Emitter.h" #include "Core/MIPS/MIPS.h" #include "Core/MIPS/MIPSVFPUUtils.h" #undef MAP_NOINIT // GPRs are numbered 0 to 31 // VFPU regs are numbered 32 to 159. // Then we have some temp regs for VFPU handling from 160 to 175. // Temp regs: 4 from S prefix, 4 from T prefix, 4 from D mask, and 4 for work (worst case.) // But most of the time prefixes aren't used that heavily so we won't use all of them. // PLANS FOR PROPER SIMD // 1, 2, 3, and 4-vectors will be loaded into single XMM registers // Matrices will be loaded into pairs, triads, or quads of XMM registers - simply by loading // the columns or the rows one by one. // On x86 this means that only one 4x4 matrix can be fully loaded at once but that's alright. // We might want to keep "linearized" columns in memory. // Implement optimized vec/matrix multiplications of all types and transposes that // take into account in which XMM registers the values are. Fallback: Just dump out the values // and do it the old way. #include "ppsspp_config.h" enum { TEMP0 = 32 + 128, NUM_MIPS_FPRS = 32 + 128 + NUM_X86_FPU_TEMPS, }; #if PPSSPP_ARCH(AMD64) #define NUM_X_FPREGS 16 #elif PPSSPP_ARCH(X86) #define NUM_X_FPREGS 8 #endif namespace MIPSAnalyst { struct AnalysisResults; }; struct X64CachedFPReg { union { int mipsReg; int mipsRegs[4]; }; bool dirty; }; struct MIPSCachedFPReg { Gen::OpArg location; int lane; bool away; // value not in source register (memory) u8 locked; // Only for temp regs. bool tempLocked; }; struct FPURegCacheState { MIPSCachedFPReg regs[NUM_MIPS_FPRS]; X64CachedFPReg xregs[NUM_X_FPREGS]; }; namespace MIPSComp { struct JitOptions; struct JitState; } enum { MAP_DIRTY = 1, MAP_NOINIT = 2 | MAP_DIRTY, // Only for MapRegsV, MapRegsVS. MAP_NOLOCK = 4, }; // The PSP has 160 FP registers: 32 FPRs + 128 VFPU registers. // Soon we will support them all. class FPURegCache { public: FPURegCache(); ~FPURegCache() {} void Start(MIPSState *mipsState, MIPSComp::JitState *js, MIPSComp::JitOptions *jo, MIPSAnalyst::AnalysisResults &stats, bool useRip); void MapReg(int preg, bool doLoad = true, bool makeDirty = true); void StoreFromRegister(int preg); void StoreFromRegisterV(int preg) { StoreFromRegister(preg + 32); } Gen::OpArg GetDefaultLocation(int reg) const; void DiscardR(int freg); void DiscardV(int vreg) { DiscardR(vreg + 32); } void DiscardVS(int vreg); bool IsTempX(Gen::X64Reg xreg); int GetTempR(); int GetTempV() { return GetTempR() - 32; } int GetTempVS(u8 *v, VectorSize vsz); void SetEmitter(Gen::XEmitter *emitter) {emit = emitter;} // Flushes one register and reuses the register for another one. Dirtyness is implied. void FlushRemap(int oldreg, int newreg); void Flush(); int SanityCheck() const; const Gen::OpArg &R(int freg) const {return regs[freg].location;} const Gen::OpArg &V(int vreg) const { _dbg_assert_msg_(vregs[vreg].lane == 0, "SIMD reg %d used as V reg (use VS instead). pc=%08x", vreg, mips_->pc); return vregs[vreg].location; } const Gen::OpArg &VS(const u8 *vs) const { _dbg_assert_msg_(vregs[vs[0]].lane != 0, "V reg %d used as VS reg (use V instead). pc=%08x", vs[0], mips_->pc); return vregs[vs[0]].location; } Gen::X64Reg RX(int freg) const { if (regs[freg].away && regs[freg].location.IsSimpleReg()) return regs[freg].location.GetSimpleReg(); _assert_msg_(false, "Not so simple - f%i", freg); return (Gen::X64Reg)-1; } Gen::X64Reg VX(int vreg) const { _dbg_assert_msg_(vregs[vreg].lane == 0, "SIMD reg %d used as V reg (use VSX instead). pc=%08x", vreg, mips_->pc); if (vregs[vreg].away && vregs[vreg].location.IsSimpleReg()) return vregs[vreg].location.GetSimpleReg(); _assert_msg_(false, "Not so simple - v%i", vreg); return (Gen::X64Reg)-1; } Gen::X64Reg VSX(const u8 *vs) const { _dbg_assert_msg_(vregs[vs[0]].lane != 0, "V reg %d used as VS reg (use VX instead). pc=%08x", vs[0], mips_->pc); if (vregs[vs[0]].away && vregs[vs[0]].location.IsSimpleReg()) return vregs[vs[0]].location.GetSimpleReg(); _assert_msg_(false, "Not so simple - v%i", vs[0]); return (Gen::X64Reg)-1; } // Just to avoid coding mistakes, defined here to prevent compilation. void R(Gen::X64Reg r); // Register locking. Prevents them from being spilled. void SpillLock(int p1, int p2=0xff, int p3=0xff, int p4=0xff); void ReleaseSpillLock(int mipsreg); void ReleaseSpillLocks(); bool IsMapped(int r) { return R(r).IsSimpleReg(); } bool IsMappedV(int v) { return vregs[v].lane == 0 && V(v).IsSimpleReg(); } bool IsMappedVS(u8 v) { return vregs[v].lane != 0 && VS(&v).IsSimpleReg(); } bool IsMappedVS(const u8 *v, VectorSize vsz); bool CanMapVS(const u8 *v, VectorSize vsz); void MapRegV(int vreg, int flags); void MapRegsV(int vec, VectorSize vsz, int flags); void MapRegsV(const u8 *v, VectorSize vsz, int flags); void SpillLockV(int vreg) { SpillLock(vreg + 32); } void SpillLockV(const u8 *v, VectorSize vsz); void SpillLockV(int vec, VectorSize vsz); void ReleaseSpillLockV(int vreg) { ReleaseSpillLock(vreg + 32); } void ReleaseSpillLockV(const u8 *vec, VectorSize sz); // TODO: This may trash XMM0/XMM1 some day. void MapRegsVS(const u8 *v, VectorSize vsz, int flags); bool TryMapRegsVS(const u8 *v, VectorSize vsz, int flags); bool TryMapDirtyInVS(const u8 *vd, VectorSize vdsz, const u8 *vs, VectorSize vssz, bool avoidLoad = true); bool TryMapDirtyInInVS(const u8 *vd, VectorSize vdsz, const u8 *vs, VectorSize vssz, const u8 *vt, VectorSize vtsz, bool avoidLoad = true); // TODO: If s/t overlap differently, need read-only copies? Maybe finalize d? Major design flaw... // TODO: Matrix versions? Cols/Rows? // No MapRegVS, that'd be silly. void SimpleRegsV(const u8 *v, VectorSize vsz, int flags); void SimpleRegsV(const u8 *v, MatrixSize msz, int flags); void SimpleRegV(const u8 v, int flags); void GetState(FPURegCacheState &state) const; void RestoreState(const FPURegCacheState& state); MIPSState *mips_ = nullptr; void FlushX(Gen::X64Reg reg); Gen::X64Reg GetFreeXReg(); int GetFreeXRegs(Gen::X64Reg *regs, int n, bool spill = true); void Invariant() const; private: const int *GetAllocationOrder(int &count); void SetupInitialRegs(); // These are intentionally not public so the interface is "locked" or "unlocked", no levels. void ReduceSpillLock(int mreg); void ReduceSpillLockV(int vreg) { ReduceSpillLock(vreg + 32); } void ReduceSpillLockV(const u8 *vec, VectorSize sz); Gen::X64Reg LoadRegsVS(const u8 *v, int n); MIPSCachedFPReg regs[NUM_MIPS_FPRS]{}; X64CachedFPReg xregs[NUM_X_FPREGS]{}; MIPSCachedFPReg *vregs; bool useRip_; bool pendingFlush; bool initialReady = false; MIPSCachedFPReg regsInitial[NUM_MIPS_FPRS]; X64CachedFPReg xregsInitial[NUM_X_FPREGS]; Gen::XEmitter *emit = nullptr; MIPSComp::JitState *js_; MIPSComp::JitOptions *jo_; };