mirror of
https://github.com/hrydgard/ppsspp.git
synced 2025-04-02 11:01:50 -04:00
softgpu: Cache texture bufws at 16 bit.
Reducing the size of state a bit.
This commit is contained in:
parent
b2e6a086dc
commit
167213c746
9 changed files with 34 additions and 33 deletions
|
@ -242,7 +242,7 @@ bool BinManager::HasTextureWrite(const RasterizerState &state) {
|
|||
if (!state.enableTextures)
|
||||
return false;
|
||||
|
||||
const int textureBits = textureBitsPerPixel[state.samplerID.texfmt];
|
||||
const uint8_t textureBits = textureBitsPerPixel[state.samplerID.texfmt];
|
||||
for (int i = 0; i <= state.maxTexLevel; ++i) {
|
||||
int byteStride = (state.texbufw[i] * textureBits) / 8;
|
||||
int byteWidth = (state.samplerID.cached.sizes[i].w * textureBits) / 8;
|
||||
|
|
|
@ -116,7 +116,7 @@ void ComputeRasterizerState(RasterizerState *state, bool throughMode) {
|
|||
for (uint8_t i = 0; i <= state->maxTexLevel; i++) {
|
||||
u32 texaddr = gstate.getTextureAddress(i);
|
||||
state->texaddr[i] = texaddr;
|
||||
state->texbufw[i] = GetTextureBufw(i, texaddr, texfmt);
|
||||
state->texbufw[i] = (uint16_t)GetTextureBufw(i, texaddr, texfmt);
|
||||
if (Memory::IsValidAddress(texaddr))
|
||||
state->texptr[i] = Memory::GetPointerUnchecked(texaddr);
|
||||
else
|
||||
|
@ -413,7 +413,7 @@ Vec3<int> AlphaBlendingResult(const PixelFuncID &pixelID, const Vec4<int> &sourc
|
|||
|
||||
static inline Vec4IntResult SOFTRAST_CALL ApplyTexturing(float s, float t, int x, int y, Vec4IntArg prim_color, int texlevel, int frac_texlevel, bool bilinear, const RasterizerState &state) {
|
||||
const u8 **tptr0 = const_cast<const u8 **>(&state.texptr[texlevel]);
|
||||
const int *bufw0 = &state.texbufw[texlevel];
|
||||
const uint16_t *bufw0 = &state.texbufw[texlevel];
|
||||
|
||||
if (!bilinear) {
|
||||
return state.nearest(s, t, x, y, prim_color, tptr0, bufw0, texlevel, frac_texlevel, state.samplerID);
|
||||
|
@ -1468,7 +1468,7 @@ bool GetCurrentTexture(GPUDebugBuffer &buffer, int level)
|
|||
|
||||
GETextureFormat texfmt = gstate.getTextureFormat();
|
||||
u32 texaddr = gstate.getTextureAddress(level);
|
||||
int texbufw = GetTextureBufw(level, texaddr, texfmt);
|
||||
u32 texbufw = GetTextureBufw(level, texaddr, texfmt);
|
||||
int w = gstate.getTextureWidth(level);
|
||||
int h = gstate.getTextureHeight(level);
|
||||
|
||||
|
|
|
@ -39,7 +39,7 @@ struct RasterizerState {
|
|||
Sampler::LinearFunc linear;
|
||||
Sampler::NearestFunc nearest;
|
||||
uint32_t texaddr[8]{};
|
||||
int texbufw[8]{};
|
||||
uint16_t texbufw[8]{};
|
||||
const u8 *texptr[8]{};
|
||||
float textureLodSlope;
|
||||
int screenOffsetX;
|
||||
|
|
|
@ -103,7 +103,7 @@ void DrawSprite(const VertexData &v0, const VertexData &v1, const BinCoords &ran
|
|||
const u8 *texptr = state.texptr[0];
|
||||
|
||||
GETextureFormat texfmt = state.samplerID.TexFmt();
|
||||
int texbufw = state.texbufw[0];
|
||||
uint16_t texbufw = state.texbufw[0];
|
||||
|
||||
Sampler::FetchFunc fetchFunc = Sampler::GetFetchFunc(state.samplerID);
|
||||
auto &pixelID = state.pixelID;
|
||||
|
|
|
@ -38,8 +38,8 @@ using namespace Rasterizer;
|
|||
|
||||
namespace Sampler {
|
||||
|
||||
static Vec4IntResult SOFTRAST_CALL SampleNearest(float s, float t, int x, int y, Vec4IntArg prim_color, const u8 *const *tptr, const int *bufw, int level, int levelFrac, const SamplerID &samplerID);
|
||||
static Vec4IntResult SOFTRAST_CALL SampleLinear(float s, float t, int x, int y, Vec4IntArg prim_color, const u8 *const *tptr, const int *bufw, int level, int levelFrac, const SamplerID &samplerID);
|
||||
static Vec4IntResult SOFTRAST_CALL SampleNearest(float s, float t, int x, int y, Vec4IntArg prim_color, const u8 *const *tptr, const uint16_t *bufw, int level, int levelFrac, const SamplerID &samplerID);
|
||||
static Vec4IntResult SOFTRAST_CALL SampleLinear(float s, float t, int x, int y, Vec4IntArg prim_color, const u8 *const *tptr, const uint16_t *bufw, int level, int levelFrac, const SamplerID &samplerID);
|
||||
static Vec4IntResult SOFTRAST_CALL SampleFetch(int u, int v, const u8 *tptr, int bufw, int level, const SamplerID &samplerID);
|
||||
|
||||
std::mutex jitCacheLock;
|
||||
|
@ -281,7 +281,7 @@ struct Nearest4 {
|
|||
};
|
||||
|
||||
template <int N>
|
||||
inline static Nearest4 SOFTRAST_CALL SampleNearest(const int u[N], const int v[N], const u8 *srcptr, int texbufw, int level, const SamplerID &samplerID) {
|
||||
inline static Nearest4 SOFTRAST_CALL SampleNearest(const int u[N], const int v[N], const u8 *srcptr, uint16_t texbufw, int level, const SamplerID &samplerID) {
|
||||
Nearest4 res;
|
||||
if (!srcptr) {
|
||||
memset(res.v, 0, sizeof(res.v));
|
||||
|
@ -535,7 +535,7 @@ Vec4IntResult SOFTRAST_CALL GetTextureFunctionOutput(Vec4IntArg prim_color_in, V
|
|||
return ToVec4IntResult(Vec4<int>(out_rgb, out_a));
|
||||
}
|
||||
|
||||
static Vec4IntResult SOFTRAST_CALL SampleNearest(float s, float t, int x, int y, Vec4IntArg prim_color, const u8 *const *tptr, const int *bufw, int level, int levelFrac, const SamplerID &samplerID) {
|
||||
static Vec4IntResult SOFTRAST_CALL SampleNearest(float s, float t, int x, int y, Vec4IntArg prim_color, const u8 *const *tptr, const uint16_t *bufw, int level, int levelFrac, const SamplerID &samplerID) {
|
||||
int u, v;
|
||||
|
||||
// Nearest filtering only. Round texcoords.
|
||||
|
@ -631,7 +631,7 @@ static inline Vec4IntResult SOFTRAST_CALL GetTexelCoordinatesQuadT(int level, fl
|
|||
return ApplyTexelClampQuadT(samplerID.clampT, base_v, height);
|
||||
}
|
||||
|
||||
static Vec4IntResult SOFTRAST_CALL SampleLinearLevel(float s, float t, int x, int y, const u8 *const *tptr, const int *bufw, int texlevel, const SamplerID &samplerID) {
|
||||
static Vec4IntResult SOFTRAST_CALL SampleLinearLevel(float s, float t, int x, int y, const u8 *const *tptr, const uint16_t *bufw, int texlevel, const SamplerID &samplerID) {
|
||||
int frac_u, frac_v;
|
||||
const Vec4<int> u = GetTexelCoordinatesQuadS(texlevel, s, frac_u, x, samplerID);
|
||||
const Vec4<int> v = GetTexelCoordinatesQuadT(texlevel, t, frac_v, y, samplerID);
|
||||
|
@ -646,7 +646,7 @@ static Vec4IntResult SOFTRAST_CALL SampleLinearLevel(float s, float t, int x, in
|
|||
return ToVec4IntResult((top * (0x10 - frac_v) + bot * frac_v) / (16 * 16));
|
||||
}
|
||||
|
||||
static Vec4IntResult SOFTRAST_CALL SampleLinear(float s, float t, int x, int y, Vec4IntArg prim_color, const u8 *const *tptr, const int *bufw, int texlevel, int levelFrac, const SamplerID &samplerID) {
|
||||
static Vec4IntResult SOFTRAST_CALL SampleLinear(float s, float t, int x, int y, Vec4IntArg prim_color, const u8 *const *tptr, const uint16_t *bufw, int texlevel, int levelFrac, const SamplerID &samplerID) {
|
||||
Vec4<int> c0 = SampleLinearLevel(s, t, x, y, tptr, bufw, texlevel, samplerID);
|
||||
if (levelFrac) {
|
||||
const Vec4<int> c1 = SampleLinearLevel(s, t, x, y, tptr + 1, bufw + 1, texlevel + 1, samplerID);
|
||||
|
|
|
@ -36,10 +36,10 @@ namespace Sampler {
|
|||
typedef Rasterizer::Vec4IntResult(SOFTRAST_CALL *FetchFunc)(int u, int v, const u8 *tptr, int bufw, int level, const SamplerID &samplerID);
|
||||
FetchFunc GetFetchFunc(SamplerID id);
|
||||
|
||||
typedef Rasterizer::Vec4IntResult (SOFTRAST_CALL *NearestFunc)(float s, float t, int x, int y, Rasterizer::Vec4IntArg prim_color, const u8 *const *tptr, const int *bufw, int level, int levelFrac, const SamplerID &samplerID);
|
||||
typedef Rasterizer::Vec4IntResult (SOFTRAST_CALL *NearestFunc)(float s, float t, int x, int y, Rasterizer::Vec4IntArg prim_color, const u8 *const *tptr, const uint16_t *bufw, int level, int levelFrac, const SamplerID &samplerID);
|
||||
NearestFunc GetNearestFunc(SamplerID id);
|
||||
|
||||
typedef Rasterizer::Vec4IntResult (SOFTRAST_CALL *LinearFunc)(float s, float t, int x, int y, Rasterizer::Vec4IntArg prim_color, const u8 *const *tptr, const int *bufw, int level, int levelFrac, const SamplerID &samplerID);
|
||||
typedef Rasterizer::Vec4IntResult (SOFTRAST_CALL *LinearFunc)(float s, float t, int x, int y, Rasterizer::Vec4IntArg prim_color, const u8 *const *tptr, const uint16_t *bufw, int level, int levelFrac, const SamplerID &samplerID);
|
||||
LinearFunc GetLinearFunc(SamplerID id);
|
||||
|
||||
void Init();
|
||||
|
|
|
@ -246,7 +246,7 @@ NearestFunc SamplerJitCache::CompileNearest(const SamplerID &id) {
|
|||
auto loadPtrs = [&](bool level1) {
|
||||
X64Reg bufwReg = regCache_.Alloc(RegCache::GEN_ARG_BUFW);
|
||||
X64Reg bufwPtrReg = regCache_.Find(RegCache::GEN_ARG_BUFW_PTR);
|
||||
MOV(32, R(bufwReg), MDisp(bufwPtrReg, level1 ? 4 : 0));
|
||||
MOVZX(32, 16, bufwReg, MDisp(bufwPtrReg, level1 ? 2 : 0));
|
||||
regCache_.Unlock(bufwPtrReg, RegCache::GEN_ARG_BUFW_PTR);
|
||||
regCache_.Unlock(bufwReg, RegCache::GEN_ARG_BUFW);
|
||||
regCache_.ForceRetain(RegCache::GEN_ARG_BUFW);
|
||||
|
@ -713,7 +713,7 @@ LinearFunc SamplerJitCache::CompileLinear(const SamplerID &id) {
|
|||
X64Reg srcReg = regCache_.Find(RegCache::GEN_ARG_TEXPTR_PTR);
|
||||
X64Reg bufwReg = regCache_.Find(RegCache::GEN_ARG_BUFW_PTR);
|
||||
ADD(64, R(srcArgReg), MDisp(srcReg, level1 ? 8 : 0));
|
||||
MOV(32, R(bufwArgReg), MDisp(bufwReg, level1 ? 4 : 0));
|
||||
MOVZX(32, 16, bufwArgReg, MDisp(bufwReg, level1 ? 2 : 0));
|
||||
// Leave level/levelFrac, we just always load from RAM on Windows and lock on POSIX.
|
||||
regCache_.Unlock(srcReg, RegCache::GEN_ARG_TEXPTR_PTR);
|
||||
regCache_.Unlock(bufwReg, RegCache::GEN_ARG_BUFW_PTR);
|
||||
|
@ -2995,12 +2995,13 @@ bool SamplerJitCache::Jit_PrepareDataDirectOffsets(const SamplerID &id, RegCache
|
|||
if (!id.useStandardBufw || id.hasAnyMips) {
|
||||
// Spread bufw into each lane.
|
||||
X64Reg bufwReg = regCache_.Find(RegCache::GEN_ARG_BUFW_PTR);
|
||||
if (cpu_info.bAVX2) {
|
||||
VPBROADCASTD(128, bufwVecReg, MDisp(bufwReg, level1 ? 4 : 0));
|
||||
if (cpu_info.bSSE4_1) {
|
||||
PMOVZXWD(bufwVecReg, MDisp(bufwReg, level1 ? 2 : 0));
|
||||
} else {
|
||||
MOVD_xmm(bufwVecReg, MDisp(bufwReg, level1 ? 4 : 0));
|
||||
PSHUFD(bufwVecReg, R(bufwVecReg), _MM_SHUFFLE(0, 0, 0, 0));
|
||||
PXOR(bufwVecReg, R(bufwVecReg));
|
||||
PINSRW(bufwVecReg, MDisp(bufwReg, level1 ? 2 : 0), 0);
|
||||
}
|
||||
PSHUFD(bufwVecReg, R(bufwVecReg), _MM_SHUFFLE(0, 0, 0, 0));
|
||||
regCache_.Unlock(bufwReg, RegCache::GEN_ARG_BUFW_PTR);
|
||||
|
||||
if (bitsPerTexel == 4)
|
||||
|
@ -3070,12 +3071,13 @@ bool SamplerJitCache::Jit_PrepareDataSwizzledOffsets(const SamplerID &id, RegCac
|
|||
if (!id.useStandardBufw || id.hasAnyMips) {
|
||||
// Spread bufw into each lane.
|
||||
X64Reg bufwReg = regCache_.Find(RegCache::GEN_ARG_BUFW_PTR);
|
||||
if (cpu_info.bAVX2) {
|
||||
VPBROADCASTD(128, bufwVecReg, MDisp(bufwReg, level1 ? 4 : 0));
|
||||
if (cpu_info.bSSE4_1) {
|
||||
PMOVZXWD(bufwVecReg, MDisp(bufwReg, level1 ? 2 : 0));
|
||||
} else {
|
||||
MOVD_xmm(bufwVecReg, MDisp(bufwReg, level1 ? 4 : 0));
|
||||
PSHUFD(bufwVecReg, R(bufwVecReg), _MM_SHUFFLE(0, 0, 0, 0));
|
||||
PXOR(bufwVecReg, R(bufwVecReg));
|
||||
PINSRW(bufwVecReg, MDisp(bufwReg, level1 ? 2 : 0), 0);
|
||||
}
|
||||
PSHUFD(bufwVecReg, R(bufwVecReg), _MM_SHUFFLE(0, 0, 0, 0));
|
||||
regCache_.Unlock(bufwReg, RegCache::GEN_ARG_BUFW_PTR);
|
||||
}
|
||||
|
||||
|
@ -3162,12 +3164,13 @@ bool SamplerJitCache::Jit_PrepareDataDXTOffsets(const SamplerID &id, Rasterizer:
|
|||
if (!id.useStandardBufw || id.hasAnyMips) {
|
||||
// Spread bufw into each lane.
|
||||
X64Reg bufwReg = regCache_.Find(RegCache::GEN_ARG_BUFW_PTR);
|
||||
if (cpu_info.bAVX2) {
|
||||
VPBROADCASTD(128, bufwVecReg, MDisp(bufwReg, level1 ? 4 : 0));
|
||||
if (cpu_info.bSSE4_1) {
|
||||
PMOVZXWD(bufwVecReg, MDisp(bufwReg, level1 ? 2 : 0));
|
||||
} else {
|
||||
MOVD_xmm(bufwVecReg, MDisp(bufwReg, level1 ? 4 : 0));
|
||||
PSHUFD(bufwVecReg, R(bufwVecReg), _MM_SHUFFLE(0, 0, 0, 0));
|
||||
PXOR(bufwVecReg, R(bufwVecReg));
|
||||
PINSRW(bufwVecReg, MDisp(bufwReg, level1 ? 2 : 0), 0);
|
||||
}
|
||||
PSHUFD(bufwVecReg, R(bufwVecReg), _MM_SHUFFLE(0, 0, 0, 0));
|
||||
regCache_.Unlock(bufwReg, RegCache::GEN_ARG_BUFW_PTR);
|
||||
|
||||
// Divide by 4 before the multiply.
|
||||
|
|
|
@ -374,8 +374,7 @@ enum GEMatrixType {
|
|||
GE_MTX_TEXGEN,
|
||||
};
|
||||
|
||||
enum GEComparison
|
||||
{
|
||||
enum GEComparison : uint8_t {
|
||||
GE_COMP_NEVER = 0,
|
||||
GE_COMP_ALWAYS = 1,
|
||||
GE_COMP_EQUAL = 2,
|
||||
|
@ -578,8 +577,7 @@ enum GEPrimitiveType
|
|||
GE_PRIM_INVALID = -1,
|
||||
};
|
||||
|
||||
enum GELogicOp
|
||||
{
|
||||
enum GELogicOp : uint8_t {
|
||||
GE_LOGIC_CLEAR = 0,
|
||||
GE_LOGIC_AND = 1,
|
||||
GE_LOGIC_AND_REVERSE = 2,
|
||||
|
|
|
@ -48,7 +48,7 @@ static bool TestSamplerJit() {
|
|||
bool header = false;
|
||||
|
||||
u8 **tptr = new u8 *[8];
|
||||
int *bufw = new int[8];
|
||||
uint16_t *bufw = new uint16_t[8];
|
||||
u8 *clut = new u8[1024];
|
||||
memset(clut, 0, 1024);
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue