Merge pull request #16123 from unknownbrackets/gpu-matrix

softgpu: Correct matrix value update wrapping
This commit is contained in:
Henrik Rydgård 2022-09-28 09:39:27 +02:00 committed by GitHub
commit 30c7b45ac8
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 268 additions and 121 deletions

View file

@ -518,49 +518,19 @@ u32 sceGeRestoreContext(u32 ctxAddr) {
return 0;
}
static void __GeCopyMatrix(u32 matrixPtr, float *mtx, u32 size) {
for (u32 i = 0; i < size / sizeof(float); ++i) {
Memory::Write_U32(toFloat24(mtx[i]), matrixPtr + i * sizeof(float));
}
}
static int sceGeGetMtx(int type, u32 matrixPtr) {
if (!Memory::IsValidAddress(matrixPtr)) {
ERROR_LOG(SCEGE, "sceGeGetMtx(%d, %08x) - bad matrix ptr", type, matrixPtr);
return -1;
int size = type == GE_MTX_PROJECTION ? 16 : 12;
if (!Memory::IsValidRange(matrixPtr, size * sizeof(float))) {
return hleLogError(SCEGE, -1, "bad matrix ptr");
}
INFO_LOG(SCEGE, "sceGeGetMtx(%d, %08x)", type, matrixPtr);
switch (type) {
case GE_MTX_BONE0:
case GE_MTX_BONE1:
case GE_MTX_BONE2:
case GE_MTX_BONE3:
case GE_MTX_BONE4:
case GE_MTX_BONE5:
case GE_MTX_BONE6:
case GE_MTX_BONE7:
{
int n = type - GE_MTX_BONE0;
__GeCopyMatrix(matrixPtr, gstate.boneMatrix + n * 12, 12 * sizeof(float));
}
break;
case GE_MTX_TEXGEN:
__GeCopyMatrix(matrixPtr, gstate.tgenMatrix, 12 * sizeof(float));
break;
case GE_MTX_WORLD:
__GeCopyMatrix(matrixPtr, gstate.worldMatrix, 12 * sizeof(float));
break;
case GE_MTX_VIEW:
__GeCopyMatrix(matrixPtr, gstate.viewMatrix, 12 * sizeof(float));
break;
case GE_MTX_PROJECTION:
__GeCopyMatrix(matrixPtr, gstate.projMatrix, 16 * sizeof(float));
break;
default:
return SCE_KERNEL_ERROR_INVALID_INDEX;
}
return 0;
u32_le *dest = (u32_le *)Memory::GetPointerWriteUnchecked(matrixPtr);
// Note: this reads the CPU-visible matrix values, which may differ from the actual used values.
// They only differ when more DATA commands are sent than are valid for a matrix.
if (!gpu || !gpu->GetMatrix24(GEMatrixType(type), dest, 0))
return hleLogError(SCEGE, SCE_KERNEL_ERROR_INVALID_INDEX, "invalid matrix");
return hleLogSuccessInfoI(SCEGE, 0);
}
static u32 sceGeGetCmd(int cmd) {
@ -631,7 +601,7 @@ const HLEFunction sceGe_user[] = {
{0X1F6752AD, &WrapU_V<sceGeEdramGetSize>, "sceGeEdramGetSize", 'x', "" },
{0XB77905EA, &WrapU_I<sceGeEdramSetAddrTranslation>, "sceGeEdramSetAddrTranslation", 'x', "i" },
{0XDC93CFEF, &WrapU_I<sceGeGetCmd>, "sceGeGetCmd", 'x', "i" },
{0X57C8945B, &WrapI_IU<sceGeGetMtx>, "sceGeGetMtx", 'i', "ix" },
{0X57C8945B, &WrapI_IU<sceGeGetMtx>, "sceGeGetMtx", 'i', "ip" },
{0X438A385A, &WrapU_U<sceGeSaveContext>, "sceGeSaveContext", 'x', "x" },
{0X0BF608FB, &WrapU_U<sceGeRestoreContext>, "sceGeRestoreContext", 'x', "x" },
{0X5FB86AB0, &WrapI_U<sceGeListDeQueue>, "sceGeListDeQueue", 'i', "x" },

View file

@ -425,6 +425,7 @@ GPUCommon::GPUCommon(GraphicsContext *gfxCtx, Draw::DrawContext *draw) :
UpdateCmdInfo();
UpdateVsyncInterval(true);
ResetMatrices();
PPGeSetDrawContext(draw);
}
@ -731,6 +732,56 @@ int GPUCommon::GetStack(int index, u32 stackPtr) {
return currentList->stackptr;
}
static void CopyMatrix24(u32_le *result, const float *mtx, u32 count, u32 cmdbits) {
for (u32 i = 0; i < count; ++i) {
result[i] = toFloat24(mtx[i]) | cmdbits;
}
}
bool GPUCommon::GetMatrix24(GEMatrixType type, u32_le *result, u32 cmdbits) {
switch (type) {
case GE_MTX_BONE0:
case GE_MTX_BONE1:
case GE_MTX_BONE2:
case GE_MTX_BONE3:
case GE_MTX_BONE4:
case GE_MTX_BONE5:
case GE_MTX_BONE6:
case GE_MTX_BONE7:
CopyMatrix24(result, gstate.boneMatrix + (type - GE_MTX_BONE0) * 12, 12, cmdbits);
break;
case GE_MTX_TEXGEN:
CopyMatrix24(result, gstate.tgenMatrix, 12, cmdbits);
break;
case GE_MTX_WORLD:
CopyMatrix24(result, gstate.worldMatrix, 12, cmdbits);
break;
case GE_MTX_VIEW:
CopyMatrix24(result, gstate.viewMatrix, 12, cmdbits);
break;
case GE_MTX_PROJECTION:
CopyMatrix24(result, gstate.projMatrix, 16, cmdbits);
break;
default:
return false;
}
return true;
}
void GPUCommon::ResetMatrices() {
// This means we restored a context, so update the visible matrix data.
for (size_t i = 0; i < ARRAY_SIZE(gstate.boneMatrix); ++i)
matrixVisible.bone[i] = toFloat24(gstate.boneMatrix[i]);
for (size_t i = 0; i < ARRAY_SIZE(gstate.worldMatrix); ++i)
matrixVisible.world[i] = toFloat24(gstate.worldMatrix[i]);
for (size_t i = 0; i < ARRAY_SIZE(gstate.viewMatrix); ++i)
matrixVisible.view[i] = toFloat24(gstate.viewMatrix[i]);
for (size_t i = 0; i < ARRAY_SIZE(gstate.projMatrix); ++i)
matrixVisible.proj[i] = toFloat24(gstate.projMatrix[i]);
for (size_t i = 0; i < ARRAY_SIZE(gstate.tgenMatrix); ++i)
matrixVisible.tgen[i] = toFloat24(gstate.tgenMatrix[i]);
}
u32 GPUCommon::EnqueueList(u32 listpc, u32 stall, int subIntrBase, PSPPointer<PspGeListArgs> args, bool head) {
// TODO Check the stack values in missing arg and ajust the stack depth
@ -1353,7 +1404,7 @@ void GPUCommon::DoExecuteCall(u32 target) {
// Check for the end
if ((Memory::ReadUnchecked_U32(target + 11 * 4) >> 24) == GE_CMD_BONEMATRIXDATA &&
(Memory::ReadUnchecked_U32(target + 12 * 4) >> 24) == GE_CMD_RET &&
(gstate.boneMatrixNumber & 0x7F) <= 96 - 12) {
(gstate.boneMatrixNumber & 0x00FFFFFF) <= 96 - 12) {
// Yep, pretty sure this is a bone matrix call. Double check stall first.
if (target > currentList->stall || target + 12 * 4 < currentList->stall) {
FastLoadBoneMatrix(target);
@ -1887,7 +1938,7 @@ void GPUCommon::Execute_Prim(u32 op, u32 diff) {
(Memory::ReadUnchecked_U32(target + 11 * 4) >> 24) == GE_CMD_BONEMATRIXDATA &&
(Memory::ReadUnchecked_U32(target + 12 * 4) >> 24) == GE_CMD_RET &&
(target > currentList->stall || target + 12 * 4 < currentList->stall) &&
(gstate.boneMatrixNumber & 0x7F) <= 96 - 12) {
(gstate.boneMatrixNumber & 0x00FFFFFF) <= 96 - 12) {
FastLoadBoneMatrix(target);
} else {
goto bail;
@ -2131,7 +2182,7 @@ void GPUCommon::Execute_WorldMtxNum(u32 op, u32 diff) {
int i = 0;
// We must record the individual data commands while debugRecording_.
bool fastLoad = !debugRecording_;
bool fastLoad = !debugRecording_ && end > 0;
// Stalling in the middle of a matrix would be stupid, I doubt this check is necessary.
if (currentList->pc < currentList->stall && currentList->pc + end * 4 >= currentList->stall) {
fastLoad = false;
@ -2152,7 +2203,7 @@ void GPUCommon::Execute_WorldMtxNum(u32 op, u32 diff) {
}
const int count = i;
gstate.worldmtxnum = (GE_CMD_WORLDMATRIXNUMBER << 24) | ((op + count) & 0xF);
gstate.worldmtxnum = (GE_CMD_WORLDMATRIXNUMBER << 24) | ((op & 0xF) + count);
// Skip over the loaded data, it's done now.
UpdatePC(currentList->pc, currentList->pc + count * 4);
@ -2161,7 +2212,7 @@ void GPUCommon::Execute_WorldMtxNum(u32 op, u32 diff) {
void GPUCommon::Execute_WorldMtxData(u32 op, u32 diff) {
// Note: it's uncommon to get here now, see above.
int num = gstate.worldmtxnum & 0xF;
int num = gstate.worldmtxnum & 0x00FFFFFF;
u32 newVal = op << 8;
if (num < 12 && newVal != ((const u32 *)gstate.worldMatrix)[num]) {
Flush();
@ -2169,7 +2220,7 @@ void GPUCommon::Execute_WorldMtxData(u32 op, u32 diff) {
gstate_c.Dirty(DIRTY_WORLDMATRIX);
}
num++;
gstate.worldmtxnum = (GE_CMD_WORLDMATRIXNUMBER << 24) | (num & 0xF);
gstate.worldmtxnum = (GE_CMD_WORLDMATRIXNUMBER << 24) | (num & 0x00FFFFFF);
gstate.worldmtxdata = GE_CMD_WORLDMATRIXDATA << 24;
}
@ -2180,7 +2231,7 @@ void GPUCommon::Execute_ViewMtxNum(u32 op, u32 diff) {
const int end = 12 - (op & 0xF);
int i = 0;
bool fastLoad = !debugRecording_;
bool fastLoad = !debugRecording_ && end > 0;
if (currentList->pc < currentList->stall && currentList->pc + end * 4 >= currentList->stall) {
fastLoad = false;
}
@ -2200,7 +2251,7 @@ void GPUCommon::Execute_ViewMtxNum(u32 op, u32 diff) {
}
const int count = i;
gstate.viewmtxnum = (GE_CMD_VIEWMATRIXNUMBER << 24) | ((op + count) & 0xF);
gstate.viewmtxnum = (GE_CMD_VIEWMATRIXNUMBER << 24) | ((op & 0xF) + count);
// Skip over the loaded data, it's done now.
UpdatePC(currentList->pc, currentList->pc + count * 4);
@ -2209,7 +2260,7 @@ void GPUCommon::Execute_ViewMtxNum(u32 op, u32 diff) {
void GPUCommon::Execute_ViewMtxData(u32 op, u32 diff) {
// Note: it's uncommon to get here now, see above.
int num = gstate.viewmtxnum & 0xF;
int num = gstate.viewmtxnum & 0x00FFFFFF;
u32 newVal = op << 8;
if (num < 12 && newVal != ((const u32 *)gstate.viewMatrix)[num]) {
Flush();
@ -2217,7 +2268,7 @@ void GPUCommon::Execute_ViewMtxData(u32 op, u32 diff) {
gstate_c.Dirty(DIRTY_VIEWMATRIX);
}
num++;
gstate.viewmtxnum = (GE_CMD_VIEWMATRIXNUMBER << 24) | (num & 0xF);
gstate.viewmtxnum = (GE_CMD_VIEWMATRIXNUMBER << 24) | (num & 0x00FFFFFF);
gstate.viewmtxdata = GE_CMD_VIEWMATRIXDATA << 24;
}
@ -2248,7 +2299,7 @@ void GPUCommon::Execute_ProjMtxNum(u32 op, u32 diff) {
}
const int count = i;
gstate.projmtxnum = (GE_CMD_PROJMATRIXNUMBER << 24) | ((op + count) & 0x1F);
gstate.projmtxnum = (GE_CMD_PROJMATRIXNUMBER << 24) | ((op & 0xF) + count);
// Skip over the loaded data, it's done now.
UpdatePC(currentList->pc, currentList->pc + count * 4);
@ -2257,16 +2308,16 @@ void GPUCommon::Execute_ProjMtxNum(u32 op, u32 diff) {
void GPUCommon::Execute_ProjMtxData(u32 op, u32 diff) {
// Note: it's uncommon to get here now, see above.
int num = gstate.projmtxnum & 0x1F; // NOTE: Changed from 0xF to catch overflows
int num = gstate.projmtxnum & 0x00FFFFFF;
u32 newVal = op << 8;
if (num < 0x10 && newVal != ((const u32 *)gstate.projMatrix)[num]) {
if (num < 16 && newVal != ((const u32 *)gstate.projMatrix)[num]) {
Flush();
((u32 *)gstate.projMatrix)[num] = newVal;
gstate_c.Dirty(DIRTY_PROJMATRIX);
}
num++;
if (num <= 16)
gstate.projmtxnum = (GE_CMD_PROJMATRIXNUMBER << 24) | (num & 0xF);
gstate.projmtxnum = (GE_CMD_PROJMATRIXNUMBER << 24) | (num & 0x00FFFFFF);
gstate.projmtxdata = GE_CMD_PROJMATRIXDATA << 24;
}
@ -2277,7 +2328,7 @@ void GPUCommon::Execute_TgenMtxNum(u32 op, u32 diff) {
const int end = 12 - (op & 0xF);
int i = 0;
bool fastLoad = !debugRecording_;
bool fastLoad = !debugRecording_ && end > 0;
if (currentList->pc < currentList->stall && currentList->pc + end * 4 >= currentList->stall) {
fastLoad = false;
}
@ -2297,7 +2348,7 @@ void GPUCommon::Execute_TgenMtxNum(u32 op, u32 diff) {
}
const int count = i;
gstate.texmtxnum = (GE_CMD_TGENMATRIXNUMBER << 24) | ((op + count) & 0xF);
gstate.texmtxnum = (GE_CMD_TGENMATRIXNUMBER << 24) | ((op & 0xF) + count);
// Skip over the loaded data, it's done now.
UpdatePC(currentList->pc, currentList->pc + count * 4);
@ -2306,7 +2357,7 @@ void GPUCommon::Execute_TgenMtxNum(u32 op, u32 diff) {
void GPUCommon::Execute_TgenMtxData(u32 op, u32 diff) {
// Note: it's uncommon to get here now, see above.
int num = gstate.texmtxnum & 0xF;
int num = gstate.texmtxnum & 0x00FFFFFF;
u32 newVal = op << 8;
if (num < 12 && newVal != ((const u32 *)gstate.tgenMatrix)[num]) {
Flush();
@ -2314,7 +2365,7 @@ void GPUCommon::Execute_TgenMtxData(u32 op, u32 diff) {
gstate_c.Dirty(DIRTY_TEXMATRIX | DIRTY_FRAGMENTSHADER_STATE); // We check the matrix to see if we need projection
}
num++;
gstate.texmtxnum = (GE_CMD_TGENMATRIXNUMBER << 24) | (num & 0xF);
gstate.texmtxnum = (GE_CMD_TGENMATRIXNUMBER << 24) | (num & 0x00FFFFFF);
gstate.texmtxdata = GE_CMD_TGENMATRIXDATA << 24;
}
@ -2364,7 +2415,7 @@ void GPUCommon::Execute_BoneMtxNum(u32 op, u32 diff) {
}
const int count = i;
gstate.boneMatrixNumber = (GE_CMD_BONEMATRIXNUMBER << 24) | ((op + count) & 0x7F);
gstate.boneMatrixNumber = (GE_CMD_BONEMATRIXNUMBER << 24) | ((op & 0x7F) + count);
// Skip over the loaded data, it's done now.
UpdatePC(currentList->pc, currentList->pc + count * 4);
@ -2373,7 +2424,7 @@ void GPUCommon::Execute_BoneMtxNum(u32 op, u32 diff) {
void GPUCommon::Execute_BoneMtxData(u32 op, u32 diff) {
// Note: it's uncommon to get here now, see above.
int num = gstate.boneMatrixNumber & 0x7F;
int num = gstate.boneMatrixNumber & 0x00FFFFFF;
u32 newVal = op << 8;
if (num < 96 && newVal != ((const u32 *)gstate.boneMatrix)[num]) {
// Bone matrices should NOT flush when software skinning is enabled!
@ -2386,7 +2437,7 @@ void GPUCommon::Execute_BoneMtxData(u32 op, u32 diff) {
((u32 *)gstate.boneMatrix)[num] = newVal;
}
num++;
gstate.boneMatrixNumber = (GE_CMD_BONEMATRIXNUMBER << 24) | (num & 0x7F);
gstate.boneMatrixNumber = (GE_CMD_BONEMATRIXNUMBER << 24) | (num & 0x00FFFFFF);
gstate.boneMatrixData = GE_CMD_BONEMATRIXDATA << 24;
}
@ -2625,7 +2676,7 @@ struct DisplayList_v2 {
};
void GPUCommon::DoState(PointerWrap &p) {
auto s = p.Section("GPUCommon", 1, 4);
auto s = p.Section("GPUCommon", 1, 5);
if (!s)
return;
@ -2697,6 +2748,10 @@ void GPUCommon::DoState(PointerWrap &p) {
Do(p, isbreak);
Do(p, drawCompleteTicks);
Do(p, busyTicks);
if (s >= 5) {
Do(p, matrixVisible.all);
}
}
void GPUCommon::InterruptStart(int listid) {

View file

@ -111,6 +111,8 @@ public:
int ListSync(int listid, int mode) override;
u32 DrawSync(int mode) override;
int GetStack(int index, u32 stackPtr) override;
bool GetMatrix24(GEMatrixType type, u32_le *result, u32 cmdbits) override;
void ResetMatrices() override;
void DoState(PointerWrap &p) override;
bool BusyDrawing() override;
u32 Continue() override;
@ -365,6 +367,21 @@ protected:
uint32_t immFlags_ = 0;
bool immFirstSent_ = false;
// Whe matrix data overflows, the CPU visible values wrap and bleed between matrices.
// But this doesn't actually change the values used by rendering.
// The CPU visible values affect the GPU when list contexts are restored.
// Note: not maintained by all backends, here for save stating.
union {
struct {
u32 bone[12 * 8];
u32 world[12];
u32 view[12];
u32 proj[16];
u32 tgen[12];
};
u32 all[12 * 8 + 12 + 12 + 16 + 12];
} matrixVisible;
std::string reportingPrimaryInfo_;
std::string reportingFullInfo_;

View file

@ -198,6 +198,8 @@ public:
virtual u32 Continue() = 0;
virtual u32 Break(int mode) = 0;
virtual int GetStack(int index, u32 stackPtr) = 0;
virtual bool GetMatrix24(GEMatrixType type, u32_le *result, u32 cmdbits) = 0;
virtual void ResetMatrices() = 0;
virtual void InterruptStart(int listid) = 0;
virtual void InterruptEnd(int listid) = 0;

View file

@ -24,6 +24,7 @@
#include "Core/System.h"
#include "Core/MemMap.h"
#include "GPU/ge_constants.h"
#include "GPU/GPUInterface.h"
#include "GPU/GPUState.h"
#ifdef _M_SSE
@ -85,11 +86,20 @@ static const CmdRange contextCmdRanges[] = {
// Skip: {0xFA, 0xFF},
};
static u32_le *SaveMatrix(u32_le *cmds, const float *mtx, int sz, int numcmd, int datacmd) {
static u32_le *SaveMatrix(u32_le *cmds, GEMatrixType type, int sz, int numcmd, int datacmd) {
if (!gpu)
return cmds;
*cmds++ = numcmd << 24;
for (int i = 0; i < sz; ++i) {
*cmds++ = (datacmd << 24) | toFloat24(mtx[i]);
// This saves the CPU-visible values, not the actual used ones, which may differ.
// Note that Restore overwrites both values.
if (type == GE_MTX_BONE0) {
for (int i = 0; i < 8; ++i)
gpu->GetMatrix24(GEMatrixType(GE_MTX_BONE0 + i), cmds + i * 12, datacmd << 24);
} else {
gpu->GetMatrix24(type, cmds, datacmd << 24);
}
cmds += sz;
return cmds;
}
@ -117,6 +127,9 @@ void GPUgstate::Reset() {
memset(gstate.tgenMatrix, 0, sizeof(gstate.tgenMatrix));
memset(gstate.boneMatrix, 0, sizeof(gstate.boneMatrix));
if (gpu)
gpu->ResetMatrices();
savedContextVersion = 1;
}
@ -152,11 +165,11 @@ void GPUgstate::Save(u32_le *ptr) {
memcpy(matrices, projMatrix, sizeof(projMatrix)); matrices += sizeof(projMatrix);
memcpy(matrices, tgenMatrix, sizeof(tgenMatrix)); matrices += sizeof(tgenMatrix);
} else {
cmds = SaveMatrix(cmds, boneMatrix, ARRAY_SIZE(boneMatrix), GE_CMD_BONEMATRIXNUMBER, GE_CMD_BONEMATRIXDATA);
cmds = SaveMatrix(cmds, worldMatrix, ARRAY_SIZE(worldMatrix), GE_CMD_WORLDMATRIXNUMBER, GE_CMD_WORLDMATRIXDATA);
cmds = SaveMatrix(cmds, viewMatrix, ARRAY_SIZE(viewMatrix), GE_CMD_VIEWMATRIXNUMBER, GE_CMD_VIEWMATRIXDATA);
cmds = SaveMatrix(cmds, projMatrix, ARRAY_SIZE(projMatrix), GE_CMD_PROJMATRIXNUMBER, GE_CMD_PROJMATRIXDATA);
cmds = SaveMatrix(cmds, tgenMatrix, ARRAY_SIZE(tgenMatrix), GE_CMD_TGENMATRIXNUMBER, GE_CMD_TGENMATRIXDATA);
cmds = SaveMatrix(cmds, GE_MTX_BONE0, ARRAY_SIZE(boneMatrix), GE_CMD_BONEMATRIXNUMBER, GE_CMD_BONEMATRIXDATA);
cmds = SaveMatrix(cmds, GE_MTX_WORLD, ARRAY_SIZE(worldMatrix), GE_CMD_WORLDMATRIXNUMBER, GE_CMD_WORLDMATRIXDATA);
cmds = SaveMatrix(cmds, GE_MTX_VIEW, ARRAY_SIZE(viewMatrix), GE_CMD_VIEWMATRIXNUMBER, GE_CMD_VIEWMATRIXDATA);
cmds = SaveMatrix(cmds, GE_MTX_PROJECTION, ARRAY_SIZE(projMatrix), GE_CMD_PROJMATRIXNUMBER, GE_CMD_PROJMATRIXDATA);
cmds = SaveMatrix(cmds, GE_MTX_TEXGEN, ARRAY_SIZE(tgenMatrix), GE_CMD_TGENMATRIXNUMBER, GE_CMD_TGENMATRIXDATA);
*cmds++ = boneMatrixNumber;
*cmds++ = worldmtxnum;
@ -199,7 +212,7 @@ void GPUgstate::FastLoadBoneMatrix(u32 addr) {
#endif
num += 12;
gstate.boneMatrixNumber = (GE_CMD_BONEMATRIXNUMBER << 24) | (num & 0x7F);
gstate.boneMatrixNumber = (GE_CMD_BONEMATRIXNUMBER << 24) | (num & 0x00FFFFFF);
}
void GPUgstate::Restore(u32_le *ptr) {
@ -244,6 +257,9 @@ void GPUgstate::Restore(u32_le *ptr) {
projmtxnum = *cmds++;
texmtxnum = *cmds++;
}
if (gpu)
gpu->ResetMatrices();
}
bool vertTypeIsSkinningEnabled(u32 vertType) {

View file

@ -341,16 +341,16 @@ const SoftwareCommandTableEntry softgpuCommandTable[] = {
{ GE_CMD_DITH2, 0, SoftDirty::PIXEL_DITHER },
{ GE_CMD_DITH3, 0, SoftDirty::PIXEL_DITHER },
{ GE_CMD_WORLDMATRIXNUMBER },
{ GE_CMD_WORLDMATRIXNUMBER, FLAG_EXECUTE, SoftDirty::NONE, &SoftGPU::Execute_WorldMtxNum },
{ GE_CMD_WORLDMATRIXDATA, FLAG_EXECUTE, SoftDirty::NONE, &SoftGPU::Execute_WorldMtxData },
{ GE_CMD_VIEWMATRIXNUMBER },
{ GE_CMD_VIEWMATRIXNUMBER, FLAG_EXECUTE, SoftDirty::NONE, &SoftGPU::Execute_ViewMtxNum },
{ GE_CMD_VIEWMATRIXDATA, FLAG_EXECUTE, SoftDirty::NONE, &SoftGPU::Execute_ViewMtxData },
{ GE_CMD_PROJMATRIXNUMBER },
{ GE_CMD_PROJMATRIXNUMBER, FLAG_EXECUTE, SoftDirty::NONE, &SoftGPU::Execute_ProjMtxNum },
{ GE_CMD_PROJMATRIXDATA, FLAG_EXECUTE, SoftDirty::NONE, &SoftGPU::Execute_ProjMtxData },
// Currently not state.
{ GE_CMD_TGENMATRIXNUMBER },
{ GE_CMD_TGENMATRIXNUMBER, FLAG_EXECUTE, SoftDirty::NONE, &SoftGPU::Execute_TgenMtxNum },
{ GE_CMD_TGENMATRIXDATA, FLAG_EXECUTE, SoftDirty::NONE, &SoftGPU::Execute_TgenMtxData },
{ GE_CMD_BONEMATRIXNUMBER },
{ GE_CMD_BONEMATRIXNUMBER, FLAG_EXECUTE, SoftDirty::NONE, &SoftGPU::Execute_BoneMtxNum },
{ GE_CMD_BONEMATRIXDATA, FLAG_EXECUTE, SoftDirty::NONE, &SoftGPU::Execute_BoneMtxData },
// Vertex Screen/Texture/Color
@ -1040,83 +1040,162 @@ void SoftGPU::Execute_VertexType(u32 op, u32 diff) {
}
}
void SoftGPU::Execute_WorldMtxNum(u32 op, u32 diff) {
// Setting 0xFFFFF0 will reset to 0.
gstate.worldmtxnum = (GE_CMD_WORLDMATRIXNUMBER << 24) | (op & 0xF);
}
void SoftGPU::Execute_ViewMtxNum(u32 op, u32 diff) {
gstate.viewmtxnum = (GE_CMD_VIEWMATRIXNUMBER << 24) | (op & 0xF);
}
void SoftGPU::Execute_ProjMtxNum(u32 op, u32 diff) {
gstate.projmtxnum = (GE_CMD_PROJMATRIXNUMBER << 24) | (op & 0xF);
}
void SoftGPU::Execute_TgenMtxNum(u32 op, u32 diff) {
gstate.texmtxnum = (GE_CMD_TGENMATRIXNUMBER << 24) | (op & 0xF);
}
void SoftGPU::Execute_BoneMtxNum(u32 op, u32 diff) {
// Setting any bits outside 0x7F are ignored and resets the internal counter.
gstate.boneMatrixNumber = (GE_CMD_BONEMATRIXNUMBER << 24) | (op & 0x7F);
}
void SoftGPU::Execute_WorldMtxData(u32 op, u32 diff) {
int num = gstate.worldmtxnum & 0xF;
u32 *target = num < 12 ? (u32 *)&gstate.worldMatrix[num] : (u32 *)&gstate.viewMatrix[num - 12];
u32 newVal = op << 8;
if (newVal != *target) {
*target = newVal;
dirtyFlags_ |= SoftDirty::TRANSFORM_MATRIX;
int num = gstate.worldmtxnum & 0x00FFFFFF;
if (num < 12) {
u32 *target = (u32 *)&gstate.worldMatrix[num];
u32 newVal = op << 8;
if (newVal != *target) {
*target = newVal;
dirtyFlags_ |= SoftDirty::TRANSFORM_MATRIX;
}
}
// Also update the CPU visible values, which update differently.
u32 *target = &matrixVisible.all[12 * 8 + (num & 0xF)];
*target = op & 0x00FFFFFF;
num++;
gstate.worldmtxnum = (GE_CMD_WORLDMATRIXNUMBER << 24) | (num & 0xF);
gstate.worldmtxnum = (GE_CMD_WORLDMATRIXNUMBER << 24) | (num & 0x00FFFFFF);
gstate.worldmtxdata = GE_CMD_WORLDMATRIXDATA << 24;
}
void SoftGPU::Execute_ViewMtxData(u32 op, u32 diff) {
int num = gstate.viewmtxnum & 0xF;
u32 *target = num < 12 ? (u32 *)&gstate.viewMatrix[num] : (u32 *)&gstate.projMatrix[num - 12];
u32 newVal = op << 8;
if (newVal != *target) {
*target = newVal;
dirtyFlags_ |= SoftDirty::TRANSFORM_MATRIX;
int num = gstate.viewmtxnum & 0x00FFFFFF;
if (num < 12) {
u32 *target = (u32 *)&gstate.viewMatrix[num];
u32 newVal = op << 8;
if (newVal != *target) {
*target = newVal;
dirtyFlags_ |= SoftDirty::TRANSFORM_MATRIX;
}
}
// Also update the CPU visible values, which update differently.
u32 *target = &matrixVisible.all[12 * 8 + 12 + (num & 0xF)];
*target = op & 0x00FFFFFF;
num++;
gstate.viewmtxnum = (GE_CMD_VIEWMATRIXNUMBER << 24) | (num & 0xF);
gstate.viewmtxnum = (GE_CMD_VIEWMATRIXNUMBER << 24) | (num & 0x00FFFFFF);
gstate.viewmtxdata = GE_CMD_VIEWMATRIXDATA << 24;
}
void SoftGPU::Execute_ProjMtxData(u32 op, u32 diff) {
int num = gstate.projmtxnum & 0xF;
u32 *target = (u32 *)&gstate.projMatrix[num];
u32 newVal = op << 8;
if (newVal != *target) {
*target = newVal;
dirtyFlags_ |= SoftDirty::TRANSFORM_MATRIX;
int num = gstate.projmtxnum & 0x00FFFFFF;
if (num < 16) {
u32 *target = (u32 *)&gstate.projMatrix[num];
u32 newVal = op << 8;
if (newVal != *target) {
*target = newVal;
dirtyFlags_ |= SoftDirty::TRANSFORM_MATRIX;
}
}
// Also update the CPU visible values, which update differently.
u32 *target = &matrixVisible.all[12 * 8 + 12 + 12 + (num & 0xF)];
*target = op & 0x00FFFFFF;
num++;
gstate.projmtxnum = (GE_CMD_PROJMATRIXNUMBER << 24) | (num & 0xF);
gstate.projmtxnum = (GE_CMD_PROJMATRIXNUMBER << 24) | (num & 0x00FFFFFF);
gstate.projmtxdata = GE_CMD_PROJMATRIXDATA << 24;
}
void SoftGPU::Execute_TgenMtxData(u32 op, u32 diff) {
int num = gstate.texmtxnum & 0xF;
u32 newVal = op << 8;
// Doesn't wrap to any other matrix.
if (num < 12 && newVal != ((const u32 *)gstate.tgenMatrix)[num]) {
((u32 *)gstate.tgenMatrix)[num] = newVal;
int num = gstate.texmtxnum & 0x00FFFFFF;
if (num < 12) {
u32 *target = (u32 *)&gstate.tgenMatrix[num];
u32 newVal = op << 8;
// No dirtying, read during vertex read.
*target = newVal;
}
// Doesn't wrap to any other matrix.
if ((num & 0xF) < 12) {
matrixVisible.tgen[num & 0xF] = op & 0x00FFFFFF;
}
num++;
gstate.texmtxnum = (GE_CMD_TGENMATRIXNUMBER << 24) | (num & 0xF);
gstate.texmtxnum = (GE_CMD_TGENMATRIXNUMBER << 24) | (num & 0x00FFFFFF);
gstate.texmtxdata = GE_CMD_TGENMATRIXDATA << 24;
}
void SoftGPU::Execute_BoneMtxData(u32 op, u32 diff) {
int num = gstate.boneMatrixNumber & 0x7F;
u32 *target;
int num = gstate.boneMatrixNumber & 0x00FFFFFF;
if (num < 96) {
target = (u32 *)&gstate.boneMatrix[num];
} else if (num < 96 + 12) {
target = (u32 *)&gstate.worldMatrix[num - 96];
} else if (num < 96 + 12 + 12) {
target = (u32 *)&gstate.viewMatrix[num - 96 - 12];
} else {
target = (u32 *)&gstate.projMatrix[num - 96 - 12 - 12];
u32 *target = (u32 *)&gstate.boneMatrix[num];
u32 newVal = op << 8;
// No dirtying, we read bone data during vertex read.
*target = newVal;
}
u32 newVal = op << 8;
if (newVal != *target) {
*target = newVal;
// Dirty if it overflowed. We read bone data during vertex read.
if (num >= 96)
dirtyFlags_ |= SoftDirty::TRANSFORM_MATRIX;
}
// Also update the CPU visible values, which update differently.
u32 *target = &matrixVisible.all[(num & 0x7F)];
*target = op & 0x00FFFFFF;
num++;
gstate.boneMatrixNumber = (GE_CMD_BONEMATRIXNUMBER << 24) | (num & 0x7F);
gstate.boneMatrixNumber = (GE_CMD_BONEMATRIXNUMBER << 24) | (num & 0x00FFFFFF);
gstate.boneMatrixData = GE_CMD_BONEMATRIXDATA << 24;
}
static void CopyMatrix24(u32_le *result, const u32 *mtx, u32 count, u32 cmdbits) {
for (u32 i = 0; i < count; ++i) {
result[i] = mtx[i] | cmdbits;
}
}
bool SoftGPU::GetMatrix24(GEMatrixType type, u32_le *result, u32 cmdbits) {
switch (type) {
case GE_MTX_BONE0:
case GE_MTX_BONE1:
case GE_MTX_BONE2:
case GE_MTX_BONE3:
case GE_MTX_BONE4:
case GE_MTX_BONE5:
case GE_MTX_BONE6:
case GE_MTX_BONE7:
CopyMatrix24(result, matrixVisible.bone + (type - GE_MTX_BONE0) * 12, 12, cmdbits);
break;
case GE_MTX_TEXGEN:
CopyMatrix24(result, matrixVisible.tgen, 12, cmdbits);
break;
case GE_MTX_WORLD:
CopyMatrix24(result, matrixVisible.world, 12, cmdbits);
break;
case GE_MTX_VIEW:
CopyMatrix24(result, matrixVisible.view, 12, cmdbits);
break;
case GE_MTX_PROJECTION:
CopyMatrix24(result, matrixVisible.proj, 16, cmdbits);
break;
default:
return false;
}
return true;
}
void SoftGPU::Execute_ImmVertexAlphaPrim(u32 op, u32 diff) {
GPUCommon::Execute_ImmVertexAlphaPrim(op, diff);
// We won't flush as often as hardware renderers, so we want to flush right away.

View file

@ -181,12 +181,20 @@ public:
// Overridden to change flushing behavior.
void Execute_Call(u32 op, u32 diff);
void Execute_WorldMtxNum(u32 op, u32 diff);
void Execute_ViewMtxNum(u32 op, u32 diff);
void Execute_ProjMtxNum(u32 op, u32 diff);
void Execute_TgenMtxNum(u32 op, u32 diff);
void Execute_BoneMtxNum(u32 op, u32 diff);
void Execute_WorldMtxData(u32 op, u32 diff);
void Execute_ViewMtxData(u32 op, u32 diff);
void Execute_ProjMtxData(u32 op, u32 diff);
void Execute_TgenMtxData(u32 op, u32 diff);
void Execute_BoneMtxData(u32 op, u32 diff);
bool GetMatrix24(GEMatrixType type, u32_le *result, u32 cmdbits) override;
void Execute_ImmVertexAlphaPrim(u32 op, u32 diff);
typedef void (SoftGPU::*CmdFunc)(u32 op, u32 diff);