Try to optimize inline matrix loads.

Improves performance by a few percent in Gods Eater Burst.
This commit is contained in:
Unknown W. Brackets 2014-03-04 00:10:27 -08:00
parent 3ebf47bbd5
commit eb04031975

View file

@ -1274,7 +1274,7 @@ void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) {
if (((data >> 16) & 0xFF) != 0xFF && (data & 7) > 1)
WARN_LOG_REPORT_ONCE(alphatestmask, G3D, "Unsupported alphatest mask: %02x", (data >> 16) & 0xFF);
#endif
// Intentional fallthrough. (?)
// Intentional fallthrough - we still need to dirty DIRTY_ALPHACOLORREF for GE_CMD_ALPHATEST.
case GE_CMD_COLORREF:
if (diff)
@ -1398,11 +1398,43 @@ void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) {
break;
case GE_CMD_BONEMATRIXNUMBER:
gstate.boneMatrixNumber &= 0xFF00007F;
{
if (!g_Config.bSoftwareSkinning || (gstate.vertType & GE_VTYPE_MORPHCOUNT_MASK) != 0) {
Flush();
}
// This is almost always followed by GE_CMD_BONEMATRIXDATA.
const u32_le *src = (const u32_le *)Memory::GetPointer(currentList->pc + 4);
u32 *dst = (u32 *)(gstate.boneMatrix + (data & 0x7F));
const int end = 12 * 8 - (data & 0x7F);
int i = 0;
while ((src[i] >> 24) == GE_CMD_BONEMATRIXDATA) {
dst[i] = src[i] << 8;
if (++i > end) {
break;
}
}
const int count = i;
// Now let's dirty the flags for the loaded matrices.
if (!g_Config.bSoftwareSkinning || (gstate.vertType & GE_VTYPE_MORPHCOUNT_MASK) != 0) {
const int numPlusCount = (data & 0x7F) + count;
for (int num = data & 0x7F; num < numPlusCount; num += 12) {
shaderManager_->DirtyUniform(DIRTY_BONEMATRIX0 << (num / 12));
}
}
gstate.boneMatrixNumber = (GE_CMD_BONEMATRIXNUMBER << 24) | ((data + count) & 0x7F);
// Skip over the loaded data, it's done now.
UpdatePC(currentList->pc, currentList->pc + count * 4);
currentList->pc += count * 4;
}
break;
case GE_CMD_BONEMATRIXDATA:
{
// Note: it's uncommon to get here now, see above.
int num = gstate.boneMatrixNumber & 0x7F;
u32 newVal = data << 8;
if (num < 96 && newVal != ((const u32 *)gstate.boneMatrix)[num]) {