mirror of
https://github.com/hrydgard/ppsspp.git
synced 2025-04-02 11:01:50 -04:00
Bone matrix loading speedup. Bit of a hack but seems reliable enough.
This commit is contained in:
parent
003cb41db5
commit
8c562a615f
3 changed files with 49 additions and 1 deletions
|
@ -725,9 +725,22 @@ void GPUCommon::ExecuteOp(u32 op, u32 diff) {
|
|||
case GE_CMD_CALL:
|
||||
{
|
||||
easy_guard guard(listLock);
|
||||
|
||||
// Saint Seiya needs correct support for relative calls.
|
||||
u32 retval = currentList->pc + 4;
|
||||
u32 target = gstate_c.getRelativeAddress(data);
|
||||
|
||||
// Bone matrix optimization - many games will CALL a bone matrix (!).
|
||||
if ((Memory::ReadUnchecked_U32(target) >> 24) == GE_CMD_BONEMATRIXDATA) {
|
||||
// Check for the end
|
||||
if ((Memory::ReadUnchecked_U32(target + 11 * 4) >> 24) == GE_CMD_BONEMATRIXDATA &&
|
||||
(Memory::ReadUnchecked_U32(target + 12 * 4) >> 24) == GE_CMD_RET) {
|
||||
// Yep, pretty sure this is a bone matrix call.
|
||||
gstate.FastLoadBoneMatrix(target);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (currentList->stackptr == ARRAY_SIZE(currentList->stack)) {
|
||||
ERROR_LOG_REPORT(G3D, "CALL: Stack full!");
|
||||
} else if (!Memory::IsValidAddress(target)) {
|
||||
|
|
|
@ -30,6 +30,10 @@
|
|||
#include "Core/CoreParameter.h"
|
||||
#include "Core/Config.h"
|
||||
#include "Core/System.h"
|
||||
#include "Core/MemMap.h"
|
||||
#ifdef _M_SSE
|
||||
#include <emmintrin.h>
|
||||
#endif
|
||||
|
||||
// This must be aligned so that the matrices within are aligned.
|
||||
GPUgstate MEMORY_ALIGNED16(gstate);
|
||||
|
@ -175,6 +179,34 @@ void GPUgstate::Save(u32_le *ptr) {
|
|||
memcpy(matrices, tgenMatrix, sizeof(tgenMatrix)); matrices += sizeof(tgenMatrix);
|
||||
}
|
||||
|
||||
void GPUgstate::FastLoadBoneMatrix(u32 addr) {
|
||||
const u32 *src = (const u32 *)Memory::GetPointerUnchecked(addr);
|
||||
u32 num = boneMatrixNumber;
|
||||
u32 *dst = (u32 *)(boneMatrix + (num & 0x7F));
|
||||
|
||||
#ifdef _M_SSE
|
||||
__m128i row1 = _mm_slli_epi32(_mm_loadu_si128((const __m128i *)src), 8);
|
||||
__m128i row2 = _mm_slli_epi32(_mm_loadu_si128((const __m128i *)(src + 4)), 8);
|
||||
__m128i row3 = _mm_slli_epi32(_mm_loadu_si128((const __m128i *)(src + 8)), 8);
|
||||
if ((num & 0x3) == 0) {
|
||||
_mm_store_si128((__m128i *)dst, row1);
|
||||
_mm_store_si128((__m128i *)(dst + 4), row2);
|
||||
_mm_store_si128((__m128i *)(dst + 8), row3);
|
||||
} else {
|
||||
_mm_storeu_si128((__m128i *)dst, row1);
|
||||
_mm_storeu_si128((__m128i *)(dst + 4), row2);
|
||||
_mm_storeu_si128((__m128i *)(dst + 8), row3);
|
||||
}
|
||||
#else
|
||||
for (int i = 0; i < 12; i++) {
|
||||
dst[i] = src[i] << 8;
|
||||
}
|
||||
#endif
|
||||
|
||||
num += 12;
|
||||
gstate.boneMatrixNumber = (GE_CMD_BONEMATRIXNUMBER << 24) | (num & 0x7F);
|
||||
}
|
||||
|
||||
void GPUgstate::Restore(u32_le *ptr) {
|
||||
// Not sure what the first 10 values are, exactly, but these seem right.
|
||||
gstate_c.vertexAddr = ptr[5];
|
||||
|
|
|
@ -395,7 +395,10 @@ struct GPUgstate
|
|||
int getTransferHeight() const { return ((transfersize >> 10) & 0x3FF) + 1; }
|
||||
int getTransferBpp() const { return (transferstart & 1) ? 4 : 2; }
|
||||
|
||||
// Real data in the context ends here
|
||||
|
||||
void FastLoadBoneMatrix(u32 addr);
|
||||
|
||||
// Real data in the context ends here
|
||||
|
||||
void Save(u32_le *ptr);
|
||||
void Restore(u32_le *ptr);
|
||||
|
|
Loading…
Add table
Reference in a new issue