Merge pull request #4531 from hrydgard/software-skinning

Software skinning
This commit is contained in:
Henrik Rydgård 2013-11-13 12:54:41 -08:00
commit a6ee2648d0
13 changed files with 956 additions and 224 deletions

View file

@ -1371,6 +1371,12 @@ void XEmitter::PSLLQ(X64Reg reg, int shift) {
Write8(shift);
}
void XEmitter::PSLLDQ(X64Reg reg, int shift) {
WriteSSEOp(64, 0x73, true, (X64Reg)7, R(reg));
Write8(shift);
}
// WARNING not REX compatible
void XEmitter::PSRAW(X64Reg reg, int shift) {
if (reg > 7)

View file

@ -658,6 +658,8 @@ public:
void PSLLD(X64Reg reg, int shift);
void PSLLQ(X64Reg reg, int shift);
void PSLLDQ(X64Reg reg, int shift);
void PSRAW(X64Reg reg, int shift);
void PSRAD(X64Reg reg, int shift);

View file

@ -144,6 +144,7 @@ void Config::Load(const char *iniFileName, const char *controllerIniFilename) {
graphics->Get("RenderingMode", &iRenderingMode, renderingModeDefault);
graphics->Get("SoftwareRendering", &bSoftwareRendering, false);
graphics->Get("HardwareTransform", &bHardwareTransform, true);
graphics->Get("SoftwareSkinning", &bSoftwareSkinning, true);
graphics->Get("TextureFiltering", &iTexFiltering, 1);
// Auto on Windows, 1x elsewhere. Maybe change to 2x on large screens?
#ifdef _WIN32
@ -401,6 +402,7 @@ void Config::Save() {
graphics->Set("RenderingMode", iRenderingMode);
graphics->Set("SoftwareRendering", bSoftwareRendering);
graphics->Set("HardwareTransform", bHardwareTransform);
graphics->Set("SoftwareSkinning", bSoftwareSkinning);
graphics->Set("TextureFiltering", iTexFiltering);
graphics->Set("InternalResolution", iInternalResolution);
graphics->Set("FrameSkip", iFrameSkip);

View file

@ -64,6 +64,8 @@ public:
// GFX
bool bSoftwareRendering;
bool bHardwareTransform; // only used in the GLES backend
bool bSoftwareSkinning; // may speed up some games
int iRenderingMode; // 0 = non-buffered rendering 1 = buffered rendering 2 = Read Framebuffer to memory (CPU) 3 = Read Framebuffer to memory (GPU)
int iTexFiltering; // 1 = off , 2 = nearest , 3 = linear , 4 = linear(CG)
#ifdef BLACKBERRY

View file

@ -430,6 +430,10 @@ GLES_GPU::GLES_GPU()
commandFlags_[GE_CMD_TEXOFFSETV] &= ~FLAG_FLUSHBEFOREONCHANGE;
}
if (g_Config.bSoftwareSkinning) {
commandFlags_[GE_CMD_VERTEXTYPE] &= ~FLAG_FLUSHBEFOREONCHANGE;
}
BuildReportingInfo();
}
@ -868,8 +872,21 @@ void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) {
break;
case GE_CMD_VERTEXTYPE:
if (diff)
shaderManager_->DirtyUniform(DIRTY_UVSCALEOFFSET);
if (diff) {
if (!g_Config.bSoftwareSkinning) {
if (diff & (GE_VTYPE_TC_MASK | GE_VTYPE_THROUGH_MASK))
shaderManager_->DirtyUniform(DIRTY_UVSCALEOFFSET);
} else {
if (diff & ~GE_VTYPE_WEIGHTCOUNT_MASK) {
// Restore and flush
gstate.vertType ^= diff;
Flush();
gstate.vertType ^= diff;
if (diff & (GE_VTYPE_TC_MASK | GE_VTYPE_THROUGH_MASK))
shaderManager_->DirtyUniform(DIRTY_UVSCALEOFFSET);
}
}
}
break;
case GE_CMD_REGION1:
@ -1170,9 +1187,9 @@ void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) {
case GE_CMD_LDC0:case GE_CMD_LDC1:case GE_CMD_LDC2:case GE_CMD_LDC3:
case GE_CMD_LSC0:case GE_CMD_LSC1:case GE_CMD_LSC2:case GE_CMD_LSC3:
if (diff) {
float r = (float)(data & 0xff)/255.0f;
float g = (float)((data>>8) & 0xff)/255.0f;
float b = (float)(data>>16)/255.0f;
float r = (float)(data & 0xff) * (1.0f / 255.0f);
float g = (float)((data >> 8) & 0xff) * (1.0f / 255.0f);
float b = (float)(data >> 16) * (1.0f / 255.0f);
int l = (cmd - GE_CMD_LAC0) / 3;
int t = (cmd - GE_CMD_LAC0) % 3;
@ -1301,10 +1318,10 @@ void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) {
case GE_CMD_WORLDMATRIXDATA:
{
int num = gstate.worldmtxnum & 0xF;
float newVal = getFloat24(data);
if (num < 12 && newVal != gstate.worldMatrix[num]) {
u32 newVal = data << 8;
if (num < 12 && newVal != ((const u32 *)gstate.worldMatrix)[num]) {
Flush();
gstate.worldMatrix[num] = newVal;
((u32 *)gstate.worldMatrix)[num] = newVal;
shaderManager_->DirtyUniform(DIRTY_WORLDMATRIX);
}
num++;
@ -1319,10 +1336,10 @@ void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) {
case GE_CMD_VIEWMATRIXDATA:
{
int num = gstate.viewmtxnum & 0xF;
float newVal = getFloat24(data);
if (num < 12 && newVal != gstate.viewMatrix[num]) {
u32 newVal = data << 8;
if (num < 12 && newVal != ((const u32 *)gstate.viewMatrix)[num]) {
Flush();
gstate.viewMatrix[num] = newVal;
((u32 *)gstate.viewMatrix)[num] = newVal;
shaderManager_->DirtyUniform(DIRTY_VIEWMATRIX);
}
num++;
@ -1337,10 +1354,10 @@ void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) {
case GE_CMD_PROJMATRIXDATA:
{
int num = gstate.projmtxnum & 0xF;
float newVal = getFloat24(data);
if (newVal != gstate.projMatrix[num]) {
u32 newVal = data << 8;
if (newVal != ((const u32 *)gstate.projMatrix)[num]) {
Flush();
gstate.projMatrix[num] = newVal;
((u32 *)gstate.projMatrix)[num] = newVal;
shaderManager_->DirtyUniform(DIRTY_PROJMATRIX);
}
num++;
@ -1355,10 +1372,10 @@ void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) {
case GE_CMD_TGENMATRIXDATA:
{
int num = gstate.texmtxnum & 0xF;
float newVal = getFloat24(data);
if (num < 12 && newVal != gstate.tgenMatrix[num]) {
u32 newVal = data << 8;
if (num < 12 && newVal != ((const u32 *)gstate.tgenMatrix)[num]) {
Flush();
gstate.tgenMatrix[num] = newVal;
((u32 *)gstate.tgenMatrix)[num] = newVal;
shaderManager_->DirtyUniform(DIRTY_TEXMATRIX);
}
num++;
@ -1373,11 +1390,15 @@ void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) {
case GE_CMD_BONEMATRIXDATA:
{
int num = gstate.boneMatrixNumber & 0x7F;
float newVal = getFloat24(data);
if (num < 96 && newVal != gstate.boneMatrix[num]) {
Flush();
gstate.boneMatrix[num] = newVal;
shaderManager_->DirtyUniform(DIRTY_BONEMATRIX0 << (num / 12));
u32 newVal = data << 8;
if (num < 96 && newVal != ((const u32 *)gstate.boneMatrix)[num]) {
// Bone matrices should NOT flush when software skinning is enabled!
// TODO: Also check for morph...
if (!g_Config.bSoftwareSkinning) {
Flush();
shaderManager_->DirtyUniform(DIRTY_BONEMATRIX0 << (num / 12));
}
((u32 *)gstate.boneMatrix)[num] = newVal;
}
num++;
gstate.boneMatrixNumber = (GE_CMD_BONEMATRIXNUMBER << 24) | (num & 0x7F);

View file

@ -119,7 +119,8 @@ TransformDrawEngine::TransformDrawEngine()
framebufferManager_(0),
numDrawCalls(0),
vertexCountInDrawCalls(0),
uvScale(0) {
uvScale(0),
decodeCounter_(0) {
decimationCounter_ = VERTEXCACHE_DECIMATION_INTERVAL;
// Allocate nicely aligned memory. Maybe graphics drivers will
// appreciate it.
@ -324,78 +325,25 @@ void TransformDrawEngine::SubmitPrim(void *verts, void *inds, GEPrimitiveType pr
if (uvScale) {
uvScale[numDrawCalls] = gstate_c.uv;
}
numDrawCalls++;
vertexCountInDrawCalls += vertexCount;
if (g_Config.bSoftwareSkinning && (vertType & GE_VTYPE_WEIGHT_MASK)) {
DecodeVertsStep();
decodeCounter_++;
}
}
void TransformDrawEngine::DecodeVerts() {
UVScale origUV;
if (uvScale)
origUV = gstate_c.uv;
for (int i = 0; i < numDrawCalls; i++) {
const DeferredDrawCall &dc = drawCalls[i];
indexGen.SetIndex(collectedVerts);
int indexLowerBound = dc.indexLowerBound, indexUpperBound = dc.indexUpperBound;
u32 indexType = dc.indexType;
void *inds = dc.inds;
if (indexType == GE_VTYPE_IDX_NONE >> GE_VTYPE_IDX_SHIFT) {
// Decode the verts and apply morphing. Simple.
if (uvScale)
gstate_c.uv = uvScale[i];
dec_->DecodeVerts(decoded + collectedVerts * (int)dec_->GetDecVtxFmt().stride,
dc.verts, indexLowerBound, indexUpperBound);
collectedVerts += indexUpperBound - indexLowerBound + 1;
indexGen.AddPrim(dc.prim, dc.vertexCount);
} else {
// It's fairly common that games issue long sequences of PRIM calls, with differing
// inds pointer but the same base vertex pointer. We'd like to reuse vertices between
// these as much as possible, so we make sure here to combine as many as possible
// into one nice big drawcall, sharing data.
// 1. Look ahead to find the max index, only looking as "matching" drawcalls.
// Expand the lower and upper bounds as we go.
int j = i + 1;
int lastMatch = i;
while (j < numDrawCalls) {
if (drawCalls[j].verts != dc.verts)
break;
if (uvScale && memcmp(&uvScale[j], &uvScale[i], sizeof(uvScale[0])) != 0)
break;
indexLowerBound = std::min(indexLowerBound, (int)drawCalls[j].indexLowerBound);
indexUpperBound = std::max(indexUpperBound, (int)drawCalls[j].indexUpperBound);
lastMatch = j;
j++;
}
// 2. Loop through the drawcalls, translating indices as we go.
for (j = i; j <= lastMatch; j++) {
switch (indexType) {
case GE_VTYPE_IDX_8BIT >> GE_VTYPE_IDX_SHIFT:
indexGen.TranslatePrim(drawCalls[j].prim, drawCalls[j].vertexCount, (const u8 *)drawCalls[j].inds, indexLowerBound);
break;
case GE_VTYPE_IDX_16BIT >> GE_VTYPE_IDX_SHIFT:
indexGen.TranslatePrim(drawCalls[j].prim, drawCalls[j].vertexCount, (const u16 *)drawCalls[j].inds, indexLowerBound);
break;
}
}
int vertexCount = indexUpperBound - indexLowerBound + 1;
// 3. Decode that range of vertex data.
if (uvScale)
gstate_c.uv = uvScale[i];
dec_->DecodeVerts(decoded + collectedVerts * (int)dec_->GetDecVtxFmt().stride,
dc.verts, indexLowerBound, indexUpperBound);
collectedVerts += vertexCount;
// 4. Advance indexgen vertex counter.
indexGen.Advance(vertexCount);
i = lastMatch;
}
for (; decodeCounter_ < numDrawCalls; decodeCounter_++) {
if (uvScale)
gstate_c.uv = uvScale[decodeCounter_];
DecodeVertsStep();
}
// Sanity check
if (indexGen.Prim() < 0) {
ERROR_LOG_REPORT(G3D, "DecodeVerts: Failed to deduce prim: %i", indexGen.Prim());
@ -406,6 +354,68 @@ void TransformDrawEngine::DecodeVerts() {
gstate_c.uv = origUV;
}
void TransformDrawEngine::DecodeVertsStep() {
const int i = decodeCounter_;
const DeferredDrawCall &dc = drawCalls[i];
indexGen.SetIndex(collectedVerts);
int indexLowerBound = dc.indexLowerBound, indexUpperBound = dc.indexUpperBound;
u32 indexType = dc.indexType;
void *inds = dc.inds;
if (indexType == GE_VTYPE_IDX_NONE >> GE_VTYPE_IDX_SHIFT) {
// Decode the verts and apply morphing. Simple.
dec_->DecodeVerts(decoded + collectedVerts * (int)dec_->GetDecVtxFmt().stride,
dc.verts, indexLowerBound, indexUpperBound);
collectedVerts += indexUpperBound - indexLowerBound + 1;
indexGen.AddPrim(dc.prim, dc.vertexCount);
} else {
// It's fairly common that games issue long sequences of PRIM calls, with differing
// inds pointer but the same base vertex pointer. We'd like to reuse vertices between
// these as much as possible, so we make sure here to combine as many as possible
// into one nice big drawcall, sharing data.
// 1. Look ahead to find the max index, only looking as "matching" drawcalls.
// Expand the lower and upper bounds as we go.
int j = i + 1;
int lastMatch = i;
while (j < numDrawCalls) {
if (drawCalls[j].verts != dc.verts)
break;
if (uvScale && memcmp(&uvScale[j], &uvScale[i], sizeof(uvScale[0])) != 0)
break;
indexLowerBound = std::min(indexLowerBound, (int)drawCalls[j].indexLowerBound);
indexUpperBound = std::max(indexUpperBound, (int)drawCalls[j].indexUpperBound);
lastMatch = j;
j++;
}
// 2. Loop through the drawcalls, translating indices as we go.
for (j = i; j <= lastMatch; j++) {
switch (indexType) {
case GE_VTYPE_IDX_8BIT >> GE_VTYPE_IDX_SHIFT:
indexGen.TranslatePrim(drawCalls[j].prim, drawCalls[j].vertexCount, (const u8 *)drawCalls[j].inds, indexLowerBound);
break;
case GE_VTYPE_IDX_16BIT >> GE_VTYPE_IDX_SHIFT:
indexGen.TranslatePrim(drawCalls[j].prim, drawCalls[j].vertexCount, (const u16 *)drawCalls[j].inds, indexLowerBound);
break;
}
}
int vertexCount = indexUpperBound - indexLowerBound + 1;
// 3. Decode that range of vertex data.
dec_->DecodeVerts(decoded + collectedVerts * (int)dec_->GetDecVtxFmt().stride,
dc.verts, indexLowerBound, indexUpperBound);
collectedVerts += vertexCount;
// 4. Advance indexgen vertex counter.
indexGen.Advance(vertexCount);
decodeCounter_ = lastMatch;
}
}
u32 TransformDrawEngine::ComputeHash() {
u32 fullhash = 0;
int vertexSize = dec_->GetDecVtxFmt().stride;
@ -524,8 +534,14 @@ void TransformDrawEngine::DoFlush() {
int vertexCount = 0;
int maxIndex = 0;
bool useElements = true;
// Cannot cache vertex data with morph enabled.
if (g_Config.bVertexCache && !(lastVType_ & GE_VTYPE_MORPHCOUNT_MASK)) {
bool useCache = g_Config.bVertexCache && !(lastVType_ & GE_VTYPE_MORPHCOUNT_MASK);
// Also avoid caching when software skinning.
if (g_Config.bSoftwareSkinning && (lastVType_ & GE_VTYPE_WEIGHT_MASK))
useCache = false;
if (useCache) {
u32 id = ComputeFastDCID();
auto iter = vai_.find(id);
VertexArrayInfo *vai;
@ -714,6 +730,7 @@ rotateVBO:
collectedVerts = 0;
numDrawCalls = 0;
vertexCountInDrawCalls = 0;
decodeCounter_ = 0;
prevPrim_ = GE_PRIM_INVALID;
#ifndef USING_GLES2

View file

@ -97,7 +97,6 @@ public:
void SubmitBezier(void* control_points, void* indices, int count_u, int count_v, GEPatchPrimType prim_type, u32 vertType);
bool TestBoundingBox(void* control_points, int vertexCount, u32 vertType);
void DecodeVerts();
void SetShaderManager(ShaderManager *shaderManager) {
shaderManager_ = shaderManager;
}
@ -127,6 +126,8 @@ public:
}
private:
void DecodeVerts();
void DecodeVertsStep();
void DoFlush();
void SoftwareTransformAndDraw(int prim, u8 *decoded, LinkedShader *program, int vertexCount, u32 vertexType, void *inds, int indexType, const DecVtxFormat &decVtxFormat, int maxIndex);
void ApplyDrawState(int prim);
@ -195,6 +196,7 @@ private:
int vertexCountInDrawCalls;
int decimationCounter_;
int decodeCounter_;
UVScale *uvScale;
};

File diff suppressed because it is too large Load diff

View file

@ -64,6 +64,10 @@ public:
void Step_WeightsU16() const;
void Step_WeightsFloat() const;
void Step_WeightsU8Skin() const;
void Step_WeightsU16Skin() const;
void Step_WeightsFloatSkin() const;
void Step_TcU8() const;
void Step_TcU16() const;
void Step_TcFloat() const;
@ -91,6 +95,10 @@ public:
void Step_NormalS16() const;
void Step_NormalFloat() const;
void Step_NormalS8Skin() const;
void Step_NormalS16Skin() const;
void Step_NormalFloatSkin() const;
void Step_NormalS8Morph() const;
void Step_NormalS16Morph() const;
void Step_NormalFloatMorph() const;
@ -99,6 +107,10 @@ public:
void Step_PosS16() const;
void Step_PosFloat() const;
void Step_PosS8Skin() const;
void Step_PosS16Skin() const;
void Step_PosFloatSkin() const;
void Step_PosS8Morph() const;
void Step_PosS16Morph() const;
void Step_PosFloatMorph() const;
@ -187,6 +199,10 @@ public:
void Jit_WeightsU16();
void Jit_WeightsFloat();
void Jit_WeightsU8Skin();
void Jit_WeightsU16Skin();
void Jit_WeightsFloatSkin();
void Jit_TcU8();
void Jit_TcU16();
void Jit_TcFloat();
@ -210,13 +226,22 @@ public:
void Jit_NormalS16();
void Jit_NormalFloat();
void Jit_NormalS8Skin();
void Jit_NormalS16Skin();
void Jit_NormalFloatSkin();
void Jit_PosS8();
void Jit_PosS8Through();
void Jit_PosS16();
void Jit_PosS16Through();
void Jit_PosFloat();
void Jit_PosS8Through();
void Jit_PosS16Through();
void Jit_PosS8Skin();
void Jit_PosS16Skin();
void Jit_PosFloatSkin();
private:
bool CompileStep(const VertexDecoder &dec, int i);
void Jit_WriteMatrixMul(int outOff, bool pos);
const VertexDecoder *dec_;
};

View file

@ -28,10 +28,14 @@
#include "GPU/Directx9/GPU_DX9.h"
#endif
#include "Core/CoreParameter.h"
#include "Core/Config.h"
#include "Core/System.h"
GPUgstate gstate;
GPUStateCache gstate_c;
// This must be aligned so that the matrices within are aligned.
GPUgstate MEMORY_ALIGNED16(gstate);
// Let's align this one too for good measure.
GPUStateCache MEMORY_ALIGNED16(gstate_c);
GPUInterface *gpu;
GPUDebugInterface *gpuDebug;
GPUStatistics gpuStats;
@ -200,3 +204,10 @@ void GPUgstate::Restore(u32_le *ptr) {
memcpy(projMatrix, matrices, sizeof(projMatrix)); matrices += sizeof(projMatrix);
memcpy(tgenMatrix, matrices, sizeof(tgenMatrix)); matrices += sizeof(tgenMatrix);
}
bool vertTypeIsSkinningEnabled(u32 vertType) {
if (g_Config.bSoftwareSkinning && ((vertType & GE_VTYPE_MORPHCOUNT_MASK) == 0))
return false;
else
return ((vertType & GE_VTYPE_WEIGHT_MASK) != GE_VTYPE_WEIGHT_NONE);
}

View file

@ -18,6 +18,7 @@
#pragma once
#include <cmath>
#include "../Globals.h"
#include "ge_constants.h"
#include "Common/Common.h"
@ -406,7 +407,8 @@ enum SkipDrawReasonFlags {
SKIPDRAW_BAD_FB_TEXTURE = 4,
};
inline bool vertTypeIsSkinningEnabled(u32 vertType) { return ((vertType & GE_VTYPE_WEIGHT_MASK) != GE_VTYPE_WEIGHT_NONE); }
bool vertTypeIsSkinningEnabled(u32 vertType);
inline int vertTypeGetNumBoneWeights(u32 vertType) { return 1 + ((vertType & GE_VTYPE_WEIGHTCOUNT_MASK) >> GE_VTYPE_WEIGHTCOUNT_SHIFT); }
inline int vertTypeGetWeightMask(u32 vertType) { return vertType & GE_VTYPE_WEIGHT_MASK; }
inline int vertTypeGetTexCoordMask(u32 vertType) { return vertType & GE_VTYPE_TC_MASK; }

View file

@ -143,10 +143,13 @@ void GameSettingsScreen::CreateViews() {
graphicsSettings->Add(new CheckBox(&g_Config.bVSync, gs->T("VSync")));
#endif
graphicsSettings->Add(new CheckBox(&g_Config.bHardwareTransform, gs->T("Hardware Transform")));
CheckBox *swSkin = graphicsSettings->Add(new CheckBox(&g_Config.bSoftwareSkinning, gs->T("Software Skinning")));
graphicsSettings->Add(new CheckBox(&g_Config.bVertexCache, gs->T("Vertex Cache")));
CheckBox *vtxJit = graphicsSettings->Add(new CheckBox(&g_Config.bVertexDecoderJit, gs->T("Vertex Decoder JIT")));
if (PSP_IsInited())
if (PSP_IsInited()) {
swSkin->SetEnabled(false);
vtxJit->SetEnabled(false);
}
graphicsSettings->Add(new CheckBox(&g_Config.bLowQualitySplineBezier, gs->T("LowCurves", "Low quality spline/bezier curves")));

2
native

@ -1 +1 @@
Subproject commit 483e42f64e666ee47390b47e3405ad731d011f93
Subproject commit cf895f95d7ae75d4535cf252687fd4f9c4f1663b