Unify DecodeVertsStep

This commit is contained in:
Henrik Rydgård 2017-06-02 12:09:57 +02:00
parent 240e058b3b
commit 3032240916
10 changed files with 11 additions and 304 deletions

View file

@ -17,6 +17,7 @@
#include <algorithm>
#include "profiler/profiler.h"
#include "Common/ColorConv.h"
#include "Core/Config.h"
#include "GPU/Common/DrawEngineCommon.h"

View file

@ -67,6 +67,9 @@ protected:
// Preprocessing for spline/bezier
u32 NormalizeVertices(u8 *outPtr, u8 *bufPtr, const u8 *inPtr, int lowerBound, int upperBound, u32 vertType);
// Vertex decoding
void DecodeVertsStep(u8 *dest, int &i, int &decodedVerts);
bool ApplyShaderBlending();
VertexDecoder *GetVertexDecoder(u32 vtype);

View file

@ -328,7 +328,7 @@ void DrawEngineD3D11::SubmitPrim(void *verts, void *inds, GEPrimitiveType prim,
vertexCountInDrawCalls_ += vertexCount;
if (g_Config.bSoftwareSkinning && (vertType & GE_VTYPE_WEIGHT_MASK)) {
DecodeVertsStep();
DecodeVertsStep(decoded, decodeCounter_, decodedVerts_);
decodeCounter_++;
}
@ -345,7 +345,7 @@ void DrawEngineD3D11::DecodeVerts() {
const UVScale origUV = gstate_c.uv;
for (; decodeCounter_ < numDrawCalls; decodeCounter_++) {
gstate_c.uv = uvScale[decodeCounter_];
DecodeVertsStep();
DecodeVertsStep(decoded, decodeCounter_, decodedVerts_);
}
gstate_c.uv = origUV;
@ -357,80 +357,6 @@ void DrawEngineD3D11::DecodeVerts() {
}
}
void DrawEngineD3D11::DecodeVertsStep() {
const int i = decodeCounter_;
const DeferredDrawCall &dc = drawCalls[i];
indexGen.SetIndex(decodedVerts_);
int indexLowerBound = dc.indexLowerBound, indexUpperBound = dc.indexUpperBound;
u32 indexType = dc.indexType;
void *inds = dc.inds;
if (indexType == GE_VTYPE_IDX_NONE >> GE_VTYPE_IDX_SHIFT) {
// Decode the verts and apply morphing. Simple.
dec_->DecodeVerts(decoded + decodedVerts_ * (int)dec_->GetDecVtxFmt().stride,
dc.verts, indexLowerBound, indexUpperBound);
decodedVerts_ += indexUpperBound - indexLowerBound + 1;
indexGen.AddPrim(dc.prim, dc.vertexCount);
} else {
// It's fairly common that games issue long sequences of PRIM calls, with differing
// inds pointer but the same base vertex pointer. We'd like to reuse vertices between
// these as much as possible, so we make sure here to combine as many as possible
// into one nice big drawcall, sharing data.
// 1. Look ahead to find the max index, only looking as "matching" drawcalls.
// Expand the lower and upper bounds as we go.
int lastMatch = i;
const int total = numDrawCalls;
for (int j = i + 1; j < total; ++j) {
if (drawCalls[j].verts != dc.verts)
break;
if (memcmp(&uvScale[j], &uvScale[i], sizeof(uvScale[0])) != 0)
break;
indexLowerBound = std::min(indexLowerBound, (int)drawCalls[j].indexLowerBound);
indexUpperBound = std::max(indexUpperBound, (int)drawCalls[j].indexUpperBound);
lastMatch = j;
}
// 2. Loop through the drawcalls, translating indices as we go.
switch (indexType) {
case GE_VTYPE_IDX_8BIT >> GE_VTYPE_IDX_SHIFT:
for (int j = i; j <= lastMatch; j++) {
indexGen.TranslatePrim(drawCalls[j].prim, drawCalls[j].vertexCount, (const u8 *)drawCalls[j].inds, indexLowerBound);
}
break;
case GE_VTYPE_IDX_16BIT >> GE_VTYPE_IDX_SHIFT:
for (int j = i; j <= lastMatch; j++) {
indexGen.TranslatePrim(drawCalls[j].prim, drawCalls[j].vertexCount, (const u16_le *)drawCalls[j].inds, indexLowerBound);
}
break;
case GE_VTYPE_IDX_32BIT >> GE_VTYPE_IDX_SHIFT:
for (int j = i; j <= lastMatch; j++) {
indexGen.TranslatePrim(drawCalls[j].prim, drawCalls[j].vertexCount, (const u32_le *)drawCalls[j].inds, indexLowerBound);
}
break;
}
const int vertexCount = indexUpperBound - indexLowerBound + 1;
// This check is a workaround for Pangya Fantasy Golf, which sends bogus index data when switching items in "My Room" sometimes.
if (decodedVerts_ + vertexCount > VERTEX_BUFFER_MAX) {
return;
}
// 3. Decode that range of vertex data.
dec_->DecodeVerts(decoded + decodedVerts_ * (int)dec_->GetDecVtxFmt().stride,
dc.verts, indexLowerBound, indexUpperBound);
decodedVerts_ += vertexCount;
// 4. Advance indexgen vertex counter.
indexGen.Advance(vertexCount);
decodeCounter_ = lastMatch;
}
}
inline u32 ComputeMiniHashRange(const void *ptr, size_t sz) {
// Switch to u32 units.
const u32 *p = (const u32 *)ptr;

View file

@ -158,7 +158,6 @@ public:
private:
void DecodeVerts();
void DecodeVertsStep();
void DoFlush();
void ApplyDrawState(int prim);

View file

@ -308,7 +308,7 @@ void DrawEngineDX9::SubmitPrim(void *verts, void *inds, GEPrimitiveType prim, in
vertexCountInDrawCalls_ += vertexCount;
if (g_Config.bSoftwareSkinning && (vertType & GE_VTYPE_WEIGHT_MASK)) {
DecodeVertsStep();
DecodeVertsStep(decoded, decodeCounter_, decodedVerts_);
decodeCounter_++;
}
@ -325,7 +325,7 @@ void DrawEngineDX9::DecodeVerts() {
const UVScale origUV = gstate_c.uv;
for (; decodeCounter_ < numDrawCalls; decodeCounter_++) {
gstate_c.uv = uvScale[decodeCounter_];
DecodeVertsStep();
DecodeVertsStep(decoded, decodeCounter_, decodedVerts_);
}
gstate_c.uv = origUV;
@ -337,80 +337,6 @@ void DrawEngineDX9::DecodeVerts() {
}
}
void DrawEngineDX9::DecodeVertsStep() {
const int i = decodeCounter_;
const DeferredDrawCall &dc = drawCalls[i];
indexGen.SetIndex(decodedVerts_);
int indexLowerBound = dc.indexLowerBound, indexUpperBound = dc.indexUpperBound;
u32 indexType = dc.indexType;
void *inds = dc.inds;
if (indexType == GE_VTYPE_IDX_NONE >> GE_VTYPE_IDX_SHIFT) {
// Decode the verts and apply morphing. Simple.
dec_->DecodeVerts(decoded + decodedVerts_ * (int)dec_->GetDecVtxFmt().stride,
dc.verts, indexLowerBound, indexUpperBound);
decodedVerts_ += indexUpperBound - indexLowerBound + 1;
indexGen.AddPrim(dc.prim, dc.vertexCount);
} else {
// It's fairly common that games issue long sequences of PRIM calls, with differing
// inds pointer but the same base vertex pointer. We'd like to reuse vertices between
// these as much as possible, so we make sure here to combine as many as possible
// into one nice big drawcall, sharing data.
// 1. Look ahead to find the max index, only looking as "matching" drawcalls.
// Expand the lower and upper bounds as we go.
int lastMatch = i;
const int total = numDrawCalls;
for (int j = i + 1; j < total; ++j) {
if (drawCalls[j].verts != dc.verts)
break;
if (memcmp(&uvScale[j], &uvScale[i], sizeof(uvScale[0])) != 0)
break;
indexLowerBound = std::min(indexLowerBound, (int)drawCalls[j].indexLowerBound);
indexUpperBound = std::max(indexUpperBound, (int)drawCalls[j].indexUpperBound);
lastMatch = j;
}
// 2. Loop through the drawcalls, translating indices as we go.
switch (indexType) {
case GE_VTYPE_IDX_8BIT >> GE_VTYPE_IDX_SHIFT:
for (int j = i; j <= lastMatch; j++) {
indexGen.TranslatePrim(drawCalls[j].prim, drawCalls[j].vertexCount, (const u8 *)drawCalls[j].inds, indexLowerBound);
}
break;
case GE_VTYPE_IDX_16BIT >> GE_VTYPE_IDX_SHIFT:
for (int j = i; j <= lastMatch; j++) {
indexGen.TranslatePrim(drawCalls[j].prim, drawCalls[j].vertexCount, (const u16_le *)drawCalls[j].inds, indexLowerBound);
}
break;
case GE_VTYPE_IDX_32BIT >> GE_VTYPE_IDX_SHIFT:
for (int j = i; j <= lastMatch; j++) {
indexGen.TranslatePrim(drawCalls[j].prim, drawCalls[j].vertexCount, (const u32_le *)drawCalls[j].inds, indexLowerBound);
}
break;
}
const int vertexCount = indexUpperBound - indexLowerBound + 1;
// This check is a workaround for Pangya Fantasy Golf, which sends bogus index data when switching items in "My Room" sometimes.
if (decodedVerts_ + vertexCount > VERTEX_BUFFER_MAX) {
return;
}
// 3. Decode that range of vertex data.
dec_->DecodeVerts(decoded + decodedVerts_ * (int)dec_->GetDecVtxFmt().stride,
dc.verts, indexLowerBound, indexUpperBound);
decodedVerts_ += vertexCount;
// 4. Advance indexgen vertex counter.
indexGen.Advance(vertexCount);
decodeCounter_ = lastMatch;
}
}
inline u32 ComputeMiniHashRange(const void *ptr, size_t sz) {
// Switch to u32 units.
const u32 *p = (const u32 *)ptr;

View file

@ -154,7 +154,6 @@ public:
private:
void DecodeVerts();
void DecodeVertsStep();
void DoFlush();
void ApplyDrawState(int prim);

View file

@ -323,7 +323,7 @@ void DrawEngineGLES::SubmitPrim(void *verts, void *inds, GEPrimitiveType prim, i
vertexCountInDrawCalls_ += vertexCount;
if (g_Config.bSoftwareSkinning && (vertType & GE_VTYPE_WEIGHT_MASK)) {
DecodeVertsStep();
DecodeVertsStep(decoded, decodeCounter_, decodedVerts_);
decodeCounter_++;
}
@ -340,7 +340,7 @@ void DrawEngineGLES::DecodeVerts() {
const UVScale origUV = gstate_c.uv;
for (; decodeCounter_ < numDrawCalls; decodeCounter_++) {
gstate_c.uv = uvScale[decodeCounter_];
DecodeVertsStep();
DecodeVertsStep(decoded, decodeCounter_, decodedVerts_);
}
gstate_c.uv = origUV;
// Sanity check
@ -351,81 +351,6 @@ void DrawEngineGLES::DecodeVerts() {
}
}
void DrawEngineGLES::DecodeVertsStep() {
PROFILE_THIS_SCOPE("vertdec");
const int i = decodeCounter_;
const DeferredDrawCall &dc = drawCalls[i];
indexGen.SetIndex(decodedVerts_);
int indexLowerBound = dc.indexLowerBound, indexUpperBound = dc.indexUpperBound;
u32 indexType = dc.indexType;
if (indexType == (GE_VTYPE_IDX_NONE >> GE_VTYPE_IDX_SHIFT)) {
// Decode the verts and apply morphing. Simple.
dec_->DecodeVerts(decoded + decodedVerts_ * (int)dec_->GetDecVtxFmt().stride,
dc.verts, indexLowerBound, indexUpperBound);
decodedVerts_ += indexUpperBound - indexLowerBound + 1;
indexGen.AddPrim(dc.prim, dc.vertexCount);
} else {
// It's fairly common that games issue long sequences of PRIM calls, with differing
// inds pointer but the same base vertex pointer. We'd like to reuse vertices between
// these as much as possible, so we make sure here to combine as many as possible
// into one nice big drawcall, sharing data.
// 1. Look ahead to find the max index, only looking as "matching" drawcalls.
// Expand the lower and upper bounds as we go.
int lastMatch = i;
const int total = numDrawCalls;
for (int j = i + 1; j < total; ++j) {
if (drawCalls[j].verts != dc.verts)
break;
if (memcmp(&uvScale[j], &uvScale[i], sizeof(uvScale[0])) != 0)
break;
indexLowerBound = std::min(indexLowerBound, (int)drawCalls[j].indexLowerBound);
indexUpperBound = std::max(indexUpperBound, (int)drawCalls[j].indexUpperBound);
lastMatch = j;
}
// 2. Loop through the drawcalls, translating indices as we go.
switch (indexType) {
case GE_VTYPE_IDX_8BIT >> GE_VTYPE_IDX_SHIFT:
for (int j = i; j <= lastMatch; j++) {
indexGen.TranslatePrim(drawCalls[j].prim, drawCalls[j].vertexCount, (const u8 *)drawCalls[j].inds, indexLowerBound);
}
break;
case GE_VTYPE_IDX_16BIT >> GE_VTYPE_IDX_SHIFT:
for (int j = i; j <= lastMatch; j++) {
indexGen.TranslatePrim(drawCalls[j].prim, drawCalls[j].vertexCount, (const u16_le *)drawCalls[j].inds, indexLowerBound);
}
break;
case GE_VTYPE_IDX_32BIT >> GE_VTYPE_IDX_SHIFT:
for (int j = i; j <= lastMatch; j++) {
indexGen.TranslatePrim(drawCalls[j].prim, drawCalls[j].vertexCount, (const u32_le *)drawCalls[j].inds, indexLowerBound);
}
break;
}
const int vertexCount = indexUpperBound - indexLowerBound + 1;
// This check is a workaround for Pangya Fantasy Golf, which sends bogus index data when switching items in "My Room" sometimes.
if (decodedVerts_ + vertexCount > VERTEX_BUFFER_MAX) {
return;
}
// 3. Decode that range of vertex data.
int stride = (int)dec_->GetDecVtxFmt().stride;
dec_->DecodeVerts(decoded + decodedVerts_ * stride, dc.verts, indexLowerBound, indexUpperBound);
decodedVerts_ += vertexCount;
// 4. Advance indexgen vertex counter.
indexGen.Advance(vertexCount);
decodeCounter_ = lastMatch;
}
}
inline u32 ComputeMiniHashRange(const void *ptr, size_t sz) {
// Switch to u32 units.
const u32 *p = (const u32 *)ptr;

View file

@ -165,7 +165,6 @@ public:
private:
void DecodeVerts();
void DecodeVertsStep();
void DoFlush();
void ApplyDrawState(int prim);
void ApplyDrawStateLate();

View file

@ -394,76 +394,6 @@ void DrawEngineVulkan::SubmitPrim(void *verts, void *inds, GEPrimitiveType prim,
}
}
void DrawEngineVulkan::DecodeVertsStep(u8 *dest, int &i, int &decodedVerts) {
const DeferredDrawCall &dc = drawCalls[i];
indexGen.SetIndex(decodedVerts);
int indexLowerBound = dc.indexLowerBound;
int indexUpperBound = dc.indexUpperBound;
void *inds = dc.inds;
if (dc.indexType == GE_VTYPE_IDX_NONE >> GE_VTYPE_IDX_SHIFT) {
// Decode the verts and apply morphing. Simple.
dec_->DecodeVerts(dest + decodedVerts * (int)dec_->GetDecVtxFmt().stride,
dc.verts, indexLowerBound, indexUpperBound);
decodedVerts += indexUpperBound - indexLowerBound + 1;
indexGen.AddPrim(dc.prim, dc.vertexCount);
} else {
// It's fairly common that games issue long sequences of PRIM calls, with differing
// inds pointer but the same base vertex pointer. We'd like to reuse vertices between
// these as much as possible, so we make sure here to combine as many as possible
// into one nice big drawcall, sharing data.
// 1. Look ahead to find the max index, only looking as "matching" drawcalls.
// Expand the lower and upper bounds as we go.
int lastMatch = i;
const int total = numDrawCalls;
for (int j = i + 1; j < total; ++j) {
if (drawCalls[j].verts != dc.verts)
break;
indexLowerBound = std::min(indexLowerBound, (int)drawCalls[j].indexLowerBound);
indexUpperBound = std::max(indexUpperBound, (int)drawCalls[j].indexUpperBound);
lastMatch = j;
}
// 2. Loop through the drawcalls, translating indices as we go.
switch (dc.indexType) {
case GE_VTYPE_IDX_8BIT >> GE_VTYPE_IDX_SHIFT:
for (int j = i; j <= lastMatch; j++) {
indexGen.TranslatePrim(drawCalls[j].prim, drawCalls[j].vertexCount, (const u8 *)drawCalls[j].inds, indexLowerBound);
}
break;
case GE_VTYPE_IDX_16BIT >> GE_VTYPE_IDX_SHIFT:
for (int j = i; j <= lastMatch; j++) {
indexGen.TranslatePrim(drawCalls[j].prim, drawCalls[j].vertexCount, (const u16_le *)drawCalls[j].inds, indexLowerBound);
}
break;
case GE_VTYPE_IDX_32BIT >> GE_VTYPE_IDX_SHIFT:
for (int j = i; j <= lastMatch; j++) {
indexGen.TranslatePrim(drawCalls[j].prim, drawCalls[j].vertexCount, (const u32_le *)drawCalls[j].inds, indexLowerBound);
}
break;
}
const int vertexCount = indexUpperBound - indexLowerBound + 1;
// This check is a workaround for Pangya Fantasy Golf, which sends bogus index data when switching items in "My Room" sometimes.
if (decodedVerts + vertexCount > VERTEX_BUFFER_MAX) {
return;
}
// 3. Decode that range of vertex data.
dec_->DecodeVerts(dest + decodedVerts * (int)dec_->GetDecVtxFmt().stride,
dc.verts, indexLowerBound, indexUpperBound);
decodedVerts += vertexCount;
// 4. Advance indexgen vertex counter.
indexGen.Advance(vertexCount);
i = lastMatch;
}
}
void DrawEngineVulkan::DecodeVerts(VulkanPushBuffer *push, uint32_t *bindOffset, VkBuffer *vkbuf) {
u8 *dest = decoded;
@ -501,7 +431,7 @@ void DrawEngineVulkan::DecodeVerts(VulkanPushBuffer *push, uint32_t *bindOffset,
const UVScale origUV = gstate_c.uv;
for (; decodeCounter_ < numDrawCalls; decodeCounter_++) {
gstate_c.uv = uvScale[decodeCounter_];
DecodeVertsStep(dest, decodeCounter_, decodedVerts_); // NOTE! DecodeVertsStep can modify i!
DecodeVertsStep(dest, decodeCounter_, decodedVerts_); // NOTE! DecodeVertsStep can modify decodeCounter_!
}
gstate_c.uv = origUV;

View file

@ -134,7 +134,6 @@ private:
void DestroyDeviceObjects();
void DecodeVerts(VulkanPushBuffer *push, uint32_t *bindOffset, VkBuffer *vkbuf);
void DecodeVertsStep(u8 *dest, int &i, int &decodedVerts);
void DoFlush();
void UpdateUBOs(FrameData *frame);