It's running.

This commit is contained in:
Henrik Rydgård 2023-10-02 00:50:20 +02:00
parent 6a2e5dd7f7
commit 9b411af1f5
11 changed files with 209 additions and 231 deletions

View file

@ -72,43 +72,20 @@ VertexDecoder *DrawEngineCommon::GetVertexDecoder(u32 vtype) {
return dec;
}
int DrawEngineCommon::ComputeNumVertsToDecode() const {
int vertsToDecode = 0;
int numDrawCalls = numDrawCalls_;
if (drawCalls_[0].indexType == GE_VTYPE_IDX_NONE >> GE_VTYPE_IDX_SHIFT) {
for (int i = 0; i < numDrawCalls; i++) {
const DeferredDrawCall &dc = drawCalls_[i];
vertsToDecode += dc.vertexCount;
}
} else {
// TODO: Share this computation with DecodeVertsStep?
for (int i = 0; i < numDrawCalls; i++) {
const DeferredDrawCall &dc = drawCalls_[i];
int lastMatch = i;
const int total = numDrawCalls;
int indexLowerBound = dc.indexLowerBound;
int indexUpperBound = dc.indexUpperBound;
for (int j = i + 1; j < total; ++j) {
if (drawCalls_[j].verts != dc.verts)
break;
indexLowerBound = std::min(indexLowerBound, (int)drawCalls_[j].indexLowerBound);
indexUpperBound = std::max(indexUpperBound, (int)drawCalls_[j].indexUpperBound);
lastMatch = j;
}
vertsToDecode += indexUpperBound - indexLowerBound + 1;
i = lastMatch;
}
void DrawEngineCommon::DecodeVerts(u8 *dest) {
int decodeVertsCounter = decodeVertsCounter_;
for (; decodeVertsCounter < numDrawVerts_; decodeVertsCounter++) {
DecodeVertsStep(dest, decodeVertsCounter, decodedVerts_, &drawVerts_[decodeVertsCounter].uvScale);
}
return vertsToDecode;
decodeVertsCounter_ = decodeVertsCounter;
}
void DrawEngineCommon::DecodeVerts(u8 *dest) {
int decodeCounter = decodeCounter_;
for (; decodeCounter < numDrawCalls_; decodeCounter++) {
DecodeVertsStep(dest, decodeCounter, decodedVerts_, &drawCalls_[decodeCounter].uvScale); // NOTE! DecodeVertsStep can modify the decodeCounter parameter!
void DrawEngineCommon::DecodeInds() {
int decodeIndsCounter = decodeIndsCounter_;
for (; decodeIndsCounter < numDrawInds_; decodeIndsCounter++) {
DecodeIndsStep(decodeIndsCounter);
}
decodeCounter_ = decodeCounter;
decodeIndsCounter_ = decodeIndsCounter;
// Sanity check
if (indexGen.Prim() < 0) {
@ -619,92 +596,43 @@ void DrawEngineCommon::ApplyFramebufferRead(FBOTexState *fboTexState) {
gstate_c.Dirty(DIRTY_SHADERBLEND);
}
void DrawEngineCommon::DecodeVertsStep(u8 *dest, int &i, int &decodedVerts, const UVScale *uvScale) {
void DrawEngineCommon::DecodeVertsStep(u8 *dest, int i, int &decodedVerts, const UVScale *uvScale) {
PROFILE_THIS_SCOPE("vertdec");
const DeferredDrawCall &dc = drawCalls_[i];
const DeferredVerts &dv = drawVerts_[i];
indexGen.SetIndex(decodedVerts);
int indexLowerBound = dc.indexLowerBound;
int indexUpperBound = dc.indexUpperBound;
int indexLowerBound = dv.indexLowerBound;
int indexUpperBound = dv.indexUpperBound;
if (dc.indexType == GE_VTYPE_IDX_NONE >> GE_VTYPE_IDX_SHIFT) {
// Decode the verts (and at the same time apply morphing/skinning). Simple.
dec_->DecodeVerts(dest + decodedVerts * (int)dec_->GetDecVtxFmt().stride,
dc.verts, uvScale, indexLowerBound, indexUpperBound);
decodedVerts += indexUpperBound - indexLowerBound + 1;
bool clockwise = true;
if (gstate.isCullEnabled() && gstate.getCullMode() != dc.cullMode) {
clockwise = false;
}
indexGen.AddPrim(dc.prim, dc.vertexCount, clockwise);
} else {
// It's fairly common that games issue long sequences of PRIM calls, with differing
// inds pointer but the same base vertex pointer. We'd like to reuse vertices between
// these as much as possible, so we make sure here to combine as many as possible
// into one nice big drawcall, sharing data.
// Decode the verts (and at the same time apply morphing/skinning). Simple.
dec_->DecodeVerts(dest + decodedVerts * (int)dec_->GetDecVtxFmt().stride, dv.verts, uvScale, dv.indexLowerBound, dv.indexUpperBound);
decodedVerts += indexUpperBound - indexLowerBound + 1;
}
// 1. Look ahead to find the max index, only looking as "matching" drawcalls.
// Expand the lower and upper bounds as we go.
int lastMatch = i;
const int total = numDrawCalls_;
for (int j = i + 1; j < total; ++j) {
if (drawCalls_[j].verts != dc.verts)
break;
// TODO: What if UV scale/offset changes between drawcalls here?
indexLowerBound = std::min(indexLowerBound, (int)drawCalls_[j].indexLowerBound);
indexUpperBound = std::max(indexUpperBound, (int)drawCalls_[j].indexUpperBound);
lastMatch = j;
}
// 2. Loop through the drawcalls, translating indices as we go.
switch (dc.indexType) {
case GE_VTYPE_IDX_8BIT >> GE_VTYPE_IDX_SHIFT:
for (int j = i; j <= lastMatch; j++) {
bool clockwise = true;
if (gstate.isCullEnabled() && gstate.getCullMode() != drawCalls_[j].cullMode) {
clockwise = false;
}
indexGen.TranslatePrim(drawCalls_[j].prim, drawCalls_[j].vertexCount, (const u8 *)drawCalls_[j].inds, indexLowerBound, clockwise);
}
break;
case GE_VTYPE_IDX_16BIT >> GE_VTYPE_IDX_SHIFT:
for (int j = i; j <= lastMatch; j++) {
bool clockwise = true;
if (gstate.isCullEnabled() && gstate.getCullMode() != drawCalls_[j].cullMode) {
clockwise = false;
}
indexGen.TranslatePrim(drawCalls_[j].prim, drawCalls_[j].vertexCount, (const u16_le *)drawCalls_[j].inds, indexLowerBound, clockwise);
}
break;
case GE_VTYPE_IDX_32BIT >> GE_VTYPE_IDX_SHIFT:
for (int j = i; j <= lastMatch; j++) {
bool clockwise = true;
if (gstate.isCullEnabled() && gstate.getCullMode() != drawCalls_[j].cullMode) {
clockwise = false;
}
indexGen.TranslatePrim(drawCalls_[j].prim, drawCalls_[j].vertexCount, (const u32_le *)drawCalls_[j].inds, indexLowerBound, clockwise);
}
break;
}
const int vertexCount = indexUpperBound - indexLowerBound + 1;
// This check is a workaround for Pangya Fantasy Golf, which sends bogus index data when switching items in "My Room" sometimes.
if (decodedVerts + vertexCount > VERTEX_BUFFER_MAX) {
return;
}
// 3. Decode that range of vertex data.
dec_->DecodeVerts(dest + decodedVerts * (int)dec_->GetDecVtxFmt().stride,
dc.verts, uvScale, indexLowerBound, indexUpperBound);
decodedVerts += vertexCount;
// 4. Advance indexgen vertex counter.
indexGen.Advance(vertexCount);
i = lastMatch;
void DrawEngineCommon::DecodeIndsStep(int i) {
const DeferredInds &di = drawInds_[i];
bool clockwise = true;
if (gstate.isCullEnabled() && gstate.getCullMode() != di.cullMode) {
clockwise = false;
}
// We've already collapsed subsequent draws with the same vertex pointer, so no tricky logic here anymore.
// 2. Loop through the drawcalls, translating indices as we go.
switch (di.indexType) {
case GE_VTYPE_IDX_NONE >> GE_VTYPE_IDX_SHIFT:
indexGen.AddPrim(di.prim, di.vertexCount, clockwise);
break;
case GE_VTYPE_IDX_8BIT >> GE_VTYPE_IDX_SHIFT:
indexGen.TranslatePrim(di.prim, di.vertexCount, (const u8 *)di.inds, di.indexOffset, clockwise);
break;
case GE_VTYPE_IDX_16BIT >> GE_VTYPE_IDX_SHIFT:
indexGen.TranslatePrim(di.prim, di.vertexCount, (const u16_le *)di.inds, di.indexOffset, clockwise);
break;
case GE_VTYPE_IDX_32BIT >> GE_VTYPE_IDX_SHIFT:
indexGen.TranslatePrim(di.prim, di.vertexCount, (const u32_le *)di.inds, di.indexOffset, clockwise);
break;
}
// 4. Advance indexgen vertex counter.
indexGen.Advance(di.vertexCount);
}
inline u32 ComputeMiniHashRange(const void *ptr, size_t sz) {
@ -731,27 +659,59 @@ u32 DrawEngineCommon::ComputeMiniHash() {
const int indexSize = IndexSize(dec_->VertexType());
int step;
if (numDrawCalls_ < 3) {
if (numDrawVerts_ < 3) {
step = 1;
} else if (numDrawCalls_ < 8) {
} else if (numDrawVerts_ < 8) {
step = 4;
} else {
step = numDrawCalls_ / 8;
step = numDrawVerts_ / 8;
}
for (int i = 0; i < numDrawCalls_; i += step) {
const DeferredDrawCall &dc = drawCalls_[i];
if (!dc.inds) {
fullhash += ComputeMiniHashRange(dc.verts, vertexSize * dc.vertexCount);
} else {
int indexLowerBound = dc.indexLowerBound, indexUpperBound = dc.indexUpperBound;
fullhash += ComputeMiniHashRange((const u8 *)dc.verts + vertexSize * indexLowerBound, vertexSize * (indexUpperBound - indexLowerBound));
fullhash += ComputeMiniHashRange(dc.inds, indexSize * dc.vertexCount);
}
for (int i = 0; i < numDrawVerts_; i += step) {
const DeferredVerts &dc = drawVerts_[i];
fullhash += ComputeMiniHashRange((const u8 *)dc.verts + vertexSize * dc.indexLowerBound, vertexSize * (dc.indexUpperBound - dc.indexLowerBound));
}
for (int i = 0; i < numDrawInds_; i += step) {
const DeferredInds &di = drawInds_[i];
fullhash += ComputeMiniHashRange(di.inds, indexSize * di.vertexCount);
}
return fullhash;
}
// Cheap bit scrambler from https://nullprogram.com/blog/2018/07/31/
inline uint32_t lowbias32_r(uint32_t x) {
x ^= x >> 16;
x *= 0x43021123U;
x ^= x >> 15 ^ x >> 30;
x *= 0x1d69e2a5U;
x ^= x >> 16;
return x;
}
uint32_t DrawEngineCommon::ComputeDrawcallsHash() const {
uint32_t dcid = 0;
for (int i = 0; i < numDrawVerts_; i++) {
u32 dhash = dcid;
dhash = __rotl(dhash ^ (u32)(uintptr_t)drawVerts_[i].verts, 13);
dhash = __rotl(dhash ^ (u32)drawInds_[i].vertexCount, 11);
dcid = lowbias32_r(dhash ^ (u32)drawInds_[i].prim);
}
for (int j = 0; j < numDrawInds_; j++) {
u32 dhash = dcid;
dhash = __rotl(dhash ^ (u32)(uintptr_t)drawInds_[j].inds, 19);
dcid = lowbias32_r(__rotl(dhash ^ (u32)drawInds_[j].indexType, 7));
}
return dcid;
}
int DrawEngineCommon::ComputeNumVertsToDecode() const {
int sum = 0;
for (int i = 0; i < numDrawVerts_; i++) {
sum += drawVerts_[i].indexUpperBound + 1 - drawVerts_[i].indexLowerBound;
}
return sum;
}
uint64_t DrawEngineCommon::ComputeHash() {
uint64_t fullhash = 0;
const int vertexSize = dec_->GetDecVtxFmt().stride;
@ -759,39 +719,26 @@ uint64_t DrawEngineCommon::ComputeHash() {
// TODO: Add some caps both for numDrawCalls_ and num verts to check?
// It is really very expensive to check all the vertex data so often.
for (int i = 0; i < numDrawCalls_; i++) {
const DeferredDrawCall &dc = drawCalls_[i];
if (!dc.inds) {
fullhash += XXH3_64bits((const char *)dc.verts, vertexSize * dc.vertexCount);
} else {
int indexLowerBound = dc.indexLowerBound, indexUpperBound = dc.indexUpperBound;
int j = i + 1;
int lastMatch = i;
while (j < numDrawCalls_) {
if (drawCalls_[j].verts != dc.verts)
break;
indexLowerBound = std::min(indexLowerBound, (int)dc.indexLowerBound);
indexUpperBound = std::max(indexUpperBound, (int)dc.indexUpperBound);
lastMatch = j;
j++;
}
// This could get seriously expensive with sparse indices. Need to combine hashing ranges the same way
// we do when drawing.
fullhash += XXH3_64bits((const char *)dc.verts + vertexSize * indexLowerBound,
vertexSize * (indexUpperBound - indexLowerBound));
// Hm, we will miss some indices when combining above, but meh, it should be fine.
fullhash += XXH3_64bits((const char *)dc.inds, indexSize * dc.vertexCount);
i = lastMatch;
}
for (int i = 0; i < numDrawVerts_; i++) {
const DeferredVerts &dv = drawVerts_[i];
int indexLowerBound = dv.indexLowerBound, indexUpperBound = dv.indexUpperBound;
fullhash += XXH3_64bits((const char *)dv.verts + vertexSize * indexLowerBound, vertexSize * (indexUpperBound - indexLowerBound));
}
fullhash += XXH3_64bits(&drawCalls_[0].uvScale, sizeof(drawCalls_[0].uvScale) * numDrawCalls_);
for (int i = 0; i < numDrawInds_; i++) {
const DeferredInds &di = drawInds_[i];
// Hm, we will miss some indices when combining above, but meh, it should be fine.
fullhash += XXH3_64bits((const char *)di.inds, indexSize * di.vertexCount);
}
// this looks utterly broken??
// fullhash += XXH3_64bits(&drawCalls_[0].uvScale, sizeof(drawCalls_[0].uvScale) * numDrawCalls_);
return fullhash;
}
// vertTypeID is the vertex type but with the UVGen mode smashed into the top bits.
void DrawEngineCommon::SubmitPrim(const void *verts, const void *inds, GEPrimitiveType prim, int vertexCount, u32 vertTypeID, int cullMode, int *bytesRead) {
if (!indexGen.PrimCompatible(prevPrim_, prim) || numDrawCalls_ >= MAX_DEFERRED_DRAW_CALLS || vertexCountInDrawCalls_ + vertexCount > VERTEX_BUFFER_MAX) {
if (!indexGen.PrimCompatible(prevPrim_, prim) || numDrawVerts_ >= MAX_DEFERRED_DRAW_CALLS || vertexCountInDrawCalls_ + vertexCount > VERTEX_BUFFER_MAX) {
DispatchFlush();
}
@ -818,27 +765,48 @@ void DrawEngineCommon::SubmitPrim(const void *verts, const void *inds, GEPrimiti
if (vertexCount < 3 && ((vertexCount < 2 && prim > 0) || (prim > GE_PRIM_LINE_STRIP && prim != GE_PRIM_RECTANGLES)))
return;
DeferredDrawCall &dc = drawCalls_[numDrawCalls_];
dc.verts = verts;
dc.inds = inds;
dc.vertexCount = vertexCount;
dc.indexType = (vertTypeID & GE_VTYPE_IDX_MASK) >> GE_VTYPE_IDX_SHIFT;
dc.prim = prim;
dc.cullMode = cullMode;
dc.uvScale = gstate_c.uv;
if (inds) {
GetIndexBounds(inds, vertexCount, vertTypeID, &dc.indexLowerBound, &dc.indexUpperBound);
bool applySkin = (vertTypeID & GE_VTYPE_WEIGHT_MASK) && decOptions_.applySkinInDecode;
DeferredInds &di = drawInds_[numDrawInds_++];
di.inds = inds;
di.indexType = (vertTypeID & GE_VTYPE_IDX_MASK) >> GE_VTYPE_IDX_SHIFT;
di.prim = prim;
di.cullMode = cullMode;
di.indexOffset = 0;
di.vertexCount = vertexCount;
if (inds && numDrawVerts_ > decodeVertsCounter_ && drawVerts_[numDrawVerts_ - 1].verts == verts && !applySkin) {
// Same vertex pointer as a previous un-decoded draw call - let's just extend the decode!
DeferredVerts &dv = drawVerts_[numDrawVerts_ - 1];
u16 lb;
u16 ub;
GetIndexBounds(inds, vertexCount, vertTypeID, &lb, &ub);
if (lb < dv.indexLowerBound)
dv.indexLowerBound = lb;
if (ub > dv.indexUpperBound)
dv.indexUpperBound = ub;
di.indexOffset = indexOffset_;
// indexOffset_ += vertexCount;
} else {
dc.indexLowerBound = 0;
dc.indexUpperBound = vertexCount - 1;
// Record a new draw, and a new index gen.
DeferredVerts &dv = drawVerts_[numDrawVerts_++];
dv.verts = verts;
dv.vertexCount = vertexCount;
dv.uvScale = gstate_c.uv;
if (inds) {
GetIndexBounds(inds, vertexCount, vertTypeID, &dv.indexLowerBound, &dv.indexUpperBound);
} else {
dv.indexLowerBound = 0;
dv.indexUpperBound = vertexCount - 1;
}
indexOffset_ = 0; // vertexCount;
}
numDrawCalls_++;
vertexCountInDrawCalls_ += vertexCount;
if ((vertTypeID & GE_VTYPE_WEIGHT_MASK) && decOptions_.applySkinInDecode) {
DecodeVertsStep(decoded_, decodeCounter_, decodedVerts_, &dc.uvScale);
decodeCounter_++;
if (applySkin) {
DecodeVertsStep(decoded_, decodeVertsCounter_, decodedVerts_, &drawVerts_[numDrawVerts_ - 1].uvScale);
DecodeIndsStep(decodeIndsCounter_);
}
if (prim == GE_PRIM_RECTANGLES && (gstate.getTextureAddress(0) & 0x3FFFFFFF) == (gstate.getFrameBufAddress() & 0x3FFFFFFF)) {
@ -861,29 +829,6 @@ bool DrawEngineCommon::CanUseHardwareTessellation(GEPatchPrimType prim) {
return false;
}
// Cheap bit scrambler from https://nullprogram.com/blog/2018/07/31/
inline uint32_t lowbias32_r(uint32_t x) {
x ^= x >> 16;
x *= 0x43021123U;
x ^= x >> 15 ^ x >> 30;
x *= 0x1d69e2a5U;
x ^= x >> 16;
return x;
}
uint32_t DrawEngineCommon::ComputeDrawcallsHash() const {
uint32_t dcid = 0;
for (int i = 0; i < numDrawCalls_; i++) {
u32 dhash = dcid;
dhash = __rotl(dhash ^ (u32)(uintptr_t)drawCalls_[i].verts, 13);
dhash = __rotl(dhash ^ (u32)(uintptr_t)drawCalls_[i].inds, 19);
dhash = __rotl(dhash ^ (u32)drawCalls_[i].indexType, 7);
dhash = __rotl(dhash ^ (u32)drawCalls_[i].vertexCount, 11);
dcid = lowbias32_r(dhash ^ (u32)drawCalls_[i].prim);
}
return dcid;
}
void TessellationDataTransfer::CopyControlPoints(float *pos, float *tex, float *col, int posStride, int texStride, int colStride, const SimpleVertex *const *points, int size, u32 vertType) {
bool hasColor = (vertType & GE_VTYPE_COL_MASK) != 0;
bool hasTexCoord = (vertType & GE_VTYPE_TC_MASK) != 0;

View file

@ -130,7 +130,7 @@ public:
return false;
}
int GetNumDrawCalls() const {
return numDrawCalls_;
return numDrawVerts_;
}
VertexDecoder *GetVertexDecoder(u32 vtype);
@ -141,8 +141,8 @@ protected:
virtual bool UpdateUseHWTessellation(bool enabled) const { return enabled; }
void UpdatePlanes();
int ComputeNumVertsToDecode() const;
void DecodeVerts(u8 *dest);
void DecodeInds();
// Preprocessing for spline/bezier
u32 NormalizeVertices(u8 *outPtr, u8 *bufPtr, const u8 *inPtr, int lowerBound, int upperBound, u32 vertType, int *vertexSize = nullptr);
@ -152,7 +152,10 @@ protected:
uint64_t ComputeHash();
// Vertex decoding
void DecodeVertsStep(u8 *dest, int &i, int &decodedVerts, const UVScale *uvScale);
void DecodeVertsStep(u8 *dest, int i, int &decodedVerts, const UVScale *uvScale);
void DecodeIndsStep(int i);
int ComputeNumVertsToDecode() const;
void ApplyFramebufferRead(FBOTexState *fboTexState);
@ -210,25 +213,36 @@ protected:
TransformedVertex *transformedExpanded_ = nullptr;
// Defer all vertex decoding to a "Flush" (except when software skinning)
struct DeferredDrawCall {
struct DeferredVerts {
const void *verts;
const void *inds;
u32 vertexCount;
u8 indexType;
s8 prim;
u8 cullMode;
u16 indexLowerBound;
u16 indexUpperBound;
UVScale uvScale;
};
struct DeferredInds {
const void *inds;
u32 vertexCount;
u8 indexType;
s8 prim;
u8 cullMode;
u16 indexOffset;
};
enum { MAX_DEFERRED_DRAW_CALLS = 128 };
DeferredDrawCall drawCalls_[MAX_DEFERRED_DRAW_CALLS];
int numDrawCalls_ = 0;
DeferredVerts drawVerts_[MAX_DEFERRED_DRAW_CALLS];
DeferredInds drawInds_[MAX_DEFERRED_DRAW_CALLS];
int numDrawVerts_ = 0;
int numDrawInds_ = 0;
int vertexCountInDrawCalls_ = 0;
int decimationCounter_ = 0;
int decodeCounter_ = 0;
int decodeVertsCounter_ = 0;
int decodeIndsCounter_ = 0;
int indexOffset_ = 0;
// Vertex collector state
IndexGenerator indexGen;

View file

@ -366,7 +366,7 @@ void DrawEngineD3D11::DoFlush() {
if (useCache) {
// getUVGenMode can have an effect on which UV decoder we need to use! And hence what the decoded data will look like. See #9263
u32 dcid = (u32)XXH3_64bits(&drawCalls_, sizeof(DeferredDrawCall) * numDrawCalls_) ^ gstate.getUVGenMode();
u32 dcid = ComputeDrawcallsHash() ^ gstate.getUVGenMode();
VertexArrayInfoD3D11 *vai;
if (!vai_.Get(dcid, &vai)) {
@ -719,14 +719,16 @@ rotateVBO:
}
gpuStats.numFlushes++;
gpuStats.numDrawCalls += numDrawCalls_;
gpuStats.numDrawCalls += numDrawInds_;
gpuStats.numVertsSubmitted += vertexCountInDrawCalls_;
indexGen.Reset();
decodedVerts_ = 0;
numDrawCalls_ = 0;
numDrawVerts_ = 0;
numDrawInds_ = 0;
vertexCountInDrawCalls_ = 0;
decodeCounter_ = 0;
decodeVertsCounter_ = 0;
decodeIndsCounter_ = 0;
gstate_c.vertexFullAlpha = true;
framebufferManager_->SetColorUpdated(gstate_c.skipDrawReason);

View file

@ -138,19 +138,19 @@ public:
// So that this can be inlined
void Flush() {
if (!numDrawCalls_)
if (!numDrawVerts_)
return;
DoFlush();
}
void FinishDeferred() {
if (!numDrawCalls_)
if (!numDrawVerts_)
return;
DecodeVerts(decoded_);
}
void DispatchFlush() override {
if (!numDrawCalls_)
if (!numDrawVerts_)
return;
Flush();
}

View file

@ -345,7 +345,7 @@ void DrawEngineDX9::DoFlush() {
if (useCache) {
// getUVGenMode can have an effect on which UV decoder we need to use! And hence what the decoded data will look like. See #9263
u32 dcid = (u32)XXH3_64bits(&drawCalls_, sizeof(DeferredDrawCall) * numDrawCalls_) ^ gstate.getUVGenMode();
u32 dcid = ComputeDrawcallsHash() ^ gstate.getUVGenMode();
VertexArrayInfoDX9 *vai;
if (!vai_.Get(dcid, &vai)) {
vai = new VertexArrayInfoDX9();
@ -658,14 +658,18 @@ rotateVBO:
}
gpuStats.numFlushes++;
gpuStats.numDrawCalls += numDrawCalls_;
gpuStats.numDrawCalls += numDrawInds_;
gpuStats.numVertsSubmitted += vertexCountInDrawCalls_;
// TODO: The below should be shared.
indexGen.Reset();
decodedVerts_ = 0;
numDrawCalls_ = 0;
numDrawVerts_ = 0;
numDrawInds_ = 0;
vertexCountInDrawCalls_ = 0;
decodeCounter_ = 0;
decodeVertsCounter_ = 0;
decodeIndsCounter_ = 0;
gstate_c.vertexFullAlpha = true;
framebufferManager_->SetColorUpdated(gstate_c.skipDrawReason);

View file

@ -128,19 +128,19 @@ public:
// So that this can be inlined
void Flush() {
if (!numDrawCalls_)
if (!numDrawVerts_)
return;
DoFlush();
}
void FinishDeferred() {
if (!numDrawCalls_)
if (!numDrawVerts_)
return;
DecodeVerts(decoded_);
}
void DispatchFlush() override {
if (!numDrawCalls_)
if (!numDrawVerts_)
return;
Flush();
}

View file

@ -245,9 +245,11 @@ void DrawEngineGLES::DoFlush() {
// can't goto bail here, skips too many variable initializations. So let's wipe the most important stuff.
indexGen.Reset();
decodedVerts_ = 0;
numDrawCalls_ = 0;
numDrawVerts_ = 0;
numDrawInds_ = 0;
vertexCountInDrawCalls_ = 0;
decodeCounter_ = 0;
decodeVertsCounter_ = 0;
decodeIndsCounter_ = 0;
return;
}
@ -471,7 +473,7 @@ void DrawEngineGLES::DoFlush() {
bail:
gpuStats.numFlushes++;
gpuStats.numDrawCalls += numDrawCalls_;
gpuStats.numDrawCalls += numDrawInds_;
gpuStats.numVertsSubmitted += vertexCountInDrawCalls_;
// TODO: When the next flush has the same vertex format, we can continue with the same offset in the vertex buffer,
@ -479,9 +481,11 @@ bail:
// wanted to avoid rebinding the vertex input every time).
indexGen.Reset();
decodedVerts_ = 0;
numDrawCalls_ = 0;
numDrawVerts_ = 0;
numDrawInds_ = 0;
vertexCountInDrawCalls_ = 0;
decodeCounter_ = 0;
decodeVertsCounter_ = 0;
decodeIndsCounter_ = 0;
gstate_c.vertexFullAlpha = true;
framebufferManager_->SetColorUpdated(gstate_c.skipDrawReason);

View file

@ -86,19 +86,19 @@ public:
// So that this can be inlined
void Flush() {
if (!numDrawCalls_)
if (!numDrawVerts_)
return;
DoFlush();
}
void FinishDeferred() {
if (!numDrawCalls_)
if (!numDrawVerts_)
return;
DoFlush();
}
void DispatchFlush() override {
if (!numDrawCalls_)
if (!numDrawVerts_)
return;
Flush();
}

View file

@ -748,6 +748,7 @@ void DrawEngineVulkan::DoFlush() {
// Decode directly into the pushbuffer
DecodeVertsToPushPool(pushVertex_, &vbOffset, &vbuf);
}
DecodeInds();
gpuStats.numUncachedVertsDrawn += indexGen.VertexCount();
}
@ -845,6 +846,7 @@ void DrawEngineVulkan::DoFlush() {
dec_ = GetVertexDecoder(lastVType_);
}
DecodeVerts(decoded_);
DecodeInds();
bool hasColor = (lastVType_ & GE_VTYPE_COL_MASK) != GE_VTYPE_COL_NONE;
if (gstate.isModeThrough()) {
gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && (hasColor || gstate.getMaterialAmbientA() == 255);
@ -857,6 +859,7 @@ void DrawEngineVulkan::DoFlush() {
// Undo the strip optimization, not supported by the SW code yet.
if (prim == GE_PRIM_TRIANGLE_STRIP)
prim = GE_PRIM_TRIANGLES;
_dbg_assert_(prim != GE_PRIM_INVALID);
u16 *inds = decIndex_;
SoftwareTransformResult result{};
@ -1007,14 +1010,16 @@ void DrawEngineVulkan::DoFlush() {
}
gpuStats.numFlushes++;
gpuStats.numDrawCalls += numDrawCalls_;
gpuStats.numDrawCalls += numDrawInds_;
gpuStats.numVertsSubmitted += vertexCountInDrawCalls_;
indexGen.Reset();
decodedVerts_ = 0;
numDrawCalls_ = 0;
numDrawVerts_ = 0;
numDrawInds_ = 0;
vertexCountInDrawCalls_ = 0;
decodeCounter_ = 0;
decodeIndsCounter_ = 0;
decodeVertsCounter_ = 0;
gstate_c.vertexFullAlpha = true;
framebufferManager_->SetColorUpdated(gstate_c.skipDrawReason);
@ -1030,8 +1035,10 @@ void DrawEngineVulkan::DoFlush() {
void DrawEngineVulkan::ResetAfterDraw() {
indexGen.Reset();
decodedVerts_ = 0;
numDrawCalls_ = 0;
decodeCounter_ = 0;
numDrawVerts_ = 0;
numDrawInds_ = 0;
decodeIndsCounter_ = 0;
decodeVertsCounter_ = 0;
decOptions_.applySkinInDecode = g_Config.bSoftwareSkinning;
gstate_c.vertexFullAlpha = true;
}

View file

@ -170,13 +170,13 @@ public:
// So that this can be inlined
void Flush() {
if (!numDrawCalls_)
if (!numDrawVerts_)
return;
DoFlush();
}
void FinishDeferred() {
if (!numDrawCalls_)
if (!numDrawVerts_)
return;
// Decode any pending vertices. And also flush while we're at it, for simplicity.
// It might be possible to only decode like in the other backends, but meh, it can't matter.
@ -185,7 +185,7 @@ public:
}
void DispatchFlush() override {
if (!numDrawCalls_)
if (!numDrawVerts_)
return;
Flush();
}

View file

@ -291,6 +291,8 @@ static VulkanPipeline *CreateVulkanPipeline(VulkanRenderManager *renderManager,
desc->geometryShaderSource = gs->GetShaderString(SHADER_STRING_SOURCE_CODE);
}
_dbg_assert_(key.topology != VK_PRIMITIVE_TOPOLOGY_POINT_LIST);
_dbg_assert_(key.topology != VK_PRIMITIVE_TOPOLOGY_LINE_LIST);
desc->topology = (VkPrimitiveTopology)key.topology;
int vertexStride = 0;