mirror of
https://github.com/hrydgard/ppsspp.git
synced 2025-04-02 11:01:50 -04:00
It's running.
This commit is contained in:
parent
6a2e5dd7f7
commit
9b411af1f5
11 changed files with 209 additions and 231 deletions
|
@ -72,43 +72,20 @@ VertexDecoder *DrawEngineCommon::GetVertexDecoder(u32 vtype) {
|
|||
return dec;
|
||||
}
|
||||
|
||||
int DrawEngineCommon::ComputeNumVertsToDecode() const {
|
||||
int vertsToDecode = 0;
|
||||
int numDrawCalls = numDrawCalls_;
|
||||
if (drawCalls_[0].indexType == GE_VTYPE_IDX_NONE >> GE_VTYPE_IDX_SHIFT) {
|
||||
for (int i = 0; i < numDrawCalls; i++) {
|
||||
const DeferredDrawCall &dc = drawCalls_[i];
|
||||
vertsToDecode += dc.vertexCount;
|
||||
}
|
||||
} else {
|
||||
// TODO: Share this computation with DecodeVertsStep?
|
||||
for (int i = 0; i < numDrawCalls; i++) {
|
||||
const DeferredDrawCall &dc = drawCalls_[i];
|
||||
int lastMatch = i;
|
||||
const int total = numDrawCalls;
|
||||
int indexLowerBound = dc.indexLowerBound;
|
||||
int indexUpperBound = dc.indexUpperBound;
|
||||
for (int j = i + 1; j < total; ++j) {
|
||||
if (drawCalls_[j].verts != dc.verts)
|
||||
break;
|
||||
|
||||
indexLowerBound = std::min(indexLowerBound, (int)drawCalls_[j].indexLowerBound);
|
||||
indexUpperBound = std::max(indexUpperBound, (int)drawCalls_[j].indexUpperBound);
|
||||
lastMatch = j;
|
||||
}
|
||||
vertsToDecode += indexUpperBound - indexLowerBound + 1;
|
||||
i = lastMatch;
|
||||
}
|
||||
void DrawEngineCommon::DecodeVerts(u8 *dest) {
|
||||
int decodeVertsCounter = decodeVertsCounter_;
|
||||
for (; decodeVertsCounter < numDrawVerts_; decodeVertsCounter++) {
|
||||
DecodeVertsStep(dest, decodeVertsCounter, decodedVerts_, &drawVerts_[decodeVertsCounter].uvScale);
|
||||
}
|
||||
return vertsToDecode;
|
||||
decodeVertsCounter_ = decodeVertsCounter;
|
||||
}
|
||||
|
||||
void DrawEngineCommon::DecodeVerts(u8 *dest) {
|
||||
int decodeCounter = decodeCounter_;
|
||||
for (; decodeCounter < numDrawCalls_; decodeCounter++) {
|
||||
DecodeVertsStep(dest, decodeCounter, decodedVerts_, &drawCalls_[decodeCounter].uvScale); // NOTE! DecodeVertsStep can modify the decodeCounter parameter!
|
||||
void DrawEngineCommon::DecodeInds() {
|
||||
int decodeIndsCounter = decodeIndsCounter_;
|
||||
for (; decodeIndsCounter < numDrawInds_; decodeIndsCounter++) {
|
||||
DecodeIndsStep(decodeIndsCounter);
|
||||
}
|
||||
decodeCounter_ = decodeCounter;
|
||||
decodeIndsCounter_ = decodeIndsCounter;
|
||||
|
||||
// Sanity check
|
||||
if (indexGen.Prim() < 0) {
|
||||
|
@ -619,92 +596,43 @@ void DrawEngineCommon::ApplyFramebufferRead(FBOTexState *fboTexState) {
|
|||
gstate_c.Dirty(DIRTY_SHADERBLEND);
|
||||
}
|
||||
|
||||
void DrawEngineCommon::DecodeVertsStep(u8 *dest, int &i, int &decodedVerts, const UVScale *uvScale) {
|
||||
void DrawEngineCommon::DecodeVertsStep(u8 *dest, int i, int &decodedVerts, const UVScale *uvScale) {
|
||||
PROFILE_THIS_SCOPE("vertdec");
|
||||
|
||||
const DeferredDrawCall &dc = drawCalls_[i];
|
||||
const DeferredVerts &dv = drawVerts_[i];
|
||||
|
||||
indexGen.SetIndex(decodedVerts);
|
||||
int indexLowerBound = dc.indexLowerBound;
|
||||
int indexUpperBound = dc.indexUpperBound;
|
||||
int indexLowerBound = dv.indexLowerBound;
|
||||
int indexUpperBound = dv.indexUpperBound;
|
||||
|
||||
if (dc.indexType == GE_VTYPE_IDX_NONE >> GE_VTYPE_IDX_SHIFT) {
|
||||
// Decode the verts (and at the same time apply morphing/skinning). Simple.
|
||||
dec_->DecodeVerts(dest + decodedVerts * (int)dec_->GetDecVtxFmt().stride,
|
||||
dc.verts, uvScale, indexLowerBound, indexUpperBound);
|
||||
decodedVerts += indexUpperBound - indexLowerBound + 1;
|
||||
|
||||
bool clockwise = true;
|
||||
if (gstate.isCullEnabled() && gstate.getCullMode() != dc.cullMode) {
|
||||
clockwise = false;
|
||||
}
|
||||
indexGen.AddPrim(dc.prim, dc.vertexCount, clockwise);
|
||||
} else {
|
||||
// It's fairly common that games issue long sequences of PRIM calls, with differing
|
||||
// inds pointer but the same base vertex pointer. We'd like to reuse vertices between
|
||||
// these as much as possible, so we make sure here to combine as many as possible
|
||||
// into one nice big drawcall, sharing data.
|
||||
// Decode the verts (and at the same time apply morphing/skinning). Simple.
|
||||
dec_->DecodeVerts(dest + decodedVerts * (int)dec_->GetDecVtxFmt().stride, dv.verts, uvScale, dv.indexLowerBound, dv.indexUpperBound);
|
||||
decodedVerts += indexUpperBound - indexLowerBound + 1;
|
||||
}
|
||||
|
||||
// 1. Look ahead to find the max index, only looking as "matching" drawcalls.
|
||||
// Expand the lower and upper bounds as we go.
|
||||
int lastMatch = i;
|
||||
const int total = numDrawCalls_;
|
||||
for (int j = i + 1; j < total; ++j) {
|
||||
if (drawCalls_[j].verts != dc.verts)
|
||||
break;
|
||||
// TODO: What if UV scale/offset changes between drawcalls here?
|
||||
indexLowerBound = std::min(indexLowerBound, (int)drawCalls_[j].indexLowerBound);
|
||||
indexUpperBound = std::max(indexUpperBound, (int)drawCalls_[j].indexUpperBound);
|
||||
lastMatch = j;
|
||||
}
|
||||
|
||||
// 2. Loop through the drawcalls, translating indices as we go.
|
||||
switch (dc.indexType) {
|
||||
case GE_VTYPE_IDX_8BIT >> GE_VTYPE_IDX_SHIFT:
|
||||
for (int j = i; j <= lastMatch; j++) {
|
||||
bool clockwise = true;
|
||||
if (gstate.isCullEnabled() && gstate.getCullMode() != drawCalls_[j].cullMode) {
|
||||
clockwise = false;
|
||||
}
|
||||
indexGen.TranslatePrim(drawCalls_[j].prim, drawCalls_[j].vertexCount, (const u8 *)drawCalls_[j].inds, indexLowerBound, clockwise);
|
||||
}
|
||||
break;
|
||||
case GE_VTYPE_IDX_16BIT >> GE_VTYPE_IDX_SHIFT:
|
||||
for (int j = i; j <= lastMatch; j++) {
|
||||
bool clockwise = true;
|
||||
if (gstate.isCullEnabled() && gstate.getCullMode() != drawCalls_[j].cullMode) {
|
||||
clockwise = false;
|
||||
}
|
||||
indexGen.TranslatePrim(drawCalls_[j].prim, drawCalls_[j].vertexCount, (const u16_le *)drawCalls_[j].inds, indexLowerBound, clockwise);
|
||||
}
|
||||
break;
|
||||
case GE_VTYPE_IDX_32BIT >> GE_VTYPE_IDX_SHIFT:
|
||||
for (int j = i; j <= lastMatch; j++) {
|
||||
bool clockwise = true;
|
||||
if (gstate.isCullEnabled() && gstate.getCullMode() != drawCalls_[j].cullMode) {
|
||||
clockwise = false;
|
||||
}
|
||||
indexGen.TranslatePrim(drawCalls_[j].prim, drawCalls_[j].vertexCount, (const u32_le *)drawCalls_[j].inds, indexLowerBound, clockwise);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
const int vertexCount = indexUpperBound - indexLowerBound + 1;
|
||||
|
||||
// This check is a workaround for Pangya Fantasy Golf, which sends bogus index data when switching items in "My Room" sometimes.
|
||||
if (decodedVerts + vertexCount > VERTEX_BUFFER_MAX) {
|
||||
return;
|
||||
}
|
||||
|
||||
// 3. Decode that range of vertex data.
|
||||
dec_->DecodeVerts(dest + decodedVerts * (int)dec_->GetDecVtxFmt().stride,
|
||||
dc.verts, uvScale, indexLowerBound, indexUpperBound);
|
||||
decodedVerts += vertexCount;
|
||||
|
||||
// 4. Advance indexgen vertex counter.
|
||||
indexGen.Advance(vertexCount);
|
||||
i = lastMatch;
|
||||
void DrawEngineCommon::DecodeIndsStep(int i) {
|
||||
const DeferredInds &di = drawInds_[i];
|
||||
bool clockwise = true;
|
||||
if (gstate.isCullEnabled() && gstate.getCullMode() != di.cullMode) {
|
||||
clockwise = false;
|
||||
}
|
||||
// We've already collapsed subsequent draws with the same vertex pointer, so no tricky logic here anymore.
|
||||
// 2. Loop through the drawcalls, translating indices as we go.
|
||||
switch (di.indexType) {
|
||||
case GE_VTYPE_IDX_NONE >> GE_VTYPE_IDX_SHIFT:
|
||||
indexGen.AddPrim(di.prim, di.vertexCount, clockwise);
|
||||
break;
|
||||
case GE_VTYPE_IDX_8BIT >> GE_VTYPE_IDX_SHIFT:
|
||||
indexGen.TranslatePrim(di.prim, di.vertexCount, (const u8 *)di.inds, di.indexOffset, clockwise);
|
||||
break;
|
||||
case GE_VTYPE_IDX_16BIT >> GE_VTYPE_IDX_SHIFT:
|
||||
indexGen.TranslatePrim(di.prim, di.vertexCount, (const u16_le *)di.inds, di.indexOffset, clockwise);
|
||||
break;
|
||||
case GE_VTYPE_IDX_32BIT >> GE_VTYPE_IDX_SHIFT:
|
||||
indexGen.TranslatePrim(di.prim, di.vertexCount, (const u32_le *)di.inds, di.indexOffset, clockwise);
|
||||
break;
|
||||
}
|
||||
// 4. Advance indexgen vertex counter.
|
||||
indexGen.Advance(di.vertexCount);
|
||||
}
|
||||
|
||||
inline u32 ComputeMiniHashRange(const void *ptr, size_t sz) {
|
||||
|
@ -731,27 +659,59 @@ u32 DrawEngineCommon::ComputeMiniHash() {
|
|||
const int indexSize = IndexSize(dec_->VertexType());
|
||||
|
||||
int step;
|
||||
if (numDrawCalls_ < 3) {
|
||||
if (numDrawVerts_ < 3) {
|
||||
step = 1;
|
||||
} else if (numDrawCalls_ < 8) {
|
||||
} else if (numDrawVerts_ < 8) {
|
||||
step = 4;
|
||||
} else {
|
||||
step = numDrawCalls_ / 8;
|
||||
step = numDrawVerts_ / 8;
|
||||
}
|
||||
for (int i = 0; i < numDrawCalls_; i += step) {
|
||||
const DeferredDrawCall &dc = drawCalls_[i];
|
||||
if (!dc.inds) {
|
||||
fullhash += ComputeMiniHashRange(dc.verts, vertexSize * dc.vertexCount);
|
||||
} else {
|
||||
int indexLowerBound = dc.indexLowerBound, indexUpperBound = dc.indexUpperBound;
|
||||
fullhash += ComputeMiniHashRange((const u8 *)dc.verts + vertexSize * indexLowerBound, vertexSize * (indexUpperBound - indexLowerBound));
|
||||
fullhash += ComputeMiniHashRange(dc.inds, indexSize * dc.vertexCount);
|
||||
}
|
||||
for (int i = 0; i < numDrawVerts_; i += step) {
|
||||
const DeferredVerts &dc = drawVerts_[i];
|
||||
fullhash += ComputeMiniHashRange((const u8 *)dc.verts + vertexSize * dc.indexLowerBound, vertexSize * (dc.indexUpperBound - dc.indexLowerBound));
|
||||
}
|
||||
for (int i = 0; i < numDrawInds_; i += step) {
|
||||
const DeferredInds &di = drawInds_[i];
|
||||
fullhash += ComputeMiniHashRange(di.inds, indexSize * di.vertexCount);
|
||||
}
|
||||
|
||||
return fullhash;
|
||||
}
|
||||
|
||||
// Cheap bit scrambler from https://nullprogram.com/blog/2018/07/31/
|
||||
inline uint32_t lowbias32_r(uint32_t x) {
|
||||
x ^= x >> 16;
|
||||
x *= 0x43021123U;
|
||||
x ^= x >> 15 ^ x >> 30;
|
||||
x *= 0x1d69e2a5U;
|
||||
x ^= x >> 16;
|
||||
return x;
|
||||
}
|
||||
|
||||
uint32_t DrawEngineCommon::ComputeDrawcallsHash() const {
|
||||
uint32_t dcid = 0;
|
||||
for (int i = 0; i < numDrawVerts_; i++) {
|
||||
u32 dhash = dcid;
|
||||
dhash = __rotl(dhash ^ (u32)(uintptr_t)drawVerts_[i].verts, 13);
|
||||
dhash = __rotl(dhash ^ (u32)drawInds_[i].vertexCount, 11);
|
||||
dcid = lowbias32_r(dhash ^ (u32)drawInds_[i].prim);
|
||||
}
|
||||
for (int j = 0; j < numDrawInds_; j++) {
|
||||
u32 dhash = dcid;
|
||||
dhash = __rotl(dhash ^ (u32)(uintptr_t)drawInds_[j].inds, 19);
|
||||
dcid = lowbias32_r(__rotl(dhash ^ (u32)drawInds_[j].indexType, 7));
|
||||
}
|
||||
return dcid;
|
||||
}
|
||||
|
||||
int DrawEngineCommon::ComputeNumVertsToDecode() const {
|
||||
int sum = 0;
|
||||
for (int i = 0; i < numDrawVerts_; i++) {
|
||||
sum += drawVerts_[i].indexUpperBound + 1 - drawVerts_[i].indexLowerBound;
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
uint64_t DrawEngineCommon::ComputeHash() {
|
||||
uint64_t fullhash = 0;
|
||||
const int vertexSize = dec_->GetDecVtxFmt().stride;
|
||||
|
@ -759,39 +719,26 @@ uint64_t DrawEngineCommon::ComputeHash() {
|
|||
|
||||
// TODO: Add some caps both for numDrawCalls_ and num verts to check?
|
||||
// It is really very expensive to check all the vertex data so often.
|
||||
for (int i = 0; i < numDrawCalls_; i++) {
|
||||
const DeferredDrawCall &dc = drawCalls_[i];
|
||||
if (!dc.inds) {
|
||||
fullhash += XXH3_64bits((const char *)dc.verts, vertexSize * dc.vertexCount);
|
||||
} else {
|
||||
int indexLowerBound = dc.indexLowerBound, indexUpperBound = dc.indexUpperBound;
|
||||
int j = i + 1;
|
||||
int lastMatch = i;
|
||||
while (j < numDrawCalls_) {
|
||||
if (drawCalls_[j].verts != dc.verts)
|
||||
break;
|
||||
indexLowerBound = std::min(indexLowerBound, (int)dc.indexLowerBound);
|
||||
indexUpperBound = std::max(indexUpperBound, (int)dc.indexUpperBound);
|
||||
lastMatch = j;
|
||||
j++;
|
||||
}
|
||||
// This could get seriously expensive with sparse indices. Need to combine hashing ranges the same way
|
||||
// we do when drawing.
|
||||
fullhash += XXH3_64bits((const char *)dc.verts + vertexSize * indexLowerBound,
|
||||
vertexSize * (indexUpperBound - indexLowerBound));
|
||||
// Hm, we will miss some indices when combining above, but meh, it should be fine.
|
||||
fullhash += XXH3_64bits((const char *)dc.inds, indexSize * dc.vertexCount);
|
||||
i = lastMatch;
|
||||
}
|
||||
for (int i = 0; i < numDrawVerts_; i++) {
|
||||
const DeferredVerts &dv = drawVerts_[i];
|
||||
int indexLowerBound = dv.indexLowerBound, indexUpperBound = dv.indexUpperBound;
|
||||
fullhash += XXH3_64bits((const char *)dv.verts + vertexSize * indexLowerBound, vertexSize * (indexUpperBound - indexLowerBound));
|
||||
}
|
||||
|
||||
fullhash += XXH3_64bits(&drawCalls_[0].uvScale, sizeof(drawCalls_[0].uvScale) * numDrawCalls_);
|
||||
for (int i = 0; i < numDrawInds_; i++) {
|
||||
const DeferredInds &di = drawInds_[i];
|
||||
// Hm, we will miss some indices when combining above, but meh, it should be fine.
|
||||
fullhash += XXH3_64bits((const char *)di.inds, indexSize * di.vertexCount);
|
||||
}
|
||||
|
||||
// this looks utterly broken??
|
||||
// fullhash += XXH3_64bits(&drawCalls_[0].uvScale, sizeof(drawCalls_[0].uvScale) * numDrawCalls_);
|
||||
return fullhash;
|
||||
}
|
||||
|
||||
// vertTypeID is the vertex type but with the UVGen mode smashed into the top bits.
|
||||
void DrawEngineCommon::SubmitPrim(const void *verts, const void *inds, GEPrimitiveType prim, int vertexCount, u32 vertTypeID, int cullMode, int *bytesRead) {
|
||||
if (!indexGen.PrimCompatible(prevPrim_, prim) || numDrawCalls_ >= MAX_DEFERRED_DRAW_CALLS || vertexCountInDrawCalls_ + vertexCount > VERTEX_BUFFER_MAX) {
|
||||
if (!indexGen.PrimCompatible(prevPrim_, prim) || numDrawVerts_ >= MAX_DEFERRED_DRAW_CALLS || vertexCountInDrawCalls_ + vertexCount > VERTEX_BUFFER_MAX) {
|
||||
DispatchFlush();
|
||||
}
|
||||
|
||||
|
@ -818,27 +765,48 @@ void DrawEngineCommon::SubmitPrim(const void *verts, const void *inds, GEPrimiti
|
|||
if (vertexCount < 3 && ((vertexCount < 2 && prim > 0) || (prim > GE_PRIM_LINE_STRIP && prim != GE_PRIM_RECTANGLES)))
|
||||
return;
|
||||
|
||||
DeferredDrawCall &dc = drawCalls_[numDrawCalls_];
|
||||
dc.verts = verts;
|
||||
dc.inds = inds;
|
||||
dc.vertexCount = vertexCount;
|
||||
dc.indexType = (vertTypeID & GE_VTYPE_IDX_MASK) >> GE_VTYPE_IDX_SHIFT;
|
||||
dc.prim = prim;
|
||||
dc.cullMode = cullMode;
|
||||
dc.uvScale = gstate_c.uv;
|
||||
if (inds) {
|
||||
GetIndexBounds(inds, vertexCount, vertTypeID, &dc.indexLowerBound, &dc.indexUpperBound);
|
||||
bool applySkin = (vertTypeID & GE_VTYPE_WEIGHT_MASK) && decOptions_.applySkinInDecode;
|
||||
|
||||
DeferredInds &di = drawInds_[numDrawInds_++];
|
||||
di.inds = inds;
|
||||
di.indexType = (vertTypeID & GE_VTYPE_IDX_MASK) >> GE_VTYPE_IDX_SHIFT;
|
||||
di.prim = prim;
|
||||
di.cullMode = cullMode;
|
||||
di.indexOffset = 0;
|
||||
di.vertexCount = vertexCount;
|
||||
|
||||
if (inds && numDrawVerts_ > decodeVertsCounter_ && drawVerts_[numDrawVerts_ - 1].verts == verts && !applySkin) {
|
||||
// Same vertex pointer as a previous un-decoded draw call - let's just extend the decode!
|
||||
DeferredVerts &dv = drawVerts_[numDrawVerts_ - 1];
|
||||
u16 lb;
|
||||
u16 ub;
|
||||
GetIndexBounds(inds, vertexCount, vertTypeID, &lb, &ub);
|
||||
if (lb < dv.indexLowerBound)
|
||||
dv.indexLowerBound = lb;
|
||||
if (ub > dv.indexUpperBound)
|
||||
dv.indexUpperBound = ub;
|
||||
di.indexOffset = indexOffset_;
|
||||
// indexOffset_ += vertexCount;
|
||||
} else {
|
||||
dc.indexLowerBound = 0;
|
||||
dc.indexUpperBound = vertexCount - 1;
|
||||
// Record a new draw, and a new index gen.
|
||||
DeferredVerts &dv = drawVerts_[numDrawVerts_++];
|
||||
dv.verts = verts;
|
||||
dv.vertexCount = vertexCount;
|
||||
dv.uvScale = gstate_c.uv;
|
||||
if (inds) {
|
||||
GetIndexBounds(inds, vertexCount, vertTypeID, &dv.indexLowerBound, &dv.indexUpperBound);
|
||||
} else {
|
||||
dv.indexLowerBound = 0;
|
||||
dv.indexUpperBound = vertexCount - 1;
|
||||
}
|
||||
indexOffset_ = 0; // vertexCount;
|
||||
}
|
||||
|
||||
numDrawCalls_++;
|
||||
vertexCountInDrawCalls_ += vertexCount;
|
||||
|
||||
if ((vertTypeID & GE_VTYPE_WEIGHT_MASK) && decOptions_.applySkinInDecode) {
|
||||
DecodeVertsStep(decoded_, decodeCounter_, decodedVerts_, &dc.uvScale);
|
||||
decodeCounter_++;
|
||||
if (applySkin) {
|
||||
DecodeVertsStep(decoded_, decodeVertsCounter_, decodedVerts_, &drawVerts_[numDrawVerts_ - 1].uvScale);
|
||||
DecodeIndsStep(decodeIndsCounter_);
|
||||
}
|
||||
|
||||
if (prim == GE_PRIM_RECTANGLES && (gstate.getTextureAddress(0) & 0x3FFFFFFF) == (gstate.getFrameBufAddress() & 0x3FFFFFFF)) {
|
||||
|
@ -861,29 +829,6 @@ bool DrawEngineCommon::CanUseHardwareTessellation(GEPatchPrimType prim) {
|
|||
return false;
|
||||
}
|
||||
|
||||
// Cheap bit scrambler from https://nullprogram.com/blog/2018/07/31/
|
||||
inline uint32_t lowbias32_r(uint32_t x) {
|
||||
x ^= x >> 16;
|
||||
x *= 0x43021123U;
|
||||
x ^= x >> 15 ^ x >> 30;
|
||||
x *= 0x1d69e2a5U;
|
||||
x ^= x >> 16;
|
||||
return x;
|
||||
}
|
||||
|
||||
uint32_t DrawEngineCommon::ComputeDrawcallsHash() const {
|
||||
uint32_t dcid = 0;
|
||||
for (int i = 0; i < numDrawCalls_; i++) {
|
||||
u32 dhash = dcid;
|
||||
dhash = __rotl(dhash ^ (u32)(uintptr_t)drawCalls_[i].verts, 13);
|
||||
dhash = __rotl(dhash ^ (u32)(uintptr_t)drawCalls_[i].inds, 19);
|
||||
dhash = __rotl(dhash ^ (u32)drawCalls_[i].indexType, 7);
|
||||
dhash = __rotl(dhash ^ (u32)drawCalls_[i].vertexCount, 11);
|
||||
dcid = lowbias32_r(dhash ^ (u32)drawCalls_[i].prim);
|
||||
}
|
||||
return dcid;
|
||||
}
|
||||
|
||||
void TessellationDataTransfer::CopyControlPoints(float *pos, float *tex, float *col, int posStride, int texStride, int colStride, const SimpleVertex *const *points, int size, u32 vertType) {
|
||||
bool hasColor = (vertType & GE_VTYPE_COL_MASK) != 0;
|
||||
bool hasTexCoord = (vertType & GE_VTYPE_TC_MASK) != 0;
|
||||
|
|
|
@ -130,7 +130,7 @@ public:
|
|||
return false;
|
||||
}
|
||||
int GetNumDrawCalls() const {
|
||||
return numDrawCalls_;
|
||||
return numDrawVerts_;
|
||||
}
|
||||
|
||||
VertexDecoder *GetVertexDecoder(u32 vtype);
|
||||
|
@ -141,8 +141,8 @@ protected:
|
|||
virtual bool UpdateUseHWTessellation(bool enabled) const { return enabled; }
|
||||
void UpdatePlanes();
|
||||
|
||||
int ComputeNumVertsToDecode() const;
|
||||
void DecodeVerts(u8 *dest);
|
||||
void DecodeInds();
|
||||
|
||||
// Preprocessing for spline/bezier
|
||||
u32 NormalizeVertices(u8 *outPtr, u8 *bufPtr, const u8 *inPtr, int lowerBound, int upperBound, u32 vertType, int *vertexSize = nullptr);
|
||||
|
@ -152,7 +152,10 @@ protected:
|
|||
uint64_t ComputeHash();
|
||||
|
||||
// Vertex decoding
|
||||
void DecodeVertsStep(u8 *dest, int &i, int &decodedVerts, const UVScale *uvScale);
|
||||
void DecodeVertsStep(u8 *dest, int i, int &decodedVerts, const UVScale *uvScale);
|
||||
void DecodeIndsStep(int i);
|
||||
|
||||
int ComputeNumVertsToDecode() const;
|
||||
|
||||
void ApplyFramebufferRead(FBOTexState *fboTexState);
|
||||
|
||||
|
@ -210,25 +213,36 @@ protected:
|
|||
TransformedVertex *transformedExpanded_ = nullptr;
|
||||
|
||||
// Defer all vertex decoding to a "Flush" (except when software skinning)
|
||||
struct DeferredDrawCall {
|
||||
struct DeferredVerts {
|
||||
const void *verts;
|
||||
const void *inds;
|
||||
u32 vertexCount;
|
||||
u8 indexType;
|
||||
s8 prim;
|
||||
u8 cullMode;
|
||||
u16 indexLowerBound;
|
||||
u16 indexUpperBound;
|
||||
UVScale uvScale;
|
||||
};
|
||||
|
||||
struct DeferredInds {
|
||||
const void *inds;
|
||||
u32 vertexCount;
|
||||
u8 indexType;
|
||||
s8 prim;
|
||||
u8 cullMode;
|
||||
u16 indexOffset;
|
||||
};
|
||||
|
||||
enum { MAX_DEFERRED_DRAW_CALLS = 128 };
|
||||
DeferredDrawCall drawCalls_[MAX_DEFERRED_DRAW_CALLS];
|
||||
int numDrawCalls_ = 0;
|
||||
DeferredVerts drawVerts_[MAX_DEFERRED_DRAW_CALLS];
|
||||
DeferredInds drawInds_[MAX_DEFERRED_DRAW_CALLS];
|
||||
|
||||
int numDrawVerts_ = 0;
|
||||
int numDrawInds_ = 0;
|
||||
int vertexCountInDrawCalls_ = 0;
|
||||
|
||||
int decimationCounter_ = 0;
|
||||
int decodeCounter_ = 0;
|
||||
int decodeVertsCounter_ = 0;
|
||||
int decodeIndsCounter_ = 0;
|
||||
|
||||
int indexOffset_ = 0;
|
||||
|
||||
// Vertex collector state
|
||||
IndexGenerator indexGen;
|
||||
|
|
|
@ -366,7 +366,7 @@ void DrawEngineD3D11::DoFlush() {
|
|||
|
||||
if (useCache) {
|
||||
// getUVGenMode can have an effect on which UV decoder we need to use! And hence what the decoded data will look like. See #9263
|
||||
u32 dcid = (u32)XXH3_64bits(&drawCalls_, sizeof(DeferredDrawCall) * numDrawCalls_) ^ gstate.getUVGenMode();
|
||||
u32 dcid = ComputeDrawcallsHash() ^ gstate.getUVGenMode();
|
||||
|
||||
VertexArrayInfoD3D11 *vai;
|
||||
if (!vai_.Get(dcid, &vai)) {
|
||||
|
@ -719,14 +719,16 @@ rotateVBO:
|
|||
}
|
||||
|
||||
gpuStats.numFlushes++;
|
||||
gpuStats.numDrawCalls += numDrawCalls_;
|
||||
gpuStats.numDrawCalls += numDrawInds_;
|
||||
gpuStats.numVertsSubmitted += vertexCountInDrawCalls_;
|
||||
|
||||
indexGen.Reset();
|
||||
decodedVerts_ = 0;
|
||||
numDrawCalls_ = 0;
|
||||
numDrawVerts_ = 0;
|
||||
numDrawInds_ = 0;
|
||||
vertexCountInDrawCalls_ = 0;
|
||||
decodeCounter_ = 0;
|
||||
decodeVertsCounter_ = 0;
|
||||
decodeIndsCounter_ = 0;
|
||||
gstate_c.vertexFullAlpha = true;
|
||||
framebufferManager_->SetColorUpdated(gstate_c.skipDrawReason);
|
||||
|
||||
|
|
|
@ -138,19 +138,19 @@ public:
|
|||
|
||||
// So that this can be inlined
|
||||
void Flush() {
|
||||
if (!numDrawCalls_)
|
||||
if (!numDrawVerts_)
|
||||
return;
|
||||
DoFlush();
|
||||
}
|
||||
|
||||
void FinishDeferred() {
|
||||
if (!numDrawCalls_)
|
||||
if (!numDrawVerts_)
|
||||
return;
|
||||
DecodeVerts(decoded_);
|
||||
}
|
||||
|
||||
void DispatchFlush() override {
|
||||
if (!numDrawCalls_)
|
||||
if (!numDrawVerts_)
|
||||
return;
|
||||
Flush();
|
||||
}
|
||||
|
|
|
@ -345,7 +345,7 @@ void DrawEngineDX9::DoFlush() {
|
|||
|
||||
if (useCache) {
|
||||
// getUVGenMode can have an effect on which UV decoder we need to use! And hence what the decoded data will look like. See #9263
|
||||
u32 dcid = (u32)XXH3_64bits(&drawCalls_, sizeof(DeferredDrawCall) * numDrawCalls_) ^ gstate.getUVGenMode();
|
||||
u32 dcid = ComputeDrawcallsHash() ^ gstate.getUVGenMode();
|
||||
VertexArrayInfoDX9 *vai;
|
||||
if (!vai_.Get(dcid, &vai)) {
|
||||
vai = new VertexArrayInfoDX9();
|
||||
|
@ -658,14 +658,18 @@ rotateVBO:
|
|||
}
|
||||
|
||||
gpuStats.numFlushes++;
|
||||
gpuStats.numDrawCalls += numDrawCalls_;
|
||||
gpuStats.numDrawCalls += numDrawInds_;
|
||||
gpuStats.numVertsSubmitted += vertexCountInDrawCalls_;
|
||||
|
||||
// TODO: The below should be shared.
|
||||
|
||||
indexGen.Reset();
|
||||
decodedVerts_ = 0;
|
||||
numDrawCalls_ = 0;
|
||||
numDrawVerts_ = 0;
|
||||
numDrawInds_ = 0;
|
||||
vertexCountInDrawCalls_ = 0;
|
||||
decodeCounter_ = 0;
|
||||
decodeVertsCounter_ = 0;
|
||||
decodeIndsCounter_ = 0;
|
||||
gstate_c.vertexFullAlpha = true;
|
||||
framebufferManager_->SetColorUpdated(gstate_c.skipDrawReason);
|
||||
|
||||
|
|
|
@ -128,19 +128,19 @@ public:
|
|||
|
||||
// So that this can be inlined
|
||||
void Flush() {
|
||||
if (!numDrawCalls_)
|
||||
if (!numDrawVerts_)
|
||||
return;
|
||||
DoFlush();
|
||||
}
|
||||
|
||||
void FinishDeferred() {
|
||||
if (!numDrawCalls_)
|
||||
if (!numDrawVerts_)
|
||||
return;
|
||||
DecodeVerts(decoded_);
|
||||
}
|
||||
|
||||
void DispatchFlush() override {
|
||||
if (!numDrawCalls_)
|
||||
if (!numDrawVerts_)
|
||||
return;
|
||||
Flush();
|
||||
}
|
||||
|
|
|
@ -245,9 +245,11 @@ void DrawEngineGLES::DoFlush() {
|
|||
// can't goto bail here, skips too many variable initializations. So let's wipe the most important stuff.
|
||||
indexGen.Reset();
|
||||
decodedVerts_ = 0;
|
||||
numDrawCalls_ = 0;
|
||||
numDrawVerts_ = 0;
|
||||
numDrawInds_ = 0;
|
||||
vertexCountInDrawCalls_ = 0;
|
||||
decodeCounter_ = 0;
|
||||
decodeVertsCounter_ = 0;
|
||||
decodeIndsCounter_ = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -471,7 +473,7 @@ void DrawEngineGLES::DoFlush() {
|
|||
|
||||
bail:
|
||||
gpuStats.numFlushes++;
|
||||
gpuStats.numDrawCalls += numDrawCalls_;
|
||||
gpuStats.numDrawCalls += numDrawInds_;
|
||||
gpuStats.numVertsSubmitted += vertexCountInDrawCalls_;
|
||||
|
||||
// TODO: When the next flush has the same vertex format, we can continue with the same offset in the vertex buffer,
|
||||
|
@ -479,9 +481,11 @@ bail:
|
|||
// wanted to avoid rebinding the vertex input every time).
|
||||
indexGen.Reset();
|
||||
decodedVerts_ = 0;
|
||||
numDrawCalls_ = 0;
|
||||
numDrawVerts_ = 0;
|
||||
numDrawInds_ = 0;
|
||||
vertexCountInDrawCalls_ = 0;
|
||||
decodeCounter_ = 0;
|
||||
decodeVertsCounter_ = 0;
|
||||
decodeIndsCounter_ = 0;
|
||||
gstate_c.vertexFullAlpha = true;
|
||||
framebufferManager_->SetColorUpdated(gstate_c.skipDrawReason);
|
||||
|
||||
|
|
|
@ -86,19 +86,19 @@ public:
|
|||
|
||||
// So that this can be inlined
|
||||
void Flush() {
|
||||
if (!numDrawCalls_)
|
||||
if (!numDrawVerts_)
|
||||
return;
|
||||
DoFlush();
|
||||
}
|
||||
|
||||
void FinishDeferred() {
|
||||
if (!numDrawCalls_)
|
||||
if (!numDrawVerts_)
|
||||
return;
|
||||
DoFlush();
|
||||
}
|
||||
|
||||
void DispatchFlush() override {
|
||||
if (!numDrawCalls_)
|
||||
if (!numDrawVerts_)
|
||||
return;
|
||||
Flush();
|
||||
}
|
||||
|
|
|
@ -748,6 +748,7 @@ void DrawEngineVulkan::DoFlush() {
|
|||
// Decode directly into the pushbuffer
|
||||
DecodeVertsToPushPool(pushVertex_, &vbOffset, &vbuf);
|
||||
}
|
||||
DecodeInds();
|
||||
gpuStats.numUncachedVertsDrawn += indexGen.VertexCount();
|
||||
}
|
||||
|
||||
|
@ -845,6 +846,7 @@ void DrawEngineVulkan::DoFlush() {
|
|||
dec_ = GetVertexDecoder(lastVType_);
|
||||
}
|
||||
DecodeVerts(decoded_);
|
||||
DecodeInds();
|
||||
bool hasColor = (lastVType_ & GE_VTYPE_COL_MASK) != GE_VTYPE_COL_NONE;
|
||||
if (gstate.isModeThrough()) {
|
||||
gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && (hasColor || gstate.getMaterialAmbientA() == 255);
|
||||
|
@ -857,6 +859,7 @@ void DrawEngineVulkan::DoFlush() {
|
|||
// Undo the strip optimization, not supported by the SW code yet.
|
||||
if (prim == GE_PRIM_TRIANGLE_STRIP)
|
||||
prim = GE_PRIM_TRIANGLES;
|
||||
_dbg_assert_(prim != GE_PRIM_INVALID);
|
||||
|
||||
u16 *inds = decIndex_;
|
||||
SoftwareTransformResult result{};
|
||||
|
@ -1007,14 +1010,16 @@ void DrawEngineVulkan::DoFlush() {
|
|||
}
|
||||
|
||||
gpuStats.numFlushes++;
|
||||
gpuStats.numDrawCalls += numDrawCalls_;
|
||||
gpuStats.numDrawCalls += numDrawInds_;
|
||||
gpuStats.numVertsSubmitted += vertexCountInDrawCalls_;
|
||||
|
||||
indexGen.Reset();
|
||||
decodedVerts_ = 0;
|
||||
numDrawCalls_ = 0;
|
||||
numDrawVerts_ = 0;
|
||||
numDrawInds_ = 0;
|
||||
vertexCountInDrawCalls_ = 0;
|
||||
decodeCounter_ = 0;
|
||||
decodeIndsCounter_ = 0;
|
||||
decodeVertsCounter_ = 0;
|
||||
gstate_c.vertexFullAlpha = true;
|
||||
framebufferManager_->SetColorUpdated(gstate_c.skipDrawReason);
|
||||
|
||||
|
@ -1030,8 +1035,10 @@ void DrawEngineVulkan::DoFlush() {
|
|||
void DrawEngineVulkan::ResetAfterDraw() {
|
||||
indexGen.Reset();
|
||||
decodedVerts_ = 0;
|
||||
numDrawCalls_ = 0;
|
||||
decodeCounter_ = 0;
|
||||
numDrawVerts_ = 0;
|
||||
numDrawInds_ = 0;
|
||||
decodeIndsCounter_ = 0;
|
||||
decodeVertsCounter_ = 0;
|
||||
decOptions_.applySkinInDecode = g_Config.bSoftwareSkinning;
|
||||
gstate_c.vertexFullAlpha = true;
|
||||
}
|
||||
|
|
|
@ -170,13 +170,13 @@ public:
|
|||
|
||||
// So that this can be inlined
|
||||
void Flush() {
|
||||
if (!numDrawCalls_)
|
||||
if (!numDrawVerts_)
|
||||
return;
|
||||
DoFlush();
|
||||
}
|
||||
|
||||
void FinishDeferred() {
|
||||
if (!numDrawCalls_)
|
||||
if (!numDrawVerts_)
|
||||
return;
|
||||
// Decode any pending vertices. And also flush while we're at it, for simplicity.
|
||||
// It might be possible to only decode like in the other backends, but meh, it can't matter.
|
||||
|
@ -185,7 +185,7 @@ public:
|
|||
}
|
||||
|
||||
void DispatchFlush() override {
|
||||
if (!numDrawCalls_)
|
||||
if (!numDrawVerts_)
|
||||
return;
|
||||
Flush();
|
||||
}
|
||||
|
|
|
@ -291,6 +291,8 @@ static VulkanPipeline *CreateVulkanPipeline(VulkanRenderManager *renderManager,
|
|||
desc->geometryShaderSource = gs->GetShaderString(SHADER_STRING_SOURCE_CODE);
|
||||
}
|
||||
|
||||
_dbg_assert_(key.topology != VK_PRIMITIVE_TOPOLOGY_POINT_LIST);
|
||||
_dbg_assert_(key.topology != VK_PRIMITIVE_TOPOLOGY_LINE_LIST);
|
||||
desc->topology = (VkPrimitiveTopology)key.topology;
|
||||
|
||||
int vertexStride = 0;
|
||||
|
|
Loading…
Add table
Reference in a new issue