Remove some state from IndexGenerator, fix bugs. Mostly works except vertex cache.

This commit is contained in:
Henrik Rydgård 2023-10-02 11:25:27 +02:00
parent 9b411af1f5
commit 92ffef2626
10 changed files with 167 additions and 171 deletions

View file

@ -72,29 +72,6 @@ VertexDecoder *DrawEngineCommon::GetVertexDecoder(u32 vtype) {
return dec;
}
void DrawEngineCommon::DecodeVerts(u8 *dest) {
int decodeVertsCounter = decodeVertsCounter_;
for (; decodeVertsCounter < numDrawVerts_; decodeVertsCounter++) {
DecodeVertsStep(dest, decodeVertsCounter, decodedVerts_, &drawVerts_[decodeVertsCounter].uvScale);
}
decodeVertsCounter_ = decodeVertsCounter;
}
void DrawEngineCommon::DecodeInds() {
int decodeIndsCounter = decodeIndsCounter_;
for (; decodeIndsCounter < numDrawInds_; decodeIndsCounter++) {
DecodeIndsStep(decodeIndsCounter);
}
decodeIndsCounter_ = decodeIndsCounter;
// Sanity check
if (indexGen.Prim() < 0) {
ERROR_LOG_REPORT(G3D, "DecodeVerts: Failed to deduce prim: %i", indexGen.Prim());
// Force to points (0)
indexGen.AddPrim(GE_PRIM_POINTS, 0, true);
}
}
std::vector<std::string> DrawEngineCommon::DebugGetVertexLoaderIDs() {
std::vector<std::string> ids;
decoderMap_.Iterate([&](const uint32_t vtype, VertexDecoder *decoder) {
@ -596,45 +573,6 @@ void DrawEngineCommon::ApplyFramebufferRead(FBOTexState *fboTexState) {
gstate_c.Dirty(DIRTY_SHADERBLEND);
}
void DrawEngineCommon::DecodeVertsStep(u8 *dest, int i, int &decodedVerts, const UVScale *uvScale) {
PROFILE_THIS_SCOPE("vertdec");
const DeferredVerts &dv = drawVerts_[i];
int indexLowerBound = dv.indexLowerBound;
int indexUpperBound = dv.indexUpperBound;
// Decode the verts (and at the same time apply morphing/skinning). Simple.
dec_->DecodeVerts(dest + decodedVerts * (int)dec_->GetDecVtxFmt().stride, dv.verts, uvScale, dv.indexLowerBound, dv.indexUpperBound);
decodedVerts += indexUpperBound - indexLowerBound + 1;
}
void DrawEngineCommon::DecodeIndsStep(int i) {
const DeferredInds &di = drawInds_[i];
bool clockwise = true;
if (gstate.isCullEnabled() && gstate.getCullMode() != di.cullMode) {
clockwise = false;
}
// We've already collapsed subsequent draws with the same vertex pointer, so no tricky logic here anymore.
// 2. Loop through the drawcalls, translating indices as we go.
switch (di.indexType) {
case GE_VTYPE_IDX_NONE >> GE_VTYPE_IDX_SHIFT:
indexGen.AddPrim(di.prim, di.vertexCount, clockwise);
break;
case GE_VTYPE_IDX_8BIT >> GE_VTYPE_IDX_SHIFT:
indexGen.TranslatePrim(di.prim, di.vertexCount, (const u8 *)di.inds, di.indexOffset, clockwise);
break;
case GE_VTYPE_IDX_16BIT >> GE_VTYPE_IDX_SHIFT:
indexGen.TranslatePrim(di.prim, di.vertexCount, (const u16_le *)di.inds, di.indexOffset, clockwise);
break;
case GE_VTYPE_IDX_32BIT >> GE_VTYPE_IDX_SHIFT:
indexGen.TranslatePrim(di.prim, di.vertexCount, (const u32_le *)di.inds, di.indexOffset, clockwise);
break;
}
// 4. Advance indexgen vertex counter.
indexGen.Advance(di.vertexCount);
}
inline u32 ComputeMiniHashRange(const void *ptr, size_t sz) {
// Switch to u32 units, and round up to avoid unaligned accesses.
// Probably doesn't matter if we skip the first few bytes in some cases.
@ -672,7 +610,9 @@ u32 DrawEngineCommon::ComputeMiniHash() {
}
for (int i = 0; i < numDrawInds_; i += step) {
const DeferredInds &di = drawInds_[i];
fullhash += ComputeMiniHashRange(di.inds, indexSize * di.vertexCount);
if (di.inds) {
fullhash += ComputeMiniHashRange(di.inds, indexSize * di.vertexCount);
}
}
return fullhash;
@ -715,7 +655,6 @@ int DrawEngineCommon::ComputeNumVertsToDecode() const {
uint64_t DrawEngineCommon::ComputeHash() {
uint64_t fullhash = 0;
const int vertexSize = dec_->GetDecVtxFmt().stride;
const int indexSize = IndexSize(dec_->VertexType());
// TODO: Add some caps both for numDrawCalls_ and num verts to check?
// It is really very expensive to check all the vertex data so often.
@ -727,8 +666,11 @@ uint64_t DrawEngineCommon::ComputeHash() {
for (int i = 0; i < numDrawInds_; i++) {
const DeferredInds &di = drawInds_[i];
// Hm, we will miss some indices when combining above, but meh, it should be fine.
fullhash += XXH3_64bits((const char *)di.inds, indexSize * di.vertexCount);
if (di.indexType != 0) {
int indexSize = IndexSize(di.indexType << GE_VTYPE_IDX_SHIFT);
// Hm, we will miss some indices when combining above, but meh, it should be fine.
fullhash += XXH3_64bits((const char *)di.inds, indexSize * di.vertexCount);
}
}
// this looks utterly broken??
@ -738,9 +680,11 @@ uint64_t DrawEngineCommon::ComputeHash() {
// vertTypeID is the vertex type but with the UVGen mode smashed into the top bits.
void DrawEngineCommon::SubmitPrim(const void *verts, const void *inds, GEPrimitiveType prim, int vertexCount, u32 vertTypeID, int cullMode, int *bytesRead) {
if (!indexGen.PrimCompatible(prevPrim_, prim) || numDrawVerts_ >= MAX_DEFERRED_DRAW_CALLS || vertexCountInDrawCalls_ + vertexCount > VERTEX_BUFFER_MAX) {
if (!indexGen.PrimCompatible(prevPrim_, prim) || numDrawVerts_ >= MAX_DEFERRED_DRAW_VERTS || numDrawInds_ >= MAX_DEFERRED_DRAW_INDS || vertexCountInDrawCalls_ + vertexCount > VERTEX_BUFFER_MAX) {
DispatchFlush();
}
_dbg_assert_(numDrawVerts_ < MAX_DEFERRED_DRAW_VERTS);
_dbg_assert_(numDrawInds_ < MAX_DEFERRED_DRAW_INDS);
// This isn't exactly right, if we flushed, since prims can straddle previous calls.
// But it generally works for common usage.
@ -772,11 +716,15 @@ void DrawEngineCommon::SubmitPrim(const void *verts, const void *inds, GEPrimiti
di.indexType = (vertTypeID & GE_VTYPE_IDX_MASK) >> GE_VTYPE_IDX_SHIFT;
di.prim = prim;
di.cullMode = cullMode;
di.indexOffset = 0;
di.vertexCount = vertexCount;
di.vertDecodeIndex = numDrawVerts_;
_dbg_assert_(numDrawVerts_ <= MAX_DEFERRED_DRAW_VERTS);
_dbg_assert_(numDrawInds_ <= MAX_DEFERRED_DRAW_INDS);
if (inds && numDrawVerts_ > decodeVertsCounter_ && drawVerts_[numDrawVerts_ - 1].verts == verts && !applySkin) {
// Same vertex pointer as a previous un-decoded draw call - let's just extend the decode!
di.vertDecodeIndex = numDrawVerts_ - 1;
DeferredVerts &dv = drawVerts_[numDrawVerts_ - 1];
u16 lb;
u16 ub;
@ -785,8 +733,6 @@ void DrawEngineCommon::SubmitPrim(const void *verts, const void *inds, GEPrimiti
dv.indexLowerBound = lb;
if (ub > dv.indexUpperBound)
dv.indexUpperBound = ub;
di.indexOffset = indexOffset_;
// indexOffset_ += vertexCount;
} else {
// Record a new draw, and a new index gen.
DeferredVerts &dv = drawVerts_[numDrawVerts_++];
@ -799,14 +745,12 @@ void DrawEngineCommon::SubmitPrim(const void *verts, const void *inds, GEPrimiti
dv.indexLowerBound = 0;
dv.indexUpperBound = vertexCount - 1;
}
indexOffset_ = 0; // vertexCount;
}
vertexCountInDrawCalls_ += vertexCount;
if (applySkin) {
DecodeVertsStep(decoded_, decodeVertsCounter_, decodedVerts_, &drawVerts_[numDrawVerts_ - 1].uvScale);
DecodeIndsStep(decodeIndsCounter_);
DecodeVerts(decoded_);
}
if (prim == GE_PRIM_RECTANGLES && (gstate.getTextureAddress(0) & 0x3FFFFFFF) == (gstate.getFrameBufAddress() & 0x3FFFFFFF)) {
@ -816,6 +760,60 @@ void DrawEngineCommon::SubmitPrim(const void *verts, const void *inds, GEPrimiti
}
}
void DrawEngineCommon::DecodeVerts(u8 *dest) {
int i = decodeVertsCounter_;
int stride = (int)dec_->GetDecVtxFmt().stride;
for (; i < numDrawVerts_; i++) {
DeferredVerts &dv = drawVerts_[i];
int indexLowerBound = dv.indexLowerBound;
drawVertexOffsets_[i] = decodedVerts_ - indexLowerBound;
int indexUpperBound = dv.indexUpperBound;
// Decode the verts (and at the same time apply morphing/skinning). Simple.
dec_->DecodeVerts(dest + decodedVerts_ * stride, dv.verts, &dv.uvScale, indexLowerBound, indexUpperBound);
decodedVerts_ += indexUpperBound - indexLowerBound + 1;
}
decodeVertsCounter_ = i;
}
void DrawEngineCommon::DecodeInds() {
int i = decodeIndsCounter_;
for (; i < numDrawInds_; i++) {
const DeferredInds &di = drawInds_[i];
int indexOffset = drawVertexOffsets_[di.vertDecodeIndex];
bool clockwise = true;
if (gstate.isCullEnabled() && gstate.getCullMode() != di.cullMode) {
clockwise = false;
}
// We've already collapsed subsequent draws with the same vertex pointer, so no tricky logic here anymore.
// 2. Loop through the drawcalls, translating indices as we go.
switch (di.indexType) {
case GE_VTYPE_IDX_NONE >> GE_VTYPE_IDX_SHIFT:
indexGen.AddPrim(di.prim, di.vertexCount, indexOffset, clockwise);
break;
case GE_VTYPE_IDX_8BIT >> GE_VTYPE_IDX_SHIFT:
indexGen.TranslatePrim(di.prim, di.vertexCount, (const u8 *)di.inds, indexOffset, clockwise);
break;
case GE_VTYPE_IDX_16BIT >> GE_VTYPE_IDX_SHIFT:
indexGen.TranslatePrim(di.prim, di.vertexCount, (const u16_le *)di.inds, indexOffset, clockwise);
break;
case GE_VTYPE_IDX_32BIT >> GE_VTYPE_IDX_SHIFT:
indexGen.TranslatePrim(di.prim, di.vertexCount, (const u32_le *)di.inds, indexOffset, clockwise);
break;
}
}
decodeIndsCounter_ = i;
// Sanity check
if (indexGen.Prim() < 0) {
ERROR_LOG_REPORT(G3D, "DecodeVerts: Failed to deduce prim: %i", indexGen.Prim());
// Force to points (0)
indexGen.AddPrim(GE_PRIM_POINTS, 0, 0, true);
}
}
bool DrawEngineCommon::CanUseHardwareTransform(int prim) {
if (!useHWTransform_)
return false;

View file

@ -144,6 +144,10 @@ protected:
void DecodeVerts(u8 *dest);
void DecodeInds();
int MaxIndex() const {
return decodedVerts_;
}
// Preprocessing for spline/bezier
u32 NormalizeVertices(u8 *outPtr, u8 *bufPtr, const u8 *inPtr, int lowerBound, int upperBound, u32 vertType, int *vertexSize = nullptr);
@ -151,10 +155,6 @@ protected:
u32 ComputeMiniHash();
uint64_t ComputeHash();
// Vertex decoding
void DecodeVertsStep(u8 *dest, int i, int &decodedVerts, const UVScale *uvScale);
void DecodeIndsStep(int i);
int ComputeNumVertsToDecode() const;
void ApplyFramebufferRead(FBOTexState *fboTexState);
@ -224,15 +224,17 @@ protected:
struct DeferredInds {
const void *inds;
u32 vertexCount;
u8 vertDecodeIndex; // index into the drawVerts_ array to look up the vertexOffset.
u8 indexType;
s8 prim;
u8 cullMode;
u16 indexOffset;
};
enum { MAX_DEFERRED_DRAW_CALLS = 128 };
DeferredVerts drawVerts_[MAX_DEFERRED_DRAW_CALLS];
DeferredInds drawInds_[MAX_DEFERRED_DRAW_CALLS];
enum { MAX_DEFERRED_DRAW_VERTS = 128 }; // If you change this to more than 256, change type of DeferredInds::vertDecodeIndex.
enum { MAX_DEFERRED_DRAW_INDS = 512 }; // Monster Hunter spams indexed calls that we end up merging.
DeferredVerts drawVerts_[MAX_DEFERRED_DRAW_VERTS];
uint32_t drawVertexOffsets_[MAX_DEFERRED_DRAW_VERTS];
DeferredInds drawInds_[MAX_DEFERRED_DRAW_INDS];
int numDrawVerts_ = 0;
int numDrawInds_ = 0;
@ -242,8 +244,6 @@ protected:
int decodeVertsCounter_ = 0;
int decodeIndsCounter_ = 0;
int indexOffset_ = 0;
// Vertex collector state
IndexGenerator indexGen;
int decodedVerts_ = 0;

View file

@ -50,44 +50,40 @@ void IndexGenerator::Setup(u16 *inds) {
Reset();
}
void IndexGenerator::AddPrim(int prim, int vertexCount, bool clockwise) {
void IndexGenerator::AddPrim(int prim, int vertexCount, int indexOffset, bool clockwise) {
switch (prim) {
case GE_PRIM_POINTS: AddPoints(vertexCount); break;
case GE_PRIM_LINES: AddLineList(vertexCount); break;
case GE_PRIM_LINE_STRIP: AddLineStrip(vertexCount); break;
case GE_PRIM_TRIANGLES: AddList(vertexCount, clockwise); break;
case GE_PRIM_TRIANGLE_STRIP: AddStrip(vertexCount, clockwise); break;
case GE_PRIM_TRIANGLE_FAN: AddFan(vertexCount, clockwise); break;
case GE_PRIM_RECTANGLES: AddRectangles(vertexCount); break; // Same
case GE_PRIM_POINTS: AddPoints(vertexCount, indexOffset); break;
case GE_PRIM_LINES: AddLineList(vertexCount, indexOffset); break;
case GE_PRIM_LINE_STRIP: AddLineStrip(vertexCount, indexOffset); break;
case GE_PRIM_TRIANGLES: AddList(vertexCount, indexOffset, clockwise); break;
case GE_PRIM_TRIANGLE_STRIP: AddStrip(vertexCount, indexOffset, clockwise); break;
case GE_PRIM_TRIANGLE_FAN: AddFan(vertexCount, indexOffset, clockwise); break;
case GE_PRIM_RECTANGLES: AddRectangles(vertexCount, indexOffset); break; // Same
}
}
void IndexGenerator::AddPoints(int numVerts) {
void IndexGenerator::AddPoints(int numVerts, int indexOffset) {
u16 *outInds = inds_;
const int startIndex = index_;
for (int i = 0; i < numVerts; i++)
*outInds++ = startIndex + i;
*outInds++ = indexOffset + i;
inds_ = outInds;
// ignore overflow verts
index_ += numVerts;
count_ += numVerts;
prim_ = GE_PRIM_POINTS;
seenPrims_ |= 1 << GE_PRIM_POINTS;
}
void IndexGenerator::AddList(int numVerts, bool clockwise) {
void IndexGenerator::AddList(int numVerts, int indexOffset, bool clockwise) {
u16 *outInds = inds_;
const int startIndex = index_;
const int v1 = clockwise ? 1 : 2;
const int v2 = clockwise ? 2 : 1;
for (int i = 0; i < numVerts; i += 3) {
*outInds++ = startIndex + i;
*outInds++ = startIndex + i + v1;
*outInds++ = startIndex + i + v2;
*outInds++ = indexOffset + i;
*outInds++ = indexOffset + i + v1;
*outInds++ = indexOffset + i + v2;
}
inds_ = outInds;
// ignore overflow verts
index_ += numVerts;
count_ += numVerts;
prim_ = GE_PRIM_TRIANGLES;
seenPrims_ |= 1 << GE_PRIM_TRIANGLES;
@ -119,7 +115,7 @@ alignas(16) static const uint16_t offsets_counter_clockwise[24] = {
7, (u16)(7 + 1), (u16)(7 + 2),
};
void IndexGenerator::AddStrip(int numVerts, bool clockwise) {
void IndexGenerator::AddStrip(int numVerts, int indexOffset, bool clockwise) {
int numTris = numVerts - 2;
#ifdef _M_SSE
// In an SSE2 register we can fit 8 16-bit integers.
@ -130,7 +126,7 @@ void IndexGenerator::AddStrip(int numVerts, bool clockwise) {
// We allow ourselves to write some extra indices to avoid the fallback loop.
// That's alright as we're appending to a buffer - they will get overwritten anyway.
int numChunks = (numTris + 7) >> 3;
__m128i ibase8 = _mm_set1_epi16(index_);
__m128i ibase8 = _mm_set1_epi16(indexOffset);
const __m128i *offsets = (const __m128i *)(clockwise ? offsets_clockwise : offsets_counter_clockwise);
__m128i *dst = (__m128i *)inds_;
__m128i offsets0 = _mm_add_epi16(ibase8, _mm_load_si128(offsets));
@ -158,7 +154,7 @@ void IndexGenerator::AddStrip(int numVerts, bool clockwise) {
// wind doesn't need to be updated, an even number of triangles have been drawn.
#elif PPSSPP_ARCH(ARM_NEON)
int numChunks = (numTris + 7) >> 3;
uint16x8_t ibase8 = vdupq_n_u16(index_);
uint16x8_t ibase8 = vdupq_n_u16(indexOffset);
const u16 *offsets = clockwise ? offsets_clockwise : offsets_counter_clockwise;
u16 *dst = inds_;
uint16x8_t offsets0 = vaddq_u16(ibase8, vld1q_u16(offsets));
@ -185,7 +181,7 @@ void IndexGenerator::AddStrip(int numVerts, bool clockwise) {
#else
// Slow fallback loop.
int wind = clockwise ? 1 : 2;
int ibase = index_;
int ibase = indexOffset;
size_t numPairs = numTris / 2;
u16 *outInds = inds_;
while (numPairs > 0) {
@ -207,7 +203,6 @@ void IndexGenerator::AddStrip(int numVerts, bool clockwise) {
inds_ = outInds;
#endif
index_ += numVerts;
if (numTris > 0)
count_ += numTris * 3;
// This is so we can detect one single strip by just looking at seenPrims_.
@ -222,19 +217,17 @@ void IndexGenerator::AddStrip(int numVerts, bool clockwise) {
}
}
void IndexGenerator::AddFan(int numVerts, bool clockwise) {
void IndexGenerator::AddFan(int numVerts, int indexOffset, bool clockwise) {
const int numTris = numVerts - 2;
u16 *outInds = inds_;
const int startIndex = index_;
const int v1 = clockwise ? 1 : 2;
const int v2 = clockwise ? 2 : 1;
for (int i = 0; i < numTris; i++) {
*outInds++ = startIndex;
*outInds++ = startIndex + i + v1;
*outInds++ = startIndex + i + v2;
*outInds++ = indexOffset;
*outInds++ = indexOffset + i + v1;
*outInds++ = indexOffset + i + v2;
}
inds_ = outInds;
index_ += numVerts;
count_ += numTris * 3;
prim_ = GE_PRIM_TRIANGLES;
seenPrims_ |= 1 << GE_PRIM_TRIANGLE_FAN;
@ -245,46 +238,40 @@ void IndexGenerator::AddFan(int numVerts, bool clockwise) {
}
//Lines
void IndexGenerator::AddLineList(int numVerts) {
void IndexGenerator::AddLineList(int numVerts, int indexOffset) {
u16 *outInds = inds_;
const int startIndex = index_;
for (int i = 0; i < numVerts; i += 2) {
*outInds++ = startIndex + i;
*outInds++ = startIndex + i + 1;
*outInds++ = indexOffset + i;
*outInds++ = indexOffset + i + 1;
}
inds_ = outInds;
index_ += numVerts;
count_ += numVerts;
prim_ = GE_PRIM_LINES;
seenPrims_ |= 1 << prim_;
}
void IndexGenerator::AddLineStrip(int numVerts) {
void IndexGenerator::AddLineStrip(int numVerts, int indexOffset) {
const int numLines = numVerts - 1;
u16 *outInds = inds_;
const int startIndex = index_;
for (int i = 0; i < numLines; i++) {
*outInds++ = startIndex + i;
*outInds++ = startIndex + i + 1;
*outInds++ = indexOffset + i;
*outInds++ = indexOffset + i + 1;
}
inds_ = outInds;
index_ += numVerts;
count_ += numLines * 2;
prim_ = GE_PRIM_LINES;
seenPrims_ |= 1 << GE_PRIM_LINE_STRIP;
}
void IndexGenerator::AddRectangles(int numVerts) {
void IndexGenerator::AddRectangles(int numVerts, int indexOffset) {
u16 *outInds = inds_;
const int startIndex = index_;
//rectangles always need 2 vertices, disregard the last one if there's an odd number
numVerts = numVerts & ~1;
for (int i = 0; i < numVerts; i += 2) {
*outInds++ = startIndex + i;
*outInds++ = startIndex + i + 1;
*outInds++ = indexOffset + i;
*outInds++ = indexOffset + i + 1;
}
inds_ = outInds;
index_ += numVerts;
count_ += numVerts;
prim_ = GE_PRIM_RECTANGLES;
seenPrims_ |= 1 << GE_PRIM_RECTANGLES;
@ -292,7 +279,6 @@ void IndexGenerator::AddRectangles(int numVerts) {
template <class ITypeLE, int flag>
void IndexGenerator::TranslatePoints(int numInds, const ITypeLE *inds, int indexOffset) {
indexOffset = index_ - indexOffset;
u16 *outInds = inds_;
for (int i = 0; i < numInds; i++)
*outInds++ = indexOffset + inds[i];
@ -304,7 +290,6 @@ void IndexGenerator::TranslatePoints(int numInds, const ITypeLE *inds, int index
template <class ITypeLE, int flag>
void IndexGenerator::TranslateLineList(int numInds, const ITypeLE *inds, int indexOffset) {
indexOffset = index_ - indexOffset;
u16 *outInds = inds_;
numInds = numInds & ~1;
for (int i = 0; i < numInds; i += 2) {
@ -319,7 +304,6 @@ void IndexGenerator::TranslateLineList(int numInds, const ITypeLE *inds, int ind
template <class ITypeLE, int flag>
void IndexGenerator::TranslateLineStrip(int numInds, const ITypeLE *inds, int indexOffset) {
indexOffset = index_ - indexOffset;
int numLines = numInds - 1;
u16 *outInds = inds_;
for (int i = 0; i < numLines; i++) {
@ -334,7 +318,6 @@ void IndexGenerator::TranslateLineStrip(int numInds, const ITypeLE *inds, int in
template <class ITypeLE, int flag>
void IndexGenerator::TranslateList(int numInds, const ITypeLE *inds, int indexOffset, bool clockwise) {
indexOffset = index_ - indexOffset;
// We only bother doing this minor optimization in triangle list, since it's by far the most
// common operation that can benefit.
if (sizeof(ITypeLE) == sizeof(inds_[0]) && indexOffset == 0 && clockwise) {
@ -347,6 +330,7 @@ void IndexGenerator::TranslateList(int numInds, const ITypeLE *inds, int indexOf
numInds = numTris * 3;
const int v1 = clockwise ? 1 : 2;
const int v2 = clockwise ? 2 : 1;
// TODO: This can actually be SIMD-d, although will need complex shuffles if clockwise.
for (int i = 0; i < numInds; i += 3) {
*outInds++ = indexOffset + inds[i];
*outInds++ = indexOffset + inds[i + v1];
@ -362,7 +346,6 @@ void IndexGenerator::TranslateList(int numInds, const ITypeLE *inds, int indexOf
template <class ITypeLE, int flag>
void IndexGenerator::TranslateStrip(int numInds, const ITypeLE *inds, int indexOffset, bool clockwise) {
int wind = clockwise ? 1 : 2;
indexOffset = index_ - indexOffset;
int numTris = numInds - 2;
u16 *outInds = inds_;
for (int i = 0; i < numTris; i++) {
@ -380,7 +363,6 @@ void IndexGenerator::TranslateStrip(int numInds, const ITypeLE *inds, int indexO
template <class ITypeLE, int flag>
void IndexGenerator::TranslateFan(int numInds, const ITypeLE *inds, int indexOffset, bool clockwise) {
if (numInds <= 0) return;
indexOffset = index_ - indexOffset;
int numTris = numInds - 2;
u16 *outInds = inds_;
const int v1 = clockwise ? 1 : 2;
@ -398,7 +380,6 @@ void IndexGenerator::TranslateFan(int numInds, const ITypeLE *inds, int indexOff
template <class ITypeLE, int flag>
inline void IndexGenerator::TranslateRectangles(int numInds, const ITypeLE *inds, int indexOffset) {
indexOffset = index_ - indexOffset;
u16 *outInds = inds_;
//rectangles always need 2 vertices, disregard the last one if there's an odd number
numInds = numInds & ~1;

View file

@ -28,7 +28,6 @@ public:
void Reset() {
prim_ = GE_PRIM_INVALID;
count_ = 0;
index_ = 0;
seenPrims_ = 0;
pureCount_ = 0;
this->inds_ = indsBase_;
@ -57,19 +56,12 @@ public:
}
}
void AddPrim(int prim, int vertexCount, bool clockwise);
void AddPrim(int prim, int vertexCount, int indexOffset, bool clockwise);
void TranslatePrim(int prim, int numInds, const u8 *inds, int indexOffset, bool clockwise);
void TranslatePrim(int prim, int numInds, const u16_le *inds, int indexOffset, bool clockwise);
void TranslatePrim(int prim, int numInds, const u32_le *inds, int indexOffset, bool clockwise);
void Advance(int numVerts) {
index_ += numVerts;
}
void SetIndex(int ind) { index_ = ind; }
int MaxIndex() const { return index_; } // Really NextIndex rather than MaxIndex, it's one more than the highest index generated
int VertexCount() const { return count_; }
bool Empty() const { return index_ == 0; }
int SeenPrims() const { return seenPrims_; }
int PureCount() const { return pureCount_; }
bool SeenOnlyPurePrims() const {
@ -81,16 +73,16 @@ public:
private:
// Points (why index these? code simplicity)
void AddPoints(int numVerts);
void AddPoints(int numVerts, int indexOffset);
// Triangles
void AddList(int numVerts, bool clockwise);
void AddStrip(int numVerts, bool clockwise);
void AddFan(int numVerts, bool clockwise);
void AddList(int numVerts, int indexOffset, bool clockwise);
void AddStrip(int numVerts, int indexOffset, bool clockwise);
void AddFan(int numVerts, int indexOffset, bool clockwise);
// Lines
void AddLineList(int numVerts);
void AddLineStrip(int numVerts);
void AddLineList(int numVerts, int indexOffset);
void AddLineStrip(int numVerts, int indexOffset);
// Rectangles
void AddRectangles(int numVerts);
void AddRectangles(int numVerts, int indexOffset);
// These translate already indexed lists
template <class ITypeLE, int flag>
@ -118,7 +110,6 @@ private:
u16 *indsBase_;
u16 *inds_;
int index_;
int count_;
int pureCount_;
GEPrimitiveType prim_;

View file

@ -1293,6 +1293,9 @@ void VertexDecoder::SetVertexType(u32 fmt, const VertexDecoderOptions &options,
}
void VertexDecoder::DecodeVerts(u8 *decodedptr, const void *verts, const UVScale *uvScaleOffset, int indexLowerBound, int indexUpperBound) const {
// A single 0 is acceptable for point lists.
_dbg_assert_(indexLowerBound <= indexUpperBound);
// Decode the vertices within the found bounds, once each
// decoded_ and ptr_ are used in the steps, so can't be turned into locals for speed.
const u8 *startPtr = (const u8*)verts + indexLowerBound * size;

View file

@ -384,9 +384,10 @@ void DrawEngineD3D11::DoFlush() {
vai->status = VertexArrayInfoD3D11::VAI_HASHING;
vai->drawsUntilNextFullHash = 0;
DecodeVerts(decoded_); // writes to indexGen
DecodeInds();
vai->numVerts = indexGen.VertexCount();
vai->prim = indexGen.Prim();
vai->maxIndex = indexGen.MaxIndex();
vai->maxIndex = MaxIndex();
vai->flags = gstate_c.vertexFullAlpha ? VAI11_FLAG_VERTEXFULLALPHA : 0;
goto rotateVBO;
}
@ -409,6 +410,7 @@ void DrawEngineD3D11::DoFlush() {
if (newMiniHash != vai->minihash || newHash != vai->hash) {
MarkUnreliable(vai);
DecodeVerts(decoded_);
DecodeInds();
goto rotateVBO;
}
if (vai->numVerts > 64) {
@ -428,15 +430,17 @@ void DrawEngineD3D11::DoFlush() {
if (newMiniHash != vai->minihash) {
MarkUnreliable(vai);
DecodeVerts(decoded_);
DecodeInds();
goto rotateVBO;
}
}
if (vai->vbo == 0) {
DecodeVerts(decoded_);
DecodeInds();
vai->numVerts = indexGen.VertexCount();
vai->prim = indexGen.Prim();
vai->maxIndex = indexGen.MaxIndex();
vai->maxIndex = MaxIndex();
vai->flags = gstate_c.vertexFullAlpha ? VAI11_FLAG_VERTEXFULLALPHA : 0;
useElements = !indexGen.SeenOnlyPurePrims() || prim == GE_PRIM_TRIANGLE_FAN;
if (!useElements && indexGen.PureCount()) {
@ -446,7 +450,7 @@ void DrawEngineD3D11::DoFlush() {
_dbg_assert_msg_(gstate_c.vertBounds.minV >= gstate_c.vertBounds.maxV, "Should not have checked UVs when caching.");
// TODO: Combine these two into one buffer?
u32 size = dec_->GetDecVtxFmt().stride * indexGen.MaxIndex();
u32 size = dec_->GetDecVtxFmt().stride * MaxIndex();
D3D11_BUFFER_DESC desc{ size, D3D11_USAGE_IMMUTABLE, D3D11_BIND_VERTEX_BUFFER, 0 };
D3D11_SUBRESOURCE_DATA data{ decoded_ };
ASSERT_SUCCESS(device_->CreateBuffer(&desc, &data, &vai->vbo));
@ -500,6 +504,7 @@ void DrawEngineD3D11::DoFlush() {
vai->numFrames++;
}
DecodeVerts(decoded_);
DecodeInds();
goto rotateVBO;
}
}
@ -507,11 +512,12 @@ void DrawEngineD3D11::DoFlush() {
vai->lastFrame = gpuStats.numFlips;
} else {
DecodeVerts(decoded_);
DecodeInds();
rotateVBO:
gpuStats.numUncachedVertsDrawn += indexGen.VertexCount();
useElements = !indexGen.SeenOnlyPurePrims() || prim == GE_PRIM_TRIANGLE_FAN;
vertexCount = indexGen.VertexCount();
maxIndex = indexGen.MaxIndex();
maxIndex = MaxIndex();
if (!useElements && indexGen.PureCount()) {
vertexCount = indexGen.PureCount();
}
@ -584,6 +590,7 @@ rotateVBO:
dec_ = GetVertexDecoder(lastVType_);
}
DecodeVerts(decoded_);
DecodeInds();
bool hasColor = (lastVType_ & GE_VTYPE_COL_MASK) != GE_VTYPE_COL_NONE;
if (gstate.isModeThrough()) {
gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && (hasColor || gstate.getMaterialAmbientA() == 255);
@ -622,7 +629,7 @@ rotateVBO:
UpdateCachedViewportState(vpAndScissor);
}
int maxIndex = indexGen.MaxIndex();
int maxIndex = MaxIndex();
SoftwareTransform swTransform(params);
const Lin::Vec3 trans(gstate_c.vpXOffset, -gstate_c.vpYOffset, gstate_c.vpZOffset * 0.5f + 0.5f);

View file

@ -362,9 +362,10 @@ void DrawEngineDX9::DoFlush() {
vai->status = VertexArrayInfoDX9::VAI_HASHING;
vai->drawsUntilNextFullHash = 0;
DecodeVerts(decoded_); // writes to indexGen
DecodeInds();
vai->numVerts = indexGen.VertexCount();
vai->prim = indexGen.Prim();
vai->maxIndex = indexGen.MaxIndex();
vai->maxIndex = MaxIndex();
vai->flags = gstate_c.vertexFullAlpha ? VAI_FLAG_VERTEXFULLALPHA : 0;
goto rotateVBO;
@ -388,6 +389,7 @@ void DrawEngineDX9::DoFlush() {
if (newMiniHash != vai->minihash || newHash != vai->hash) {
MarkUnreliable(vai);
DecodeVerts(decoded_);
DecodeInds();
goto rotateVBO;
}
if (vai->numVerts > 64) {
@ -407,15 +409,17 @@ void DrawEngineDX9::DoFlush() {
if (newMiniHash != vai->minihash) {
MarkUnreliable(vai);
DecodeVerts(decoded_);
DecodeInds();
goto rotateVBO;
}
}
if (vai->vbo == 0) {
DecodeVerts(decoded_);
DecodeInds();
vai->numVerts = indexGen.VertexCount();
vai->prim = indexGen.Prim();
vai->maxIndex = indexGen.MaxIndex();
vai->maxIndex = MaxIndex();
vai->flags = gstate_c.vertexFullAlpha ? VAI_FLAG_VERTEXFULLALPHA : 0;
useElements = !indexGen.SeenOnlyPurePrims();
if (!useElements && indexGen.PureCount()) {
@ -425,7 +429,7 @@ void DrawEngineDX9::DoFlush() {
_dbg_assert_msg_(gstate_c.vertBounds.minV >= gstate_c.vertBounds.maxV, "Should not have checked UVs when caching.");
void * pVb;
u32 size = dec_->GetDecVtxFmt().stride * indexGen.MaxIndex();
u32 size = dec_->GetDecVtxFmt().stride * MaxIndex();
device_->CreateVertexBuffer(size, D3DUSAGE_WRITEONLY, 0, D3DPOOL_DEFAULT, &vai->vbo, NULL);
vai->vbo->Lock(0, size, &pVb, 0);
memcpy(pVb, decoded_, size);
@ -482,6 +486,7 @@ void DrawEngineDX9::DoFlush() {
vai->numFrames++;
}
DecodeVerts(decoded_);
DecodeInds();
goto rotateVBO;
}
}
@ -489,17 +494,20 @@ void DrawEngineDX9::DoFlush() {
vai->lastFrame = gpuStats.numFlips;
} else {
DecodeVerts(decoded_);
DecodeInds();
rotateVBO:
gpuStats.numUncachedVertsDrawn += indexGen.VertexCount();
useElements = !indexGen.SeenOnlyPurePrims();
vertexCount = indexGen.VertexCount();
maxIndex = indexGen.MaxIndex();
maxIndex = MaxIndex();
if (!useElements && indexGen.PureCount()) {
vertexCount = indexGen.PureCount();
}
prim = indexGen.Prim();
}
_dbg_assert_((int)prim > 0);
bool hasColor = (lastVType_ & GE_VTYPE_COL_MASK) != GE_VTYPE_COL_NONE;
if (gstate.isModeThrough()) {
gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && (hasColor || gstate.getMaterialAmbientA() == 255);
@ -544,6 +552,7 @@ rotateVBO:
dec_ = GetVertexDecoder(lastVType_);
}
DecodeVerts(decoded_);
DecodeInds();
bool hasColor = (lastVType_ & GE_VTYPE_COL_MASK) != GE_VTYPE_COL_NONE;
if (gstate.isModeThrough()) {
gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && (hasColor || gstate.getMaterialAmbientA() == 255);
@ -582,7 +591,7 @@ rotateVBO:
UpdateCachedViewportState(vpAndScissor);
}
int maxIndex = indexGen.MaxIndex();
int maxIndex = MaxIndex();
SoftwareTransform swTransform(params);
// Half pixel offset hack.

View file

@ -286,9 +286,9 @@ void DrawEngineGLES::DoFlush() {
// Figure out how much pushbuffer space we need to allocate.
int vertsToDecode = ComputeNumVertsToDecode();
u8 *dest = (u8 *)frameData.pushVertex->Allocate(vertsToDecode * dec_->GetDecVtxFmt().stride, 4, &vertexBuffer, &vertexBufferOffset);
// Indices are decoded in here.
DecodeVerts(dest);
}
DecodeInds();
gpuStats.numUncachedVertsDrawn += indexGen.VertexCount();
@ -345,6 +345,7 @@ void DrawEngineGLES::DoFlush() {
dec_ = GetVertexDecoder(lastVType_);
}
DecodeVerts(decoded_);
DecodeInds();
bool hasColor = (lastVType_ & GE_VTYPE_COL_MASK) != GE_VTYPE_COL_NONE;
if (gstate.isModeThrough()) {
@ -383,7 +384,7 @@ void DrawEngineGLES::DoFlush() {
UpdateCachedViewportState(vpAndScissor_);
}
int maxIndex = indexGen.MaxIndex();
int maxIndex = MaxIndex();
int vertexCount = indexGen.VertexCount();
// TODO: Split up into multiple draw calls for GLES 2.0 where you can't guarantee support for more than 0x10000 verts.

View file

@ -566,10 +566,11 @@ bool DrawEngineVulkan::VertexCacheLookup(int &vertexCount, GEPrimitiveType &prim
vai->minihash = ComputeMiniHash();
vai->status = VertexArrayInfoVulkan::VAI_HASHING;
vai->drawsUntilNextFullHash = 0;
DecodeVertsToPushPool(pushVertex_, &vbOffset, &vbuf); // writes to indexGen
DecodeVertsToPushPool(pushVertex_, &vbOffset, &vbuf);
DecodeInds();
vai->numVerts = indexGen.VertexCount();
vai->prim = indexGen.Prim();
vai->maxIndex = indexGen.MaxIndex();
vai->maxIndex = MaxIndex();
vai->flags = gstate_c.vertexFullAlpha ? VAIVULKAN_FLAG_VERTEXFULLALPHA : 0;
return true;
}
@ -593,6 +594,7 @@ bool DrawEngineVulkan::VertexCacheLookup(int &vertexCount, GEPrimitiveType &prim
if (newMiniHash != vai->minihash || newHash != vai->hash) {
MarkUnreliable(vai);
DecodeVertsToPushPool(pushVertex_, &vbOffset, &vbuf);
DecodeInds();
return true;
}
if (vai->numVerts > 64) {
@ -612,6 +614,7 @@ bool DrawEngineVulkan::VertexCacheLookup(int &vertexCount, GEPrimitiveType &prim
if (newMiniHash != vai->minihash) {
MarkUnreliable(vai);
DecodeVertsToPushPool(pushVertex_, &vbOffset, &vbuf);
DecodeInds();
return true;
}
}
@ -619,9 +622,10 @@ bool DrawEngineVulkan::VertexCacheLookup(int &vertexCount, GEPrimitiveType &prim
if (!vai->vb) {
// Directly push to the vertex cache.
DecodeVertsToPushBuffer(vertexCache_, &vai->vbOffset, &vai->vb);
DecodeInds();
_dbg_assert_msg_(gstate_c.vertBounds.minV >= gstate_c.vertBounds.maxV, "Should not have checked UVs when caching.");
vai->numVerts = indexGen.VertexCount();
vai->maxIndex = indexGen.MaxIndex();
vai->maxIndex = MaxIndex();
vai->flags = gstate_c.vertexFullAlpha ? VAIVULKAN_FLAG_VERTEXFULLALPHA : 0;
if (forceIndexed) {
vai->prim = indexGen.GeneralPrim();
@ -684,6 +688,7 @@ bool DrawEngineVulkan::VertexCacheLookup(int &vertexCount, GEPrimitiveType &prim
vai->numFrames++;
}
DecodeVertsToPushPool(pushVertex_, &vbOffset, &vbuf);
DecodeInds();
return true;
}
default:
@ -889,7 +894,7 @@ void DrawEngineVulkan::DoFlush() {
UpdateCachedViewportState(vpAndScissor);
}
int maxIndex = indexGen.MaxIndex();
int maxIndex = MaxIndex();
SoftwareTransform swTransform(params);
const Lin::Vec3 trans(gstate_c.vpXOffset, gstate_c.vpYOffset, gstate_c.vpZOffset * 0.5f + 0.5f);
@ -1037,6 +1042,7 @@ void DrawEngineVulkan::ResetAfterDraw() {
decodedVerts_ = 0;
numDrawVerts_ = 0;
numDrawInds_ = 0;
vertexCountInDrawCalls_ = 0;
decodeIndsCounter_ = 0;
decodeVertsCounter_ = 0;
decOptions_.applySkinInDecode = g_Config.bSoftwareSkinning;

View file

@ -170,13 +170,13 @@ public:
// So that this can be inlined
void Flush() {
if (!numDrawVerts_)
if (!numDrawInds_)
return;
DoFlush();
}
void FinishDeferred() {
if (!numDrawVerts_)
if (!numDrawInds_)
return;
// Decode any pending vertices. And also flush while we're at it, for simplicity.
// It might be possible to only decode like in the other backends, but meh, it can't matter.
@ -185,9 +185,9 @@ public:
}
void DispatchFlush() override {
if (!numDrawVerts_)
if (!numDrawInds_)
return;
Flush();
DoFlush();
}
VkPipelineLayout GetPipelineLayout() const {