diff --git a/Common/UI/View.cpp b/Common/UI/View.cpp index b60f6ac88d..37f8e9ab02 100644 --- a/Common/UI/View.cpp +++ b/Common/UI/View.cpp @@ -620,7 +620,6 @@ CollapsibleHeader::CollapsibleHeader(bool *toggle, const std::string &text, Layo void CollapsibleHeader::Draw(UIContext &dc) { Style style = dc.theme->itemStyle; - style.background.color = 0; if (HasFocus()) style = dc.theme->itemFocusedStyle; if (down_) style = dc.theme->itemDownStyle; if (!IsEnabled()) style = dc.theme->itemDisabledStyle; diff --git a/GPU/Common/DrawEngineCommon.cpp b/GPU/Common/DrawEngineCommon.cpp index 8927a1c2a0..2ce5daded7 100644 --- a/GPU/Common/DrawEngineCommon.cpp +++ b/GPU/Common/DrawEngineCommon.cpp @@ -670,6 +670,31 @@ int DrawEngineCommon::ExtendNonIndexedPrim(const uint32_t *cmd, const uint32_t * return cmd - start; } +void DrawEngineCommon::SkipPrim(GEPrimitiveType prim, int vertexCount, u32 vertTypeID, int *bytesRead) { + if (!indexGen.PrimCompatible(prevPrim_, prim)) { + DispatchFlush(); + } + + // This isn't exactly right, if we flushed, since prims can straddle previous calls. + // But it generally works for common usage. + if (prim == GE_PRIM_KEEP_PREVIOUS) { + // Has to be set to something, let's assume POINTS (0) if no previous. + if (prevPrim_ == GE_PRIM_INVALID) + prevPrim_ = GE_PRIM_POINTS; + prim = prevPrim_; + } else { + prevPrim_ = prim; + } + + // If vtype has changed, setup the vertex decoder. + if (vertTypeID != lastVType_ || !dec_) { + dec_ = GetVertexDecoder(vertTypeID); + lastVType_ = vertTypeID; + } + + *bytesRead = vertexCount * dec_->VertexSize(); +} + // vertTypeID is the vertex type but with the UVGen mode smashed into the top bits. bool DrawEngineCommon::SubmitPrim(const void *verts, const void *inds, GEPrimitiveType prim, int vertexCount, u32 vertTypeID, bool clockwise, int *bytesRead) { if (!indexGen.PrimCompatible(prevPrim_, prim) || numDrawVerts_ >= MAX_DEFERRED_DRAW_VERTS || numDrawInds_ >= MAX_DEFERRED_DRAW_INDS || vertexCountInDrawCalls_ + vertexCount > VERTEX_BUFFER_MAX) { diff --git a/GPU/Common/DrawEngineCommon.h b/GPU/Common/DrawEngineCommon.h index e9e8870ef3..ad274bb692 100644 --- a/GPU/Common/DrawEngineCommon.h +++ b/GPU/Common/DrawEngineCommon.h @@ -113,6 +113,8 @@ public: int ExtendNonIndexedPrim(const uint32_t *cmd, const uint32_t *stall, u32 vertTypeID, bool clockwise, int *bytesRead, bool isTriangle); bool SubmitPrim(const void *verts, const void *inds, GEPrimitiveType prim, int vertexCount, u32 vertTypeID, bool clockwise, int *bytesRead); + void SkipPrim(GEPrimitiveType prim, int vertexCount, u32 vertTypeID, int *bytesRead); + template void SubmitCurve(const void *control_points, const void *indices, Surface &surface, u32 vertType, int *bytesRead, const char *scope); void ClearSplineBezierWeights(); diff --git a/GPU/GPU.h b/GPU/GPU.h index 7d4d4d1c0a..f2edbdc1f6 100644 --- a/GPU/GPU.h +++ b/GPU/GPU.h @@ -76,6 +76,7 @@ struct GPUStatistics { void ResetFrame() { numDrawCalls = 0; numVertexDecodes = 0; + numCulledDraws = 0; numDrawSyncs = 0; numListSyncs = 0; numVertsSubmitted = 0; @@ -111,6 +112,7 @@ struct GPUStatistics { // Per frame statistics int numDrawCalls; int numVertexDecodes; + int numCulledDraws; int numDrawSyncs; int numListSyncs; int numFlushes; diff --git a/GPU/GPUCommonHW.cpp b/GPU/GPUCommonHW.cpp index f961880d8a..e064fe7454 100644 --- a/GPU/GPUCommonHW.cpp +++ b/GPU/GPUCommonHW.cpp @@ -989,9 +989,36 @@ void GPUCommonHW::Execute_Prim(u32 op, u32 diff) { int cullMode = gstate.getCullMode(); uint32_t vertTypeID = GetVertTypeID(vertexType, gstate.getUVGenMode(), g_Config.bSoftwareSkinning); - if (!drawEngineCommon_->SubmitPrim(verts, inds, prim, count, vertTypeID, true, &bytesRead)) { + +#define MAX_CULL_CHECK_COUNT 6 + +#define PASSES_CULLING ((vertexType & (GE_VTYPE_THROUGH_MASK | GE_VTYPE_MORPHCOUNT_MASK | GE_VTYPE_WEIGHT_MASK | GE_VTYPE_IDX_MASK)) || count > MAX_CULL_CHECK_COUNT) + + // If certain conditions are true, do frustum culling. + bool passCulling = PASSES_CULLING; + if (!passCulling) { + // Do software culling. + if (drawEngineCommon_->TestBoundingBox(verts, inds, count, vertexType)) { + passCulling = true; + } else { + gpuStats.numCulledDraws++; + } + } + + // If the first one in a batch passes, let's assume the whole batch passes. + // Cuts down on checking, while not losing that much efficiency. + bool onePassed = false; + if (passCulling) { + if (!drawEngineCommon_->SubmitPrim(verts, inds, prim, count, vertTypeID, true, &bytesRead)) { + canExtend = false; + } + onePassed = true; + } else { + // Still need to advance bytesRead. + drawEngineCommon_->SkipPrim(prim, count, vertTypeID, &bytesRead); canExtend = false; } + // After drawing, we advance the vertexAddr (when non indexed) or indexAddr (when indexed). // Some games rely on this, they don't bother reloading VADDR and IADDR. // The VADDR/IADDR registers are NOT updated. @@ -1027,7 +1054,7 @@ void GPUCommonHW::Execute_Prim(u32 op, u32 diff) { bool clockwise = !gstate.isCullEnabled() || gstate.getCullMode() == cullMode; if (canExtend) { // Non-indexed draws can be cheaply merged if vertexAddr hasn't changed, that means the vertices - // are consecutive in memory. + // are consecutive in memory. We also ignore culling here. _dbg_assert_((vertexType & GE_VTYPE_IDX_MASK) == GE_VTYPE_IDX_NONE); int commandsExecuted = drawEngineCommon_->ExtendNonIndexedPrim(src, stall, vertTypeID, clockwise, &bytesRead, isTriangle); if (!commandsExecuted) { @@ -1047,7 +1074,25 @@ void GPUCommonHW::Execute_Prim(u32 op, u32 diff) { // We can extend again after submitting a normal draw. canExtend = isTriangle; } - if (!drawEngineCommon_->SubmitPrim(verts, inds, newPrim, count, vertTypeID, clockwise, &bytesRead)) { + + bool passCulling = onePassed || PASSES_CULLING; + if (!passCulling) { + // Do software culling. + if (drawEngineCommon_->TestBoundingBox(verts, inds, count, vertexType)) { + passCulling = true; + } else { + gpuStats.numCulledDraws++; + } + } + if (passCulling) { + if (!drawEngineCommon_->SubmitPrim(verts, inds, newPrim, count, vertTypeID, clockwise, &bytesRead)) { + canExtend = false; + } + // As soon as one passes, assume we don't need to check the rest of this batch. + onePassed = true; + } else { + // Still need to advance bytesRead. + drawEngineCommon_->SkipPrim(newPrim, count, vertTypeID, &bytesRead); canExtend = false; } AdvanceVerts(vertexType, count, bytesRead); @@ -1691,7 +1736,7 @@ size_t GPUCommonHW::FormatGPUStatsCommon(char *buffer, size_t size) { float vertexAverageCycles = gpuStats.numVertsSubmitted > 0 ? (float)gpuStats.vertexGPUCycles / (float)gpuStats.numVertsSubmitted : 0.0f; return snprintf(buffer, size, "DL processing time: %0.2f ms, %d drawsync, %d listsync\n" - "Draw: %d (%d dec), flushes %d, clears %d, bbox jumps %d (%d updates)\n" + "Draw: %d (%d dec, %d culled), flushes %d, clears %d, bbox jumps %d (%d updates)\n" "Vertices: %d drawn: %d\n" "FBOs active: %d (evaluations: %d)\n" "Textures: %d, dec: %d, invalidated: %d, hashed: %d kB\n" @@ -1705,6 +1750,7 @@ size_t GPUCommonHW::FormatGPUStatsCommon(char *buffer, size_t size) { gpuStats.numListSyncs, gpuStats.numDrawCalls, gpuStats.numVertexDecodes, + gpuStats.numCulledDraws, gpuStats.numFlushes, gpuStats.numClears, gpuStats.numBBOXJumps,