mirror of
https://github.com/hrydgard/ppsspp.git
synced 2025-04-02 11:01:50 -04:00
Frustum-cull small draws
Some games do a poor job of culling stuff, and some transparent sprites can be very expensive if they cause a copy. Skipping them if outside the viewport makes sense in that case. One example are the flame sprites in #17797 . Additionally, we should be able to cull through-mode draws easily, this one doesn't even try.
This commit is contained in:
parent
3e20fab387
commit
0905b6a5ad
5 changed files with 79 additions and 5 deletions
|
@ -620,7 +620,6 @@ CollapsibleHeader::CollapsibleHeader(bool *toggle, const std::string &text, Layo
|
|||
|
||||
void CollapsibleHeader::Draw(UIContext &dc) {
|
||||
Style style = dc.theme->itemStyle;
|
||||
style.background.color = 0;
|
||||
if (HasFocus()) style = dc.theme->itemFocusedStyle;
|
||||
if (down_) style = dc.theme->itemDownStyle;
|
||||
if (!IsEnabled()) style = dc.theme->itemDisabledStyle;
|
||||
|
|
|
@ -670,6 +670,31 @@ int DrawEngineCommon::ExtendNonIndexedPrim(const uint32_t *cmd, const uint32_t *
|
|||
return cmd - start;
|
||||
}
|
||||
|
||||
void DrawEngineCommon::SkipPrim(GEPrimitiveType prim, int vertexCount, u32 vertTypeID, int *bytesRead) {
|
||||
if (!indexGen.PrimCompatible(prevPrim_, prim)) {
|
||||
DispatchFlush();
|
||||
}
|
||||
|
||||
// This isn't exactly right, if we flushed, since prims can straddle previous calls.
|
||||
// But it generally works for common usage.
|
||||
if (prim == GE_PRIM_KEEP_PREVIOUS) {
|
||||
// Has to be set to something, let's assume POINTS (0) if no previous.
|
||||
if (prevPrim_ == GE_PRIM_INVALID)
|
||||
prevPrim_ = GE_PRIM_POINTS;
|
||||
prim = prevPrim_;
|
||||
} else {
|
||||
prevPrim_ = prim;
|
||||
}
|
||||
|
||||
// If vtype has changed, setup the vertex decoder.
|
||||
if (vertTypeID != lastVType_ || !dec_) {
|
||||
dec_ = GetVertexDecoder(vertTypeID);
|
||||
lastVType_ = vertTypeID;
|
||||
}
|
||||
|
||||
*bytesRead = vertexCount * dec_->VertexSize();
|
||||
}
|
||||
|
||||
// vertTypeID is the vertex type but with the UVGen mode smashed into the top bits.
|
||||
bool DrawEngineCommon::SubmitPrim(const void *verts, const void *inds, GEPrimitiveType prim, int vertexCount, u32 vertTypeID, bool clockwise, int *bytesRead) {
|
||||
if (!indexGen.PrimCompatible(prevPrim_, prim) || numDrawVerts_ >= MAX_DEFERRED_DRAW_VERTS || numDrawInds_ >= MAX_DEFERRED_DRAW_INDS || vertexCountInDrawCalls_ + vertexCount > VERTEX_BUFFER_MAX) {
|
||||
|
|
|
@ -113,6 +113,8 @@ public:
|
|||
|
||||
int ExtendNonIndexedPrim(const uint32_t *cmd, const uint32_t *stall, u32 vertTypeID, bool clockwise, int *bytesRead, bool isTriangle);
|
||||
bool SubmitPrim(const void *verts, const void *inds, GEPrimitiveType prim, int vertexCount, u32 vertTypeID, bool clockwise, int *bytesRead);
|
||||
void SkipPrim(GEPrimitiveType prim, int vertexCount, u32 vertTypeID, int *bytesRead);
|
||||
|
||||
template<class Surface>
|
||||
void SubmitCurve(const void *control_points, const void *indices, Surface &surface, u32 vertType, int *bytesRead, const char *scope);
|
||||
void ClearSplineBezierWeights();
|
||||
|
|
|
@ -76,6 +76,7 @@ struct GPUStatistics {
|
|||
void ResetFrame() {
|
||||
numDrawCalls = 0;
|
||||
numVertexDecodes = 0;
|
||||
numCulledDraws = 0;
|
||||
numDrawSyncs = 0;
|
||||
numListSyncs = 0;
|
||||
numVertsSubmitted = 0;
|
||||
|
@ -111,6 +112,7 @@ struct GPUStatistics {
|
|||
// Per frame statistics
|
||||
int numDrawCalls;
|
||||
int numVertexDecodes;
|
||||
int numCulledDraws;
|
||||
int numDrawSyncs;
|
||||
int numListSyncs;
|
||||
int numFlushes;
|
||||
|
|
|
@ -989,9 +989,36 @@ void GPUCommonHW::Execute_Prim(u32 op, u32 diff) {
|
|||
int cullMode = gstate.getCullMode();
|
||||
|
||||
uint32_t vertTypeID = GetVertTypeID(vertexType, gstate.getUVGenMode(), g_Config.bSoftwareSkinning);
|
||||
if (!drawEngineCommon_->SubmitPrim(verts, inds, prim, count, vertTypeID, true, &bytesRead)) {
|
||||
|
||||
#define MAX_CULL_CHECK_COUNT 6
|
||||
|
||||
#define PASSES_CULLING ((vertexType & (GE_VTYPE_THROUGH_MASK | GE_VTYPE_MORPHCOUNT_MASK | GE_VTYPE_WEIGHT_MASK | GE_VTYPE_IDX_MASK)) || count > MAX_CULL_CHECK_COUNT)
|
||||
|
||||
// If certain conditions are true, do frustum culling.
|
||||
bool passCulling = PASSES_CULLING;
|
||||
if (!passCulling) {
|
||||
// Do software culling.
|
||||
if (drawEngineCommon_->TestBoundingBox(verts, inds, count, vertexType)) {
|
||||
passCulling = true;
|
||||
} else {
|
||||
gpuStats.numCulledDraws++;
|
||||
}
|
||||
}
|
||||
|
||||
// If the first one in a batch passes, let's assume the whole batch passes.
|
||||
// Cuts down on checking, while not losing that much efficiency.
|
||||
bool onePassed = false;
|
||||
if (passCulling) {
|
||||
if (!drawEngineCommon_->SubmitPrim(verts, inds, prim, count, vertTypeID, true, &bytesRead)) {
|
||||
canExtend = false;
|
||||
}
|
||||
onePassed = true;
|
||||
} else {
|
||||
// Still need to advance bytesRead.
|
||||
drawEngineCommon_->SkipPrim(prim, count, vertTypeID, &bytesRead);
|
||||
canExtend = false;
|
||||
}
|
||||
|
||||
// After drawing, we advance the vertexAddr (when non indexed) or indexAddr (when indexed).
|
||||
// Some games rely on this, they don't bother reloading VADDR and IADDR.
|
||||
// The VADDR/IADDR registers are NOT updated.
|
||||
|
@ -1027,7 +1054,7 @@ void GPUCommonHW::Execute_Prim(u32 op, u32 diff) {
|
|||
bool clockwise = !gstate.isCullEnabled() || gstate.getCullMode() == cullMode;
|
||||
if (canExtend) {
|
||||
// Non-indexed draws can be cheaply merged if vertexAddr hasn't changed, that means the vertices
|
||||
// are consecutive in memory.
|
||||
// are consecutive in memory. We also ignore culling here.
|
||||
_dbg_assert_((vertexType & GE_VTYPE_IDX_MASK) == GE_VTYPE_IDX_NONE);
|
||||
int commandsExecuted = drawEngineCommon_->ExtendNonIndexedPrim(src, stall, vertTypeID, clockwise, &bytesRead, isTriangle);
|
||||
if (!commandsExecuted) {
|
||||
|
@ -1047,7 +1074,25 @@ void GPUCommonHW::Execute_Prim(u32 op, u32 diff) {
|
|||
// We can extend again after submitting a normal draw.
|
||||
canExtend = isTriangle;
|
||||
}
|
||||
if (!drawEngineCommon_->SubmitPrim(verts, inds, newPrim, count, vertTypeID, clockwise, &bytesRead)) {
|
||||
|
||||
bool passCulling = onePassed || PASSES_CULLING;
|
||||
if (!passCulling) {
|
||||
// Do software culling.
|
||||
if (drawEngineCommon_->TestBoundingBox(verts, inds, count, vertexType)) {
|
||||
passCulling = true;
|
||||
} else {
|
||||
gpuStats.numCulledDraws++;
|
||||
}
|
||||
}
|
||||
if (passCulling) {
|
||||
if (!drawEngineCommon_->SubmitPrim(verts, inds, newPrim, count, vertTypeID, clockwise, &bytesRead)) {
|
||||
canExtend = false;
|
||||
}
|
||||
// As soon as one passes, assume we don't need to check the rest of this batch.
|
||||
onePassed = true;
|
||||
} else {
|
||||
// Still need to advance bytesRead.
|
||||
drawEngineCommon_->SkipPrim(newPrim, count, vertTypeID, &bytesRead);
|
||||
canExtend = false;
|
||||
}
|
||||
AdvanceVerts(vertexType, count, bytesRead);
|
||||
|
@ -1691,7 +1736,7 @@ size_t GPUCommonHW::FormatGPUStatsCommon(char *buffer, size_t size) {
|
|||
float vertexAverageCycles = gpuStats.numVertsSubmitted > 0 ? (float)gpuStats.vertexGPUCycles / (float)gpuStats.numVertsSubmitted : 0.0f;
|
||||
return snprintf(buffer, size,
|
||||
"DL processing time: %0.2f ms, %d drawsync, %d listsync\n"
|
||||
"Draw: %d (%d dec), flushes %d, clears %d, bbox jumps %d (%d updates)\n"
|
||||
"Draw: %d (%d dec, %d culled), flushes %d, clears %d, bbox jumps %d (%d updates)\n"
|
||||
"Vertices: %d drawn: %d\n"
|
||||
"FBOs active: %d (evaluations: %d)\n"
|
||||
"Textures: %d, dec: %d, invalidated: %d, hashed: %d kB\n"
|
||||
|
@ -1705,6 +1750,7 @@ size_t GPUCommonHW::FormatGPUStatsCommon(char *buffer, size_t size) {
|
|||
gpuStats.numListSyncs,
|
||||
gpuStats.numDrawCalls,
|
||||
gpuStats.numVertexDecodes,
|
||||
gpuStats.numCulledDraws,
|
||||
gpuStats.numFlushes,
|
||||
gpuStats.numClears,
|
||||
gpuStats.numBBOXJumps,
|
||||
|
|
Loading…
Add table
Reference in a new issue