From 3ccb01b09cff4d32f6ae4d5234c47c53e8b43f3d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Sun, 22 Dec 2024 10:02:29 +0100 Subject: [PATCH] Depth Raster: Add more stats, discard triangles with < 10 px area. Still good results. --- GPU/Common/DepthRaster.cpp | 33 +++++++++++++++++++++++---------- GPU/GPU.h | 8 ++++++-- GPU/GPUCommonHW.cpp | 6 ++++-- 3 files changed, 33 insertions(+), 14 deletions(-) diff --git a/GPU/Common/DepthRaster.cpp b/GPU/Common/DepthRaster.cpp index eb4485a75f..f9e3f3b165 100644 --- a/GPU/Common/DepthRaster.cpp +++ b/GPU/Common/DepthRaster.cpp @@ -81,10 +81,19 @@ struct Edge { } }; +enum class TriangleResult { + OK, + NoPixels, + Backface, + TooSmall, +}; + +constexpr int MIN_TRI_AREA = 10; + // Adapted from Intel's depth rasterizer example. // Started with the scalar version, will SIMD-ify later. // x1/y1 etc are the scissor rect. -bool DepthRasterTriangle(uint16_t *depthBuf, int stride, int x1, int y1, int x2, int y2, const int *tx, const int *ty, const float *tz, ZCompareMode compareMode) { +TriangleResult DepthRasterTriangle(uint16_t *depthBuf, int stride, int x1, int y1, int x2, int y2, const int *tx, const int *ty, const float *tz, ZCompareMode compareMode) { int tileStartX = x1; int tileEndX = x2; @@ -110,13 +119,16 @@ bool DepthRasterTriangle(uint16_t *depthBuf, int stride, int x1, int y1, int x2, int maxY = std::min(std::max(std::max(v0y, v1y), v2y), tileEndY); if (maxX == minX || maxY == minY) { // No pixels, or outside screen. - return false; + return TriangleResult::NoPixels; } // TODO: Cull really small triangles here - we can increase the threshold a bit probably. int triArea = (v1y - v2y) * v0x + (v2x - v1x) * v0y + (v1x * v2y - v2x * v1y); if (triArea <= 0) { - return false; + return TriangleResult::Backface; + } + if (triArea < MIN_TRI_AREA) { + return TriangleResult::TooSmall; } float oneOverTriArea = 1.0f / (float)triArea; @@ -179,7 +191,7 @@ bool DepthRasterTriangle(uint16_t *depthBuf, int stride, int x1, int y1, int x2, } } } - return true; + return TriangleResult::OK; } void DecodeAndTransformForDepthRaster(float *dest, const float *worldviewproj, const void *vertexData, int indexLowerBound, int indexUpperBound, VertexDecoder *dec, u32 vertTypeID) { @@ -448,14 +460,15 @@ void DepthRasterScreenVerts(uint16_t *depth, int depthStride, GEPrimitiveType pr break; case GE_PRIM_TRIANGLES: { - int culled = 0; + int stats[4]{}; for (int i = 0; i < count; i += 3) { - if (!DepthRasterTriangle(depth, depthStride, x1, y1, x2, y2, &tx[i], &ty[i], &tz[i], comp)) { - culled++; - } + TriangleResult result = DepthRasterTriangle(depth, depthStride, x1, y1, x2, y2, &tx[i], &ty[i], &tz[i], comp); + stats[(int)result]++; } - gpuStats.numDepthRasterCulls += culled; - gpuStats.numDepthRasterPrims += count / 3; + gpuStats.numDepthRasterBackface += stats[(int)TriangleResult::Backface]; + gpuStats.numDepthRasterNoPixels += stats[(int)TriangleResult::NoPixels]; + gpuStats.numDepthRasterTooSmall += stats[(int)TriangleResult::TooSmall]; + gpuStats.numDepthRasterPrims += stats[(int)TriangleResult::OK]; break; } default: diff --git a/GPU/GPU.h b/GPU/GPU.h index ef83235775..9b98e86344 100644 --- a/GPU/GPU.h +++ b/GPU/GPU.h @@ -110,7 +110,9 @@ struct GPUStatistics { msProcessingDisplayLists = 0; msRasterizingDepth = 0.0f; numDepthRasterPrims = 0; - numDepthRasterCulls = 0; + numDepthRasterBackface = 0; + numDepthRasterNoPixels = 0; + numDepthRasterTooSmall = 0; vertexGPUCycles = 0; otherGPUCycles = 0; } @@ -154,7 +156,9 @@ struct GPUStatistics { int vertexGPUCycles; int otherGPUCycles; int numDepthRasterPrims; - int numDepthRasterCulls; + int numDepthRasterBackface; + int numDepthRasterNoPixels; + int numDepthRasterTooSmall; // Flip count. Doesn't really belong here. int numFlips; }; diff --git a/GPU/GPUCommonHW.cpp b/GPU/GPUCommonHW.cpp index d8db488251..552a38179c 100644 --- a/GPU/GPUCommonHW.cpp +++ b/GPU/GPUCommonHW.cpp @@ -1776,7 +1776,7 @@ size_t GPUCommonHW::FormatGPUStatsCommon(char *buffer, size_t size) { "replacer: tracks %d references, %d unique textures\n" "Cpy: depth %d, color %d, reint %d, blend %d, self %d\n" "GPU cycles: %d (%0.1f per vertex)\n" - "Depth raster: %0.2f ms, %d prim, %d prims culled\n%s", + "Depth raster: %0.2f ms, %d prim, %d nopix, %d small, %d backface\n%s", gpuStats.msProcessingDisplayLists * 1000.0f, gpuStats.numDrawSyncs, gpuStats.numListSyncs, @@ -1815,7 +1815,9 @@ size_t GPUCommonHW::FormatGPUStatsCommon(char *buffer, size_t size) { vertexAverageCycles, gpuStats.msRasterizingDepth * 1000.0, gpuStats.numDepthRasterPrims, - gpuStats.numDepthRasterCulls, + gpuStats.numDepthRasterNoPixels, + gpuStats.numDepthRasterTooSmall, + gpuStats.numDepthRasterBackface, debugRecording_ ? "(debug-recording)" : "" ); }