From 0629a98f97dc80cbc3ec988800c118337ca8ed5a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Sat, 21 Dec 2024 22:30:36 +0100 Subject: [PATCH] Add some depth raster stats --- GPU/Common/DepthRaster.cpp | 17 +++++++++++++---- GPU/Common/DrawEngineCommon.cpp | 9 +++++++++ GPU/Common/FramebufferManagerCommon.cpp | 2 ++ GPU/GPU.h | 9 ++++++++- GPU/GPUCommonHW.cpp | 9 +++++++-- 5 files changed, 39 insertions(+), 7 deletions(-) diff --git a/GPU/Common/DepthRaster.cpp b/GPU/Common/DepthRaster.cpp index efa0c913b0..875083f0bb 100644 --- a/GPU/Common/DepthRaster.cpp +++ b/GPU/Common/DepthRaster.cpp @@ -84,7 +84,7 @@ struct Edge { // Adapted from Intel's depth rasterizer example. // Started with the scalar version, will SIMD-ify later. // x1/y1 etc are the scissor rect. -void DepthRasterTriangle(uint16_t *depthBuf, int stride, int x1, int y1, int x2, int y2, const int *tx, const int *ty, const int *tz, ZCompareMode compareMode) { +bool DepthRasterTriangle(uint16_t *depthBuf, int stride, int x1, int y1, int x2, int y2, const int *tx, const int *ty, const int *tz, ZCompareMode compareMode) { int tileStartX = x1; int tileEndX = x2; @@ -114,13 +114,13 @@ void DepthRasterTriangle(uint16_t *depthBuf, int stride, int x1, int y1, int x2, int maxY = std::min(std::max(std::max(v0y, v1y), v2y), tileEndY); if (maxX == minX || maxY == minY) { // No pixels, or outside screen. - return; + return false; } // TODO: Cull really small triangles here. int triArea = (v1y - v2y) * v0x + (v2x - v1x) * v0y + (v1x * v2y - v2x * v1y); if (triArea <= 0) { - return; + return false; } float oneOverTriArea = 1.0f / (float)triArea; @@ -183,6 +183,7 @@ void DepthRasterTriangle(uint16_t *depthBuf, int stride, int x1, int y1, int x2, } } } + return true; } void DecodeAndTransformForDepthRaster(float *dest, const float *worldviewproj, const void *vertexData, int indexLowerBound, int indexUpperBound, VertexDecoder *dec, u32 vertTypeID) { @@ -446,12 +447,20 @@ void DepthRasterScreenVerts(uint16_t *depth, int depthStride, GEPrimitiveType pr // We remove the subpixel information here. DepthRasterRect(depth, depthStride, tx[i], ty[i], tx[i + 1], ty[i + 1], z, comp); } + gpuStats.numDepthRasterPrims += count / 2; break; case GE_PRIM_TRIANGLES: + { + int culled = 0; for (int i = 0; i < count; i += 3) { - DepthRasterTriangle(depth, depthStride, x1, y1, x2, y2, &tx[i], &ty[i], &tz[i], comp); + if (!DepthRasterTriangle(depth, depthStride, x1, y1, x2, y2, &tx[i], &ty[i], &tz[i], comp)) { + culled++; + } } + gpuStats.numDepthRasterCulls += culled; + gpuStats.numDepthRasterPrims += count / 3; break; + } default: _dbg_assert_(false); } diff --git a/GPU/Common/DrawEngineCommon.cpp b/GPU/Common/DrawEngineCommon.cpp index 088efd052e..7aaac261ad 100644 --- a/GPU/Common/DrawEngineCommon.cpp +++ b/GPU/Common/DrawEngineCommon.cpp @@ -23,6 +23,7 @@ #include "Common/LogReporting.h" #include "Common/Math/SIMDHeaders.h" #include "Common/Math/lin/matrix4x4.h" +#include "Common/TimeUtil.h" #include "Core/System.h" #include "Core/Config.h" #include "GPU/Common/DrawEngineCommon.h" @@ -914,6 +915,7 @@ inline void ComputeFinalProjMatrix(float *worldviewproj) { } void DrawEngineCommon::DepthRasterTransform(GEPrimitiveType prim, VertexDecoder *dec, uint32_t vertTypeID, int vertexCount) { + switch (prim) { case GE_PRIM_INVALID: case GE_PRIM_KEEP_PREVIOUS: @@ -929,6 +931,8 @@ void DrawEngineCommon::DepthRasterTransform(GEPrimitiveType prim, VertexDecoder return; } + TimeCollector collectStat(&gpuStats.msRasterizingDepth, coreCollectDebugStats); + float worldviewproj[16]; ComputeFinalProjMatrix(worldviewproj); @@ -972,6 +976,8 @@ void DrawEngineCommon::DepthRasterTransform(GEPrimitiveType prim, VertexDecoder } void DrawEngineCommon::DepthRasterPredecoded(GEPrimitiveType prim, const void *inVerts, int numDecoded, VertexDecoder *dec, int vertexCount) { + TimeCollector collectStat(&gpuStats.msRasterizingDepth, coreCollectDebugStats); + switch (prim) { case GE_PRIM_INVALID: case GE_PRIM_KEEP_PREVIOUS: @@ -996,6 +1002,9 @@ void DrawEngineCommon::DepthRasterPredecoded(GEPrimitiveType prim, const void *i DepthRasterConvertTransformed(tx, ty, tz, depthTransformed_, decIndex_, vertexCount); outVertCount = vertexCount; } else { + if (dec->VertexType() & (GE_VTYPE_WEIGHT_MASK | GE_VTYPE_MORPHCOUNT_MASK)) { + return; + } float worldviewproj[16]; ComputeFinalProjMatrix(worldviewproj); TransformPredecodedForDepthRaster(depthTransformed_, worldviewproj, decoded_, dec, numDecoded); diff --git a/GPU/Common/FramebufferManagerCommon.cpp b/GPU/Common/FramebufferManagerCommon.cpp index e919ba6113..0ecfbdfd96 100644 --- a/GPU/Common/FramebufferManagerCommon.cpp +++ b/GPU/Common/FramebufferManagerCommon.cpp @@ -1866,6 +1866,8 @@ void FramebufferManagerCommon::ResizeFramebufFBO(VirtualFramebuffer *vfb, int w, char tag[128]; size_t len = FormatFramebufferName(vfb, tag, sizeof(tag)); + gpuStats.numFBOsCreated++; + vfb->fbo = draw_->CreateFramebuffer({ vfb->renderWidth, vfb->renderHeight, 1, GetFramebufferLayers(), msaaLevel_, true, tag }); if (Memory::IsVRAMAddress(vfb->fb_address) && vfb->fb_stride != 0) { NotifyMemInfo(MemBlockFlags::ALLOC, vfb->fb_address, vfb->BufferByteSize(RASTER_COLOR), tag, len); diff --git a/GPU/GPU.h b/GPU/GPU.h index 4c86c7e815..ef83235775 100644 --- a/GPU/GPU.h +++ b/GPU/GPU.h @@ -91,6 +91,7 @@ struct GPUStatistics { numPlaneUpdates = 0; numTexturesDecoded = 0; numFramebufferEvaluations = 0; + numFBOsCreated = 0; numBlockingReadbacks = 0; numReadbacks = 0; numUploads = 0; @@ -107,6 +108,9 @@ struct GPUStatistics { numCachedReplacedTextures = 0; numClutTextures = 0; msProcessingDisplayLists = 0; + msRasterizingDepth = 0.0f; + numDepthRasterPrims = 0; + numDepthRasterCulls = 0; vertexGPUCycles = 0; otherGPUCycles = 0; } @@ -129,6 +133,7 @@ struct GPUStatistics { int numTextureDataBytesHashed; int numTexturesDecoded; int numFramebufferEvaluations; + int numFBOsCreated; int numBlockingReadbacks; int numReadbacks; int numUploads; @@ -145,9 +150,11 @@ struct GPUStatistics { int numCachedReplacedTextures; int numClutTextures; double msProcessingDisplayLists; + double msRasterizingDepth; int vertexGPUCycles; int otherGPUCycles; - + int numDepthRasterPrims; + int numDepthRasterCulls; // Flip count. Doesn't really belong here. int numFlips; }; diff --git a/GPU/GPUCommonHW.cpp b/GPU/GPUCommonHW.cpp index 9b5389c875..d8db488251 100644 --- a/GPU/GPUCommonHW.cpp +++ b/GPU/GPUCommonHW.cpp @@ -1769,13 +1769,14 @@ size_t GPUCommonHW::FormatGPUStatsCommon(char *buffer, size_t size) { "DL processing time: %0.2f ms, %d drawsync, %d listsync\n" "Draw: %d (%d dec, %d culled), flushes %d, clears %d, bbox jumps %d (%d updates)\n" "Vertices: %d dec: %d drawn: %d\n" - "FBOs active: %d (evaluations: %d)\n" + "FBOs active: %d (evaluations: %d, created %d)\n" "Textures: %d, dec: %d, invalidated: %d, hashed: %d kB, clut %d\n" "readbacks %d (%d non-block), upload %d (cached %d), depal %d\n" "block transfers: %d\n" "replacer: tracks %d references, %d unique textures\n" "Cpy: depth %d, color %d, reint %d, blend %d, self %d\n" - "GPU cycles: %d (%0.1f per vertex)\n%s", + "GPU cycles: %d (%0.1f per vertex)\n" + "Depth raster: %0.2f ms, %d prim, %d prims culled\n%s", gpuStats.msProcessingDisplayLists * 1000.0f, gpuStats.numDrawSyncs, gpuStats.numListSyncs, @@ -1791,6 +1792,7 @@ size_t GPUCommonHW::FormatGPUStatsCommon(char *buffer, size_t size) { gpuStats.numUncachedVertsDrawn, (int)framebufferManager_->NumVFBs(), gpuStats.numFramebufferEvaluations, + gpuStats.numFBOsCreated, (int)textureCache_->NumLoadedTextures(), gpuStats.numTexturesDecoded, gpuStats.numTextureInvalidations, @@ -1811,6 +1813,9 @@ size_t GPUCommonHW::FormatGPUStatsCommon(char *buffer, size_t size) { gpuStats.numCopiesForSelfTex, gpuStats.vertexGPUCycles + gpuStats.otherGPUCycles, vertexAverageCycles, + gpuStats.msRasterizingDepth * 1000.0, + gpuStats.numDepthRasterPrims, + gpuStats.numDepthRasterCulls, debugRecording_ ? "(debug-recording)" : "" ); }