Add some depth raster stats

This commit is contained in:
Henrik Rydgård 2024-12-21 22:30:36 +01:00
parent d53635e096
commit 0629a98f97
5 changed files with 39 additions and 7 deletions

View file

@ -84,7 +84,7 @@ struct Edge {
// Adapted from Intel's depth rasterizer example.
// Started with the scalar version, will SIMD-ify later.
// x1/y1 etc are the scissor rect.
void DepthRasterTriangle(uint16_t *depthBuf, int stride, int x1, int y1, int x2, int y2, const int *tx, const int *ty, const int *tz, ZCompareMode compareMode) {
bool DepthRasterTriangle(uint16_t *depthBuf, int stride, int x1, int y1, int x2, int y2, const int *tx, const int *ty, const int *tz, ZCompareMode compareMode) {
int tileStartX = x1;
int tileEndX = x2;
@ -114,13 +114,13 @@ void DepthRasterTriangle(uint16_t *depthBuf, int stride, int x1, int y1, int x2,
int maxY = std::min(std::max(std::max(v0y, v1y), v2y), tileEndY);
if (maxX == minX || maxY == minY) {
// No pixels, or outside screen.
return;
return false;
}
// TODO: Cull really small triangles here.
int triArea = (v1y - v2y) * v0x + (v2x - v1x) * v0y + (v1x * v2y - v2x * v1y);
if (triArea <= 0) {
return;
return false;
}
float oneOverTriArea = 1.0f / (float)triArea;
@ -183,6 +183,7 @@ void DepthRasterTriangle(uint16_t *depthBuf, int stride, int x1, int y1, int x2,
}
}
}
return true;
}
void DecodeAndTransformForDepthRaster(float *dest, const float *worldviewproj, const void *vertexData, int indexLowerBound, int indexUpperBound, VertexDecoder *dec, u32 vertTypeID) {
@ -446,12 +447,20 @@ void DepthRasterScreenVerts(uint16_t *depth, int depthStride, GEPrimitiveType pr
// We remove the subpixel information here.
DepthRasterRect(depth, depthStride, tx[i], ty[i], tx[i + 1], ty[i + 1], z, comp);
}
gpuStats.numDepthRasterPrims += count / 2;
break;
case GE_PRIM_TRIANGLES:
{
int culled = 0;
for (int i = 0; i < count; i += 3) {
DepthRasterTriangle(depth, depthStride, x1, y1, x2, y2, &tx[i], &ty[i], &tz[i], comp);
if (!DepthRasterTriangle(depth, depthStride, x1, y1, x2, y2, &tx[i], &ty[i], &tz[i], comp)) {
culled++;
}
}
gpuStats.numDepthRasterCulls += culled;
gpuStats.numDepthRasterPrims += count / 3;
break;
}
default:
_dbg_assert_(false);
}

View file

@ -23,6 +23,7 @@
#include "Common/LogReporting.h"
#include "Common/Math/SIMDHeaders.h"
#include "Common/Math/lin/matrix4x4.h"
#include "Common/TimeUtil.h"
#include "Core/System.h"
#include "Core/Config.h"
#include "GPU/Common/DrawEngineCommon.h"
@ -914,6 +915,7 @@ inline void ComputeFinalProjMatrix(float *worldviewproj) {
}
void DrawEngineCommon::DepthRasterTransform(GEPrimitiveType prim, VertexDecoder *dec, uint32_t vertTypeID, int vertexCount) {
switch (prim) {
case GE_PRIM_INVALID:
case GE_PRIM_KEEP_PREVIOUS:
@ -929,6 +931,8 @@ void DrawEngineCommon::DepthRasterTransform(GEPrimitiveType prim, VertexDecoder
return;
}
TimeCollector collectStat(&gpuStats.msRasterizingDepth, coreCollectDebugStats);
float worldviewproj[16];
ComputeFinalProjMatrix(worldviewproj);
@ -972,6 +976,8 @@ void DrawEngineCommon::DepthRasterTransform(GEPrimitiveType prim, VertexDecoder
}
void DrawEngineCommon::DepthRasterPredecoded(GEPrimitiveType prim, const void *inVerts, int numDecoded, VertexDecoder *dec, int vertexCount) {
TimeCollector collectStat(&gpuStats.msRasterizingDepth, coreCollectDebugStats);
switch (prim) {
case GE_PRIM_INVALID:
case GE_PRIM_KEEP_PREVIOUS:
@ -996,6 +1002,9 @@ void DrawEngineCommon::DepthRasterPredecoded(GEPrimitiveType prim, const void *i
DepthRasterConvertTransformed(tx, ty, tz, depthTransformed_, decIndex_, vertexCount);
outVertCount = vertexCount;
} else {
if (dec->VertexType() & (GE_VTYPE_WEIGHT_MASK | GE_VTYPE_MORPHCOUNT_MASK)) {
return;
}
float worldviewproj[16];
ComputeFinalProjMatrix(worldviewproj);
TransformPredecodedForDepthRaster(depthTransformed_, worldviewproj, decoded_, dec, numDecoded);

View file

@ -1866,6 +1866,8 @@ void FramebufferManagerCommon::ResizeFramebufFBO(VirtualFramebuffer *vfb, int w,
char tag[128];
size_t len = FormatFramebufferName(vfb, tag, sizeof(tag));
gpuStats.numFBOsCreated++;
vfb->fbo = draw_->CreateFramebuffer({ vfb->renderWidth, vfb->renderHeight, 1, GetFramebufferLayers(), msaaLevel_, true, tag });
if (Memory::IsVRAMAddress(vfb->fb_address) && vfb->fb_stride != 0) {
NotifyMemInfo(MemBlockFlags::ALLOC, vfb->fb_address, vfb->BufferByteSize(RASTER_COLOR), tag, len);

View file

@ -91,6 +91,7 @@ struct GPUStatistics {
numPlaneUpdates = 0;
numTexturesDecoded = 0;
numFramebufferEvaluations = 0;
numFBOsCreated = 0;
numBlockingReadbacks = 0;
numReadbacks = 0;
numUploads = 0;
@ -107,6 +108,9 @@ struct GPUStatistics {
numCachedReplacedTextures = 0;
numClutTextures = 0;
msProcessingDisplayLists = 0;
msRasterizingDepth = 0.0f;
numDepthRasterPrims = 0;
numDepthRasterCulls = 0;
vertexGPUCycles = 0;
otherGPUCycles = 0;
}
@ -129,6 +133,7 @@ struct GPUStatistics {
int numTextureDataBytesHashed;
int numTexturesDecoded;
int numFramebufferEvaluations;
int numFBOsCreated;
int numBlockingReadbacks;
int numReadbacks;
int numUploads;
@ -145,9 +150,11 @@ struct GPUStatistics {
int numCachedReplacedTextures;
int numClutTextures;
double msProcessingDisplayLists;
double msRasterizingDepth;
int vertexGPUCycles;
int otherGPUCycles;
int numDepthRasterPrims;
int numDepthRasterCulls;
// Flip count. Doesn't really belong here.
int numFlips;
};

View file

@ -1769,13 +1769,14 @@ size_t GPUCommonHW::FormatGPUStatsCommon(char *buffer, size_t size) {
"DL processing time: %0.2f ms, %d drawsync, %d listsync\n"
"Draw: %d (%d dec, %d culled), flushes %d, clears %d, bbox jumps %d (%d updates)\n"
"Vertices: %d dec: %d drawn: %d\n"
"FBOs active: %d (evaluations: %d)\n"
"FBOs active: %d (evaluations: %d, created %d)\n"
"Textures: %d, dec: %d, invalidated: %d, hashed: %d kB, clut %d\n"
"readbacks %d (%d non-block), upload %d (cached %d), depal %d\n"
"block transfers: %d\n"
"replacer: tracks %d references, %d unique textures\n"
"Cpy: depth %d, color %d, reint %d, blend %d, self %d\n"
"GPU cycles: %d (%0.1f per vertex)\n%s",
"GPU cycles: %d (%0.1f per vertex)\n"
"Depth raster: %0.2f ms, %d prim, %d prims culled\n%s",
gpuStats.msProcessingDisplayLists * 1000.0f,
gpuStats.numDrawSyncs,
gpuStats.numListSyncs,
@ -1791,6 +1792,7 @@ size_t GPUCommonHW::FormatGPUStatsCommon(char *buffer, size_t size) {
gpuStats.numUncachedVertsDrawn,
(int)framebufferManager_->NumVFBs(),
gpuStats.numFramebufferEvaluations,
gpuStats.numFBOsCreated,
(int)textureCache_->NumLoadedTextures(),
gpuStats.numTexturesDecoded,
gpuStats.numTextureInvalidations,
@ -1811,6 +1813,9 @@ size_t GPUCommonHW::FormatGPUStatsCommon(char *buffer, size_t size) {
gpuStats.numCopiesForSelfTex,
gpuStats.vertexGPUCycles + gpuStats.otherGPUCycles,
vertexAverageCycles,
gpuStats.msRasterizingDepth * 1000.0,
gpuStats.numDepthRasterPrims,
gpuStats.numDepthRasterCulls,
debugRecording_ ? "(debug-recording)" : ""
);
}