mirror of
https://github.com/hrydgard/ppsspp.git
synced 2025-04-02 11:01:50 -04:00
Add some depth raster stats
This commit is contained in:
parent
d53635e096
commit
0629a98f97
5 changed files with 39 additions and 7 deletions
|
@ -84,7 +84,7 @@ struct Edge {
|
|||
// Adapted from Intel's depth rasterizer example.
|
||||
// Started with the scalar version, will SIMD-ify later.
|
||||
// x1/y1 etc are the scissor rect.
|
||||
void DepthRasterTriangle(uint16_t *depthBuf, int stride, int x1, int y1, int x2, int y2, const int *tx, const int *ty, const int *tz, ZCompareMode compareMode) {
|
||||
bool DepthRasterTriangle(uint16_t *depthBuf, int stride, int x1, int y1, int x2, int y2, const int *tx, const int *ty, const int *tz, ZCompareMode compareMode) {
|
||||
int tileStartX = x1;
|
||||
int tileEndX = x2;
|
||||
|
||||
|
@ -114,13 +114,13 @@ void DepthRasterTriangle(uint16_t *depthBuf, int stride, int x1, int y1, int x2,
|
|||
int maxY = std::min(std::max(std::max(v0y, v1y), v2y), tileEndY);
|
||||
if (maxX == minX || maxY == minY) {
|
||||
// No pixels, or outside screen.
|
||||
return;
|
||||
return false;
|
||||
}
|
||||
|
||||
// TODO: Cull really small triangles here.
|
||||
int triArea = (v1y - v2y) * v0x + (v2x - v1x) * v0y + (v1x * v2y - v2x * v1y);
|
||||
if (triArea <= 0) {
|
||||
return;
|
||||
return false;
|
||||
}
|
||||
|
||||
float oneOverTriArea = 1.0f / (float)triArea;
|
||||
|
@ -183,6 +183,7 @@ void DepthRasterTriangle(uint16_t *depthBuf, int stride, int x1, int y1, int x2,
|
|||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void DecodeAndTransformForDepthRaster(float *dest, const float *worldviewproj, const void *vertexData, int indexLowerBound, int indexUpperBound, VertexDecoder *dec, u32 vertTypeID) {
|
||||
|
@ -446,12 +447,20 @@ void DepthRasterScreenVerts(uint16_t *depth, int depthStride, GEPrimitiveType pr
|
|||
// We remove the subpixel information here.
|
||||
DepthRasterRect(depth, depthStride, tx[i], ty[i], tx[i + 1], ty[i + 1], z, comp);
|
||||
}
|
||||
gpuStats.numDepthRasterPrims += count / 2;
|
||||
break;
|
||||
case GE_PRIM_TRIANGLES:
|
||||
{
|
||||
int culled = 0;
|
||||
for (int i = 0; i < count; i += 3) {
|
||||
DepthRasterTriangle(depth, depthStride, x1, y1, x2, y2, &tx[i], &ty[i], &tz[i], comp);
|
||||
if (!DepthRasterTriangle(depth, depthStride, x1, y1, x2, y2, &tx[i], &ty[i], &tz[i], comp)) {
|
||||
culled++;
|
||||
}
|
||||
}
|
||||
gpuStats.numDepthRasterCulls += culled;
|
||||
gpuStats.numDepthRasterPrims += count / 3;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
_dbg_assert_(false);
|
||||
}
|
||||
|
|
|
@ -23,6 +23,7 @@
|
|||
#include "Common/LogReporting.h"
|
||||
#include "Common/Math/SIMDHeaders.h"
|
||||
#include "Common/Math/lin/matrix4x4.h"
|
||||
#include "Common/TimeUtil.h"
|
||||
#include "Core/System.h"
|
||||
#include "Core/Config.h"
|
||||
#include "GPU/Common/DrawEngineCommon.h"
|
||||
|
@ -914,6 +915,7 @@ inline void ComputeFinalProjMatrix(float *worldviewproj) {
|
|||
}
|
||||
|
||||
void DrawEngineCommon::DepthRasterTransform(GEPrimitiveType prim, VertexDecoder *dec, uint32_t vertTypeID, int vertexCount) {
|
||||
|
||||
switch (prim) {
|
||||
case GE_PRIM_INVALID:
|
||||
case GE_PRIM_KEEP_PREVIOUS:
|
||||
|
@ -929,6 +931,8 @@ void DrawEngineCommon::DepthRasterTransform(GEPrimitiveType prim, VertexDecoder
|
|||
return;
|
||||
}
|
||||
|
||||
TimeCollector collectStat(&gpuStats.msRasterizingDepth, coreCollectDebugStats);
|
||||
|
||||
float worldviewproj[16];
|
||||
ComputeFinalProjMatrix(worldviewproj);
|
||||
|
||||
|
@ -972,6 +976,8 @@ void DrawEngineCommon::DepthRasterTransform(GEPrimitiveType prim, VertexDecoder
|
|||
}
|
||||
|
||||
void DrawEngineCommon::DepthRasterPredecoded(GEPrimitiveType prim, const void *inVerts, int numDecoded, VertexDecoder *dec, int vertexCount) {
|
||||
TimeCollector collectStat(&gpuStats.msRasterizingDepth, coreCollectDebugStats);
|
||||
|
||||
switch (prim) {
|
||||
case GE_PRIM_INVALID:
|
||||
case GE_PRIM_KEEP_PREVIOUS:
|
||||
|
@ -996,6 +1002,9 @@ void DrawEngineCommon::DepthRasterPredecoded(GEPrimitiveType prim, const void *i
|
|||
DepthRasterConvertTransformed(tx, ty, tz, depthTransformed_, decIndex_, vertexCount);
|
||||
outVertCount = vertexCount;
|
||||
} else {
|
||||
if (dec->VertexType() & (GE_VTYPE_WEIGHT_MASK | GE_VTYPE_MORPHCOUNT_MASK)) {
|
||||
return;
|
||||
}
|
||||
float worldviewproj[16];
|
||||
ComputeFinalProjMatrix(worldviewproj);
|
||||
TransformPredecodedForDepthRaster(depthTransformed_, worldviewproj, decoded_, dec, numDecoded);
|
||||
|
|
|
@ -1866,6 +1866,8 @@ void FramebufferManagerCommon::ResizeFramebufFBO(VirtualFramebuffer *vfb, int w,
|
|||
char tag[128];
|
||||
size_t len = FormatFramebufferName(vfb, tag, sizeof(tag));
|
||||
|
||||
gpuStats.numFBOsCreated++;
|
||||
|
||||
vfb->fbo = draw_->CreateFramebuffer({ vfb->renderWidth, vfb->renderHeight, 1, GetFramebufferLayers(), msaaLevel_, true, tag });
|
||||
if (Memory::IsVRAMAddress(vfb->fb_address) && vfb->fb_stride != 0) {
|
||||
NotifyMemInfo(MemBlockFlags::ALLOC, vfb->fb_address, vfb->BufferByteSize(RASTER_COLOR), tag, len);
|
||||
|
|
|
@ -91,6 +91,7 @@ struct GPUStatistics {
|
|||
numPlaneUpdates = 0;
|
||||
numTexturesDecoded = 0;
|
||||
numFramebufferEvaluations = 0;
|
||||
numFBOsCreated = 0;
|
||||
numBlockingReadbacks = 0;
|
||||
numReadbacks = 0;
|
||||
numUploads = 0;
|
||||
|
@ -107,6 +108,9 @@ struct GPUStatistics {
|
|||
numCachedReplacedTextures = 0;
|
||||
numClutTextures = 0;
|
||||
msProcessingDisplayLists = 0;
|
||||
msRasterizingDepth = 0.0f;
|
||||
numDepthRasterPrims = 0;
|
||||
numDepthRasterCulls = 0;
|
||||
vertexGPUCycles = 0;
|
||||
otherGPUCycles = 0;
|
||||
}
|
||||
|
@ -129,6 +133,7 @@ struct GPUStatistics {
|
|||
int numTextureDataBytesHashed;
|
||||
int numTexturesDecoded;
|
||||
int numFramebufferEvaluations;
|
||||
int numFBOsCreated;
|
||||
int numBlockingReadbacks;
|
||||
int numReadbacks;
|
||||
int numUploads;
|
||||
|
@ -145,9 +150,11 @@ struct GPUStatistics {
|
|||
int numCachedReplacedTextures;
|
||||
int numClutTextures;
|
||||
double msProcessingDisplayLists;
|
||||
double msRasterizingDepth;
|
||||
int vertexGPUCycles;
|
||||
int otherGPUCycles;
|
||||
|
||||
int numDepthRasterPrims;
|
||||
int numDepthRasterCulls;
|
||||
// Flip count. Doesn't really belong here.
|
||||
int numFlips;
|
||||
};
|
||||
|
|
|
@ -1769,13 +1769,14 @@ size_t GPUCommonHW::FormatGPUStatsCommon(char *buffer, size_t size) {
|
|||
"DL processing time: %0.2f ms, %d drawsync, %d listsync\n"
|
||||
"Draw: %d (%d dec, %d culled), flushes %d, clears %d, bbox jumps %d (%d updates)\n"
|
||||
"Vertices: %d dec: %d drawn: %d\n"
|
||||
"FBOs active: %d (evaluations: %d)\n"
|
||||
"FBOs active: %d (evaluations: %d, created %d)\n"
|
||||
"Textures: %d, dec: %d, invalidated: %d, hashed: %d kB, clut %d\n"
|
||||
"readbacks %d (%d non-block), upload %d (cached %d), depal %d\n"
|
||||
"block transfers: %d\n"
|
||||
"replacer: tracks %d references, %d unique textures\n"
|
||||
"Cpy: depth %d, color %d, reint %d, blend %d, self %d\n"
|
||||
"GPU cycles: %d (%0.1f per vertex)\n%s",
|
||||
"GPU cycles: %d (%0.1f per vertex)\n"
|
||||
"Depth raster: %0.2f ms, %d prim, %d prims culled\n%s",
|
||||
gpuStats.msProcessingDisplayLists * 1000.0f,
|
||||
gpuStats.numDrawSyncs,
|
||||
gpuStats.numListSyncs,
|
||||
|
@ -1791,6 +1792,7 @@ size_t GPUCommonHW::FormatGPUStatsCommon(char *buffer, size_t size) {
|
|||
gpuStats.numUncachedVertsDrawn,
|
||||
(int)framebufferManager_->NumVFBs(),
|
||||
gpuStats.numFramebufferEvaluations,
|
||||
gpuStats.numFBOsCreated,
|
||||
(int)textureCache_->NumLoadedTextures(),
|
||||
gpuStats.numTexturesDecoded,
|
||||
gpuStats.numTextureInvalidations,
|
||||
|
@ -1811,6 +1813,9 @@ size_t GPUCommonHW::FormatGPUStatsCommon(char *buffer, size_t size) {
|
|||
gpuStats.numCopiesForSelfTex,
|
||||
gpuStats.vertexGPUCycles + gpuStats.otherGPUCycles,
|
||||
vertexAverageCycles,
|
||||
gpuStats.msRasterizingDepth * 1000.0,
|
||||
gpuStats.numDepthRasterPrims,
|
||||
gpuStats.numDepthRasterCulls,
|
||||
debugRecording_ ? "(debug-recording)" : ""
|
||||
);
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue