Just minor cleanup and prep in depth raster

This commit is contained in:
Henrik Rydgård 2024-12-28 23:16:59 +01:00
parent 25d79afd47
commit 4ef3ac474a
3 changed files with 20 additions and 33 deletions

View file

@ -103,25 +103,18 @@ enum class TriangleResult {
TooSmall,
};
constexpr int MIN_TRI_AREA = 10;
constexpr int MIN_TWICE_TRI_AREA = 10;
// Adapted from Intel's depth rasterizer example.
// Started with the scalar version, will SIMD-ify later.
// x1/y1 etc are the scissor rect.
template<ZCompareMode compareMode>
TriangleResult DepthRasterTriangle(uint16_t *depthBuf, int stride, DepthScissor scissor, const int *tx, const int *ty, const float *tz) {
const int tileStartX = scissor.x1;
const int tileEndX = scissor.x2;
const int tileStartY = scissor.y1;
const int tileEndY = scissor.y2;
// BEGIN triangle setup. This should be done SIMD, four triangles at a time.
// Due to the many multiplications, we might want to do it in floating point as 32-bit integer muls
// are slow on SSE2.
// NOTE: Triangles are stored in groups of 4.
int v0x = tx[0];
int v0y = ty[0];
int v1x = tx[4];
@ -131,10 +124,10 @@ TriangleResult DepthRasterTriangle(uint16_t *depthBuf, int stride, DepthScissor
// use fixed-point only for X and Y. Avoid work for Z and W.
// We use 4x1 tiles for simplicity.
int minX = std::max(std::min(std::min(v0x, v1x), v2x), tileStartX) & ~3;
int maxX = std::min(std::max(std::max(v0x, v1x), v2x) + 3, tileEndX) & ~3;
int minY = std::max(std::min(std::min(v0y, v1y), v2y), tileStartY);
int maxY = std::min(std::max(std::max(v0y, v1y), v2y), tileEndY);
int minX = std::max(std::min(std::min(v0x, v1x), v2x), (int)scissor.x1) & ~3;
int maxX = std::min(std::max(std::max(v0x, v1x), v2x) + 3, (int)scissor.x2) & ~3;
int minY = std::max(std::min(std::min(v0y, v1y), v2y), (int)scissor.y1);
int maxY = std::min(std::max(std::max(v0y, v1y), v2y), (int)scissor.y2);
if (maxX == minX || maxY == minY) {
// No pixels, or outside screen.
return TriangleResult::NoPixels;
@ -145,7 +138,7 @@ TriangleResult DepthRasterTriangle(uint16_t *depthBuf, int stride, DepthScissor
if (triArea < 0) {
return TriangleResult::Backface;
}
if (triArea < MIN_TRI_AREA) {
if (triArea < MIN_TWICE_TRI_AREA) {
return TriangleResult::TooSmall; // Or zero area.
}

View file

@ -35,7 +35,7 @@ struct DepthDraw {
int cullMode;
DepthScissor scissor;
bool through;
int transformedStartIndex;
int vertexOffset;
int indexOffset;
int vertexCount;
};

View file

@ -803,7 +803,7 @@ void DrawEngineCommon::DecodeVerts(VertexDecoder *dec, u8 *dest) {
int i = decodeVertsCounter_;
int stride = (int)dec->GetDecVtxFmt().stride;
for (; i < numDrawVerts_; i++) {
DeferredVerts &dv = drawVerts_[i];
const DeferredVerts &dv = drawVerts_[i];
int indexLowerBound = dv.indexLowerBound;
drawVertexOffsets_[i] = numDecodedVerts_ - indexLowerBound;
@ -978,7 +978,7 @@ static bool CalculateDepthDraw(DepthDraw *draw, GEPrimitiveType prim, int vertex
_dbg_assert_(gstate.isDepthWriteEnabled());
}
draw->transformedStartIndex = 0;
draw->vertexOffset = 0;
draw->indexOffset = 0;
draw->vertexCount = vertexCount;
draw->cullEnabled = gstate.isCullEnabled();
@ -1013,26 +1013,20 @@ void DrawEngineCommon::DepthRasterTransform(GEPrimitiveType prim, VertexDecoder
TimeCollector collectStat(&gpuStats.msRasterizingDepth, coreCollectDebugStats);
// Decode.
int numDec = 0;
int numDecoded = 0;
for (int i = 0; i < numDrawVerts_; i++) {
DeferredVerts &dv = drawVerts_[i];
int indexLowerBound = dv.indexLowerBound;
drawVertexOffsets_[i] = numDec - indexLowerBound;
int indexUpperBound = dv.indexUpperBound;
if (indexUpperBound + 1 - indexLowerBound + numDec >= VERTEX_BUFFER_MAX) {
const DeferredVerts &dv = drawVerts_[i];
if (dv.indexUpperBound + 1 - dv.indexLowerBound + numDecoded >= VERTEX_BUFFER_MAX) {
// Hit our limit! Stop decoding in this draw.
break;
}
// Decode the verts (and at the same time apply morphing/skinning). Simple.
DecodeAndTransformForDepthRaster(depthTransformed_ + numDec * 4, worldviewproj, dv.verts, indexLowerBound, indexUpperBound, dec, vertTypeID);
numDec += indexUpperBound - indexLowerBound + 1;
DecodeAndTransformForDepthRaster(depthTransformed_ + (draw.vertexOffset + numDecoded) * 4, worldviewproj, dv.verts, dv.indexLowerBound, dv.indexUpperBound, dec, vertTypeID);
numDecoded += dv.indexUpperBound - dv.indexLowerBound + 1;
}
// Copy indices.
memcpy(depthIndices_, decIndex_, sizeof(uint16_t) * vertexCount);
memcpy(depthIndices_ + draw.indexOffset, decIndex_, sizeof(uint16_t) * vertexCount);
// FUTURE SPLIT --- The above will always run on the main thread. The below can be split across workers.
ExecuteDepthDraw(draw);
@ -1053,18 +1047,18 @@ void DrawEngineCommon::DepthRasterPredecoded(GEPrimitiveType prim, const void *i
_dbg_assert_(prim != GE_PRIM_TRIANGLE_STRIP && prim != GE_PRIM_TRIANGLE_FAN);
if (dec->throughmode) {
ConvertPredecodedThroughForDepthRaster(depthTransformed_, decoded_, dec, numDecoded);
ConvertPredecodedThroughForDepthRaster(depthTransformed_ + 4 * draw.vertexOffset, decoded_, dec, numDecoded);
} else {
if (dec->VertexType() & (GE_VTYPE_WEIGHT_MASK | GE_VTYPE_MORPHCOUNT_MASK)) {
return;
}
float worldviewproj[16];
ComputeFinalProjMatrix().Store(worldviewproj);
TransformPredecodedForDepthRaster(depthTransformed_, worldviewproj, decoded_, dec, numDecoded);
TransformPredecodedForDepthRaster(depthTransformed_ + 4 * draw.vertexOffset, worldviewproj, decoded_, dec, numDecoded);
}
// Copy indices.
memcpy(depthIndices_, decIndex_, sizeof(uint16_t) * vertexCount);
memcpy(depthIndices_ + draw.indexOffset, decIndex_, sizeof(uint16_t) * vertexCount);
// FUTURE SPLIT --- The above will always run on the main thread. The below can be split across workers.
ExecuteDepthDraw(draw);
@ -1078,10 +1072,10 @@ void DrawEngineCommon::ExecuteDepthDraw(const DepthDraw &draw) {
int outVertCount = 0;
switch (draw.prim) {
case GE_PRIM_RECTANGLES:
outVertCount = DepthRasterClipIndexedRectangles(tx, ty, tz, depthTransformed_, depthIndices_ + draw.indexOffset, draw);
outVertCount = DepthRasterClipIndexedRectangles(tx, ty, tz, depthTransformed_ + 4 * draw.vertexOffset, depthIndices_ + draw.indexOffset, draw);
break;
case GE_PRIM_TRIANGLES:
outVertCount = DepthRasterClipIndexedTriangles(tx, ty, tz, depthTransformed_, depthIndices_ + draw.indexOffset, draw);
outVertCount = DepthRasterClipIndexedTriangles(tx, ty, tz, depthTransformed_ + 4 * draw.vertexOffset, depthIndices_ + draw.indexOffset, draw);
break;
default:
_dbg_assert_(false);