From 18f2a45a6a1b218579843865bdbb5ef207901ea9 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 15 Jan 2022 22:19:44 -0800 Subject: [PATCH] softgpu: Allow binning across prim calls. --- GPU/Software/BinManager.cpp | 2 ++ GPU/Software/SoftGpu.cpp | 24 ++++++++++++++++ GPU/Software/SoftGpu.h | 1 + GPU/Software/TransformUnit.cpp | 50 ++++++++++++++++++---------------- GPU/Software/TransformUnit.h | 8 +++++- 5 files changed, 60 insertions(+), 25 deletions(-) diff --git a/GPU/Software/BinManager.cpp b/GPU/Software/BinManager.cpp index 19339cdd77..36981c6e8f 100644 --- a/GPU/Software/BinManager.cpp +++ b/GPU/Software/BinManager.cpp @@ -124,6 +124,8 @@ BinManager::BinManager() { queueRange_.y2 = 0; waitable_ = new BinWaitable(); + for (auto &s : taskStatus_) + s = false; } BinManager::~BinManager() { diff --git a/GPU/Software/SoftGpu.cpp b/GPU/Software/SoftGpu.cpp index 76a631054d..d0e3fd378e 100644 --- a/GPU/Software/SoftGpu.cpp +++ b/GPU/Software/SoftGpu.cpp @@ -554,10 +554,14 @@ void SoftGPU::ExecuteOp(u32 op, u32 diff) { break; case GE_CMD_FRAMEBUFPTR: + // We assume fb.data won't change while we're drawing. + drawEngine_->transformUnit.Flush(); fb.data = Memory::GetPointer(gstate.getFrameBufAddress()); break; case GE_CMD_FRAMEBUFWIDTH: + // We assume fb.data won't change while we're drawing. + drawEngine_->transformUnit.Flush(); fb.data = Memory::GetPointer(gstate.getFrameBufAddress()); break; @@ -572,6 +576,8 @@ void SoftGPU::ExecuteOp(u32 op, u32 diff) { case GE_CMD_TEXADDR5: case GE_CMD_TEXADDR6: case GE_CMD_TEXADDR7: + // TODO: Try not flushing here, unless overlap with framebuf/depthbuf? + drawEngine_->transformUnit.Flush(); break; case GE_CMD_TEXBUFWIDTH0: @@ -582,6 +588,8 @@ void SoftGPU::ExecuteOp(u32 op, u32 diff) { case GE_CMD_TEXBUFWIDTH5: case GE_CMD_TEXBUFWIDTH6: case GE_CMD_TEXBUFWIDTH7: + // TODO: Try not flushing here, unless overlap with framebuf/depthbuf? + drawEngine_->transformUnit.Flush(); break; case GE_CMD_CLUTADDR: @@ -590,6 +598,10 @@ void SoftGPU::ExecuteOp(u32 op, u32 diff) { case GE_CMD_LOADCLUT: { + // Might be copying drawing into the CLUT, so flush. + // TODO: It seems worth copying the CLUT to state... + drawEngine_->transformUnit.Flush(); + u32 clutAddr = gstate.getClutAddress(); u32 clutTotalBytes = gstate.getClutLoadBytes(); @@ -620,6 +632,9 @@ void SoftGPU::ExecuteOp(u32 op, u32 diff) { case GE_CMD_TRANSFERSTART: { + // Let's finish any drawing before we transfer. + drawEngine_->transformUnit.Flush(); + u32 srcBasePtr = gstate.getTransferSrcAddress(); u32 srcStride = gstate.getTransferSrcStride(); @@ -670,10 +685,14 @@ void SoftGPU::ExecuteOp(u32 op, u32 diff) { break; case GE_CMD_ZBUFPTR: + // We assume depthbuf.data won't change while we're drawing. + drawEngine_->transformUnit.Flush(); depthbuf.data = Memory::GetPointer(gstate.getDepthBufAddress()); break; case GE_CMD_ZBUFWIDTH: + // We assume depthbuf.data won't change while we're drawing. + drawEngine_->transformUnit.Flush(); depthbuf.data = Memory::GetPointer(gstate.getDepthBufAddress()); break; @@ -867,6 +886,11 @@ void SoftGPU::ExecuteOp(u32 op, u32 diff) { } } +void SoftGPU::FinishDeferred() { + // Need to flush before going back to CPU, so drawing is appropriately visible. + drawEngine_->transformUnit.Flush(); +} + void SoftGPU::GetStats(char *buffer, size_t bufsize) { snprintf(buffer, bufsize, "SoftGPU: (N/A)"); } diff --git a/GPU/Software/SoftGpu.h b/GPU/Software/SoftGpu.h index d1d22d73e1..2c7d89849f 100644 --- a/GPU/Software/SoftGpu.h +++ b/GPU/Software/SoftGpu.h @@ -65,6 +65,7 @@ public: void CheckGPUFeatures() override {} void InitClear() override {} void ExecuteOp(u32 op, u32 diff) override; + void FinishDeferred() override; void SetDisplayFramebuffer(u32 framebuf, u32 stride, GEBufferFormat format) override; void CopyDisplayToOutput(bool reallyDirty) override; diff --git a/GPU/Software/TransformUnit.cpp b/GPU/Software/TransformUnit.cpp index 8ab3c1f789..47d8393c40 100644 --- a/GPU/Software/TransformUnit.cpp +++ b/GPU/Software/TransformUnit.cpp @@ -41,10 +41,12 @@ TransformUnit::TransformUnit() { decoded_ = (u8 *)AllocateMemoryPages(TRANSFORM_BUF_SIZE, MEM_PROT_READ | MEM_PROT_WRITE); + binner_ = new BinManager(); } TransformUnit::~TransformUnit() { FreeMemoryPages(decoded_, DECODED_VERTEX_BUFFER_SIZE); + delete binner_; } SoftwareDrawEngine::SoftwareDrawEngine() { @@ -333,8 +335,7 @@ void TransformUnit::SubmitPrimitive(void* vertices, void* indices, GEPrimitiveTy // TODO: Do this in two passes - first process the vertices (before indexing/stripping), // then resolve the indices. This lets us avoid transforming shared vertices twice. - static BinManager binner; - binner.UpdateState(); + binner_->UpdateState(); bool outside_range_flag = false; switch (prim_type) { @@ -367,22 +368,22 @@ void TransformUnit::SubmitPrimitive(void* vertices, void* indices, GEPrimitiveTy case GE_PRIM_TRIANGLES: { if (!gstate.isCullEnabled() || gstate.isModeClear()) { - Clipper::ProcessTriangle(data[0], data[1], data[2], data[2], binner); - Clipper::ProcessTriangle(data[2], data[1], data[0], data[2], binner); + Clipper::ProcessTriangle(data[0], data[1], data[2], data[2], *binner_); + Clipper::ProcessTriangle(data[2], data[1], data[0], data[2], *binner_); } else if (!gstate.getCullMode()) { - Clipper::ProcessTriangle(data[2], data[1], data[0], data[2], binner); + Clipper::ProcessTriangle(data[2], data[1], data[0], data[2], *binner_); } else { - Clipper::ProcessTriangle(data[0], data[1], data[2], data[2], binner); + Clipper::ProcessTriangle(data[0], data[1], data[2], data[2], *binner_); } break; } case GE_PRIM_LINES: - Clipper::ProcessLine(data[0], data[1], binner); + Clipper::ProcessLine(data[0], data[1], *binner_); break; case GE_PRIM_POINTS: - Clipper::ProcessPoint(data[0], binner); + Clipper::ProcessPoint(data[0], *binner_); break; default: @@ -422,14 +423,14 @@ void TransformUnit::SubmitPrimitive(void* vertices, void* indices, GEPrimitiveTy } if (data_index == 4) { - Clipper::ProcessRect(data[0], data[1], binner); - Clipper::ProcessRect(data[2], data[3], binner); + Clipper::ProcessRect(data[0], data[1], *binner_); + Clipper::ProcessRect(data[2], data[3], *binner_); data_index = 0; } } if (data_index >= 2) { - Clipper::ProcessRect(data[0], data[1], binner); + Clipper::ProcessRect(data[0], data[1], *binner_); data_index -= 2; } break; @@ -458,7 +459,7 @@ void TransformUnit::SubmitPrimitive(void* vertices, void* indices, GEPrimitiveTy --skip_count; } else { // We already incremented data_index, so data_index & 1 is previous one. - Clipper::ProcessLine(data[data_index & 1], data[(data_index & 1) ^ 1], binner); + Clipper::ProcessLine(data[data_index & 1], data[(data_index & 1) ^ 1], *binner_); } } break; @@ -484,7 +485,7 @@ void TransformUnit::SubmitPrimitive(void* vertices, void* indices, GEPrimitiveTy // If a strip is effectively a rectangle, draw it as such! if (!outside_range_flag && Rasterizer::DetectRectangleFromThroughModeStrip(data)) { - Clipper::ProcessRect(data[0], data[3], binner); + Clipper::ProcessRect(data[0], data[3], *binner_); break; } } @@ -512,14 +513,14 @@ void TransformUnit::SubmitPrimitive(void* vertices, void* indices, GEPrimitiveTy } if (!gstate.isCullEnabled() || gstate.isModeClear()) { - Clipper::ProcessTriangle(data[0], data[1], data[2], data[provoking_index], binner); - Clipper::ProcessTriangle(data[2], data[1], data[0], data[provoking_index], binner); + Clipper::ProcessTriangle(data[0], data[1], data[2], data[provoking_index], *binner_); + Clipper::ProcessTriangle(data[2], data[1], data[0], data[provoking_index], *binner_); } else if ((!gstate.getCullMode()) ^ ((data_index - 1) % 2)) { // We need to reverse the vertex order for each second primitive, // but we additionally need to do that for every primitive if CCW cullmode is used. - Clipper::ProcessTriangle(data[2], data[1], data[0], data[provoking_index], binner); + Clipper::ProcessTriangle(data[2], data[1], data[0], data[provoking_index], *binner_); } else { - Clipper::ProcessTriangle(data[0], data[1], data[2], data[provoking_index], binner); + Clipper::ProcessTriangle(data[0], data[1], data[2], data[provoking_index], *binner_); } } break; @@ -560,7 +561,7 @@ void TransformUnit::SubmitPrimitive(void* vertices, void* indices, GEPrimitiveTy int tl = -1, br = -1; if (!outside_range_flag && Rasterizer::DetectRectangleFromThroughModeFan(data, vertex_count, &tl, &br)) { - Clipper::ProcessRect(data[tl], data[br], binner); + Clipper::ProcessRect(data[tl], data[br], *binner_); break; } } @@ -588,14 +589,14 @@ void TransformUnit::SubmitPrimitive(void* vertices, void* indices, GEPrimitiveTy } if (!gstate.isCullEnabled() || gstate.isModeClear()) { - Clipper::ProcessTriangle(data[0], data[1], data[2], data[provoking_index], binner); - Clipper::ProcessTriangle(data[2], data[1], data[0], data[provoking_index], binner); + Clipper::ProcessTriangle(data[0], data[1], data[2], data[provoking_index], *binner_); + Clipper::ProcessTriangle(data[2], data[1], data[0], data[provoking_index], *binner_); } else if ((!gstate.getCullMode()) ^ ((data_index - 1) % 2)) { // We need to reverse the vertex order for each second primitive, // but we additionally need to do that for every primitive if CCW cullmode is used. - Clipper::ProcessTriangle(data[2], data[1], data[0], data[provoking_index], binner); + Clipper::ProcessTriangle(data[2], data[1], data[0], data[provoking_index], *binner_); } else { - Clipper::ProcessTriangle(data[0], data[1], data[2], data[provoking_index], binner); + Clipper::ProcessTriangle(data[0], data[1], data[2], data[provoking_index], *binner_); } } break; @@ -605,9 +606,10 @@ void TransformUnit::SubmitPrimitive(void* vertices, void* indices, GEPrimitiveTy ERROR_LOG(G3D, "Unexpected prim type: %d", prim_type); break; } +} - binner.Flush(); - +void TransformUnit::Flush() { + binner_->Flush(); GPUDebug::NotifyDraw(); } diff --git a/GPU/Software/TransformUnit.h b/GPU/Software/TransformUnit.h index 021dc538df..adb75c558e 100644 --- a/GPU/Software/TransformUnit.h +++ b/GPU/Software/TransformUnit.h @@ -32,6 +32,7 @@ typedef Vec3 ViewCoords; typedef Vec4 ClipCoords; // Range: -w <= x/y/z <= w struct SplinePatch; +class BinManager; struct ScreenCoords { @@ -117,9 +118,14 @@ public: void SubmitPrimitive(void* vertices, void* indices, GEPrimitiveType prim_type, int vertex_count, u32 vertex_type, int *bytesRead, SoftwareDrawEngine *drawEngine); bool GetCurrentSimpleVertices(int count, std::vector &vertices, std::vector &indices); + + void Flush(); + +private: VertexData ReadVertex(VertexReader &vreader, bool &outside_range_flag); - u8 *decoded_; + u8 *decoded_ = nullptr; + BinManager *binner_ = nullptr; }; class SoftwareDrawEngine : public DrawEngineCommon {