diff --git a/GPU/Common/DrawEngineCommon.cpp b/GPU/Common/DrawEngineCommon.cpp index 4ea75b7d16..1f6a44eda2 100644 --- a/GPU/Common/DrawEngineCommon.cpp +++ b/GPU/Common/DrawEngineCommon.cpp @@ -668,3 +668,69 @@ ReliableHashType DrawEngineCommon::ComputeHash() { fullhash += DoReliableHash(&uvScale[0], sizeof(uvScale[0]) * numDrawCalls, 0x0123e658); return fullhash; } + +void DrawEngineCommon::SubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertType, int *bytesRead) { + if (!indexGen.PrimCompatible(prevPrim_, prim) || numDrawCalls >= MAX_DEFERRED_DRAW_CALLS || vertexCountInDrawCalls_ + vertexCount > VERTEX_BUFFER_MAX) { + DispatchFlush(); + } + + // TODO: Is this the right thing to do? + if (prim == GE_PRIM_KEEP_PREVIOUS) { + prim = prevPrim_ != GE_PRIM_INVALID ? prevPrim_ : GE_PRIM_POINTS; + } else { + prevPrim_ = prim; + } + + SetupVertexDecoder(vertType); + + *bytesRead = vertexCount * dec_->VertexSize(); + if ((vertexCount < 2 && prim > 0) || (vertexCount < 3 && prim > 2 && prim != GE_PRIM_RECTANGLES)) + return; + + DeferredDrawCall &dc = drawCalls[numDrawCalls]; + dc.verts = verts; + dc.inds = inds; + dc.vertType = vertType; + dc.indexType = (vertType & GE_VTYPE_IDX_MASK) >> GE_VTYPE_IDX_SHIFT; + dc.prim = prim; + dc.vertexCount = vertexCount; + + if (g_Config.bVertexCache) { + u32 dhash = dcid_; + dhash ^= (u32)(uintptr_t)verts; + dhash = __rotl(dhash, 13); + dhash ^= (u32)(uintptr_t)inds; + dhash = __rotl(dhash, 13); + dhash ^= (u32)vertType; + dhash = __rotl(dhash, 13); + dhash ^= (u32)vertexCount; + dhash = __rotl(dhash, 13); + dhash ^= (u32)prim; + dcid_ = dhash; + } + + if (inds) { + GetIndexBounds(inds, vertexCount, vertType, &dc.indexLowerBound, &dc.indexUpperBound); + } else { + dc.indexLowerBound = 0; + dc.indexUpperBound = vertexCount - 1; + } + + uvScale[numDrawCalls] = gstate_c.uv; + + numDrawCalls++; + vertexCountInDrawCalls_ += vertexCount; + + if (g_Config.bSoftwareSkinning && (vertType & GE_VTYPE_WEIGHT_MASK)) { + DecodeVertsStep(decoded, decodeCounter_, decodedVerts_); + decodeCounter_++; + } + + if (prim == GE_PRIM_RECTANGLES && (gstate.getTextureAddress(0) & 0x3FFFFFFF) == (gstate.getFrameBufAddress() & 0x3FFFFFFF)) { + // Rendertarget == texture? + if (!g_Config.bDisableSlowFramebufEffects) { + gstate_c.Dirty(DIRTY_TEXTURE_PARAMS); + DispatchFlush(); + } + } +} diff --git a/GPU/Common/DrawEngineCommon.h b/GPU/Common/DrawEngineCommon.h index 24599a094b..b99bc12cea 100644 --- a/GPU/Common/DrawEngineCommon.h +++ b/GPU/Common/DrawEngineCommon.h @@ -56,10 +56,16 @@ public: // Flush is normally non-virtual but here's a virtual way to call it, used by the shared spline code, which is expensive anyway. // Not really sure if these wrappers are worth it... virtual void DispatchFlush() = 0; - // Same for SubmitPrim - virtual void DispatchSubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertType, int *bytesRead) = 0; + + // This would seem to be unnecessary now, but is still required for splines/beziers to work in the software backend since SubmitPrim + // is different. Should probably refactor that. + virtual void DispatchSubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertType, int *bytesRead) { + SubmitPrim(verts, inds, prim, vertexCount, vertType, bytesRead); + } bool TestBoundingBox(void* control_points, int vertexCount, u32 vertType, int *bytesRead); + + void SubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertType, int *bytesRead); void SubmitSpline(const void *control_points, const void *indices, int tess_u, int tess_v, int count_u, int count_v, int type_u, int type_v, GEPatchPrimType prim_type, bool computeNormals, bool patchFacing, u32 vertType, int *bytesRead); void SubmitBezier(const void *control_points, const void *indices, int tess_u, int tess_v, int count_u, int count_v, GEPatchPrimType prim_type, bool computeNormals, bool patchFacing, u32 vertType, int *bytesRead); @@ -73,6 +79,9 @@ public: bool IsCodePtrVertexDecoder(const u8 *ptr) const { return decJitCache_->IsInSpace(ptr); } + int GetNumDrawCalls() const { + return numDrawCalls; + } protected: virtual void ClearTrackedVertexArrays() {} diff --git a/GPU/D3D11/DrawEngineD3D11.cpp b/GPU/D3D11/DrawEngineD3D11.cpp index 9924d9c888..2b49ae2d6e 100644 --- a/GPU/D3D11/DrawEngineD3D11.cpp +++ b/GPU/D3D11/DrawEngineD3D11.cpp @@ -257,72 +257,6 @@ ID3D11InputLayout *DrawEngineD3D11::SetupDecFmtForDraw(D3D11VertexShader *vshade } } -void DrawEngineD3D11::SubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertType, int *bytesRead) { - if (!indexGen.PrimCompatible(prevPrim_, prim) || numDrawCalls >= MAX_DEFERRED_DRAW_CALLS || vertexCountInDrawCalls_ + vertexCount > VERTEX_BUFFER_MAX) - Flush(); - - // TODO: Is this the right thing to do? - if (prim == GE_PRIM_KEEP_PREVIOUS) { - prim = prevPrim_ != GE_PRIM_INVALID ? prevPrim_ : GE_PRIM_POINTS; - } else { - prevPrim_ = prim; - } - - SetupVertexDecoder(vertType); - - *bytesRead = vertexCount * dec_->VertexSize(); - - if ((vertexCount < 2 && prim > 0) || (vertexCount < 3 && prim > 2 && prim != GE_PRIM_RECTANGLES)) - return; - - DeferredDrawCall &dc = drawCalls[numDrawCalls]; - dc.verts = verts; - dc.inds = inds; - dc.vertType = vertType; - dc.indexType = (vertType & GE_VTYPE_IDX_MASK) >> GE_VTYPE_IDX_SHIFT; - dc.prim = prim; - dc.vertexCount = vertexCount; - - if (g_Config.bVertexCache) { - u32 dhash = dcid_; - dhash ^= (u32)(uintptr_t)verts; - dhash = __rotl(dhash, 13); - dhash ^= (u32)(uintptr_t)inds; - dhash = __rotl(dhash, 13); - dhash ^= (u32)vertType; - dhash = __rotl(dhash, 13); - dhash ^= (u32)vertexCount; - dhash = __rotl(dhash, 13); - dhash ^= (u32)prim; - dcid_ = dhash; - } - - if (inds) { - GetIndexBounds(inds, vertexCount, vertType, &dc.indexLowerBound, &dc.indexUpperBound); - } else { - dc.indexLowerBound = 0; - dc.indexUpperBound = vertexCount - 1; - } - - uvScale[numDrawCalls] = gstate_c.uv; - - numDrawCalls++; - vertexCountInDrawCalls_ += vertexCount; - - if (g_Config.bSoftwareSkinning && (vertType & GE_VTYPE_WEIGHT_MASK)) { - DecodeVertsStep(decoded, decodeCounter_, decodedVerts_); - decodeCounter_++; - } - - if (prim == GE_PRIM_RECTANGLES && (gstate.getTextureAddress(0) & 0x3FFFFFFF) == (gstate.getFrameBufAddress() & 0x3FFFFFFF)) { - // Rendertarget == texture? - if (!g_Config.bDisableSlowFramebufEffects) { - gstate_c.Dirty(DIRTY_TEXTURE_PARAMS); - Flush(); - } - } -} - void DrawEngineD3D11::MarkUnreliable(VertexArrayInfoD3D11 *vai) { vai->status = VertexArrayInfoD3D11::VAI_UNRELIABLE; if (vai->vbo) { diff --git a/GPU/D3D11/DrawEngineD3D11.h b/GPU/D3D11/DrawEngineD3D11.h index 6928bca67b..9d4589baae 100644 --- a/GPU/D3D11/DrawEngineD3D11.h +++ b/GPU/D3D11/DrawEngineD3D11.h @@ -105,8 +105,6 @@ public: DrawEngineD3D11(Draw::DrawContext *draw, ID3D11Device *device, ID3D11DeviceContext *context); virtual ~DrawEngineD3D11(); - void SubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertType, int *bytesRead); - void SetShaderManager(ShaderManagerD3D11 *shaderManager) { shaderManager_ = shaderManager; } @@ -135,9 +133,6 @@ public: } void DispatchFlush() override { Flush(); } - void DispatchSubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertType, int *bytesRead) override { - SubmitPrim(verts, inds, prim, vertexCount, vertType, bytesRead); - } void ClearTrackedVertexArrays() override; diff --git a/GPU/D3D11/GPU_D3D11.cpp b/GPU/D3D11/GPU_D3D11.cpp index 24dcfdd734..ec866cc815 100644 --- a/GPU/D3D11/GPU_D3D11.cpp +++ b/GPU/D3D11/GPU_D3D11.cpp @@ -66,31 +66,6 @@ #include "Core/HLE/sceKernelInterrupt.h" #include "Core/HLE/sceGe.h" -struct D3D11CommandTableEntry { - uint8_t cmd; - uint8_t flags; - uint64_t dirty; - GPU_D3D11::CmdFunc func; -}; - -// This table gets crunched into a faster form by init. -static const D3D11CommandTableEntry commandTable[] = { - // Changes that dirty the current texture. - { GE_CMD_TEXSIZE0, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTE, 0, &GPUCommon::Execute_TexSize0 }, - - // Changing the vertex type requires us to flush. - { GE_CMD_VERTEXTYPE, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, 0, &GPUCommon::Execute_VertexType }, - - { GE_CMD_PRIM, FLAG_EXECUTE, 0, &GPU_D3D11::Execute_Prim }, - { GE_CMD_BEZIER, FLAG_FLUSHBEFORE | FLAG_EXECUTE, 0, &GPUCommon::Execute_Bezier }, - { GE_CMD_SPLINE, FLAG_FLUSHBEFORE | FLAG_EXECUTE, 0, &GPUCommon::Execute_Spline }, - - // Changes that trigger data copies. Only flushing on change for LOADCLUT must be a bit of a hack... - { GE_CMD_LOADCLUT, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTE, 0, &GPU_D3D11::Execute_LoadClut }, -}; - -GPU_D3D11::CommandInfo GPU_D3D11::cmdInfo_[256]{}; - GPU_D3D11::GPU_D3D11(GraphicsContext *gfxCtx, Draw::DrawContext *draw) : GPUCommon(gfxCtx, draw), drawEngine_(draw, (ID3D11Device *)draw->GetNativeObject(Draw::NativeObject::DEVICE), @@ -126,45 +101,6 @@ GPU_D3D11::GPU_D3D11(GraphicsContext *gfxCtx, Draw::DrawContext *draw) ERROR_LOG(G3D, "gstate has drifted out of sync!"); } - memset(cmdInfo_, 0, sizeof(cmdInfo_)); - - // Import both the global and local command tables, and check for dupes - std::set dupeCheck; - for (size_t i = 0; i < commonCommandTableSize; i++) { - const u8 cmd = commonCommandTable[i].cmd; - if (dupeCheck.find(cmd) != dupeCheck.end()) { - ERROR_LOG(G3D, "Command table Dupe: %02x (%i)", (int)cmd, (int)cmd); - } else { - dupeCheck.insert(cmd); - } - cmdInfo_[cmd].flags |= (uint64_t)commonCommandTable[i].flags | (commonCommandTable[i].dirty << 8); - cmdInfo_[cmd].func = commonCommandTable[i].func; - if ((cmdInfo_[cmd].flags & (FLAG_EXECUTE | FLAG_EXECUTEONCHANGE)) && !cmdInfo_[cmd].func) { - Crash(); - } - } - - for (size_t i = 0; i < ARRAY_SIZE(commandTable); i++) { - const u8 cmd = commandTable[i].cmd; - if (dupeCheck.find(cmd) != dupeCheck.end()) { - ERROR_LOG(G3D, "Command table Dupe: %02x (%i)", (int)cmd, (int)cmd); - } else { - dupeCheck.insert(cmd); - } - cmdInfo_[cmd].flags |= (uint64_t)commandTable[i].flags | (commandTable[i].dirty << 8); - cmdInfo_[cmd].func = commandTable[i].func; - if ((cmdInfo_[cmd].flags & (FLAG_EXECUTE | FLAG_EXECUTEONCHANGE)) && !cmdInfo_[cmd].func) { - Crash(); - } - } - - // Find commands missing from the table. - for (int i = 0; i < 0xEF; i++) { - if (dupeCheck.find((u8)i) == dupeCheck.end()) { - ERROR_LOG(G3D, "Command missing from table: %02x (%i)", i, i); - } - } - // No need to flush before the tex scale/offset commands if we are baking // the tex scale/offset into the vertices anyway. UpdateCmdInfo(); @@ -187,18 +123,6 @@ GPU_D3D11::~GPU_D3D11() { stockD3D11.Destroy(); } -void GPU_D3D11::UpdateCmdInfo() { - if (g_Config.bSoftwareSkinning) { - cmdInfo_[GE_CMD_VERTEXTYPE].flags &= ~FLAG_FLUSHBEFOREONCHANGE; - cmdInfo_[GE_CMD_VERTEXTYPE].func = &GPUCommon::Execute_VertexTypeSkinning; - } else { - cmdInfo_[GE_CMD_VERTEXTYPE].flags |= FLAG_FLUSHBEFOREONCHANGE; - cmdInfo_[GE_CMD_VERTEXTYPE].func = &GPUCommon::Execute_VertexType; - } - - CheckGPUFeatures(); -} - void GPU_D3D11::CheckGPUFeatures() { u32 features = 0; @@ -330,26 +254,6 @@ void GPU_D3D11::SetDisplayFramebuffer(u32 framebuf, u32 stride, GEBufferFormat f framebufferManagerD3D11_->SetDisplayFramebuffer(framebuf, stride, format); } -bool GPU_D3D11::FramebufferDirty() { - VirtualFramebuffer *vfb = framebufferManager_->GetDisplayVFB(); - if (vfb) { - bool dirty = vfb->dirtyAfterDisplay; - vfb->dirtyAfterDisplay = false; - return dirty; - } - return true; -} - -bool GPU_D3D11::FramebufferReallyDirty() { - VirtualFramebuffer *vfb = framebufferManager_->GetDisplayVFB(); - if (vfb) { - bool dirty = vfb->reallyDirtyAfterDisplay; - vfb->reallyDirtyAfterDisplay = false; - return dirty; - } - return true; -} - void GPU_D3D11::CopyDisplayToOutput() { float blendColor[4]{}; context_->OMSetBlendState(stockD3D11.blendStateDisabledWithColorMask[0xF], blendColor, 0xFFFFFFFF); @@ -365,44 +269,6 @@ void GPU_D3D11::CopyDisplayToOutput() { gstate_c.Dirty(DIRTY_TEXTURE_IMAGE); } -// Maybe should write this in ASM... -void GPU_D3D11::FastRunLoop(DisplayList &list) { - PROFILE_THIS_SCOPE("gpuloop"); - const CommandInfo *cmdInfo = cmdInfo_; - int dc = downcount; - for (; dc > 0; --dc) { - // We know that display list PCs have the upper nibble == 0 - no need to mask the pointer - const u32 op = *(const u32 *)(Memory::base + list.pc); - const u32 cmd = op >> 24; - const CommandInfo &info = cmdInfo[cmd]; - const u32 diff = op ^ gstate.cmdmem[cmd]; - if (diff == 0) { - if (info.flags & FLAG_EXECUTE) { - downcount = dc; - (this->*info.func)(op, diff); - dc = downcount; - } - } else { - uint64_t flags = info.flags; - if (flags & FLAG_FLUSHBEFOREONCHANGE) { - drawEngine_.Flush(); - } - gstate.cmdmem[cmd] = op; - if (flags & (FLAG_EXECUTE | FLAG_EXECUTEONCHANGE)) { - downcount = dc; - (this->*info.func)(op, diff); - dc = downcount; - } else { - uint64_t dirty = flags >> 8; - if (dirty) - gstate_c.Dirty(dirty); - } - } - list.pc += 4; - } - downcount = 0; -} - void GPU_D3D11::FinishDeferred() { // This finishes reading any vertex data that is pending. drawEngine_.FinishDeferred(); @@ -435,85 +301,6 @@ void GPU_D3D11::ExecuteOp(u32 op, u32 diff) { } } -void GPU_D3D11::Execute_Prim(u32 op, u32 diff) { - // This drives all drawing. All other state we just buffer up, then we apply it only - // when it's time to draw. As most PSP games set state redundantly ALL THE TIME, this is a huge optimization. - - u32 data = op & 0xFFFFFF; - u32 count = data & 0xFFFF; - if (count == 0) - return; - - // Upper bits are ignored. - GEPrimitiveType prim = static_cast((data >> 16) & 7); - SetDrawType(DRAW_PRIM, prim); - - // Discard AA lines as we can't do anything that makes sense with these anyway. The SW plugin might, though. - - if (gstate.isAntiAliasEnabled()) { - // Discard AA lines in DOA - if (prim == GE_PRIM_LINE_STRIP) - return; - // Discard AA lines in Summon Night 5 - if ((prim == GE_PRIM_LINES) && gstate.isSkinningEnabled()) - return; - } - - // This also make skipping drawing very effective. - framebufferManagerD3D11_->SetRenderFrameBuffer(gstate_c.IsDirty(DIRTY_FRAMEBUF), gstate_c.skipDrawReason); - if (gstate_c.skipDrawReason & (SKIPDRAW_SKIPFRAME | SKIPDRAW_NON_DISPLAYED_FB)) { - drawEngine_.SetupVertexDecoder(gstate.vertType); - // Rough estimate, not sure what's correct. - cyclesExecuted += EstimatePerVertexCost() * count; - return; - } - - u32 vertexAddr = gstate_c.vertexAddr; - if (!Memory::IsValidAddress(vertexAddr)) { - ERROR_LOG_REPORT(G3D, "Bad vertex address %08x!", vertexAddr); - return; - } - - void *verts = Memory::GetPointerUnchecked(vertexAddr); - void *inds = 0; - u32 vertexType = gstate.vertType; - if ((vertexType & GE_VTYPE_IDX_MASK) != GE_VTYPE_IDX_NONE) { - u32 indexAddr = gstate_c.indexAddr; - if (!Memory::IsValidAddress(indexAddr)) { - ERROR_LOG_REPORT(G3D, "Bad index address %08x!", indexAddr); - return; - } - inds = Memory::GetPointerUnchecked(indexAddr); - } - -#ifndef MOBILE_DEVICE - if (prim > GE_PRIM_RECTANGLES) { - ERROR_LOG_REPORT_ONCE(reportPrim, G3D, "Unexpected prim type: %d", prim); - } -#endif - - if (gstate_c.dirty & DIRTY_VERTEXSHADER_STATE) { - vertexCost_ = EstimatePerVertexCost(); - } - gpuStats.vertexGPUCycles += vertexCost_ * count; - cyclesExecuted += vertexCost_* count; - - int bytesRead = 0; - UpdateUVScaleOffset(); - drawEngine_.SubmitPrim(verts, inds, prim, count, vertexType, &bytesRead); - - // After drawing, we advance the vertexAddr (when non indexed) or indexAddr (when indexed). - // Some games rely on this, they don't bother reloading VADDR and IADDR. - // The VADDR/IADDR registers are NOT updated. - AdvanceVerts(vertexType, count, bytesRead); -} - -void GPU_D3D11::Execute_LoadClut(u32 op, u32 diff) { - gstate_c.Dirty(DIRTY_TEXTURE_PARAMS); - textureCacheD3D11_->LoadClut(gstate.getClutAddress(), gstate.getClutLoadBytes()); - // This could be used to "dirty" textures with clut. -} - void GPU_D3D11::GetStats(char *buffer, size_t bufsize) { float vertexAverageCycles = gpuStats.numVertsSubmitted > 0 ? (float)gpuStats.vertexGPUCycles / (float)gpuStats.numVertsSubmitted : 0.0f; snprintf(buffer, bufsize - 1, diff --git a/GPU/D3D11/GPU_D3D11.h b/GPU/D3D11/GPU_D3D11.h index f2dea31c61..e716906f83 100644 --- a/GPU/D3D11/GPU_D3D11.h +++ b/GPU/D3D11/GPU_D3D11.h @@ -36,7 +36,7 @@ public: GPU_D3D11(GraphicsContext *gfxCtx, Draw::DrawContext *draw); ~GPU_D3D11(); - void CheckGPUFeatures(); + void CheckGPUFeatures() override; void PreExecuteOp(u32 op, u32 diff) override; void ExecuteOp(u32 op, u32 diff) override; @@ -50,23 +50,12 @@ public: void DoState(PointerWrap &p) override; void ClearShaderCache() override; - bool FramebufferDirty() override; - bool FramebufferReallyDirty() override; void GetReportingInfo(std::string &primaryInfo, std::string &fullInfo) override { primaryInfo = reportingPrimaryInfo_; fullInfo = reportingFullInfo_; } - typedef void (GPU_D3D11::*CmdFunc)(u32 op, u32 diff); - struct CommandInfo { - uint64_t flags; - GPU_D3D11::CmdFunc func; - }; - - void Execute_Prim(u32 op, u32 diff); - void Execute_LoadClut(u32 op, u32 diff); - // Using string because it's generic - makes no assumptions on the size of the shader IDs of this backend. std::vector DebugGetShaderIDs(DebugShaderType shader) override; std::string DebugGetShaderString(std::string id, DebugShaderType shader, DebugShaderStringType stringType) override; @@ -75,12 +64,9 @@ public: void EndHostFrame() override; protected: - void FastRunLoop(DisplayList &list) override; void FinishDeferred() override; private: - void UpdateCmdInfo(); - void Flush() { drawEngine_.Flush(); } @@ -101,8 +87,6 @@ private: DrawEngineD3D11 drawEngine_; ShaderManagerD3D11 *shaderManagerD3D11_; - static CommandInfo cmdInfo_[256]; - int lastVsync_; int vertexCost_ = 0; diff --git a/GPU/Directx9/DrawEngineDX9.cpp b/GPU/Directx9/DrawEngineDX9.cpp index 1acd41d7a3..c72d256727 100644 --- a/GPU/Directx9/DrawEngineDX9.cpp +++ b/GPU/Directx9/DrawEngineDX9.cpp @@ -232,70 +232,6 @@ IDirect3DVertexDeclaration9 *DrawEngineDX9::SetupDecFmtForDraw(VSShader *vshader } } -void DrawEngineDX9::SubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertType, int *bytesRead) { - if (!indexGen.PrimCompatible(prevPrim_, prim) || numDrawCalls >= MAX_DEFERRED_DRAW_CALLS || vertexCountInDrawCalls_ + vertexCount > VERTEX_BUFFER_MAX) - Flush(); - - // TODO: Is this the right thing to do? - if (prim == GE_PRIM_KEEP_PREVIOUS) { - prim = prevPrim_ != GE_PRIM_INVALID ? prevPrim_ : GE_PRIM_POINTS; - } else { - prevPrim_ = prim; - } - - SetupVertexDecoder(vertType); - - *bytesRead = vertexCount * dec_->VertexSize(); - - if ((vertexCount < 2 && prim > 0) || (vertexCount < 3 && prim > 2 && prim != GE_PRIM_RECTANGLES)) - return; - - DeferredDrawCall &dc = drawCalls[numDrawCalls]; - dc.verts = verts; - dc.inds = inds; - dc.vertType = vertType; - dc.indexType = (vertType & GE_VTYPE_IDX_MASK) >> GE_VTYPE_IDX_SHIFT; - dc.prim = prim; - dc.vertexCount = vertexCount; - - u32 dhash = dcid_; - dhash ^= (u32)(uintptr_t)verts; - dhash = __rotl(dhash, 13); - dhash ^= (u32)(uintptr_t)inds; - dhash = __rotl(dhash, 13); - dhash ^= (u32)vertType; - dhash = __rotl(dhash, 13); - dhash ^= (u32)vertexCount; - dhash = __rotl(dhash, 13); - dhash ^= (u32)prim; - dcid_ = dhash; - - if (inds) { - GetIndexBounds(inds, vertexCount, vertType, &dc.indexLowerBound, &dc.indexUpperBound); - } else { - dc.indexLowerBound = 0; - dc.indexUpperBound = vertexCount - 1; - } - - uvScale[numDrawCalls] = gstate_c.uv; - - numDrawCalls++; - vertexCountInDrawCalls_ += vertexCount; - - if (g_Config.bSoftwareSkinning && (vertType & GE_VTYPE_WEIGHT_MASK)) { - DecodeVertsStep(decoded, decodeCounter_, decodedVerts_); - decodeCounter_++; - } - - if (prim == GE_PRIM_RECTANGLES && (gstate.getTextureAddress(0) & 0x3FFFFFFF) == (gstate.getFrameBufAddress() & 0x3FFFFFFF)) { - // Rendertarget == texture? - if (!g_Config.bDisableSlowFramebufEffects) { - gstate_c.Dirty(DIRTY_TEXTURE_PARAMS); - Flush(); - } - } -} - void DrawEngineDX9::MarkUnreliable(VertexArrayInfoDX9 *vai) { vai->status = VertexArrayInfoDX9::VAI_UNRELIABLE; if (vai->vbo) { diff --git a/GPU/Directx9/DrawEngineDX9.h b/GPU/Directx9/DrawEngineDX9.h index 35cc5fd18e..ef015b02b5 100644 --- a/GPU/Directx9/DrawEngineDX9.h +++ b/GPU/Directx9/DrawEngineDX9.h @@ -103,8 +103,6 @@ public: DrawEngineDX9(Draw::DrawContext *draw); virtual ~DrawEngineDX9(); - void SubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertType, int *bytesRead); - void SetShaderManager(ShaderManagerDX9 *shaderManager) { shaderManager_ = shaderManager; } @@ -134,9 +132,6 @@ public: } void DispatchFlush() override { Flush(); } - void DispatchSubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertType, int *bytesRead) override { - SubmitPrim(verts, inds, prim, vertexCount, vertType, bytesRead); - } private: void DoFlush(); diff --git a/GPU/Directx9/GPU_DX9.cpp b/GPU/Directx9/GPU_DX9.cpp index 158442285c..590c7ac135 100644 --- a/GPU/Directx9/GPU_DX9.cpp +++ b/GPU/Directx9/GPU_DX9.cpp @@ -50,31 +50,6 @@ namespace DX9 { -struct D3D9CommandTableEntry { - uint8_t cmd; - uint8_t flags; - uint64_t dirty; - GPU_DX9::CmdFunc func; -}; - -// This table gets crunched into a faster form by init. -static const D3D9CommandTableEntry commandTable[] = { - // Changes that dirty the current texture. - { GE_CMD_TEXSIZE0, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTE, 0, &GPUCommon::Execute_TexSize0 }, - - // Changing the vertex type requires us to flush. - { GE_CMD_VERTEXTYPE, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, 0, &GPUCommon::Execute_VertexType }, - - { GE_CMD_PRIM, FLAG_EXECUTE, 0, &GPU_DX9::Execute_Prim }, - { GE_CMD_BEZIER, FLAG_FLUSHBEFORE | FLAG_EXECUTE, 0, &GPUCommon::Execute_Bezier }, - { GE_CMD_SPLINE, FLAG_FLUSHBEFORE | FLAG_EXECUTE, 0, &GPUCommon::Execute_Spline }, - - // Changes that trigger data copies. Only flushing on change for LOADCLUT must be a bit of a hack... - { GE_CMD_LOADCLUT, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTE, 0, &GPU_DX9::Execute_LoadClut }, -}; - -GPU_DX9::CommandInfo GPU_DX9::cmdInfo_[256]; - GPU_DX9::GPU_DX9(GraphicsContext *gfxCtx, Draw::DrawContext *draw) : GPUCommon(gfxCtx, draw), depalShaderCache_(draw), @@ -108,44 +83,6 @@ GPU_DX9::GPU_DX9(GraphicsContext *gfxCtx, Draw::DrawContext *draw) ERROR_LOG(G3D, "gstate has drifted out of sync!"); } - memset(cmdInfo_, 0, sizeof(cmdInfo_)); - - // Import both the global and local command tables, and check for dupes - std::set dupeCheck; - for (size_t i = 0; i < commonCommandTableSize; i++) { - const u8 cmd = commonCommandTable[i].cmd; - if (dupeCheck.find(cmd) != dupeCheck.end()) { - ERROR_LOG(G3D, "Command table Dupe: %02x (%i)", (int)cmd, (int)cmd); - } else { - dupeCheck.insert(cmd); - } - cmdInfo_[cmd].flags |= (uint64_t)commonCommandTable[i].flags | (commonCommandTable[i].dirty << 8); - cmdInfo_[cmd].func = commonCommandTable[i].func; - if ((cmdInfo_[cmd].flags & (FLAG_EXECUTE | FLAG_EXECUTEONCHANGE)) && !cmdInfo_[cmd].func) { - Crash(); - } - } - - for (size_t i = 0; i < ARRAY_SIZE(commandTable); i++) { - const u8 cmd = commandTable[i].cmd; - if (dupeCheck.find(cmd) != dupeCheck.end()) { - ERROR_LOG(G3D, "Command table Dupe: %02x (%i)", (int)cmd, (int)cmd); - } else { - dupeCheck.insert(cmd); - } - cmdInfo_[cmd].flags |= (uint64_t)commandTable[i].flags | (commandTable[i].dirty << 8); - cmdInfo_[cmd].func = commandTable[i].func; - if ((cmdInfo_[cmd].flags & (FLAG_EXECUTE | FLAG_EXECUTEONCHANGE)) && !cmdInfo_[cmd].func) { - Crash(); - } - } - // Find commands missing from the table. - for (int i = 0; i < 0xEF; i++) { - if (dupeCheck.find((u8)i) == dupeCheck.end()) { - ERROR_LOG(G3D, "Command missing from table: %02x (%i)", i, i); - } - } - // No need to flush before the tex scale/offset commands if we are baking // the tex scale/offset into the vertices anyway. UpdateCmdInfo(); @@ -166,18 +103,6 @@ GPU_DX9::GPU_DX9(GraphicsContext *gfxCtx, Draw::DrawContext *draw) } } -void GPU_DX9::UpdateCmdInfo() { - if (g_Config.bSoftwareSkinning) { - cmdInfo_[GE_CMD_VERTEXTYPE].flags &= ~FLAG_FLUSHBEFOREONCHANGE; - cmdInfo_[GE_CMD_VERTEXTYPE].func = &GPUCommon::Execute_VertexTypeSkinning; - } else { - cmdInfo_[GE_CMD_VERTEXTYPE].flags |= FLAG_FLUSHBEFOREONCHANGE; - cmdInfo_[GE_CMD_VERTEXTYPE].func = &GPUCommon::Execute_VertexType; - } - - CheckGPUFeatures(); -} - void GPU_DX9::CheckGPUFeatures() { u32 features = 0; @@ -305,26 +230,6 @@ void GPU_DX9::SetDisplayFramebuffer(u32 framebuf, u32 stride, GEBufferFormat for framebufferManagerDX9_->SetDisplayFramebuffer(framebuf, stride, format); } -bool GPU_DX9::FramebufferDirty() { - VirtualFramebuffer *vfb = framebufferManager_->GetDisplayVFB(); - if (vfb) { - bool dirty = vfb->dirtyAfterDisplay; - vfb->dirtyAfterDisplay = false; - return dirty; - } - return true; -} - -bool GPU_DX9::FramebufferReallyDirty() { - VirtualFramebuffer *vfb = framebufferManager_->GetDisplayVFB(); - if (vfb) { - bool dirty = vfb->reallyDirtyAfterDisplay; - vfb->reallyDirtyAfterDisplay = false; - return dirty; - } - return true; -} - void GPU_DX9::CopyDisplayToOutput() { dxstate.depthWrite.set(true); dxstate.colorMask.set(true, true, true, true); @@ -340,44 +245,6 @@ void GPU_DX9::CopyDisplayToOutput() { gstate_c.Dirty(DIRTY_TEXTURE_IMAGE); } -// Maybe should write this in ASM... -void GPU_DX9::FastRunLoop(DisplayList &list) { - PROFILE_THIS_SCOPE("gpuloop"); - const CommandInfo *cmdInfo = cmdInfo_; - int dc = downcount; - for (; dc > 0; --dc) { - // We know that display list PCs have the upper nibble == 0 - no need to mask the pointer - const u32 op = *(const u32 *)(Memory::base + list.pc); - const u32 cmd = op >> 24; - const CommandInfo &info = cmdInfo[cmd]; - const u32 diff = op ^ gstate.cmdmem[cmd]; - if (diff == 0) { - if (info.flags & FLAG_EXECUTE) { - downcount = dc; - (this->*info.func)(op, diff); - dc = downcount; - } - } else { - uint64_t flags = info.flags; - if (flags & FLAG_FLUSHBEFOREONCHANGE) { - drawEngine_.Flush(); - } - gstate.cmdmem[cmd] = op; - if (flags & (FLAG_EXECUTE | FLAG_EXECUTEONCHANGE)) { - downcount = dc; - (this->*info.func)(op, diff); - dc = downcount; - } else { - uint64_t dirty = flags >> 8; - if (dirty) - gstate_c.Dirty(dirty); - } - } - list.pc += 4; - } - downcount = 0; -} - void GPU_DX9::FinishDeferred() { // This finishes reading any vertex data that is pending. drawEngine_.FinishDeferred(); @@ -410,84 +277,6 @@ void GPU_DX9::ExecuteOp(u32 op, u32 diff) { } } -void GPU_DX9::Execute_Prim(u32 op, u32 diff) { - // This drives all drawing. All other state we just buffer up, then we apply it only - // when it's time to draw. As most PSP games set state redundantly ALL THE TIME, this is a huge optimization. - - u32 data = op & 0xFFFFFF; - u32 count = data & 0xFFFF; - if (count == 0) - return; - // Upper bits are ignored. - GEPrimitiveType prim = static_cast((data >> 16) & 7); - SetDrawType(DRAW_PRIM, prim); - - // Discard AA lines as we can't do anything that makes sense with these anyway. The SW plugin might, though. - - if (gstate.isAntiAliasEnabled()) { - // Discard AA lines in DOA - if (prim == GE_PRIM_LINE_STRIP) - return; - // Discard AA lines in Summon Night 5 - if ((prim == GE_PRIM_LINES) && gstate.isSkinningEnabled()) - return; - } - - // This also make skipping drawing very effective. - framebufferManagerDX9_->SetRenderFrameBuffer(gstate_c.IsDirty(DIRTY_FRAMEBUF), gstate_c.skipDrawReason); - if (gstate_c.skipDrawReason & (SKIPDRAW_SKIPFRAME | SKIPDRAW_NON_DISPLAYED_FB)) { - drawEngine_.SetupVertexDecoder(gstate.vertType); - // Rough estimate, not sure what's correct. - cyclesExecuted += EstimatePerVertexCost() * count; - return; - } - - u32 vertexAddr = gstate_c.vertexAddr; - if (!Memory::IsValidAddress(vertexAddr)) { - ERROR_LOG_REPORT(G3D, "Bad vertex address %08x!", vertexAddr); - return; - } - - void *verts = Memory::GetPointerUnchecked(vertexAddr); - void *inds = 0; - u32 vertexType = gstate.vertType; - if ((vertexType & GE_VTYPE_IDX_MASK) != GE_VTYPE_IDX_NONE) { - u32 indexAddr = gstate_c.indexAddr; - if (!Memory::IsValidAddress(indexAddr)) { - ERROR_LOG_REPORT(G3D, "Bad index address %08x!", indexAddr); - return; - } - inds = Memory::GetPointerUnchecked(indexAddr); - } - -#ifndef MOBILE_DEVICE - if (prim > GE_PRIM_RECTANGLES) { - ERROR_LOG_REPORT_ONCE(reportPrim, G3D, "Unexpected prim type: %d", prim); - } -#endif - - if (gstate_c.dirty & DIRTY_VERTEXSHADER_STATE) { - vertexCost_ = EstimatePerVertexCost(); - } - gpuStats.vertexGPUCycles += vertexCost_ * count; - cyclesExecuted += vertexCost_* count; - - int bytesRead = 0; - UpdateUVScaleOffset(); - drawEngine_.SubmitPrim(verts, inds, prim, count, vertexType, &bytesRead); - - // After drawing, we advance the vertexAddr (when non indexed) or indexAddr (when indexed). - // Some games rely on this, they don't bother reloading VADDR and IADDR. - // The VADDR/IADDR registers are NOT updated. - AdvanceVerts(vertexType, count, bytesRead); -} - -void GPU_DX9::Execute_LoadClut(u32 op, u32 diff) { - gstate_c.Dirty(DIRTY_TEXTURE_PARAMS); - textureCacheDX9_->LoadClut(gstate.getClutAddress(), gstate.getClutLoadBytes()); - // This could be used to "dirty" textures with clut. -} - void GPU_DX9::GetStats(char *buffer, size_t bufsize) { float vertexAverageCycles = gpuStats.numVertsSubmitted > 0 ? (float)gpuStats.vertexGPUCycles / (float)gpuStats.numVertsSubmitted : 0.0f; snprintf(buffer, bufsize - 1, diff --git a/GPU/Directx9/GPU_DX9.h b/GPU/Directx9/GPU_DX9.h index 1a0a7ee037..d72e2c4321 100644 --- a/GPU/Directx9/GPU_DX9.h +++ b/GPU/Directx9/GPU_DX9.h @@ -37,7 +37,7 @@ public: GPU_DX9(GraphicsContext *gfxCtx, Draw::DrawContext *draw); ~GPU_DX9(); - void CheckGPUFeatures(); + void CheckGPUFeatures() override; void PreExecuteOp(u32 op, u32 diff) override; void ExecuteOp(u32 op, u32 diff) override; @@ -51,24 +51,12 @@ public: void DoState(PointerWrap &p) override; void ClearShaderCache() override; - bool FramebufferDirty() override; - bool FramebufferReallyDirty() override; void GetReportingInfo(std::string &primaryInfo, std::string &fullInfo) override { primaryInfo = reportingPrimaryInfo_; fullInfo = reportingFullInfo_; } - typedef void (GPU_DX9::*CmdFunc)(u32 op, u32 diff); - struct CommandInfo { - uint64_t flags; - GPU_DX9::CmdFunc func; - }; - - void Execute_Prim(u32 op, u32 diff); - void Execute_TexSize0(u32 op, u32 diff); - void Execute_LoadClut(u32 op, u32 diff); - // Using string because it's generic - makes no assumptions on the size of the shader IDs of this backend. std::vector DebugGetShaderIDs(DebugShaderType shader) override; std::string DebugGetShaderString(std::string id, DebugShaderType shader, DebugShaderStringType stringType) override; @@ -76,12 +64,9 @@ public: void BeginHostFrame() override; protected: - void FastRunLoop(DisplayList &list) override; void FinishDeferred() override; private: - void UpdateCmdInfo(); - void Flush() { drawEngine_.Flush(); } @@ -102,8 +87,6 @@ private: DrawEngineDX9 drawEngine_; ShaderManagerDX9 *shaderManagerDX9_; - static CommandInfo cmdInfo_[256]; - int lastVsync_; int vertexCost_ = 0; diff --git a/GPU/GLES/DrawEngineGLES.cpp b/GPU/GLES/DrawEngineGLES.cpp index bb0676719c..83b56e5cab 100644 --- a/GPU/GLES/DrawEngineGLES.cpp +++ b/GPU/GLES/DrawEngineGLES.cpp @@ -276,70 +276,6 @@ GLRInputLayout *DrawEngineGLES::SetupDecFmtForDraw(LinkedShader *program, const return inputLayout; } -void DrawEngineGLES::SubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertType, int *bytesRead) { - if (!indexGen.PrimCompatible(prevPrim_, prim) || numDrawCalls >= MAX_DEFERRED_DRAW_CALLS || vertexCountInDrawCalls_ + vertexCount > VERTEX_BUFFER_MAX) - Flush(); - - // TODO: Is this the right thing to do? - if (prim == GE_PRIM_KEEP_PREVIOUS) { - prim = prevPrim_ != GE_PRIM_INVALID ? prevPrim_ : GE_PRIM_POINTS; - } else { - prevPrim_ = prim; - } - - SetupVertexDecoder(vertType); - - *bytesRead = vertexCount * dec_->VertexSize(); - - if ((vertexCount < 2 && prim > 0) || (vertexCount < 3 && prim > 2 && prim != GE_PRIM_RECTANGLES)) - return; - - DeferredDrawCall &dc = drawCalls[numDrawCalls]; - dc.verts = verts; - dc.inds = inds; - dc.vertType = vertType; - dc.indexType = (vertType & GE_VTYPE_IDX_MASK) >> GE_VTYPE_IDX_SHIFT; - dc.prim = prim; - dc.vertexCount = vertexCount; - - u32 dhash = dcid_; - dhash ^= (u32)(uintptr_t)verts; - dhash = __rotl(dhash, 13); - dhash ^= (u32)(uintptr_t)inds; - dhash = __rotl(dhash, 13); - dhash ^= (u32)vertType; - dhash = __rotl(dhash, 13); - dhash ^= (u32)vertexCount; - dhash = __rotl(dhash, 13); - dhash ^= (u32)prim; - dcid_ = dhash; - - if (inds) { - GetIndexBounds(inds, vertexCount, vertType, &dc.indexLowerBound, &dc.indexUpperBound); - } else { - dc.indexLowerBound = 0; - dc.indexUpperBound = vertexCount - 1; - } - - uvScale[numDrawCalls] = gstate_c.uv; - - numDrawCalls++; - vertexCountInDrawCalls_ += vertexCount; - - if (g_Config.bSoftwareSkinning && (vertType & GE_VTYPE_WEIGHT_MASK)) { - DecodeVertsStep(decoded, decodeCounter_, decodedVerts_); - decodeCounter_++; - } - - if (prim == GE_PRIM_RECTANGLES && (gstate.getTextureAddress(0) & 0x3FFFFFFF) == (gstate.getFrameBufAddress() & 0x3FFFFFFF)) { - // Rendertarget == texture? - if (!g_Config.bDisableSlowFramebufEffects) { - gstate_c.Dirty(DIRTY_TEXTURE_PARAMS); - Flush(); - } - } -} - void DrawEngineGLES::DecodeVertsToPushBuffer(GLPushBuffer *push, uint32_t *bindOffset, GLRBuffer **buf) { u8 *dest = decoded; diff --git a/GPU/GLES/DrawEngineGLES.h b/GPU/GLES/DrawEngineGLES.h index 59246fb5cd..42e4ce5133 100644 --- a/GPU/GLES/DrawEngineGLES.h +++ b/GPU/GLES/DrawEngineGLES.h @@ -105,8 +105,6 @@ public: DrawEngineGLES(Draw::DrawContext *draw); virtual ~DrawEngineGLES(); - void SubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertType, int *bytesRead); - void SetShaderManager(ShaderManagerGLES *shaderManager) { shaderManager_ = shaderManager; } @@ -146,9 +144,6 @@ public: bool IsCodePtrVertexDecoder(const u8 *ptr) const; void DispatchFlush() override { Flush(); } - void DispatchSubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertType, int *bytesRead) override { - SubmitPrim(verts, inds, prim, vertexCount, vertType, bytesRead); - } GLPushBuffer *GetPushVertexBuffer() { return frameData_[render_->GetCurFrame()].pushVertex; diff --git a/GPU/GLES/GPU_GLES.cpp b/GPU/GLES/GPU_GLES.cpp index 2fb3043cf8..477f507344 100644 --- a/GPU/GLES/GPU_GLES.cpp +++ b/GPU/GLES/GPU_GLES.cpp @@ -51,32 +51,6 @@ #include "Windows/GPU/WindowsGLContext.h" #endif -struct GLESCommandTableEntry { - uint8_t cmd; - uint8_t flags; - uint64_t dirty; - GPU_GLES::CmdFunc func; -}; - -// This table gets crunched into a faster form by init. -// TODO: Share this table between the backends. Will have to make another indirection for the function pointers though.. -static const GLESCommandTableEntry commandTable[] = { - // Changes that dirty the current texture. - { GE_CMD_TEXSIZE0, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTE, 0, &GPUCommon::Execute_TexSize0 }, - - // Changing the vertex type requires us to flush. - { GE_CMD_VERTEXTYPE, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, 0, &GPUCommon::Execute_VertexType }, - - { GE_CMD_PRIM, FLAG_EXECUTE, 0, &GPU_GLES::Execute_Prim }, - { GE_CMD_BEZIER, FLAG_FLUSHBEFORE | FLAG_EXECUTE, 0, &GPUCommon::Execute_Bezier }, - { GE_CMD_SPLINE, FLAG_FLUSHBEFORE | FLAG_EXECUTE, 0, &GPUCommon::Execute_Spline }, - - // Changes that trigger data copies. Only flushing on change for LOADCLUT must be a bit of a hack... - { GE_CMD_LOADCLUT, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTE, 0, &GPU_GLES::Execute_LoadClut }, -}; - -GPU_GLES::CommandInfo GPU_GLES::cmdInfo_[256]; - GPU_GLES::GPU_GLES(GraphicsContext *gfxCtx, Draw::DrawContext *draw) : GPUCommon(gfxCtx, draw), drawEngine_(draw), fragmentTestCache_(draw), depalShaderCache_(draw) { UpdateVsyncInterval(true); @@ -112,44 +86,6 @@ GPU_GLES::GPU_GLES(GraphicsContext *gfxCtx, Draw::DrawContext *draw) ERROR_LOG(G3D, "gstate has drifted out of sync!"); } - memset(cmdInfo_, 0, sizeof(cmdInfo_)); - - // Import both the global and local command tables, and check for dupes - std::set dupeCheck; - for (size_t i = 0; i < commonCommandTableSize; i++) { - const u8 cmd = commonCommandTable[i].cmd; - if (dupeCheck.find(cmd) != dupeCheck.end()) { - ERROR_LOG(G3D, "Command table Dupe: %02x (%i)", (int)cmd, (int)cmd); - } else { - dupeCheck.insert(cmd); - } - cmdInfo_[cmd].flags |= (uint64_t)commonCommandTable[i].flags | (commonCommandTable[i].dirty << 8); - cmdInfo_[cmd].func = commonCommandTable[i].func; - if ((cmdInfo_[cmd].flags & (FLAG_EXECUTE | FLAG_EXECUTEONCHANGE)) && !cmdInfo_[cmd].func) { - Crash(); - } - } - - for (size_t i = 0; i < ARRAY_SIZE(commandTable); i++) { - const u8 cmd = commandTable[i].cmd; - if (dupeCheck.find(cmd) != dupeCheck.end()) { - ERROR_LOG(G3D, "Command table Dupe: %02x (%i)", (int)cmd, (int)cmd); - } else { - dupeCheck.insert(cmd); - } - cmdInfo_[cmd].flags |= (uint64_t)commandTable[i].flags | (commandTable[i].dirty << 8); - cmdInfo_[cmd].func = commandTable[i].func; - if ((cmdInfo_[cmd].flags & (FLAG_EXECUTE | FLAG_EXECUTEONCHANGE)) && !cmdInfo_[cmd].func) { - Crash(); - } - } - // Find commands missing from the table. - for (int i = 0; i < 0xEF; i++) { - if (dupeCheck.find((u8)i) == dupeCheck.end()) { - ERROR_LOG(G3D, "Command missing from table: %02x (%i)", i, i); - } - } - // No need to flush before the tex scale/offset commands if we are baking // the tex scale/offset into the vertices anyway. @@ -467,16 +403,6 @@ inline void GPU_GLES::UpdateVsyncInterval(bool force) { #endif } -void GPU_GLES::UpdateCmdInfo() { - if (g_Config.bSoftwareSkinning) { - cmdInfo_[GE_CMD_VERTEXTYPE].flags &= ~FLAG_FLUSHBEFOREONCHANGE; - cmdInfo_[GE_CMD_VERTEXTYPE].func = &GPUCommon::Execute_VertexTypeSkinning; - } else { - cmdInfo_[GE_CMD_VERTEXTYPE].flags |= FLAG_FLUSHBEFOREONCHANGE; - cmdInfo_[GE_CMD_VERTEXTYPE].func = &GPUCommon::Execute_VertexType; - } -} - void GPU_GLES::ReapplyGfxState() { GPUCommon::ReapplyGfxState(); } @@ -510,26 +436,6 @@ void GPU_GLES::SetDisplayFramebuffer(u32 framebuf, u32 stride, GEBufferFormat fo framebufferManagerGL_->SetDisplayFramebuffer(framebuf, stride, format); } -bool GPU_GLES::FramebufferDirty() { - VirtualFramebuffer *vfb = framebufferManagerGL_->GetDisplayVFB(); - if (vfb) { - bool dirty = vfb->dirtyAfterDisplay; - vfb->dirtyAfterDisplay = false; - return dirty; - } - return true; -} - -bool GPU_GLES::FramebufferReallyDirty() { - VirtualFramebuffer *vfb = framebufferManagerGL_->GetDisplayVFB(); - if (vfb) { - bool dirty = vfb->reallyDirtyAfterDisplay; - vfb->reallyDirtyAfterDisplay = false; - return dirty; - } - return true; -} - void GPU_GLES::CopyDisplayToOutput() { // Flush anything left over. framebufferManagerGL_->RebindFramebuffer(); @@ -551,44 +457,6 @@ void GPU_GLES::CopyDisplayToOutput() { #endif } -// Maybe should write this in ASM... -void GPU_GLES::FastRunLoop(DisplayList &list) { - PROFILE_THIS_SCOPE("gpuloop"); - const CommandInfo *cmdInfo = cmdInfo_; - int dc = downcount; - for (; dc > 0; --dc) { - // We know that display list PCs have the upper nibble == 0 - no need to mask the pointer - const u32 op = *(const u32 *)(Memory::base + list.pc); - const u32 cmd = op >> 24; - const CommandInfo &info = cmdInfo[cmd]; - const u32 diff = op ^ gstate.cmdmem[cmd]; - if (diff == 0) { - if (info.flags & FLAG_EXECUTE) { - downcount = dc; - (this->*info.func)(op, diff); - dc = downcount; - } - } else { - uint64_t flags = info.flags; - if (flags & FLAG_FLUSHBEFOREONCHANGE) { - drawEngine_.Flush(); - } - gstate.cmdmem[cmd] = op; - if (flags & (FLAG_EXECUTE | FLAG_EXECUTEONCHANGE)) { - downcount = dc; - (this->*info.func)(op, diff); - dc = downcount; - } else { - uint64_t dirty = flags >> 8; - if (dirty) - gstate_c.Dirty(dirty); - } - } - list.pc += 4; - } - downcount = 0; -} - void GPU_GLES::FinishDeferred() { // This finishes reading any vertex data that is pending. drawEngine_.FinishDeferred(); @@ -621,81 +489,6 @@ void GPU_GLES::ExecuteOp(u32 op, u32 diff) { } } -void GPU_GLES::Execute_Prim(u32 op, u32 diff) { - // This drives all drawing. All other state we just buffer up, then we apply it only - // when it's time to draw. As most PSP games set state redundantly ALL THE TIME, this is a huge optimization. - - u32 data = op & 0xFFFFFF; - u32 count = data & 0xFFFF; - if (count == 0) - return; - - // Upper bits are ignored. - GEPrimitiveType prim = static_cast((data >> 16) & 7); - SetDrawType(DRAW_PRIM, prim); - - // Discard AA lines as we can't do anything that makes sense with these anyway. The SW plugin might, though. - - if (gstate.isAntiAliasEnabled()) { - // Discard AA lines in DOA - if (prim == GE_PRIM_LINE_STRIP) - return; - // Discard AA lines in Summon Night 5 - if ((prim == GE_PRIM_LINES) && gstate.isSkinningEnabled()) - return; - } - - // This also makes skipping drawing very effective. This function can change the framebuffer. - framebufferManagerGL_->SetRenderFrameBuffer(gstate_c.IsDirty(DIRTY_FRAMEBUF), gstate_c.skipDrawReason); - if (gstate_c.skipDrawReason & (SKIPDRAW_SKIPFRAME | SKIPDRAW_NON_DISPLAYED_FB)) { - drawEngine_.SetupVertexDecoder(gstate.vertType); - // Rough estimate, not sure what's correct. - cyclesExecuted += EstimatePerVertexCost() * count; - return; - } - - if (!Memory::IsValidAddress(gstate_c.vertexAddr)) { - ERROR_LOG_REPORT(G3D, "Bad vertex address %08x!", gstate_c.vertexAddr); - return; - } - - void *verts = Memory::GetPointerUnchecked(gstate_c.vertexAddr); - void *inds = 0; - if ((gstate.vertType & GE_VTYPE_IDX_MASK) != GE_VTYPE_IDX_NONE) { - if (!Memory::IsValidAddress(gstate_c.indexAddr)) { - ERROR_LOG_REPORT(G3D, "Bad index address %08x!", gstate_c.indexAddr); - return; - } - inds = Memory::GetPointerUnchecked(gstate_c.indexAddr); - } - -#ifndef MOBILE_DEVICE - if (prim > GE_PRIM_RECTANGLES) { - ERROR_LOG_REPORT_ONCE(reportPrim, G3D, "Unexpected prim type: %d", prim); - } -#endif - - if (gstate_c.dirty & DIRTY_VERTEXSHADER_STATE) { - vertexCost_ = EstimatePerVertexCost(); - } - gpuStats.vertexGPUCycles += vertexCost_ * count; - cyclesExecuted += vertexCost_* count; - - int bytesRead = 0; - UpdateUVScaleOffset(); - drawEngine_.SubmitPrim(verts, inds, prim, count, gstate.vertType, &bytesRead); - - // After drawing, we advance the vertexAddr (when non indexed) or indexAddr (when indexed). - // Some games rely on this, they don't bother reloading VADDR and IADDR. - // The VADDR/IADDR registers are NOT updated. - AdvanceVerts(gstate.vertType, count, bytesRead); -} - -void GPU_GLES::Execute_LoadClut(u32 op, u32 diff) { - gstate_c.Dirty(DIRTY_TEXTURE_PARAMS); - textureCacheGL_->LoadClut(gstate.getClutAddress(), gstate.getClutLoadBytes()); -} - void GPU_GLES::GetStats(char *buffer, size_t bufsize) { float vertexAverageCycles = gpuStats.numVertsSubmitted > 0 ? (float)gpuStats.vertexGPUCycles / (float)gpuStats.numVertsSubmitted : 0.0f; snprintf(buffer, bufsize - 1, diff --git a/GPU/GLES/GPU_GLES.h b/GPU/GLES/GPU_GLES.h index f23c5ddf9d..4ea7a62064 100644 --- a/GPU/GLES/GPU_GLES.h +++ b/GPU/GLES/GPU_GLES.h @@ -36,7 +36,7 @@ public: ~GPU_GLES(); // This gets called on startup and when we get back from settings. - void CheckGPUFeatures(); + void CheckGPUFeatures() override; bool IsReady() override; @@ -55,23 +55,12 @@ public: void ClearShaderCache() override; void CleanupBeforeUI() override; - bool FramebufferDirty() override; - bool FramebufferReallyDirty() override; void GetReportingInfo(std::string &primaryInfo, std::string &fullInfo) override { primaryInfo = reportingPrimaryInfo_; fullInfo = reportingFullInfo_; } - typedef void (GPU_GLES::*CmdFunc)(u32 op, u32 diff); - struct CommandInfo { - uint64_t flags; - GPU_GLES::CmdFunc func; - }; - - void Execute_Prim(u32 op, u32 diff); - void Execute_LoadClut(u32 op, u32 diff); - // Using string because it's generic - makes no assumptions on the size of the shader IDs of this backend. std::vector DebugGetShaderIDs(DebugShaderType shader) override; std::string DebugGetShaderString(std::string id, DebugShaderType shader, DebugShaderStringType stringType) override; @@ -80,7 +69,6 @@ public: void EndHostFrame() override; protected: - void FastRunLoop(DisplayList &list) override; void FinishDeferred() override; private: @@ -96,9 +84,6 @@ private: void Reinitialize() override; inline void UpdateVsyncInterval(bool force); - void UpdateCmdInfo(); - - static CommandInfo cmdInfo_[256]; FramebufferManagerGLES *framebufferManagerGL_; TextureCacheGLES *textureCacheGL_; diff --git a/GPU/GPU.cpp b/GPU/GPU.cpp index 847e25661f..abccdf6da9 100644 --- a/GPU/GPU.cpp +++ b/GPU/GPU.cpp @@ -110,6 +110,6 @@ bool GPU_Init(GraphicsContext *ctx, Draw::DrawContext *draw) { void GPU_Shutdown() { delete gpu; - gpu = 0; + gpu = nullptr; gpuDebug = 0; } diff --git a/GPU/GPUCommon.cpp b/GPU/GPUCommon.cpp index af99023ced..d30ba2cfdf 100644 --- a/GPU/GPUCommon.cpp +++ b/GPU/GPUCommon.cpp @@ -42,6 +42,15 @@ const CommonCommandTableEntry commonCommandTable[] = { { GE_CMD_BJUMP, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, 0, &GPUCommon::Execute_BJump }, // EXECUTE { GE_CMD_BOUNDINGBOX, FLAG_EXECUTE, 0, &GPUCommon::Execute_BoundingBox }, // + FLUSHBEFORE when we implement... or not, do we need to? + { GE_CMD_PRIM, FLAG_EXECUTE, 0, &GPUCommon::Execute_Prim }, + { GE_CMD_BEZIER, FLAG_FLUSHBEFORE | FLAG_EXECUTE, 0, &GPUCommon::Execute_Bezier }, + { GE_CMD_SPLINE, FLAG_FLUSHBEFORE | FLAG_EXECUTE, 0, &GPUCommon::Execute_Spline }, + + // Changing the vertex type requires us to flush. + { GE_CMD_VERTEXTYPE, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, 0, &GPUCommon::Execute_VertexType }, + + { GE_CMD_LOADCLUT, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTE, 0, &GPUCommon::Execute_LoadClut }, + // These two are actually processed in CMD_END. Not sure if FLAG_FLUSHBEFORE matters. { GE_CMD_SIGNAL, FLAG_FLUSHBEFORE }, { GE_CMD_FINISH, FLAG_FLUSHBEFORE }, @@ -121,7 +130,7 @@ const CommonCommandTableEntry commonCommandTable[] = { { GE_CMD_TEXOFFSETU }, { GE_CMD_TEXOFFSETV }, - // TEXSIZE0 is handled by each backend. + { GE_CMD_TEXSIZE0, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTE, 0, &GPUCommon::Execute_TexSize0 }, { GE_CMD_TEXSIZE1, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_PARAMS }, { GE_CMD_TEXSIZE2, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_PARAMS }, { GE_CMD_TEXSIZE3, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_PARAMS }, @@ -343,6 +352,9 @@ const CommonCommandTableEntry commonCommandTable[] = { }; size_t commonCommandTableSize = ARRAY_SIZE(commonCommandTable); +// TODO: Make class member? +GPUCommon::CommandInfo GPUCommon::cmdInfo_[256]; + void GPUCommon::Flush() { drawEngineCommon_->DispatchFlush(); } @@ -366,11 +378,47 @@ GPUCommon::GPUCommon(GraphicsContext *gfxCtx, Draw::DrawContext *draw) : gstate.Reset(); gstate_c.Reset(); gpuStats.Reset(); + + memset(cmdInfo_, 0, sizeof(cmdInfo_)); + + // Import both the global and local command tables, and check for dupes + std::set dupeCheck; + for (size_t i = 0; i < commonCommandTableSize; i++) { + const u8 cmd = commonCommandTable[i].cmd; + if (dupeCheck.find(cmd) != dupeCheck.end()) { + ERROR_LOG(G3D, "Command table Dupe: %02x (%i)", (int)cmd, (int)cmd); + } else { + dupeCheck.insert(cmd); + } + cmdInfo_[cmd].flags |= (uint64_t)commonCommandTable[i].flags | (commonCommandTable[i].dirty << 8); + cmdInfo_[cmd].func = commonCommandTable[i].func; + if ((cmdInfo_[cmd].flags & (FLAG_EXECUTE | FLAG_EXECUTEONCHANGE)) && !cmdInfo_[cmd].func) { + Crash(); + } + } + // Find commands missing from the table. + for (int i = 0; i < 0xEF; i++) { + if (dupeCheck.find((u8)i) == dupeCheck.end()) { + ERROR_LOG(G3D, "Command missing from table: %02x (%i)", i, i); + } + } + + UpdateCmdInfo(); } GPUCommon::~GPUCommon() { } +void GPUCommon::UpdateCmdInfo() { + if (g_Config.bSoftwareSkinning) { + cmdInfo_[GE_CMD_VERTEXTYPE].flags &= ~FLAG_FLUSHBEFOREONCHANGE; + cmdInfo_[GE_CMD_VERTEXTYPE].func = &GPUCommon::Execute_VertexTypeSkinning; + } else { + cmdInfo_[GE_CMD_VERTEXTYPE].flags |= FLAG_FLUSHBEFOREONCHANGE; + cmdInfo_[GE_CMD_VERTEXTYPE].func = &GPUCommon::Execute_VertexType; + } +} + void GPUCommon::BeginHostFrame() { ReapplyGfxState(); @@ -917,6 +965,46 @@ bool GPUCommon::InterpretList(DisplayList &list) { return gpuState == GPUSTATE_DONE || gpuState == GPUSTATE_ERROR; } +// Maybe should write this in ASM... +void GPUCommon::FastRunLoop(DisplayList &list) { + PROFILE_THIS_SCOPE("gpuloop"); + const CommandInfo *cmdInfo = cmdInfo_; + int dc = downcount; + for (; dc > 0; --dc) { + // We know that display list PCs have the upper nibble == 0 - no need to mask the pointer + const u32 op = *(const u32 *)(Memory::base + list.pc); + const u32 cmd = op >> 24; + const CommandInfo &info = cmdInfo[cmd]; + const u32 diff = op ^ gstate.cmdmem[cmd]; + if (diff == 0) { + if (info.flags & FLAG_EXECUTE) { + downcount = dc; + (this->*info.func)(op, diff); + dc = downcount; + } + } else { + uint64_t flags = info.flags; + if (flags & FLAG_FLUSHBEFOREONCHANGE) { + if (drawEngineCommon_->GetNumDrawCalls()) { + drawEngineCommon_->DispatchFlush(); + } + } + gstate.cmdmem[cmd] = op; + if (flags & (FLAG_EXECUTE | FLAG_EXECUTEONCHANGE)) { + downcount = dc; + (this->*info.func)(op, diff); + dc = downcount; + } else { + uint64_t dirty = flags >> 8; + if (dirty) + gstate_c.Dirty(dirty); + } + } + list.pc += 4; + } + downcount = 0; +} + void GPUCommon::BeginFrame() { immCount_ = 0; if (dumpNextFrame_) { @@ -1347,6 +1435,11 @@ void GPUCommon::Execute_VertexType(u32 op, u32 diff) { } } +void GPUCommon::Execute_LoadClut(u32 op, u32 diff) { + gstate_c.Dirty(DIRTY_TEXTURE_PARAMS); + textureCache_->LoadClut(gstate.getClutAddress(), gstate.getClutLoadBytes()); +} + void GPUCommon::Execute_VertexTypeSkinning(u32 op, u32 diff) { // Don't flush when weight count changes, unless morph is enabled. if ((diff & ~GE_VTYPE_WEIGHTCOUNT_MASK) || (op & GE_VTYPE_MORPHCOUNT_MASK) != 0) { @@ -1368,6 +1461,81 @@ void GPUCommon::Execute_VertexTypeSkinning(u32 op, u32 diff) { gstate_c.Dirty(DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_FRAGMENTSHADER_STATE); } + +void GPUCommon::Execute_Prim(u32 op, u32 diff) { + // This drives all drawing. All other state we just buffer up, then we apply it only + // when it's time to draw. As most PSP games set state redundantly ALL THE TIME, this is a huge optimization. + + PROFILE_THIS_SCOPE("execprim"); + + u32 data = op & 0xFFFFFF; + u32 count = data & 0xFFFF; + if (count == 0) + return; + + // Upper bits are ignored. + GEPrimitiveType prim = static_cast((data >> 16) & 7); + SetDrawType(DRAW_PRIM, prim); + + // Discard AA lines as we can't do anything that makes sense with these anyway. The SW plugin might, though. + if (gstate.isAntiAliasEnabled()) { + // Discard AA lines in DOA + if (prim == GE_PRIM_LINE_STRIP) + return; + // Discard AA lines in Summon Night 5 + if ((prim == GE_PRIM_LINES) && gstate.isSkinningEnabled()) + return; + } + + // This also makes skipping drawing very effective. + framebufferManager_->SetRenderFrameBuffer(gstate_c.IsDirty(DIRTY_FRAMEBUF), gstate_c.skipDrawReason); + + if (gstate_c.skipDrawReason & (SKIPDRAW_SKIPFRAME | SKIPDRAW_NON_DISPLAYED_FB)) { + drawEngineCommon_->SetupVertexDecoder(gstate.vertType); // Do we still need to do this? + // Rough estimate, not sure what's correct. + cyclesExecuted += EstimatePerVertexCost() * count; + return; + } + + if (!Memory::IsValidAddress(gstate_c.vertexAddr)) { + ERROR_LOG_REPORT(G3D, "Bad vertex address %08x!", gstate_c.vertexAddr); + return; + } + + void *verts = Memory::GetPointerUnchecked(gstate_c.vertexAddr); + void *inds = 0; + u32 vertexType = gstate.vertType; + if ((gstate.vertType & GE_VTYPE_IDX_MASK) != GE_VTYPE_IDX_NONE) { + u32 indexAddr = gstate_c.indexAddr; + if (!Memory::IsValidAddress(indexAddr)) { + ERROR_LOG_REPORT(G3D, "Bad index address %08x!", indexAddr); + return; + } + inds = Memory::GetPointerUnchecked(indexAddr); + } + +#ifndef MOBILE_DEVICE + if (prim > GE_PRIM_RECTANGLES) { + ERROR_LOG_REPORT_ONCE(reportPrim, G3D, "Unexpected prim type: %d", prim); + } +#endif + + if (gstate_c.dirty & DIRTY_VERTEXSHADER_STATE) { + vertexCost_ = EstimatePerVertexCost(); + } + gpuStats.vertexGPUCycles += vertexCost_ * count; + cyclesExecuted += vertexCost_* count; + + int bytesRead = 0; + UpdateUVScaleOffset(); + drawEngineCommon_->SubmitPrim(verts, inds, prim, count, vertexType, &bytesRead); + + // After drawing, we advance the vertexAddr (when non indexed) or indexAddr (when indexed). + // Some games rely on this, they don't bother reloading VADDR and IADDR. + // The VADDR/IADDR registers are NOT updated. + AdvanceVerts(vertexType, count, bytesRead); +} + void GPUCommon::Execute_Bezier(u32 op, u32 diff) { drawEngineCommon_->DispatchFlush(); @@ -2432,3 +2600,23 @@ bool GPUCommon::DescribeCodePtr(const u8 *ptr, std::string &name) { } return false; } + +bool GPUCommon::FramebufferDirty() { + VirtualFramebuffer *vfb = framebufferManager_->GetDisplayVFB(); + if (vfb) { + bool dirty = vfb->dirtyAfterDisplay; + vfb->dirtyAfterDisplay = false; + return dirty; + } + return true; +} + +bool GPUCommon::FramebufferReallyDirty() { + VirtualFramebuffer *vfb = framebufferManager_->GetDisplayVFB(); + if (vfb) { + bool dirty = vfb->reallyDirtyAfterDisplay; + vfb->reallyDirtyAfterDisplay = false; + return dirty; + } + return true; +} diff --git a/GPU/GPUCommon.h b/GPU/GPUCommon.h index 1890166ce8..e520b38e15 100644 --- a/GPU/GPUCommon.h +++ b/GPU/GPUCommon.h @@ -70,6 +70,10 @@ public: Draw::DrawContext *GetDrawContext() override { return draw_; } + virtual void CheckGPUFeatures() = 0; + + void UpdateCmdInfo(); + bool IsReady() override { return true; } @@ -129,11 +133,14 @@ public: void Execute_VertexType(u32 op, u32 diff); void Execute_VertexTypeSkinning(u32 op, u32 diff); + void Execute_Prim(u32 op, u32 diff); void Execute_Bezier(u32 op, u32 diff); void Execute_Spline(u32 op, u32 diff); void Execute_BoundingBox(u32 op, u32 diff); void Execute_BlockTransferStart(u32 op, u32 diff); + void Execute_LoadClut(u32 op, u32 diff); + void Execute_TexSize0(u32 op, u32 diff); void Execute_TexLevel(u32 op, u32 diff); @@ -238,6 +245,9 @@ public: return -1; } + bool FramebufferDirty() override; + bool FramebufferReallyDirty() override; + typedef void (GPUCommon::*CmdFunc)(u32 op, u32 diff); protected: @@ -258,8 +268,8 @@ protected: void BeginFrame() override; - // To avoid virtual calls to PreExecuteOp(). - virtual void FastRunLoop(DisplayList &list) = 0; + virtual void FastRunLoop(DisplayList &list); + void SlowRunLoop(DisplayList &list); void UpdatePC(u32 currentPC, u32 newPC); void UpdateState(GPURunState state); @@ -290,6 +300,13 @@ protected: GraphicsContext *gfxCtx_; Draw::DrawContext *draw_; + struct CommandInfo { + uint64_t flags; + GPUCommon::CmdFunc func; + }; + + static CommandInfo cmdInfo_[256]; + typedef std::list DisplayListQueue; int nextListID; @@ -316,6 +333,8 @@ protected: DrawType lastDraw_; GEPrimitiveType lastPrim_; + int vertexCost_ = 0; + // No idea how big this buffer needs to be. enum { MAX_IMMBUFFER_SIZE = 32, diff --git a/GPU/Null/NullGpu.h b/GPU/Null/NullGpu.h index 59b920723c..12deca5d41 100644 --- a/GPU/Null/NullGpu.h +++ b/GPU/Null/NullGpu.h @@ -26,6 +26,8 @@ class NullGPU : public GPUCommon { public: NullGPU(); ~NullGPU(); + + void CheckGPUFeatures() override {} void InitClear() override {} void ExecuteOp(u32 op, u32 diff) override; diff --git a/GPU/Software/SoftGpu.h b/GPU/Software/SoftGpu.h index 7d5750b043..92762a487f 100644 --- a/GPU/Software/SoftGpu.h +++ b/GPU/Software/SoftGpu.h @@ -52,6 +52,8 @@ class SoftGPU : public GPUCommon { public: SoftGPU(GraphicsContext *gfxCtx, Draw::DrawContext *_thin3D); ~SoftGPU(); + + void CheckGPUFeatures() override {} void InitClear() override {} void ExecuteOp(u32 op, u32 diff) override; diff --git a/GPU/Vulkan/DrawEngineVulkan.cpp b/GPU/Vulkan/DrawEngineVulkan.cpp index 122b50ddaa..faba817b1c 100644 --- a/GPU/Vulkan/DrawEngineVulkan.cpp +++ b/GPU/Vulkan/DrawEngineVulkan.cpp @@ -337,72 +337,6 @@ void DrawEngineVulkan::EndFrame() { vertexCache_->End(); } -void DrawEngineVulkan::SubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertType, int *bytesRead) { - if (!indexGen.PrimCompatible(prevPrim_, prim) || numDrawCalls >= MAX_DEFERRED_DRAW_CALLS || vertexCountInDrawCalls_ + vertexCount > VERTEX_BUFFER_MAX) { - Flush(); - } - - // TODO: Is this the right thing to do? - if (prim == GE_PRIM_KEEP_PREVIOUS) { - prim = prevPrim_ != GE_PRIM_INVALID ? prevPrim_ : GE_PRIM_POINTS; - } else { - prevPrim_ = prim; - } - - SetupVertexDecoder(vertType); - - *bytesRead = vertexCount * dec_->VertexSize(); - if ((vertexCount < 2 && prim > 0) || (vertexCount < 3 && prim > 2 && prim != GE_PRIM_RECTANGLES)) - return; - - DeferredDrawCall &dc = drawCalls[numDrawCalls]; - dc.verts = verts; - dc.inds = inds; - dc.vertType = vertType; - dc.indexType = (vertType & GE_VTYPE_IDX_MASK) >> GE_VTYPE_IDX_SHIFT; - dc.prim = prim; - dc.vertexCount = vertexCount; - - if (g_Config.bVertexCache) { - u32 dhash = dcid_; - dhash ^= (u32)(uintptr_t)verts; - dhash = __rotl(dhash, 13); - dhash ^= (u32)(uintptr_t)inds; - dhash = __rotl(dhash, 13); - dhash ^= (u32)vertType; - dhash = __rotl(dhash, 13); - dhash ^= (u32)vertexCount; - dhash = __rotl(dhash, 13); - dhash ^= (u32)prim; - dcid_ = dhash; - } - - if (inds) { - GetIndexBounds(inds, vertexCount, vertType, &dc.indexLowerBound, &dc.indexUpperBound); - } else { - dc.indexLowerBound = 0; - dc.indexUpperBound = vertexCount - 1; - } - - uvScale[numDrawCalls] = gstate_c.uv; - - numDrawCalls++; - vertexCountInDrawCalls_ += vertexCount; - - if (g_Config.bSoftwareSkinning && (vertType & GE_VTYPE_WEIGHT_MASK)) { - DecodeVertsStep(decoded, decodeCounter_, decodedVerts_); - decodeCounter_++; - } - - if (prim == GE_PRIM_RECTANGLES && (gstate.getTextureAddress(0) & 0x3FFFFFFF) == (gstate.getFrameBufAddress() & 0x3FFFFFFF)) { - // Rendertarget == texture? - if (!g_Config.bDisableSlowFramebufEffects) { - gstate_c.Dirty(DIRTY_TEXTURE_PARAMS); - Flush(); - } - } -} - void DrawEngineVulkan::DecodeVertsToPushBuffer(VulkanPushBuffer *push, uint32_t *bindOffset, VkBuffer *vkbuf) { u8 *dest = decoded; diff --git a/GPU/Vulkan/DrawEngineVulkan.h b/GPU/Vulkan/DrawEngineVulkan.h index 15890cefde..d39186837c 100644 --- a/GPU/Vulkan/DrawEngineVulkan.h +++ b/GPU/Vulkan/DrawEngineVulkan.h @@ -122,8 +122,6 @@ public: DrawEngineVulkan(VulkanContext *vulkan, Draw::DrawContext *draw); virtual ~DrawEngineVulkan(); - void SubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertType, int *bytesRead); - void SetShaderManager(ShaderManagerVulkan *shaderManager) { shaderManager_ = shaderManager; } @@ -157,9 +155,6 @@ public: } void DispatchFlush() override { Flush(); } - void DispatchSubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertType, int *bytesRead) override { - SubmitPrim(verts, inds, prim, vertexCount, vertType, bytesRead); - } VkPipelineLayout GetPipelineLayout() const { return pipelineLayout_; diff --git a/GPU/Vulkan/GPU_Vulkan.cpp b/GPU/Vulkan/GPU_Vulkan.cpp index 5bdd2fc086..0aeefe8495 100644 --- a/GPU/Vulkan/GPU_Vulkan.cpp +++ b/GPU/Vulkan/GPU_Vulkan.cpp @@ -46,31 +46,6 @@ #include "Core/HLE/sceKernelInterrupt.h" #include "Core/HLE/sceGe.h" -struct VulkanCommandTableEntry { - uint8_t cmd; - uint8_t flags; - uint64_t dirty; - GPU_Vulkan::CmdFunc func; -}; - -GPU_Vulkan::CommandInfo GPU_Vulkan::cmdInfo_[256]; - -// This table gets crunched into a faster form by init. -static const VulkanCommandTableEntry commandTable[] = { - // Changes that dirty the current texture. - { GE_CMD_TEXSIZE0, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTE, 0, &GPUCommon::Execute_TexSize0 }, - - // Changing the vertex type requires us to flush. - { GE_CMD_VERTEXTYPE, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, 0, &GPUCommon::Execute_VertexType }, - - { GE_CMD_PRIM, FLAG_EXECUTE, 0, &GPU_Vulkan::Execute_Prim }, - { GE_CMD_BEZIER, FLAG_FLUSHBEFORE | FLAG_EXECUTE, 0, &GPUCommon::Execute_Bezier }, - { GE_CMD_SPLINE, FLAG_FLUSHBEFORE | FLAG_EXECUTE, 0, &GPUCommon::Execute_Spline }, - - // Changes that trigger data copies. Only flushing on change for LOADCLUT must be a bit of a hack... - { GE_CMD_LOADCLUT, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTE, 0, &GPU_Vulkan::Execute_LoadClut }, -}; - GPU_Vulkan::GPU_Vulkan(GraphicsContext *gfxCtx, Draw::DrawContext *draw) : GPUCommon(gfxCtx, draw), vulkan_((VulkanContext *)gfxCtx->GetAPIContext()), @@ -111,46 +86,6 @@ GPU_Vulkan::GPU_Vulkan(GraphicsContext *gfxCtx, Draw::DrawContext *draw) ERROR_LOG(G3D, "gstate has drifted out of sync!"); } - memset(cmdInfo_, 0, sizeof(cmdInfo_)); - - // Import both the global and local command tables, and check for dupes - std::set dupeCheck; - for (size_t i = 0; i < commonCommandTableSize; i++) { - const u8 cmd = commonCommandTable[i].cmd; - if (dupeCheck.find(cmd) != dupeCheck.end()) { - ERROR_LOG(G3D, "Command table Dupe: %02x (%i)", (int)cmd, (int)cmd); - } else { - dupeCheck.insert(cmd); - } - cmdInfo_[cmd].flags |= (uint64_t)commonCommandTable[i].flags | (commonCommandTable[i].dirty << 8); - cmdInfo_[cmd].func = commonCommandTable[i].func; - if ((cmdInfo_[cmd].flags & (FLAG_EXECUTE | FLAG_EXECUTEONCHANGE)) && !cmdInfo_[cmd].func) { - Crash(); - } - } - - for (size_t i = 0; i < ARRAY_SIZE(commandTable); i++) { - const u8 cmd = commandTable[i].cmd; - if (dupeCheck.find(cmd) != dupeCheck.end()) { - ERROR_LOG(G3D, "Command table Dupe: %02x (%i)", (int)cmd, (int)cmd); - } else { - dupeCheck.insert(cmd); - } - cmdInfo_[cmd].flags |= (uint64_t)commandTable[i].flags | (commandTable[i].dirty << 8); - cmdInfo_[cmd].func = commandTable[i].func; - if ((cmdInfo_[cmd].flags & (FLAG_EXECUTE | FLAG_EXECUTEONCHANGE)) && !cmdInfo_[cmd].func) { - Crash(); - } - } - // Find commands missing from the table. - for (int i = 0; i < 0xEF; i++) { - if (dupeCheck.find((u8)i) == dupeCheck.end()) { - ERROR_LOG(G3D, "Command missing from table: %02x (%i)", i, i); - } - } - - UpdateCmdInfo(); - BuildReportingInfo(); // Update again after init to be sure of any silly driver problems. UpdateVsyncInterval(true); @@ -405,41 +340,11 @@ void GPU_Vulkan::UpdateVsyncInterval(bool force) { // TODO } -void GPU_Vulkan::UpdateCmdInfo() { - if (g_Config.bSoftwareSkinning) { - cmdInfo_[GE_CMD_VERTEXTYPE].flags &= ~FLAG_FLUSHBEFOREONCHANGE; - cmdInfo_[GE_CMD_VERTEXTYPE].func = &GPUCommon::Execute_VertexTypeSkinning; - } else { - cmdInfo_[GE_CMD_VERTEXTYPE].flags |= FLAG_FLUSHBEFOREONCHANGE; - cmdInfo_[GE_CMD_VERTEXTYPE].func = &GPUCommon::Execute_VertexType; - } -} - void GPU_Vulkan::SetDisplayFramebuffer(u32 framebuf, u32 stride, GEBufferFormat format) { host->GPUNotifyDisplay(framebuf, stride, format); framebufferManager_->SetDisplayFramebuffer(framebuf, stride, format); } -bool GPU_Vulkan::FramebufferDirty() { - VirtualFramebuffer *vfb = framebufferManager_->GetDisplayVFB(); - if (vfb) { - bool dirty = vfb->dirtyAfterDisplay; - vfb->dirtyAfterDisplay = false; - return dirty; - } - return true; -} - -bool GPU_Vulkan::FramebufferReallyDirty() { - VirtualFramebuffer *vfb = framebufferManager_->GetDisplayVFB(); - if (vfb) { - bool dirty = vfb->reallyDirtyAfterDisplay; - vfb->reallyDirtyAfterDisplay = false; - return dirty; - } - return true; -} - void GPU_Vulkan::CopyDisplayToOutput() { // Flush anything left over. drawEngine_.Flush(); @@ -451,44 +356,6 @@ void GPU_Vulkan::CopyDisplayToOutput() { gstate_c.Dirty(DIRTY_TEXTURE_IMAGE); } -// Maybe should write this in ASM... -void GPU_Vulkan::FastRunLoop(DisplayList &list) { - PROFILE_THIS_SCOPE("gpuloop"); - const CommandInfo *cmdInfo = cmdInfo_; - int dc = downcount; - for (; dc > 0; --dc) { - // We know that display list PCs have the upper nibble == 0 - no need to mask the pointer - const u32 op = *(const u32 *)(Memory::base + list.pc); - const u32 cmd = op >> 24; - const CommandInfo &info = cmdInfo[cmd]; - const u32 diff = op ^ gstate.cmdmem[cmd]; - if (diff == 0) { - if (info.flags & FLAG_EXECUTE) { - downcount = dc; - (this->*info.func)(op, diff); - dc = downcount; - } - } else { - uint64_t flags = info.flags; - if (flags & FLAG_FLUSHBEFOREONCHANGE) { - drawEngine_.Flush(); - } - gstate.cmdmem[cmd] = op; - if (flags & (FLAG_EXECUTE | FLAG_EXECUTEONCHANGE)) { - downcount = dc; - (this->*info.func)(op, diff); - dc = downcount; - } else { - uint64_t dirty = flags >> 8; - if (dirty) - gstate_c.Dirty(dirty); - } - } - list.pc += 4; - } - downcount = 0; -} - void GPU_Vulkan::FinishDeferred() { drawEngine_.FinishDeferred(); } @@ -520,85 +387,6 @@ void GPU_Vulkan::ExecuteOp(u32 op, u32 diff) { } } -void GPU_Vulkan::Execute_Prim(u32 op, u32 diff) { - // This drives all drawing. All other state we just buffer up, then we apply it only - // when it's time to draw. As most PSP games set state redundantly ALL THE TIME, this is a huge optimization. - - PROFILE_THIS_SCOPE("execprim"); - - u32 data = op & 0xFFFFFF; - u32 count = data & 0xFFFF; - if (count == 0) - return; - - // Upper bits are ignored. - GEPrimitiveType prim = static_cast((data >> 16) & 7); - SetDrawType(DRAW_PRIM, prim); - - // Discard AA lines as we can't do anything that makes sense with these anyway. The SW plugin might, though. - if (gstate.isAntiAliasEnabled()) { - // Discard AA lines in DOA - if (prim == GE_PRIM_LINE_STRIP) - return; - // Discard AA lines in Summon Night 5 - if ((prim == GE_PRIM_LINES) && gstate.isSkinningEnabled()) - return; - } - - // This also makes skipping drawing very effective. - framebufferManager_->SetRenderFrameBuffer(gstate_c.IsDirty(DIRTY_FRAMEBUF), gstate_c.skipDrawReason); - - if (gstate_c.skipDrawReason & (SKIPDRAW_SKIPFRAME | SKIPDRAW_NON_DISPLAYED_FB)) { - drawEngine_.SetupVertexDecoder(gstate.vertType); // Do we still need to do this? - // Rough estimate, not sure what's correct. - cyclesExecuted += EstimatePerVertexCost() * count; - return; - } - - if (!Memory::IsValidAddress(gstate_c.vertexAddr)) { - ERROR_LOG_REPORT(G3D, "Bad vertex address %08x!", gstate_c.vertexAddr); - return; - } - - void *verts = Memory::GetPointerUnchecked(gstate_c.vertexAddr); - void *inds = 0; - u32 vertexType = gstate.vertType; - if ((gstate.vertType & GE_VTYPE_IDX_MASK) != GE_VTYPE_IDX_NONE) { - u32 indexAddr = gstate_c.indexAddr; - if (!Memory::IsValidAddress(indexAddr)) { - ERROR_LOG_REPORT(G3D, "Bad index address %08x!", indexAddr); - return; - } - inds = Memory::GetPointerUnchecked(indexAddr); - } - -#ifndef MOBILE_DEVICE - if (prim > GE_PRIM_RECTANGLES) { - ERROR_LOG_REPORT_ONCE(reportPrim, G3D, "Unexpected prim type: %d", prim); - } -#endif - - if (gstate_c.dirty & DIRTY_VERTEXSHADER_STATE) { - vertexCost_ = EstimatePerVertexCost(); - } - gpuStats.vertexGPUCycles += vertexCost_ * count; - cyclesExecuted += vertexCost_* count; - - int bytesRead = 0; - UpdateUVScaleOffset(); - drawEngine_.SubmitPrim(verts, inds, prim, count, vertexType, &bytesRead); - - // After drawing, we advance the vertexAddr (when non indexed) or indexAddr (when indexed). - // Some games rely on this, they don't bother reloading VADDR and IADDR. - // The VADDR/IADDR registers are NOT updated. - AdvanceVerts(vertexType, count, bytesRead); -} - -void GPU_Vulkan::Execute_LoadClut(u32 op, u32 diff) { - gstate_c.Dirty(DIRTY_TEXTURE_PARAMS); - textureCacheVulkan_->LoadClut(gstate.getClutAddress(), gstate.getClutLoadBytes()); -} - void GPU_Vulkan::InitDeviceObjects() { ILOG("GPU_Vulkan::InitDeviceObjects"); // Initialize framedata diff --git a/GPU/Vulkan/GPU_Vulkan.h b/GPU/Vulkan/GPU_Vulkan.h index 63813331fb..266fe01485 100644 --- a/GPU/Vulkan/GPU_Vulkan.h +++ b/GPU/Vulkan/GPU_Vulkan.h @@ -36,7 +36,7 @@ public: ~GPU_Vulkan(); // This gets called on startup and when we get back from settings. - void CheckGPUFeatures(); + void CheckGPUFeatures() override; // These are where we can reset command buffers etc. void BeginHostFrame() override; @@ -54,23 +54,12 @@ public: void DoState(PointerWrap &p) override; void ClearShaderCache() override; - bool FramebufferDirty() override; - bool FramebufferReallyDirty() override; void GetReportingInfo(std::string &primaryInfo, std::string &fullInfo) override { primaryInfo = reportingPrimaryInfo_; fullInfo = reportingFullInfo_; } - typedef void (GPU_Vulkan::*CmdFunc)(u32 op, u32 diff); - struct CommandInfo { - uint64_t flags; - GPU_Vulkan::CmdFunc func; - }; - - void Execute_Prim(u32 op, u32 diff); - void Execute_LoadClut(u32 op, u32 diff); - // Using string because it's generic - makes no assumptions on the size of the shader IDs of this backend. std::vector DebugGetShaderIDs(DebugShaderType shader) override; std::string DebugGetShaderString(std::string id, DebugShaderType shader, DebugShaderStringType stringType) override; @@ -80,7 +69,6 @@ public: } protected: - void FastRunLoop(DisplayList &list) override; void FinishDeferred() override; private: @@ -93,13 +81,10 @@ private: void CopyDisplayToOutput() override; void Reinitialize() override; inline void UpdateVsyncInterval(bool force); - void UpdateCmdInfo(); void InitDeviceObjects(); void DestroyDeviceObjects(); - static CommandInfo cmdInfo_[256]; - VulkanContext *vulkan_; FramebufferManagerVulkan *framebufferManagerVulkan_; TextureCacheVulkan *textureCacheVulkan_; @@ -112,8 +97,6 @@ private: // Manages state and pipeline objects PipelineManagerVulkan *pipelineManager_; - int vertexCost_ = 0; - std::string reportingPrimaryInfo_; std::string reportingFullInfo_;