From 9ed8d8871e881fdfe9c1de81be7042a1fd94c049 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Tue, 3 Dec 2024 08:55:13 +0100 Subject: [PATCH] Ge stepping without wait: Tex/Prim stepping works. Draw/Single have issues. --- Core/Core.cpp | 34 +---------- Core/Core.h | 1 - Core/HW/Display.h | 2 +- Core/System.cpp | 22 +++++-- GPU/Common/GPUDebugInterface.h | 6 +- GPU/Debugger/Debugger.cpp | 19 +++--- GPU/Debugger/Debugger.h | 10 +++- GPU/Debugger/Stepping.cpp | 67 +++++++++------------- GPU/Debugger/Stepping.h | 9 +-- GPU/GPUCommon.cpp | 31 ++++++++-- GPU/GPUCommon.h | 5 +- GPU/GPUDefinitions.h | 3 +- UI/EmuScreen.cpp | 1 + Windows/GEDebugger/CtrlDisplayListView.cpp | 2 +- 14 files changed, 105 insertions(+), 107 deletions(-) diff --git a/Core/Core.cpp b/Core/Core.cpp index 0960957bd4..f229a04249 100644 --- a/Core/Core.cpp +++ b/Core/Core.cpp @@ -73,18 +73,6 @@ struct CPUStepCommand { static CPUStepCommand g_cpuStepCommand; -struct GeStepCommand { - CPUStepType type; - bool empty() const { - return type == CPUStepType::None; - } - void clear() { - type = CPUStepType::None; - } -}; - -static GeStepCommand g_geStepCommand; - // This is so that external threads can wait for the CPU to become inactive. static std::condition_variable m_InactiveCond; static std::mutex m_hInactiveMutex; @@ -183,16 +171,6 @@ bool Core_RequestCPUStep(CPUStepType type, int stepSize) { return true; } -bool Core_RequestGeStep(CPUStepType type) { - std::lock_guard guard(g_stepMutex); - if (g_geStepCommand.type != CPUStepType::None) { - ERROR_LOG(Log::CPU, "Can't submit two steps in one host frame"); - return false; - } - g_geStepCommand = { type }; - return true; -} - // Handles more advanced step types (used by the debugger). // stepSize is to support stepping through compound instructions like fused lui+ladd (li). // Yes, our disassembler does support those. @@ -308,7 +286,7 @@ void Core_ProcessStepping(MIPSDebugInterface *cpu) { // Or any GPU actions. // Legacy stepping code. - GPUStepping::SingleStep(); + GPUStepping::ProcessStepping(); // We're not inside jit now, so it's safe to clear the breakpoints. static int lastSteppingCounter = -1; @@ -322,16 +300,6 @@ void Core_ProcessStepping(MIPSDebugInterface *cpu) { // Need to check inside the lock to avoid races. std::lock_guard guard(g_stepMutex); - if (coreState == CORE_STEPPING_GE) { - if (!g_geStepCommand.empty()) { - Core_PerformGeStep(g_geStepCommand.type); - // System_Notify(SystemNotification::) - g_geStepCommand.clear(); - steppingCounter++; - } - return; - } - if (coreState != CORE_STEPPING_CPU || g_cpuStepCommand.empty()) { return; } diff --git a/Core/Core.h b/Core/Core.h index 0df85039b3..dbfa0376ea 100644 --- a/Core/Core.h +++ b/Core/Core.h @@ -52,7 +52,6 @@ void Core_Resume(); // This should be called externally. // Can fail if another step type was requested this frame. bool Core_RequestCPUStep(CPUStepType stepType, int stepSize); -bool Core_RequestGeStep(CPUStepType stepType); bool Core_ShouldRunBehind(); bool Core_MustRunBehind(); diff --git a/Core/HW/Display.h b/Core/HW/Display.h index 241b43ec0d..1f0497c139 100644 --- a/Core/HW/Display.h +++ b/Core/HW/Display.h @@ -37,7 +37,7 @@ uint32_t __DisplayGetCurrentHcount(); uint32_t __DisplayGetAccumulatedHcount(); void DisplayAdjustAccumulatedHcount(uint32_t diff); -void __DisplayGetDebugStats(char stats[], size_t bufsize); +void __DisplayGetDebugStats(char *stats, size_t bufsize); void __DisplayGetAveragedFPS(float *out_vps, float *out_fps); void __DisplayGetFPS(float *out_vps, float *out_fps, float *out_actual_fps); void __DisplayGetVPS(float *out_vps); diff --git a/Core/System.cpp b/Core/System.cpp index eb9039df9f..4f60c4fc67 100644 --- a/Core/System.cpp +++ b/Core/System.cpp @@ -73,6 +73,7 @@ #include "Core/HLE/sceAudiocodec.h" #include "GPU/GPUState.h" #include "GPU/GPUCommon.h" +#include "GPU/Debugger/Stepping.h" #include "GPU/Debugger/RecordFormat.h" #include "Core/RetroAchievements.h" @@ -633,19 +634,30 @@ void PSP_RunLoopUntil(u64 globalticks) { case CORE_NEXTFRAME: return; case CORE_STEPPING_CPU: + case CORE_STEPPING_GE: Core_ProcessStepping(currentDebugMIPS); return; case CORE_RUNNING_CPU: mipsr4k.RunLoopUntil(globalticks); break; // Will loop around to go to RUNNING_GE or NEXTFRAME, which will exit. - case CORE_STEPPING_GE: - _dbg_assert_(false); - break; case CORE_RUNNING_GE: switch (gpu->ProcessDLQueue()) { - case DLResult::Error: // TODO: shouldn't return this normally - case DLResult::Pause: // like updatestall. + case DLResult::Break: + GPUStepping::EnterStepping(); + break; + case DLResult::Error: + // We should elegantly report the error, or I guess ignore it. + hleFinishSyscallAfterGe(); + coreState = CORE_RUNNING_CPU; + break; + case DLResult::Stall: case DLResult::Done: + // Done executing for now + hleFinishSyscallAfterGe(); + coreState = CORE_RUNNING_CPU; + break; + default: + _dbg_assert_(false); hleFinishSyscallAfterGe(); coreState = CORE_RUNNING_CPU; break; diff --git a/GPU/Common/GPUDebugInterface.h b/GPU/Common/GPUDebugInterface.h index 722672fdb6..f862689b84 100644 --- a/GPU/Common/GPUDebugInterface.h +++ b/GPU/Common/GPUDebugInterface.h @@ -205,10 +205,10 @@ public: virtual void ResetListStall(int listID, u32 stall) = 0; virtual void ResetListState(int listID, DisplayListState state) = 0; - GPUDebugOp DissassembleOp(u32 pc) { - return DissassembleOp(pc, Memory::Read_U32(pc)); + GPUDebugOp DisassembleOp(u32 pc) { + return DisassembleOp(pc, Memory::Read_U32(pc)); } - virtual GPUDebugOp DissassembleOp(u32 pc, u32 op) = 0; + virtual GPUDebugOp DisassembleOp(u32 pc, u32 op) = 0; virtual std::vector DissassembleOpRange(u32 startpc, u32 endpc) = 0; // Enter/exit stepping mode. Mainly for better debug stats on time taken. diff --git a/GPU/Debugger/Debugger.cpp b/GPU/Debugger/Debugger.cpp index 88c923b8ee..74adbcd96f 100644 --- a/GPU/Debugger/Debugger.cpp +++ b/GPU/Debugger/Debugger.cpp @@ -46,7 +46,6 @@ static void Init() { GPUBreakpoints::Init([](bool flag) { hasBreakpoints = flag; }); - Core_ListenStopRequest(&GPUStepping::ForceUnpause); inited = true; } } @@ -105,9 +104,12 @@ static bool IsBreakpoint(u32 pc, u32 op) { return false; } -bool NotifyCommand(u32 pc) { - if (!active) - return true; +NotifyResult NotifyCommand(u32 pc) { + if (!active) { + _dbg_assert_(false); + return NotifyResult::Skip; // return false + } + u32 op = Memory::ReadUnchecked_U32(pc); u32 cmd = op >> 24; if (thisFlipNum != gpuStats.numFlips) { @@ -136,22 +138,23 @@ bool NotifyCommand(u32 pc) { if (coreState == CORE_POWERDOWN || !gpuDebug) { breakNext = BreakNext::NONE; - return process; + return process ? NotifyResult::Execute : NotifyResult::Skip; } - auto info = gpuDebug->DissassembleOp(pc); + auto info = gpuDebug->DisassembleOp(pc); if (lastStepTime >= 0.0) { NOTICE_LOG(Log::G3D, "Waiting at %08x, %s (%fms)", pc, info.desc.c_str(), (time_now_d() - lastStepTime) * 1000.0); lastStepTime = -1.0; } else { NOTICE_LOG(Log::G3D, "Waiting at %08x, %s", pc, info.desc.c_str()); } - GPUStepping::EnterStepping(); + return NotifyResult::Break; // new. caller will call GPUStepping::EnterStepping(). } - return process; + return process ? NotifyResult::Execute : NotifyResult::Skip; } +// TODO: This mechanism isn't great. void NotifyDraw() { if (!active) return; diff --git a/GPU/Debugger/Debugger.h b/GPU/Debugger/Debugger.h index b17c6b4ad0..c41490ce16 100644 --- a/GPU/Debugger/Debugger.h +++ b/GPU/Debugger/Debugger.h @@ -40,8 +40,14 @@ bool IsActive(); void SetBreakNext(BreakNext next); void SetBreakCount(int c, bool relative = false); +enum class NotifyResult { + Execute, + Skip, + Break +}; + // While debugging is active, these may block. -bool NotifyCommand(u32 pc); +NotifyResult NotifyCommand(u32 pc); void NotifyDraw(); void NotifyDisplay(u32 framebuf, u32 stride, int format); void NotifyBeginFrame(); @@ -52,4 +58,4 @@ int PrimsLastFrame(); bool SetRestrictPrims(const char *rule); const char *GetRestrictPrims(); -} +} // namespace diff --git a/GPU/Debugger/Stepping.cpp b/GPU/Debugger/Stepping.cpp index 5774bfabe7..9e2296f91f 100644 --- a/GPU/Debugger/Stepping.cpp +++ b/GPU/Debugger/Stepping.cpp @@ -44,7 +44,6 @@ static bool isStepping; static int stepCounter = 0; static std::mutex pauseLock; -static std::condition_variable pauseWait; static PauseAction pauseAction = PAUSE_CONTINUE; static std::mutex actionLock; static std::condition_variable actionWait; @@ -76,12 +75,7 @@ static void SetPauseAction(PauseAction act, bool waitComplete = true) { // if (coreState == CORE_STEPPING && act != PAUSE_CONTINUE) // Core_UpdateSingleStep(); - actionComplete = false; - pauseWait.notify_all(); - while (waitComplete && !actionComplete) { - actionWait.wait(guard); - } } static void RunPauseAction() { @@ -152,17 +146,23 @@ static void StopStepping() { isStepping = false; } -bool SingleStep() { +bool ProcessStepping() { + _dbg_assert_(gpuDebug); + std::unique_lock guard(pauseLock); - if (coreState != CORE_RUNNING_CPU && coreState != CORE_NEXTFRAME && coreState != CORE_STEPPING_CPU) { - // Shutting down, don't try to step. + if (coreState != CORE_STEPPING_GE) { + // Not stepping any more, don't try. actionComplete = true; actionWait.notify_all(); return false; } - if (!gpuDebug || pauseAction == PAUSE_CONTINUE) { + + if (pauseAction == PAUSE_CONTINUE) { + // This is fine, can just mean to run to the next breakpoint/event. + INFO_LOG(Log::G3D, "Continuing..."); actionComplete = true; actionWait.notify_all(); + coreState = CORE_RUNNING_GE; return false; } @@ -173,36 +173,33 @@ bool SingleStep() { } bool EnterStepping() { - std::unique_lock guard(pauseLock); - if (coreState != CORE_RUNNING_CPU && coreState != CORE_NEXTFRAME && coreState != CORE_STEPPING_CPU) { - // Shutting down, don't try to step. - actionComplete = true; - actionWait.notify_all(); - return false; - } - if (!gpuDebug) { - actionComplete = true; - actionWait.notify_all(); - return false; - } + _dbg_assert_(gpuDebug); - StartStepping(); + std::unique_lock guard(pauseLock); + if (coreState == CORE_STEPPING_GE) { + // Already there. Should avoid this happening, I think. + return true; + } + if (coreState != CORE_RUNNING_CPU && coreState != CORE_RUNNING_GE) { + // ?? Shutting down, don't try to step. + actionComplete = true; + actionWait.notify_all(); + return false; + } // Just to be sure. if (pauseAction == PAUSE_CONTINUE) { pauseAction = PAUSE_BREAK; } - stepCounter++; - do { - RunPauseAction(); - pauseWait.wait(guard); - } while (pauseAction != PAUSE_CONTINUE); - - StopStepping(); + coreState = CORE_STEPPING_GE; return true; } +void ResumeFromStepping() { + SetPauseAction(PAUSE_CONTINUE, false); +} + bool IsStepping() { return isStepping; } @@ -268,16 +265,6 @@ bool GPU_FlushDrawing() { return true; } -void ResumeFromStepping() { - SetPauseAction(PAUSE_CONTINUE, false); -} - -void ForceUnpause() { - SetPauseAction(PAUSE_CONTINUE, false); - actionComplete = true; - actionWait.notify_all(); -} - GPUgstate LastState() { return lastGState; } diff --git a/GPU/Debugger/Stepping.h b/GPU/Debugger/Stepping.h index 4b3c5e5ab9..2d50ee7f4f 100644 --- a/GPU/Debugger/Stepping.h +++ b/GPU/Debugger/Stepping.h @@ -28,10 +28,14 @@ namespace GPUStepping { // Should be called from the emu thread. // Begins stepping and increments the stepping counter while inside a lock. bool EnterStepping(); - bool SingleStep(); bool IsStepping(); + void ResumeFromStepping(); + int GetSteppingCounter(); + // Called from the emu thread. + bool ProcessStepping(); + bool GPU_GetOutputFramebuffer(const GPUDebugBuffer *&buffer); bool GPU_GetCurrentFramebuffer(const GPUDebugBuffer *&buffer, GPUDebugFramebufferType type); bool GPU_GetCurrentDepthbuffer(const GPUDebugBuffer *&buffer); @@ -41,8 +45,5 @@ namespace GPUStepping { bool GPU_SetCmdValue(u32 op); bool GPU_FlushDrawing(); - void ResumeFromStepping(); - void ForceUnpause(); - GPUgstate LastState(); }; diff --git a/GPU/GPUCommon.cpp b/GPU/GPUCommon.cpp index d264c524e2..575bda7aa1 100644 --- a/GPU/GPUCommon.cpp +++ b/GPU/GPUCommon.cpp @@ -678,7 +678,14 @@ bool GPUCommon::InterpretList(DisplayList &list) { if (useFastRunLoop) { FastRunLoop(list); } else { - SlowRunLoop(list); + if (!SlowRunLoop(list)) { + // Hit a breakpoint - I guess we bail? + // This will be intricate. + FinishDeferred(); + _dbg_assert_(!GPURecord::IsActive()); + gpuState = GPUSTATE_BREAK; + return false; + } } { @@ -726,11 +733,13 @@ void GPUCommon::PSPFrame() { GPURecord::NotifyBeginFrame(); } -void GPUCommon::SlowRunLoop(DisplayList &list) { +// Returns false on breakpoint. +bool GPUCommon::SlowRunLoop(DisplayList &list) { const bool dumpThisFrame = dumpThisFrame_; while (downcount > 0) { - bool process = GPUDebug::NotifyCommand(list.pc); - if (process) { + GPUDebug::NotifyResult result = GPUDebug::NotifyCommand(list.pc); + + if (result == GPUDebug::NotifyResult::Execute) { GPURecord::NotifyCommand(list.pc); u32 op = Memory::ReadUnchecked_U32(list.pc); u32 cmd = op >> 24; @@ -751,11 +760,14 @@ void GPUCommon::SlowRunLoop(DisplayList &list) { gstate.cmdmem[cmd] = op; ExecuteOp(op, diff); + } else if (result == GPUDebug::NotifyResult::Break) { + return false; } list.pc += 4; --downcount; } + return true; } // The newPC parameter is used for jumps, we don't count cycles between. @@ -849,7 +861,14 @@ DLResult GPUCommon::ProcessDLQueue() { DisplayList &l = dls[listIndex]; DEBUG_LOG(Log::G3D, "Starting DL execution at %08x - stall = %08x (startingTicks=%d)", l.pc, l.stall, startingTicks); if (!InterpretList(l)) { - return DLResult::Error; + switch (gpuState) { + case GPURunState::GPUSTATE_STALL: + return DLResult::Stall; + case GPURunState::GPUSTATE_BREAK: + return DLResult::Break; + default: + return DLResult::Error; + } } // Some other list could've taken the spot while we dilly-dallied around, so we need the check. @@ -1608,7 +1627,7 @@ void GPUCommon::ResetListState(int listID, DisplayListState state) { downcount = 0; } -GPUDebugOp GPUCommon::DissassembleOp(u32 pc, u32 op) { +GPUDebugOp GPUCommon::DisassembleOp(u32 pc, u32 op) { char buffer[1024]; u32 prev = Memory::IsValidAddress(pc - 4) ? Memory::ReadUnchecked_U32(pc - 4) : 0; GeDisassembleOp(pc, op, prev, buffer, sizeof(buffer)); diff --git a/GPU/GPUCommon.h b/GPU/GPUCommon.h index 3ea07bf0b4..ad96b8cf41 100644 --- a/GPU/GPUCommon.h +++ b/GPU/GPUCommon.h @@ -80,6 +80,7 @@ enum GPURunState { GPUSTATE_STALL = 2, GPUSTATE_INTERRUPT = 3, GPUSTATE_ERROR = 4, + GPUSTATE_BREAK = 5, }; enum GPUSyncType { @@ -353,7 +354,7 @@ public: void ResetListStall(int listID, u32 stall) override; void ResetListState(int listID, DisplayListState state) override; - GPUDebugOp DissassembleOp(u32 pc, u32 op) override; + GPUDebugOp DisassembleOp(u32 pc, u32 op) override; std::vector DissassembleOpRange(u32 startpc, u32 endpc) override; void NotifySteppingEnter() override; @@ -410,7 +411,7 @@ protected: virtual void CheckDepthUsage(VirtualFramebuffer *vfb) {} virtual void FastRunLoop(DisplayList &list) = 0; - void SlowRunLoop(DisplayList &list); + bool SlowRunLoop(DisplayList &list); // Returns false on breakpoint. void UpdatePC(u32 currentPC, u32 newPC); void UpdateState(GPURunState state); void FastLoadBoneMatrix(u32 target); diff --git a/GPU/GPUDefinitions.h b/GPU/GPUDefinitions.h index 5dc6d6d08a..d2ec4ff4a7 100644 --- a/GPU/GPUDefinitions.h +++ b/GPU/GPUDefinitions.h @@ -72,5 +72,6 @@ enum class DLStepType { enum class DLResult { Done, Error, - Pause, // used for stepping, breakpoints + Stall, + Break, // used for stepping, breakpoints }; diff --git a/UI/EmuScreen.cpp b/UI/EmuScreen.cpp index ada8b9a3f0..bc581fa1f5 100644 --- a/UI/EmuScreen.cpp +++ b/UI/EmuScreen.cpp @@ -1570,6 +1570,7 @@ ScreenRenderFlags EmuScreen::render(ScreenRenderMode mode) { flags |= ScreenRenderFlags::HANDLED_THROTTLING; break; case CORE_STEPPING_CPU: + case CORE_STEPPING_GE: case CORE_RUNTIME_ERROR: { // If there's an exception, display information. diff --git a/Windows/GEDebugger/CtrlDisplayListView.cpp b/Windows/GEDebugger/CtrlDisplayListView.cpp index cc859ee41c..a55e757c2d 100644 --- a/Windows/GEDebugger/CtrlDisplayListView.cpp +++ b/Windows/GEDebugger/CtrlDisplayListView.cpp @@ -341,7 +341,7 @@ void CtrlDisplayListView::onMouseUp(WPARAM wParam, LPARAM lParam, int button) char *p = temp, *end = temp + space; for (u32 pos = selectRangeStart; pos < selectRangeEnd && p < end; pos += instructionSize) { - GPUDebugOp op = gpuDebug->DissassembleOp(pos); + GPUDebugOp op = gpuDebug->DisassembleOp(pos); p += snprintf(p, end - p, "%s\r\n", op.desc.c_str()); }