Add "Split syscall" mechanism

This commit is contained in:
Henrik Rydgård 2024-12-04 20:04:42 +01:00
parent 90d833be86
commit c25e6b3933
9 changed files with 122 additions and 49 deletions

View file

@ -59,6 +59,11 @@ enum
HLE_AFTER_SKIP_DEADBEEF = 0x40,
// Execute pending mips calls.
HLE_AFTER_QUEUED_CALLS = 0x80,
// Call CoreTiming::ForceCheck
HLE_AFTER_CORETIMING_FORCE_CHECK = 0x100,
// Split syscall over GE execution
HLE_SPLIT_SYSCALL_OVER_GE = 0x200,
HLE_SPLIT_SYSCALL_PART2 = 0x400,
};
static std::vector<HLEModule> moduleDB;
@ -69,6 +74,10 @@ static const HLEFunction *latestSyscall = nullptr;
static uint32_t latestSyscallPC = 0;
static int idleOp;
// Split syscall support. NOTE: This needs to be saved in DoState somehow!
static int splitSyscallEatCycles = 0;
struct HLEMipsCallInfo {
u32 func;
PSPAction *action;
@ -354,6 +363,10 @@ void hleSkipDeadbeef()
hleAfterSyscall |= HLE_AFTER_SKIP_DEADBEEF;
}
void hleCoreTimingForceCheck() {
hleAfterSyscall |= HLE_AFTER_CORETIMING_FORCE_CHECK;
}
// Pauses execution after an HLE call.
bool hleExecuteDebugBreak(const HLEFunction &func)
{
@ -399,8 +412,16 @@ u64 hleDelayResult(u64 result, const char *reason, int usec) {
}
void hleEatCycles(int cycles) {
// Maybe this should Idle, at least for larger delays? Could that cause issues?
currentMIPS->downcount -= cycles;
if (hleAfterSyscall & HLE_SPLIT_SYSCALL_OVER_GE) {
splitSyscallEatCycles = cycles;
} else {
// Maybe this should Idle, at least for larger delays? Could that cause issues?
currentMIPS->downcount -= cycles;
}
}
void hleSplitSyscallOverGe() {
hleAfterSyscall |= HLE_SPLIT_SYSCALL_OVER_GE;
}
void hleEatMicro(int usec) {
@ -562,8 +583,27 @@ inline static void SetDeadbeefRegs()
currentMIPS->hi = 0xDEADBEEF;
}
inline void hleFinishSyscall(const HLEFunction &info)
{
static void hleFinishSyscall(const HLEFunction *info) {
if (hleAfterSyscall & HLE_SPLIT_SYSCALL_OVER_GE) {
hleAfterSyscall &= ~HLE_SPLIT_SYSCALL_OVER_GE;
hleAfterSyscall |= HLE_SPLIT_SYSCALL_PART2;
// Switch to GE execution immediately.
// coreState is checked after the syscall, always.
Core_SwitchToGe();
return;
}
if (hleAfterSyscall & HLE_SPLIT_SYSCALL_PART2) {
// Eat the extra cycle we added above.
hleEatCycles(splitSyscallEatCycles + 1);
// Make sure to zero it so it's not accidentally re-used.
splitSyscallEatCycles = 0;
}
if (hleAfterSyscall & HLE_AFTER_CORETIMING_FORCE_CHECK) {
CoreTiming::ForceCheck();
}
if ((hleAfterSyscall & HLE_AFTER_SKIP_DEADBEEF) == 0)
SetDeadbeefRegs();
@ -580,9 +620,9 @@ inline void hleFinishSyscall(const HLEFunction &info)
else if ((hleAfterSyscall & HLE_AFTER_RESCHED) != 0)
__KernelReSchedule(hleAfterSyscallReschedReason);
if ((hleAfterSyscall & HLE_AFTER_DEBUG_BREAK) != 0)
{
if (!hleExecuteDebugBreak(info))
if ((hleAfterSyscall & HLE_AFTER_DEBUG_BREAK) != 0) {
_dbg_assert_(info);
if (!hleExecuteDebugBreak(*info))
{
// We'll do it next syscall.
hleAfterSyscall = HLE_AFTER_DEBUG_BREAK;
@ -595,6 +635,10 @@ inline void hleFinishSyscall(const HLEFunction &info)
hleAfterSyscallReschedReason = 0;
}
void hleFinishSyscallAfterGe() {
hleFinishSyscall(nullptr);
}
static void updateSyscallStats(int modulenum, int funcnum, double total)
{
const char *name = moduleDB[modulenum].funcTable[funcnum].name;
@ -653,7 +697,7 @@ inline void CallSyscallWithFlags(const HLEFunction *info)
}
if (hleAfterSyscall != HLE_AFTER_NOTHING)
hleFinishSyscall(*info);
hleFinishSyscall(info);
else
SetDeadbeefRegs();
}
@ -665,7 +709,7 @@ inline void CallSyscallWithoutFlags(const HLEFunction *info)
info->func();
if (hleAfterSyscall != HLE_AFTER_NOTHING)
hleFinishSyscall(*info);
hleFinishSyscall(info);
else
SetDeadbeefRegs();
}

View file

@ -129,6 +129,15 @@ u64 hleDelayResult(u64 result, const char *reason, int usec);
void hleEatCycles(int cycles);
void hleEatMicro(int usec);
void hleCoreTimingForceCheck();
// Causes the syscall to not fully execute immediately, instead give the Ge a chance to
// execute display lists.
void hleSplitSyscallOverGe();
// Called after a split syscall from System.cpp
void hleFinishSyscallAfterGe();
inline int hleDelayResult(int result, const char *reason, int usec) {
return hleDelayResult((u32) result, reason, usec);
}

View file

@ -145,7 +145,7 @@ public:
// Hm. This might be really tricky to get to behave the same in both modes. Here we are in __KernelReschedule, CoreTiming::Advance, ProcessEvents, GeExecuteInterrupt, ... .... __RunOnePendingInterrupt
// But not sure how much it will matter. The test pause2 hits here.
gpu->RunGe();
gpu->ProcessDLQueue();
return false;
}
@ -181,8 +181,11 @@ public:
}
gpu->InterruptEnd(intrdata.listid);
// This is the last thing done here in the syscall (__KernelReturnFromInterrupt) so switching coreState should just work.
gpu->RunGe();
// TODO: This is called from __KernelReturnFromInterrupt which does a bunch of stuff afterwards.
// Using hleSplitSyscallOverGe here breaks the gpu/signals/suspend.prx test, for that reason.
// So we just process inline and sacrifice debuggability a little.
gpu->ProcessDLQueue();
}
};
@ -349,12 +352,15 @@ u32 sceGeListEnQueue(u32 listAddress, u32 stallAddress, int callbackId, u32 optP
if ((int)listID >= 0)
listID = LIST_ID_MAGIC ^ listID;
if (runList) {
gpu->RunGe();
if (gpu->ShouldSplitOverGe()) {
hleSplitSyscallOverGe();
} else {
gpu->ProcessDLQueue();
}
}
// The stuff here below must be deferred...
hleEatCycles(490);
CoreTiming::ForceCheck();
return hleLogSuccessX(Log::sceGe, listID);
hleCoreTimingForceCheck();
return listID; // We already logged above, logs get confusing if we use hleLogSuccess.
}
u32 sceGeListEnQueueHead(u32 listAddress, u32 stallAddress, int callbackId, u32 optParamAddr) {
@ -368,11 +374,15 @@ u32 sceGeListEnQueueHead(u32 listAddress, u32 stallAddress, int callbackId, u32
if ((int)listID >= 0)
listID = LIST_ID_MAGIC ^ listID;
if (runList) {
gpu->RunGe();
if (gpu->ShouldSplitOverGe()) {
hleSplitSyscallOverGe();
} else {
gpu->ProcessDLQueue();
}
}
hleEatCycles(480);
CoreTiming::ForceCheck();
return hleLogSuccessX(Log::sceGe, listID);
hleCoreTimingForceCheck();
return listID; // We already logged above, logs get confusing if we use hleLogSuccess.
}
static int sceGeListDeQueue(u32 listID) {
@ -386,13 +396,17 @@ static int sceGeListUpdateStallAddr(u32 displayListID, u32 stallAddress) {
// Advance() might cause an interrupt, so defer the Advance but do it ASAP.
// Final Fantasy Type-0 has a graphical artifact without this (timing issue.)
hleEatCycles(190);
CoreTiming::ForceCheck();
hleCoreTimingForceCheck();
DEBUG_LOG(Log::sceGe, "sceGeListUpdateStallAddr(dlid=%i, stalladdr=%08x)", displayListID, stallAddress);
bool runList;
int retval = gpu->UpdateStall(LIST_ID_MAGIC ^ displayListID, stallAddress, &runList);
if (runList) {
gpu->RunGe();
if (gpu->ShouldSplitOverGe()) {
hleSplitSyscallOverGe();
} else {
gpu->ProcessDLQueue();
}
}
return retval;
}
@ -419,7 +433,11 @@ static int sceGeContinue() {
bool runList;
int ret = gpu->Continue(&runList);
if (runList) {
gpu->RunGe();
if (gpu->ShouldSplitOverGe()) {
hleSplitSyscallOverGe();
} else {
gpu->ProcessDLQueue();
}
}
hleEatCycles(220);
hleReSchedule("ge continue");

View file

@ -336,6 +336,7 @@ int MIPSState::RunLoopUntil(u64 globalTicks) {
case CPUCore::IR_INTERPRETER:
while (inDelaySlot) {
// We must get out of the delay slot before going into jit.
// This normally should never take more than one step...
SingleStep();
}
insideJit = true;

View file

@ -642,8 +642,14 @@ void PSP_RunLoopUntil(u64 globalticks) {
_dbg_assert_(false);
break;
case CORE_RUNNING_GE:
gpu->ProcessDLQueue(true);
coreState = CORE_RUNNING_CPU;
switch (gpu->ProcessDLQueue()) {
case DLResult::Error: // TODO: shouldn't return this normally
case DLResult::Pause: // like updatestall.
case DLResult::Done:
hleFinishSyscallAfterGe();
coreState = CORE_RUNNING_CPU;
break;
}
break;
}
}

View file

@ -337,7 +337,8 @@ void DumpExecute::SyncStall() {
bool runList;
gpu->UpdateStall(execListID, execListPos, &runList);
if (runList) {
gpu->RunGe();
DLResult result = gpu->ProcessDLQueue();
_dbg_assert_(result == DLResult::Done || result == DLResult::Pause);
}
s64 listTicks = gpu->GetListTicks(execListID);
if (listTicks != -1) {
@ -372,7 +373,7 @@ bool DumpExecute::SubmitCmds(const void *p, u32 sz) {
bool runList;
execListID = gpu->EnqueueList(execListBuf, execListPos, -1, optParam, false, &runList);
if (runList) {
gpu->RunGe();
gpu->ProcessDLQueue();
}
gpu->EnableInterrupts(true);
}

View file

@ -552,19 +552,6 @@ u32 GPUCommon::Continue(bool *runList) {
return 0;
}
void GPUCommon::RunGe() {
// Old method, although may make sense for performance if the ImDebugger isn't active.
#if 1
// Call ProcessDLQueue directly.
ProcessDLQueue(false);
#else
// New method, will allow ImDebugger to step the GPU.
// ARGH, what makes this different appears to be what happens AFTER the call to
// EnqueueList inside sceGeListEnqueue. Like the cycle eating and CoreTiming forcecheck.
Core_SwitchToGe();
#endif
}
u32 GPUCommon::Break(int mode) {
if (mode < 0 || mode > 1)
return SCE_KERNEL_ERROR_INVALID_MODE;
@ -848,7 +835,7 @@ int GPUCommon::GetNextListIndex() {
// This is now called when coreState == CORE_RUNNING_GE.
// TODO: It should return the next action.. (break into debugger or continue running)
DLResult GPUCommon::ProcessDLQueue(bool fromCore) {
DLResult GPUCommon::ProcessDLQueue() {
startingTicks = CoreTiming::GetTicks();
cyclesExecuted = 0;
@ -884,15 +871,15 @@ DLResult GPUCommon::ProcessDLQueue(bool fromCore) {
__GeTriggerSync(GPU_SYNC_DRAW, 1, drawCompleteTicks);
// Since the event is in CoreTiming, we're in sync. Just set 0 now.
if (fromCore) {
// Now update the core timing like we would have previously...
// TODO
}
return DLResult::Done;
}
bool GPUCommon::ShouldSplitOverGe() const {
// TODO: Should check for debugger active, etc.
// We only need to do this if we want to step through Ge display lists using the Ge debuggers.
return false;
}
void GPUCommon::Execute_OffsetAddr(u32 op, u32 diff) {
gstate_c.offsetAddr = op << 8;
}

View file

@ -243,7 +243,7 @@ public:
bool InterpretList(DisplayList &list);
DLResult ProcessDLQueue(bool fromCore);
DLResult ProcessDLQueue();
u32 UpdateStall(int listid, u32 newstall, bool *runList);
u32 EnqueueList(u32 listpc, u32 stall, int subIntrBase, PSPPointer<PspGeListArgs> args, bool head, bool *runList);
@ -266,7 +266,9 @@ public:
virtual void DeviceLost() = 0;
virtual void DeviceRestore(Draw::DrawContext *draw) = 0;
void RunGe();
// Returns true if we should split the call across GE execution.
// For example, a debugger is active.
bool ShouldSplitOverGe() const;
void DrawImGuiDebugger();

View file

@ -255,7 +255,12 @@ bool RunAutoTest(HeadlessHost *headlessHost, CoreParameter &coreParameter, const
if (coreState == CORE_STEPPING_CPU && !coreParameter.startBreak) {
break;
}
if (time_now_d() > deadline) {
bool debugger = false;
#ifdef _WIN32
if (IsDebuggerPresent())
debugger = true;
#endif
if (time_now_d() > deadline && !debugger) {
// Don't compare, print the output at least up to this point, and bail.
if (!opt.bench) {
printf("%s", output.c_str());