mirror of
https://github.com/hrydgard/ppsspp.git
synced 2025-04-02 11:01:50 -04:00
Add "Split syscall" mechanism
This commit is contained in:
parent
90d833be86
commit
c25e6b3933
9 changed files with 122 additions and 49 deletions
|
@ -59,6 +59,11 @@ enum
|
|||
HLE_AFTER_SKIP_DEADBEEF = 0x40,
|
||||
// Execute pending mips calls.
|
||||
HLE_AFTER_QUEUED_CALLS = 0x80,
|
||||
// Call CoreTiming::ForceCheck
|
||||
HLE_AFTER_CORETIMING_FORCE_CHECK = 0x100,
|
||||
// Split syscall over GE execution
|
||||
HLE_SPLIT_SYSCALL_OVER_GE = 0x200,
|
||||
HLE_SPLIT_SYSCALL_PART2 = 0x400,
|
||||
};
|
||||
|
||||
static std::vector<HLEModule> moduleDB;
|
||||
|
@ -69,6 +74,10 @@ static const HLEFunction *latestSyscall = nullptr;
|
|||
static uint32_t latestSyscallPC = 0;
|
||||
static int idleOp;
|
||||
|
||||
// Split syscall support. NOTE: This needs to be saved in DoState somehow!
|
||||
static int splitSyscallEatCycles = 0;
|
||||
|
||||
|
||||
struct HLEMipsCallInfo {
|
||||
u32 func;
|
||||
PSPAction *action;
|
||||
|
@ -354,6 +363,10 @@ void hleSkipDeadbeef()
|
|||
hleAfterSyscall |= HLE_AFTER_SKIP_DEADBEEF;
|
||||
}
|
||||
|
||||
void hleCoreTimingForceCheck() {
|
||||
hleAfterSyscall |= HLE_AFTER_CORETIMING_FORCE_CHECK;
|
||||
}
|
||||
|
||||
// Pauses execution after an HLE call.
|
||||
bool hleExecuteDebugBreak(const HLEFunction &func)
|
||||
{
|
||||
|
@ -399,8 +412,16 @@ u64 hleDelayResult(u64 result, const char *reason, int usec) {
|
|||
}
|
||||
|
||||
void hleEatCycles(int cycles) {
|
||||
// Maybe this should Idle, at least for larger delays? Could that cause issues?
|
||||
currentMIPS->downcount -= cycles;
|
||||
if (hleAfterSyscall & HLE_SPLIT_SYSCALL_OVER_GE) {
|
||||
splitSyscallEatCycles = cycles;
|
||||
} else {
|
||||
// Maybe this should Idle, at least for larger delays? Could that cause issues?
|
||||
currentMIPS->downcount -= cycles;
|
||||
}
|
||||
}
|
||||
|
||||
void hleSplitSyscallOverGe() {
|
||||
hleAfterSyscall |= HLE_SPLIT_SYSCALL_OVER_GE;
|
||||
}
|
||||
|
||||
void hleEatMicro(int usec) {
|
||||
|
@ -562,8 +583,27 @@ inline static void SetDeadbeefRegs()
|
|||
currentMIPS->hi = 0xDEADBEEF;
|
||||
}
|
||||
|
||||
inline void hleFinishSyscall(const HLEFunction &info)
|
||||
{
|
||||
static void hleFinishSyscall(const HLEFunction *info) {
|
||||
if (hleAfterSyscall & HLE_SPLIT_SYSCALL_OVER_GE) {
|
||||
hleAfterSyscall &= ~HLE_SPLIT_SYSCALL_OVER_GE;
|
||||
hleAfterSyscall |= HLE_SPLIT_SYSCALL_PART2;
|
||||
// Switch to GE execution immediately.
|
||||
// coreState is checked after the syscall, always.
|
||||
Core_SwitchToGe();
|
||||
return;
|
||||
}
|
||||
|
||||
if (hleAfterSyscall & HLE_SPLIT_SYSCALL_PART2) {
|
||||
// Eat the extra cycle we added above.
|
||||
hleEatCycles(splitSyscallEatCycles + 1);
|
||||
// Make sure to zero it so it's not accidentally re-used.
|
||||
splitSyscallEatCycles = 0;
|
||||
}
|
||||
|
||||
if (hleAfterSyscall & HLE_AFTER_CORETIMING_FORCE_CHECK) {
|
||||
CoreTiming::ForceCheck();
|
||||
}
|
||||
|
||||
if ((hleAfterSyscall & HLE_AFTER_SKIP_DEADBEEF) == 0)
|
||||
SetDeadbeefRegs();
|
||||
|
||||
|
@ -580,9 +620,9 @@ inline void hleFinishSyscall(const HLEFunction &info)
|
|||
else if ((hleAfterSyscall & HLE_AFTER_RESCHED) != 0)
|
||||
__KernelReSchedule(hleAfterSyscallReschedReason);
|
||||
|
||||
if ((hleAfterSyscall & HLE_AFTER_DEBUG_BREAK) != 0)
|
||||
{
|
||||
if (!hleExecuteDebugBreak(info))
|
||||
if ((hleAfterSyscall & HLE_AFTER_DEBUG_BREAK) != 0) {
|
||||
_dbg_assert_(info);
|
||||
if (!hleExecuteDebugBreak(*info))
|
||||
{
|
||||
// We'll do it next syscall.
|
||||
hleAfterSyscall = HLE_AFTER_DEBUG_BREAK;
|
||||
|
@ -595,6 +635,10 @@ inline void hleFinishSyscall(const HLEFunction &info)
|
|||
hleAfterSyscallReschedReason = 0;
|
||||
}
|
||||
|
||||
void hleFinishSyscallAfterGe() {
|
||||
hleFinishSyscall(nullptr);
|
||||
}
|
||||
|
||||
static void updateSyscallStats(int modulenum, int funcnum, double total)
|
||||
{
|
||||
const char *name = moduleDB[modulenum].funcTable[funcnum].name;
|
||||
|
@ -653,7 +697,7 @@ inline void CallSyscallWithFlags(const HLEFunction *info)
|
|||
}
|
||||
|
||||
if (hleAfterSyscall != HLE_AFTER_NOTHING)
|
||||
hleFinishSyscall(*info);
|
||||
hleFinishSyscall(info);
|
||||
else
|
||||
SetDeadbeefRegs();
|
||||
}
|
||||
|
@ -665,7 +709,7 @@ inline void CallSyscallWithoutFlags(const HLEFunction *info)
|
|||
info->func();
|
||||
|
||||
if (hleAfterSyscall != HLE_AFTER_NOTHING)
|
||||
hleFinishSyscall(*info);
|
||||
hleFinishSyscall(info);
|
||||
else
|
||||
SetDeadbeefRegs();
|
||||
}
|
||||
|
|
|
@ -129,6 +129,15 @@ u64 hleDelayResult(u64 result, const char *reason, int usec);
|
|||
void hleEatCycles(int cycles);
|
||||
void hleEatMicro(int usec);
|
||||
|
||||
void hleCoreTimingForceCheck();
|
||||
|
||||
// Causes the syscall to not fully execute immediately, instead give the Ge a chance to
|
||||
// execute display lists.
|
||||
void hleSplitSyscallOverGe();
|
||||
|
||||
// Called after a split syscall from System.cpp
|
||||
void hleFinishSyscallAfterGe();
|
||||
|
||||
inline int hleDelayResult(int result, const char *reason, int usec) {
|
||||
return hleDelayResult((u32) result, reason, usec);
|
||||
}
|
||||
|
|
|
@ -145,7 +145,7 @@ public:
|
|||
|
||||
// Hm. This might be really tricky to get to behave the same in both modes. Here we are in __KernelReschedule, CoreTiming::Advance, ProcessEvents, GeExecuteInterrupt, ... .... __RunOnePendingInterrupt
|
||||
// But not sure how much it will matter. The test pause2 hits here.
|
||||
gpu->RunGe();
|
||||
gpu->ProcessDLQueue();
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -181,8 +181,11 @@ public:
|
|||
}
|
||||
|
||||
gpu->InterruptEnd(intrdata.listid);
|
||||
// This is the last thing done here in the syscall (__KernelReturnFromInterrupt) so switching coreState should just work.
|
||||
gpu->RunGe();
|
||||
|
||||
// TODO: This is called from __KernelReturnFromInterrupt which does a bunch of stuff afterwards.
|
||||
// Using hleSplitSyscallOverGe here breaks the gpu/signals/suspend.prx test, for that reason.
|
||||
// So we just process inline and sacrifice debuggability a little.
|
||||
gpu->ProcessDLQueue();
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -349,12 +352,15 @@ u32 sceGeListEnQueue(u32 listAddress, u32 stallAddress, int callbackId, u32 optP
|
|||
if ((int)listID >= 0)
|
||||
listID = LIST_ID_MAGIC ^ listID;
|
||||
if (runList) {
|
||||
gpu->RunGe();
|
||||
if (gpu->ShouldSplitOverGe()) {
|
||||
hleSplitSyscallOverGe();
|
||||
} else {
|
||||
gpu->ProcessDLQueue();
|
||||
}
|
||||
}
|
||||
// The stuff here below must be deferred...
|
||||
hleEatCycles(490);
|
||||
CoreTiming::ForceCheck();
|
||||
return hleLogSuccessX(Log::sceGe, listID);
|
||||
hleCoreTimingForceCheck();
|
||||
return listID; // We already logged above, logs get confusing if we use hleLogSuccess.
|
||||
}
|
||||
|
||||
u32 sceGeListEnQueueHead(u32 listAddress, u32 stallAddress, int callbackId, u32 optParamAddr) {
|
||||
|
@ -368,11 +374,15 @@ u32 sceGeListEnQueueHead(u32 listAddress, u32 stallAddress, int callbackId, u32
|
|||
if ((int)listID >= 0)
|
||||
listID = LIST_ID_MAGIC ^ listID;
|
||||
if (runList) {
|
||||
gpu->RunGe();
|
||||
if (gpu->ShouldSplitOverGe()) {
|
||||
hleSplitSyscallOverGe();
|
||||
} else {
|
||||
gpu->ProcessDLQueue();
|
||||
}
|
||||
}
|
||||
hleEatCycles(480);
|
||||
CoreTiming::ForceCheck();
|
||||
return hleLogSuccessX(Log::sceGe, listID);
|
||||
hleCoreTimingForceCheck();
|
||||
return listID; // We already logged above, logs get confusing if we use hleLogSuccess.
|
||||
}
|
||||
|
||||
static int sceGeListDeQueue(u32 listID) {
|
||||
|
@ -386,13 +396,17 @@ static int sceGeListUpdateStallAddr(u32 displayListID, u32 stallAddress) {
|
|||
// Advance() might cause an interrupt, so defer the Advance but do it ASAP.
|
||||
// Final Fantasy Type-0 has a graphical artifact without this (timing issue.)
|
||||
hleEatCycles(190);
|
||||
CoreTiming::ForceCheck();
|
||||
hleCoreTimingForceCheck();
|
||||
|
||||
DEBUG_LOG(Log::sceGe, "sceGeListUpdateStallAddr(dlid=%i, stalladdr=%08x)", displayListID, stallAddress);
|
||||
bool runList;
|
||||
int retval = gpu->UpdateStall(LIST_ID_MAGIC ^ displayListID, stallAddress, &runList);
|
||||
if (runList) {
|
||||
gpu->RunGe();
|
||||
if (gpu->ShouldSplitOverGe()) {
|
||||
hleSplitSyscallOverGe();
|
||||
} else {
|
||||
gpu->ProcessDLQueue();
|
||||
}
|
||||
}
|
||||
return retval;
|
||||
}
|
||||
|
@ -419,7 +433,11 @@ static int sceGeContinue() {
|
|||
bool runList;
|
||||
int ret = gpu->Continue(&runList);
|
||||
if (runList) {
|
||||
gpu->RunGe();
|
||||
if (gpu->ShouldSplitOverGe()) {
|
||||
hleSplitSyscallOverGe();
|
||||
} else {
|
||||
gpu->ProcessDLQueue();
|
||||
}
|
||||
}
|
||||
hleEatCycles(220);
|
||||
hleReSchedule("ge continue");
|
||||
|
|
|
@ -336,6 +336,7 @@ int MIPSState::RunLoopUntil(u64 globalTicks) {
|
|||
case CPUCore::IR_INTERPRETER:
|
||||
while (inDelaySlot) {
|
||||
// We must get out of the delay slot before going into jit.
|
||||
// This normally should never take more than one step...
|
||||
SingleStep();
|
||||
}
|
||||
insideJit = true;
|
||||
|
|
|
@ -642,8 +642,14 @@ void PSP_RunLoopUntil(u64 globalticks) {
|
|||
_dbg_assert_(false);
|
||||
break;
|
||||
case CORE_RUNNING_GE:
|
||||
gpu->ProcessDLQueue(true);
|
||||
coreState = CORE_RUNNING_CPU;
|
||||
switch (gpu->ProcessDLQueue()) {
|
||||
case DLResult::Error: // TODO: shouldn't return this normally
|
||||
case DLResult::Pause: // like updatestall.
|
||||
case DLResult::Done:
|
||||
hleFinishSyscallAfterGe();
|
||||
coreState = CORE_RUNNING_CPU;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -337,7 +337,8 @@ void DumpExecute::SyncStall() {
|
|||
bool runList;
|
||||
gpu->UpdateStall(execListID, execListPos, &runList);
|
||||
if (runList) {
|
||||
gpu->RunGe();
|
||||
DLResult result = gpu->ProcessDLQueue();
|
||||
_dbg_assert_(result == DLResult::Done || result == DLResult::Pause);
|
||||
}
|
||||
s64 listTicks = gpu->GetListTicks(execListID);
|
||||
if (listTicks != -1) {
|
||||
|
@ -372,7 +373,7 @@ bool DumpExecute::SubmitCmds(const void *p, u32 sz) {
|
|||
bool runList;
|
||||
execListID = gpu->EnqueueList(execListBuf, execListPos, -1, optParam, false, &runList);
|
||||
if (runList) {
|
||||
gpu->RunGe();
|
||||
gpu->ProcessDLQueue();
|
||||
}
|
||||
gpu->EnableInterrupts(true);
|
||||
}
|
||||
|
|
|
@ -552,19 +552,6 @@ u32 GPUCommon::Continue(bool *runList) {
|
|||
return 0;
|
||||
}
|
||||
|
||||
void GPUCommon::RunGe() {
|
||||
// Old method, although may make sense for performance if the ImDebugger isn't active.
|
||||
#if 1
|
||||
// Call ProcessDLQueue directly.
|
||||
ProcessDLQueue(false);
|
||||
#else
|
||||
// New method, will allow ImDebugger to step the GPU.
|
||||
// ARGH, what makes this different appears to be what happens AFTER the call to
|
||||
// EnqueueList inside sceGeListEnqueue. Like the cycle eating and CoreTiming forcecheck.
|
||||
Core_SwitchToGe();
|
||||
#endif
|
||||
}
|
||||
|
||||
u32 GPUCommon::Break(int mode) {
|
||||
if (mode < 0 || mode > 1)
|
||||
return SCE_KERNEL_ERROR_INVALID_MODE;
|
||||
|
@ -848,7 +835,7 @@ int GPUCommon::GetNextListIndex() {
|
|||
|
||||
// This is now called when coreState == CORE_RUNNING_GE.
|
||||
// TODO: It should return the next action.. (break into debugger or continue running)
|
||||
DLResult GPUCommon::ProcessDLQueue(bool fromCore) {
|
||||
DLResult GPUCommon::ProcessDLQueue() {
|
||||
startingTicks = CoreTiming::GetTicks();
|
||||
cyclesExecuted = 0;
|
||||
|
||||
|
@ -884,15 +871,15 @@ DLResult GPUCommon::ProcessDLQueue(bool fromCore) {
|
|||
|
||||
__GeTriggerSync(GPU_SYNC_DRAW, 1, drawCompleteTicks);
|
||||
// Since the event is in CoreTiming, we're in sync. Just set 0 now.
|
||||
|
||||
if (fromCore) {
|
||||
// Now update the core timing like we would have previously...
|
||||
// TODO
|
||||
}
|
||||
|
||||
return DLResult::Done;
|
||||
}
|
||||
|
||||
bool GPUCommon::ShouldSplitOverGe() const {
|
||||
// TODO: Should check for debugger active, etc.
|
||||
// We only need to do this if we want to step through Ge display lists using the Ge debuggers.
|
||||
return false;
|
||||
}
|
||||
|
||||
void GPUCommon::Execute_OffsetAddr(u32 op, u32 diff) {
|
||||
gstate_c.offsetAddr = op << 8;
|
||||
}
|
||||
|
|
|
@ -243,7 +243,7 @@ public:
|
|||
|
||||
bool InterpretList(DisplayList &list);
|
||||
|
||||
DLResult ProcessDLQueue(bool fromCore);
|
||||
DLResult ProcessDLQueue();
|
||||
|
||||
u32 UpdateStall(int listid, u32 newstall, bool *runList);
|
||||
u32 EnqueueList(u32 listpc, u32 stall, int subIntrBase, PSPPointer<PspGeListArgs> args, bool head, bool *runList);
|
||||
|
@ -266,7 +266,9 @@ public:
|
|||
virtual void DeviceLost() = 0;
|
||||
virtual void DeviceRestore(Draw::DrawContext *draw) = 0;
|
||||
|
||||
void RunGe();
|
||||
// Returns true if we should split the call across GE execution.
|
||||
// For example, a debugger is active.
|
||||
bool ShouldSplitOverGe() const;
|
||||
|
||||
void DrawImGuiDebugger();
|
||||
|
||||
|
|
|
@ -255,7 +255,12 @@ bool RunAutoTest(HeadlessHost *headlessHost, CoreParameter &coreParameter, const
|
|||
if (coreState == CORE_STEPPING_CPU && !coreParameter.startBreak) {
|
||||
break;
|
||||
}
|
||||
if (time_now_d() > deadline) {
|
||||
bool debugger = false;
|
||||
#ifdef _WIN32
|
||||
if (IsDebuggerPresent())
|
||||
debugger = true;
|
||||
#endif
|
||||
if (time_now_d() > deadline && !debugger) {
|
||||
// Don't compare, print the output at least up to this point, and bail.
|
||||
if (!opt.bench) {
|
||||
printf("%s", output.c_str());
|
||||
|
|
Loading…
Add table
Reference in a new issue