From 57770dbd95590f16646b1efc7a6af780af83b366 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sun, 7 Apr 2013 12:45:42 -0700 Subject: [PATCH 1/5] Delay GPU signals and waits to simulate cycles. --- Core/HLE/sceGe.cpp | 57 ++++++++++++++++++++++++++++++++++++---------- Core/HLE/sceGe.h | 5 +++- GPU/GPUCommon.cpp | 54 ++++++++++++++++++++++++------------------- GPU/GPUCommon.h | 3 ++- GPU/GPUInterface.h | 4 +++- 5 files changed, 84 insertions(+), 39 deletions(-) diff --git a/Core/HLE/sceGe.cpp b/Core/HLE/sceGe.cpp index f56692b1ad..17ef2aaf71 100644 --- a/Core/HLE/sceGe.cpp +++ b/Core/HLE/sceGe.cpp @@ -19,6 +19,7 @@ #include "../MIPS/MIPS.h" #include "../System.h" #include "../CoreParameter.h" +#include "../CoreTiming.h" #include "../Reporting.h" #include "sceGe.h" #include "sceKernelMemory.h" @@ -37,6 +38,8 @@ struct GeInterruptData }; static std::list ge_pending_cb; +static int geSyncEvent; +static int geInterruptEvent; class GeIntrHandler : public IntrHandler { @@ -100,7 +103,8 @@ public: ge_pending_cb.pop_front(); gpu->InterruptEnd(intrdata.listid); - WARN_LOG(HLE, "Ignoring interrupt for display list %d, already been released.", intrdata.listid); + if (subintr >= 0) + WARN_LOG(HLE, "Ignoring interrupt for display list %d, already been released.", intrdata.listid); return false; } @@ -137,11 +141,34 @@ public: } }; +void __GeExecuteSync(u64 userdata, int cyclesLate) +{ + int listid = userdata >> 32; + WaitType waitType = (WaitType) (userdata & 0xFFFFFFFF); + bool wokeThreads = __KernelTriggerWait(waitType, listid, 0, "GeSync", true); + gpu->SyncEnd(waitType, listid, wokeThreads); +} + +void __GeExecuteInterrupt(u64 userdata, int cyclesLate) +{ + int listid = userdata >> 32; + u32 pc = userdata & 0xFFFFFFFF; + + GeInterruptData intrdata; + intrdata.listid = listid; + intrdata.pc = pc; + ge_pending_cb.push_back(intrdata); + __TriggerInterrupt(PSP_INTR_HLE, PSP_GE_INTR, PSP_INTR_SUB_NONE); +} + void __GeInit() { memset(&ge_used_callbacks, 0, sizeof(ge_used_callbacks)); ge_pending_cb.clear(); __RegisterIntrHandler(PSP_GE_INTR, new GeIntrHandler()); + + geSyncEvent = CoreTiming::RegisterEvent("GeSyncEvent", &__GeExecuteSync); + geInterruptEvent = CoreTiming::RegisterEvent("GeInterruptEvent", &__GeExecuteInterrupt); } void __GeDoState(PointerWrap &p) @@ -149,6 +176,12 @@ void __GeDoState(PointerWrap &p) p.DoArray(ge_callback_data, ARRAY_SIZE(ge_callback_data)); p.DoArray(ge_used_callbacks, ARRAY_SIZE(ge_used_callbacks)); p.Do(ge_pending_cb); + + p.Do(geSyncEvent); + CoreTiming::RestoreRegisterEvent(geSyncEvent, "GeSyncEvent", &__GeExecuteSync); + p.Do(geInterruptEvent); + CoreTiming::RestoreRegisterEvent(geInterruptEvent, "GeInterruptEvent", &__GeExecuteInterrupt); + // Everything else is done in sceDisplay. p.DoMarker("sceGe"); } @@ -158,19 +191,19 @@ void __GeShutdown() } -bool __GeTriggerInterrupt(int listid, u32 pc) +// Warning: may be called from the GPU thread. +bool __GeTriggerSync(WaitType waitType, int id, u64 atTicks) { - // ClaDun X2 does not expect sceGeListEnqueue to reschedule (which it does not on the PSP.) - // Once PPSSPP's GPU uses cycles, we can remove this check. - DisplayList* dl = gpu->getList(listid); - if (dl != NULL && dl->subIntrBase < 0) - return false; + u64 userdata = (u64)id << 32 | (u64) waitType; + CoreTiming::ScheduleEvent_Threadsafe(atTicks - CoreTiming::GetTicks(), geSyncEvent, userdata); + return true; +} - GeInterruptData intrdata; - intrdata.listid = listid; - intrdata.pc = pc; - ge_pending_cb.push_back(intrdata); - __TriggerInterrupt(PSP_INTR_HLE, PSP_GE_INTR, PSP_INTR_SUB_NONE); +// Warning: may be called from the GPU thread. +bool __GeTriggerInterrupt(int listid, u32 pc, u64 atTicks) +{ + u64 userdata = (u64)listid << 32 | (u64) pc; + CoreTiming::ScheduleEvent_Threadsafe(atTicks - CoreTiming::GetTicks(), geInterruptEvent, userdata); return true; } diff --git a/Core/HLE/sceGe.h b/Core/HLE/sceGe.h index 7c82a98d5f..9659ed0b93 100644 --- a/Core/HLE/sceGe.h +++ b/Core/HLE/sceGe.h @@ -17,6 +17,8 @@ #pragma once +#include "Core/HLE/sceKernelThread.h" + #define SCE_GE_LIST_COMPLETED 0 #define SCE_GE_LIST_QUEUED 1 #define SCE_GE_LIST_DRAWING 2 @@ -39,7 +41,8 @@ void Register_sceGe_user(); void __GeInit(); void __GeDoState(PointerWrap &p); void __GeShutdown(); -bool __GeTriggerInterrupt(int listid, u32 pc); +bool __GeTriggerSync(WaitType waitType, int id, u64 atTicks); +bool __GeTriggerInterrupt(int listid, u32 pc, u64 atTicks); bool __GeHasPendingInterrupt(); diff --git a/GPU/GPUCommon.cpp b/GPU/GPUCommon.cpp index 46a9a91a60..bd6f3ef444 100644 --- a/GPU/GPUCommon.cpp +++ b/GPU/GPUCommon.cpp @@ -16,14 +16,14 @@ GPUCommon::GPUCommon() : currentList(NULL), isbreak(false), - drawComplete(true), + drawCompleteTicks(0), dumpNextFrame_(false), dumpThisFrame_(false), interruptsEnabled_(true) { for (int i = 0; i < DisplayListMaxCount; ++i) { dls[i].state = PSP_GE_DL_STATE_NONE; - dls[i].shouldWait = false; + dls[i].waitTicks = 0; } } @@ -47,7 +47,7 @@ u32 GPUCommon::DrawSync(int mode) { if (mode == 0) { // TODO: What if dispatch / interrupts disabled? - if (!drawComplete) { + if (drawCompleteTicks > CoreTiming::GetTicks()) { __KernelWaitCurThread(WAITTYPE_GEDRAWSYNC, 1, 0, 0, false, "GeDrawSync"); } else { for (int i = 0; i < DisplayListMaxCount; ++i) { @@ -116,7 +116,7 @@ int GPUCommon::ListSync(int listid, int mode) } } - if (dl.shouldWait) { + if (dl.waitTicks > CoreTiming::GetTicks()) { __KernelWaitCurThread(WAITTYPE_GELISTSYNC, listid, 0, 0, false, "GeListSync"); } return PSP_GE_LIST_COMPLETED; @@ -181,7 +181,7 @@ u32 GPUCommon::EnqueueList(u32 listpc, u32 stall, int subIntrBase, bool head) dl.stackptr = 0; dl.signal = PSP_GE_SIGNAL_NONE; dl.interrupted = false; - dl.shouldWait = true; + dl.waitTicks = (u64)-1; if (head) { if (currentList) { @@ -202,7 +202,7 @@ u32 GPUCommon::EnqueueList(u32 listpc, u32 stall, int subIntrBase, bool head) currentList = &dl; dlQueue.push_front(id); - drawComplete = false; + drawCompleteTicks = (u64)-1; // TODO save context when starting the list if param is set ProcessDLQueue(); @@ -226,7 +226,7 @@ u32 GPUCommon::DequeueList(int listid) else dlQueue.remove(listid); - dls[listid].shouldWait = false; + dls[listid].waitTicks = 0; __KernelTriggerWait(WAITTYPE_GELISTSYNC, listid, 0, "GeListSync"); CheckDrawSync(); @@ -265,7 +265,7 @@ u32 GPUCommon::Continue() // TODO Restore BASE // We have a list now, so it's not complete. - drawComplete = false; + drawCompleteTicks = (u64)-1; } else currentList->state = PSP_GE_DL_STATE_QUEUED; @@ -453,15 +453,8 @@ bool GPUCommon::ProcessDLQueue() } currentList = NULL; - drawComplete = true; - if (__KernelTriggerWait(WAITTYPE_GEDRAWSYNC, 1, 0, "GeDrawSync")) - { - for (int i = 0; i < DisplayListMaxCount; ++i) { - if (dls[i].state == PSP_GE_DL_STATE_COMPLETED) { - dls[i].state = PSP_GE_DL_STATE_NONE; - } - } - } + drawCompleteTicks = startingTicks + cyclesExecuted; + __GeTriggerSync(WAITTYPE_GEDRAWSYNC, 1, drawCompleteTicks); return true; //no more lists! } @@ -622,7 +615,7 @@ void GPUCommon::ExecuteOp(u32 op, u32 diff) { } // TODO: Technically, jump/call/ret should generate an interrupt, but before the pc change maybe? if (interruptsEnabled_ && trigger) { - if (__GeTriggerInterrupt(currentList->id, currentList->pc)) + if (__GeTriggerInterrupt(currentList->id, currentList->pc, startingTicks + cyclesExecuted)) gpuState = GPUSTATE_INTERRUPT; } } @@ -631,7 +624,7 @@ void GPUCommon::ExecuteOp(u32 op, u32 diff) { switch (currentList->signal) { case PSP_GE_SIGNAL_HANDLER_PAUSE: if (interruptsEnabled_) { - if (__GeTriggerInterrupt(currentList->id, currentList->pc)) + if (__GeTriggerInterrupt(currentList->id, currentList->pc, startingTicks + cyclesExecuted)) gpuState = GPUSTATE_INTERRUPT; } break; @@ -645,9 +638,9 @@ void GPUCommon::ExecuteOp(u32 op, u32 diff) { currentList->subIntrToken = prev & 0xFFFF; currentList->state = PSP_GE_DL_STATE_COMPLETED; gpuState = GPUSTATE_DONE; - if (!interruptsEnabled_ || !__GeTriggerInterrupt(currentList->id, currentList->pc)) { - currentList->shouldWait = false; - __KernelTriggerWait(WAITTYPE_GELISTSYNC, currentList->id, 0, "GeListSync", true); + if (!interruptsEnabled_ || !__GeTriggerInterrupt(currentList->id, currentList->pc, startingTicks + cyclesExecuted)) { + currentList->waitTicks = startingTicks + cyclesExecuted; + __GeTriggerSync(WAITTYPE_GELISTSYNC, currentList->id, currentList->waitTicks); } break; } @@ -682,7 +675,7 @@ void GPUCommon::DoState(PointerWrap &p) { p.Do(prev); p.Do(gpuState); p.Do(isbreak); - p.Do(drawComplete); + p.Do(drawCompleteTicks); p.DoMarker("GPUCommon"); } @@ -698,9 +691,22 @@ void GPUCommon::InterruptEnd(int listid) DisplayList &dl = dls[listid]; // TODO: Unless the signal handler could change it? if (dl.state == PSP_GE_DL_STATE_COMPLETED) { - dl.shouldWait = false; + dl.waitTicks = 0; __KernelTriggerWait(WAITTYPE_GELISTSYNC, listid, 0, "GeListSync", true); } ProcessDLQueue(); } + +// TODO: Maybe cleaner to keep this in GE and trigger the clear directly? +void GPUCommon::SyncEnd(WaitType waitType, int listid, bool wokeThreads) +{ + if (waitType == WAITTYPE_GEDRAWSYNC && wokeThreads) + { + for (int i = 0; i < DisplayListMaxCount; ++i) { + if (dls[i].state == PSP_GE_DL_STATE_COMPLETED) { + dls[i].state = PSP_GE_DL_STATE_NONE; + } + } + } +} diff --git a/GPU/GPUCommon.h b/GPU/GPUCommon.h index b392cff5bd..53174216f3 100644 --- a/GPU/GPUCommon.h +++ b/GPU/GPUCommon.h @@ -10,6 +10,7 @@ public: virtual void InterruptStart(int listid); virtual void InterruptEnd(int listid); + virtual void SyncEnd(WaitType waitType, int listid, bool wokeThreads); virtual void EnableInterrupts(bool enable) { interruptsEnabled_ = enable; } @@ -43,7 +44,7 @@ protected: u32 prev; GPUState gpuState; bool isbreak; - bool drawComplete; + u64 drawCompleteTicks; u64 startingTicks; u32 cycleLastPC; diff --git a/GPU/GPUInterface.h b/GPU/GPUInterface.h index 265d051454..8727043ad7 100644 --- a/GPU/GPUInterface.h +++ b/GPU/GPUInterface.h @@ -21,6 +21,7 @@ #include "GPUState.h" #include +enum WaitType; class PointerWrap; enum DisplayListStatus @@ -121,7 +122,7 @@ struct DisplayList u32 stack[32]; int stackptr; bool interrupted; - bool shouldWait; + u64 waitTicks; }; class GPUInterface @@ -147,6 +148,7 @@ public: virtual void InterruptStart(int listid) = 0; virtual void InterruptEnd(int listid) = 0; + virtual void SyncEnd(WaitType waitType, int listid, bool wokeThreads) = 0; virtual void PreExecuteOp(u32 op, u32 diff) = 0; virtual void ExecuteOp(u32 op, u32 diff) = 0; From 5d017829ad3183e9fb8d684b1ba7c4bfba672e38 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sun, 7 Apr 2013 16:47:29 -0700 Subject: [PATCH 2/5] Make sure lists aren't reused before they complete. --- Core/HLE/sceGe.cpp | 2 +- GPU/GPUCommon.cpp | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/Core/HLE/sceGe.cpp b/Core/HLE/sceGe.cpp index 17ef2aaf71..eb6ccd2292 100644 --- a/Core/HLE/sceGe.cpp +++ b/Core/HLE/sceGe.cpp @@ -158,7 +158,7 @@ void __GeExecuteInterrupt(u64 userdata, int cyclesLate) intrdata.listid = listid; intrdata.pc = pc; ge_pending_cb.push_back(intrdata); - __TriggerInterrupt(PSP_INTR_HLE, PSP_GE_INTR, PSP_INTR_SUB_NONE); + __TriggerInterrupt(PSP_INTR_IMMEDIATE, PSP_GE_INTR, PSP_INTR_SUB_NONE); } void __GeInit() diff --git a/GPU/GPUCommon.cpp b/GPU/GPUCommon.cpp index bd6f3ef444..eb26cdd7bb 100644 --- a/GPU/GPUCommon.cpp +++ b/GPU/GPUCommon.cpp @@ -139,6 +139,7 @@ u32 GPUCommon::EnqueueList(u32 listpc, u32 stall, int subIntrBase, bool head) oldCompatibility = false; } + u64 currentTicks = CoreTiming::GetTicks(); for (int i = 0; i < DisplayListMaxCount; ++i) { if (dls[i].state != PSP_GE_DL_STATE_NONE && dls[i].state != PSP_GE_DL_STATE_COMPLETED) { @@ -157,7 +158,7 @@ u32 GPUCommon::EnqueueList(u32 listpc, u32 stall, int subIntrBase, bool head) id = i; break; } - if (id < 0 && dls[i].state == PSP_GE_DL_STATE_COMPLETED) + if (id < 0 && dls[i].state == PSP_GE_DL_STATE_COMPLETED && dls[i].waitTicks < currentTicks) { id = i; } @@ -690,7 +691,7 @@ void GPUCommon::InterruptEnd(int listid) DisplayList &dl = dls[listid]; // TODO: Unless the signal handler could change it? - if (dl.state == PSP_GE_DL_STATE_COMPLETED) { + if (dl.state == PSP_GE_DL_STATE_COMPLETED || dl.state == PSP_GE_DL_STATE_NONE) { dl.waitTicks = 0; __KernelTriggerWait(WAITTYPE_GELISTSYNC, listid, 0, "GeListSync", true); } From 210c7b1639e2467322eb9039ea8823af2c75e4d6 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sun, 7 Apr 2013 16:55:48 -0700 Subject: [PATCH 3/5] Slow down the GPU clock estimate. This makes Fat Princess for example much faster. --- GPU/GLES/DisplayListInterpreter.cpp | 3 ++- GPU/GPUCommon.cpp | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/GPU/GLES/DisplayListInterpreter.cpp b/GPU/GLES/DisplayListInterpreter.cpp index 4332d91d61..a66bd655f1 100644 --- a/GPU/GLES/DisplayListInterpreter.cpp +++ b/GPU/GLES/DisplayListInterpreter.cpp @@ -334,7 +334,8 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) { break; } - cyclesExecuted += 10 * count; + // Rough estimate, not sure what's correct. + cyclesExecuted += 80 * count; // TODO: Split this so that we can collect sequences of primitives, can greatly speed things up // on platforms where draw calls are expensive like mobile and D3D diff --git a/GPU/GPUCommon.cpp b/GPU/GPUCommon.cpp index eb26cdd7bb..ecfa801cf3 100644 --- a/GPU/GPUCommon.cpp +++ b/GPU/GPUCommon.cpp @@ -426,7 +426,8 @@ bool GPUCommon::InterpretList(DisplayList &list) inline void GPUCommon::UpdateCycles(u32 pc, u32 newPC) { - cyclesExecuted += (pc - cycleLastPC) / 4; + // Rough estimate, 2 CPU ticks (it's double the clock rate) per GPU instruction. + cyclesExecuted += 2 * (pc - cycleLastPC) / 4; cycleLastPC = newPC == 0 ? pc : newPC; } From 6190918158be5e05556fc7fddad72e851b32d264 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sun, 7 Apr 2013 17:37:24 -0700 Subject: [PATCH 4/5] Don't allow two drawsyncs to be in play at once. --- Core/CoreTiming.cpp | 46 +++++++++++++++++++++++++++++++++++++++++++++ Core/CoreTiming.h | 1 + Core/HLE/sceGe.cpp | 2 ++ 3 files changed, 49 insertions(+) diff --git a/Core/CoreTiming.cpp b/Core/CoreTiming.cpp index d442f0bcc9..d6625ee736 100644 --- a/Core/CoreTiming.cpp +++ b/Core/CoreTiming.cpp @@ -315,6 +315,52 @@ s64 UnscheduleEvent(int event_type, u64 userdata) return result; } +s64 UnscheduleThreadsafeEvent(int event_type, u64 userdata) +{ + s64 result = 0; + std::lock_guard lk(externalEventSection); + if (!tsFirst) + return result; + while(tsFirst) + { + if (tsFirst->type == event_type && tsFirst->userdata == userdata) + { + result = tsFirst->time - globalTimer; + + Event *next = tsFirst->next; + FreeTsEvent(tsFirst); + tsFirst = next; + } + else + { + break; + } + } + if (!tsFirst) + return result; + + Event *prev = tsFirst; + Event *ptr = prev->next; + while (ptr) + { + if (ptr->type == event_type && ptr->userdata == userdata) + { + result = ptr->time - globalTimer; + + prev->next = ptr->next; + FreeTsEvent(ptr); + ptr = prev->next; + } + else + { + prev = ptr; + ptr = ptr->next; + } + } + + return result; +} + // Warning: not included in save state. void RegisterAdvanceCallback(void (*callback)(int cyclesExecuted)) { diff --git a/Core/CoreTiming.h b/Core/CoreTiming.h index 5f608df5af..d4bdd12aca 100644 --- a/Core/CoreTiming.h +++ b/Core/CoreTiming.h @@ -94,6 +94,7 @@ namespace CoreTiming void ScheduleEvent_Threadsafe(s64 cyclesIntoFuture, int event_type, u64 userdata=0); void ScheduleEvent_Threadsafe_Immediate(int event_type, u64 userdata=0); s64 UnscheduleEvent(int event_type, u64 userdata); + s64 UnscheduleThreadsafeEvent(int event_type, u64 userdata); void RemoveEvent(int event_type); void RemoveThreadsafeEvent(int event_type); diff --git a/Core/HLE/sceGe.cpp b/Core/HLE/sceGe.cpp index eb6ccd2292..1e77a249a1 100644 --- a/Core/HLE/sceGe.cpp +++ b/Core/HLE/sceGe.cpp @@ -195,6 +195,8 @@ void __GeShutdown() bool __GeTriggerSync(WaitType waitType, int id, u64 atTicks) { u64 userdata = (u64)id << 32 | (u64) waitType; + if (waitType == WAITTYPE_GEDRAWSYNC) + CoreTiming::UnscheduleThreadsafeEvent(geSyncEvent, userdata); CoreTiming::ScheduleEvent_Threadsafe(atTicks - CoreTiming::GetTicks(), geSyncEvent, userdata); return true; } From a8e8b096e62e3c2e2af3b235c04918a25be996d2 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sun, 7 Apr 2013 17:52:57 -0700 Subject: [PATCH 5/5] Use regular events for GPU for now, quicker. There's too much latency in threadsafe events, causing tests to fail. Might break games too. I guess they need to execute more often... --- Core/HLE/sceGe.cpp | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/Core/HLE/sceGe.cpp b/Core/HLE/sceGe.cpp index 1e77a249a1..a044b6f0e9 100644 --- a/Core/HLE/sceGe.cpp +++ b/Core/HLE/sceGe.cpp @@ -195,9 +195,14 @@ void __GeShutdown() bool __GeTriggerSync(WaitType waitType, int id, u64 atTicks) { u64 userdata = (u64)id << 32 | (u64) waitType; + s64 future = atTicks - CoreTiming::GetTicks(); if (waitType == WAITTYPE_GEDRAWSYNC) - CoreTiming::UnscheduleThreadsafeEvent(geSyncEvent, userdata); - CoreTiming::ScheduleEvent_Threadsafe(atTicks - CoreTiming::GetTicks(), geSyncEvent, userdata); + { + s64 left = CoreTiming::UnscheduleEvent(geSyncEvent, userdata); + if (left > future) + future = left; + } + CoreTiming::ScheduleEvent(future, geSyncEvent, userdata); return true; } @@ -205,7 +210,7 @@ bool __GeTriggerSync(WaitType waitType, int id, u64 atTicks) bool __GeTriggerInterrupt(int listid, u32 pc, u64 atTicks) { u64 userdata = (u64)listid << 32 | (u64) pc; - CoreTiming::ScheduleEvent_Threadsafe(atTicks - CoreTiming::GetTicks(), geInterruptEvent, userdata); + CoreTiming::ScheduleEvent(atTicks - CoreTiming::GetTicks(), geInterruptEvent, userdata); return true; }