diff --git a/Core/CoreTiming.cpp b/Core/CoreTiming.cpp index d442f0bcc9..d6625ee736 100644 --- a/Core/CoreTiming.cpp +++ b/Core/CoreTiming.cpp @@ -315,6 +315,52 @@ s64 UnscheduleEvent(int event_type, u64 userdata) return result; } +s64 UnscheduleThreadsafeEvent(int event_type, u64 userdata) +{ + s64 result = 0; + std::lock_guard lk(externalEventSection); + if (!tsFirst) + return result; + while(tsFirst) + { + if (tsFirst->type == event_type && tsFirst->userdata == userdata) + { + result = tsFirst->time - globalTimer; + + Event *next = tsFirst->next; + FreeTsEvent(tsFirst); + tsFirst = next; + } + else + { + break; + } + } + if (!tsFirst) + return result; + + Event *prev = tsFirst; + Event *ptr = prev->next; + while (ptr) + { + if (ptr->type == event_type && ptr->userdata == userdata) + { + result = ptr->time - globalTimer; + + prev->next = ptr->next; + FreeTsEvent(ptr); + ptr = prev->next; + } + else + { + prev = ptr; + ptr = ptr->next; + } + } + + return result; +} + // Warning: not included in save state. void RegisterAdvanceCallback(void (*callback)(int cyclesExecuted)) { diff --git a/Core/CoreTiming.h b/Core/CoreTiming.h index 5f608df5af..d4bdd12aca 100644 --- a/Core/CoreTiming.h +++ b/Core/CoreTiming.h @@ -94,6 +94,7 @@ namespace CoreTiming void ScheduleEvent_Threadsafe(s64 cyclesIntoFuture, int event_type, u64 userdata=0); void ScheduleEvent_Threadsafe_Immediate(int event_type, u64 userdata=0); s64 UnscheduleEvent(int event_type, u64 userdata); + s64 UnscheduleThreadsafeEvent(int event_type, u64 userdata); void RemoveEvent(int event_type); void RemoveThreadsafeEvent(int event_type); diff --git a/Core/HLE/sceGe.cpp b/Core/HLE/sceGe.cpp index f56692b1ad..a044b6f0e9 100644 --- a/Core/HLE/sceGe.cpp +++ b/Core/HLE/sceGe.cpp @@ -19,6 +19,7 @@ #include "../MIPS/MIPS.h" #include "../System.h" #include "../CoreParameter.h" +#include "../CoreTiming.h" #include "../Reporting.h" #include "sceGe.h" #include "sceKernelMemory.h" @@ -37,6 +38,8 @@ struct GeInterruptData }; static std::list ge_pending_cb; +static int geSyncEvent; +static int geInterruptEvent; class GeIntrHandler : public IntrHandler { @@ -100,7 +103,8 @@ public: ge_pending_cb.pop_front(); gpu->InterruptEnd(intrdata.listid); - WARN_LOG(HLE, "Ignoring interrupt for display list %d, already been released.", intrdata.listid); + if (subintr >= 0) + WARN_LOG(HLE, "Ignoring interrupt for display list %d, already been released.", intrdata.listid); return false; } @@ -137,11 +141,34 @@ public: } }; +void __GeExecuteSync(u64 userdata, int cyclesLate) +{ + int listid = userdata >> 32; + WaitType waitType = (WaitType) (userdata & 0xFFFFFFFF); + bool wokeThreads = __KernelTriggerWait(waitType, listid, 0, "GeSync", true); + gpu->SyncEnd(waitType, listid, wokeThreads); +} + +void __GeExecuteInterrupt(u64 userdata, int cyclesLate) +{ + int listid = userdata >> 32; + u32 pc = userdata & 0xFFFFFFFF; + + GeInterruptData intrdata; + intrdata.listid = listid; + intrdata.pc = pc; + ge_pending_cb.push_back(intrdata); + __TriggerInterrupt(PSP_INTR_IMMEDIATE, PSP_GE_INTR, PSP_INTR_SUB_NONE); +} + void __GeInit() { memset(&ge_used_callbacks, 0, sizeof(ge_used_callbacks)); ge_pending_cb.clear(); __RegisterIntrHandler(PSP_GE_INTR, new GeIntrHandler()); + + geSyncEvent = CoreTiming::RegisterEvent("GeSyncEvent", &__GeExecuteSync); + geInterruptEvent = CoreTiming::RegisterEvent("GeInterruptEvent", &__GeExecuteInterrupt); } void __GeDoState(PointerWrap &p) @@ -149,6 +176,12 @@ void __GeDoState(PointerWrap &p) p.DoArray(ge_callback_data, ARRAY_SIZE(ge_callback_data)); p.DoArray(ge_used_callbacks, ARRAY_SIZE(ge_used_callbacks)); p.Do(ge_pending_cb); + + p.Do(geSyncEvent); + CoreTiming::RestoreRegisterEvent(geSyncEvent, "GeSyncEvent", &__GeExecuteSync); + p.Do(geInterruptEvent); + CoreTiming::RestoreRegisterEvent(geInterruptEvent, "GeInterruptEvent", &__GeExecuteInterrupt); + // Everything else is done in sceDisplay. p.DoMarker("sceGe"); } @@ -158,19 +191,26 @@ void __GeShutdown() } -bool __GeTriggerInterrupt(int listid, u32 pc) +// Warning: may be called from the GPU thread. +bool __GeTriggerSync(WaitType waitType, int id, u64 atTicks) { - // ClaDun X2 does not expect sceGeListEnqueue to reschedule (which it does not on the PSP.) - // Once PPSSPP's GPU uses cycles, we can remove this check. - DisplayList* dl = gpu->getList(listid); - if (dl != NULL && dl->subIntrBase < 0) - return false; + u64 userdata = (u64)id << 32 | (u64) waitType; + s64 future = atTicks - CoreTiming::GetTicks(); + if (waitType == WAITTYPE_GEDRAWSYNC) + { + s64 left = CoreTiming::UnscheduleEvent(geSyncEvent, userdata); + if (left > future) + future = left; + } + CoreTiming::ScheduleEvent(future, geSyncEvent, userdata); + return true; +} - GeInterruptData intrdata; - intrdata.listid = listid; - intrdata.pc = pc; - ge_pending_cb.push_back(intrdata); - __TriggerInterrupt(PSP_INTR_HLE, PSP_GE_INTR, PSP_INTR_SUB_NONE); +// Warning: may be called from the GPU thread. +bool __GeTriggerInterrupt(int listid, u32 pc, u64 atTicks) +{ + u64 userdata = (u64)listid << 32 | (u64) pc; + CoreTiming::ScheduleEvent(atTicks - CoreTiming::GetTicks(), geInterruptEvent, userdata); return true; } diff --git a/Core/HLE/sceGe.h b/Core/HLE/sceGe.h index 7c82a98d5f..9659ed0b93 100644 --- a/Core/HLE/sceGe.h +++ b/Core/HLE/sceGe.h @@ -17,6 +17,8 @@ #pragma once +#include "Core/HLE/sceKernelThread.h" + #define SCE_GE_LIST_COMPLETED 0 #define SCE_GE_LIST_QUEUED 1 #define SCE_GE_LIST_DRAWING 2 @@ -39,7 +41,8 @@ void Register_sceGe_user(); void __GeInit(); void __GeDoState(PointerWrap &p); void __GeShutdown(); -bool __GeTriggerInterrupt(int listid, u32 pc); +bool __GeTriggerSync(WaitType waitType, int id, u64 atTicks); +bool __GeTriggerInterrupt(int listid, u32 pc, u64 atTicks); bool __GeHasPendingInterrupt(); diff --git a/GPU/GLES/DisplayListInterpreter.cpp b/GPU/GLES/DisplayListInterpreter.cpp index 4332d91d61..a66bd655f1 100644 --- a/GPU/GLES/DisplayListInterpreter.cpp +++ b/GPU/GLES/DisplayListInterpreter.cpp @@ -334,7 +334,8 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) { break; } - cyclesExecuted += 10 * count; + // Rough estimate, not sure what's correct. + cyclesExecuted += 80 * count; // TODO: Split this so that we can collect sequences of primitives, can greatly speed things up // on platforms where draw calls are expensive like mobile and D3D diff --git a/GPU/GPUCommon.cpp b/GPU/GPUCommon.cpp index 46a9a91a60..ecfa801cf3 100644 --- a/GPU/GPUCommon.cpp +++ b/GPU/GPUCommon.cpp @@ -16,14 +16,14 @@ GPUCommon::GPUCommon() : currentList(NULL), isbreak(false), - drawComplete(true), + drawCompleteTicks(0), dumpNextFrame_(false), dumpThisFrame_(false), interruptsEnabled_(true) { for (int i = 0; i < DisplayListMaxCount; ++i) { dls[i].state = PSP_GE_DL_STATE_NONE; - dls[i].shouldWait = false; + dls[i].waitTicks = 0; } } @@ -47,7 +47,7 @@ u32 GPUCommon::DrawSync(int mode) { if (mode == 0) { // TODO: What if dispatch / interrupts disabled? - if (!drawComplete) { + if (drawCompleteTicks > CoreTiming::GetTicks()) { __KernelWaitCurThread(WAITTYPE_GEDRAWSYNC, 1, 0, 0, false, "GeDrawSync"); } else { for (int i = 0; i < DisplayListMaxCount; ++i) { @@ -116,7 +116,7 @@ int GPUCommon::ListSync(int listid, int mode) } } - if (dl.shouldWait) { + if (dl.waitTicks > CoreTiming::GetTicks()) { __KernelWaitCurThread(WAITTYPE_GELISTSYNC, listid, 0, 0, false, "GeListSync"); } return PSP_GE_LIST_COMPLETED; @@ -139,6 +139,7 @@ u32 GPUCommon::EnqueueList(u32 listpc, u32 stall, int subIntrBase, bool head) oldCompatibility = false; } + u64 currentTicks = CoreTiming::GetTicks(); for (int i = 0; i < DisplayListMaxCount; ++i) { if (dls[i].state != PSP_GE_DL_STATE_NONE && dls[i].state != PSP_GE_DL_STATE_COMPLETED) { @@ -157,7 +158,7 @@ u32 GPUCommon::EnqueueList(u32 listpc, u32 stall, int subIntrBase, bool head) id = i; break; } - if (id < 0 && dls[i].state == PSP_GE_DL_STATE_COMPLETED) + if (id < 0 && dls[i].state == PSP_GE_DL_STATE_COMPLETED && dls[i].waitTicks < currentTicks) { id = i; } @@ -181,7 +182,7 @@ u32 GPUCommon::EnqueueList(u32 listpc, u32 stall, int subIntrBase, bool head) dl.stackptr = 0; dl.signal = PSP_GE_SIGNAL_NONE; dl.interrupted = false; - dl.shouldWait = true; + dl.waitTicks = (u64)-1; if (head) { if (currentList) { @@ -202,7 +203,7 @@ u32 GPUCommon::EnqueueList(u32 listpc, u32 stall, int subIntrBase, bool head) currentList = &dl; dlQueue.push_front(id); - drawComplete = false; + drawCompleteTicks = (u64)-1; // TODO save context when starting the list if param is set ProcessDLQueue(); @@ -226,7 +227,7 @@ u32 GPUCommon::DequeueList(int listid) else dlQueue.remove(listid); - dls[listid].shouldWait = false; + dls[listid].waitTicks = 0; __KernelTriggerWait(WAITTYPE_GELISTSYNC, listid, 0, "GeListSync"); CheckDrawSync(); @@ -265,7 +266,7 @@ u32 GPUCommon::Continue() // TODO Restore BASE // We have a list now, so it's not complete. - drawComplete = false; + drawCompleteTicks = (u64)-1; } else currentList->state = PSP_GE_DL_STATE_QUEUED; @@ -425,7 +426,8 @@ bool GPUCommon::InterpretList(DisplayList &list) inline void GPUCommon::UpdateCycles(u32 pc, u32 newPC) { - cyclesExecuted += (pc - cycleLastPC) / 4; + // Rough estimate, 2 CPU ticks (it's double the clock rate) per GPU instruction. + cyclesExecuted += 2 * (pc - cycleLastPC) / 4; cycleLastPC = newPC == 0 ? pc : newPC; } @@ -453,15 +455,8 @@ bool GPUCommon::ProcessDLQueue() } currentList = NULL; - drawComplete = true; - if (__KernelTriggerWait(WAITTYPE_GEDRAWSYNC, 1, 0, "GeDrawSync")) - { - for (int i = 0; i < DisplayListMaxCount; ++i) { - if (dls[i].state == PSP_GE_DL_STATE_COMPLETED) { - dls[i].state = PSP_GE_DL_STATE_NONE; - } - } - } + drawCompleteTicks = startingTicks + cyclesExecuted; + __GeTriggerSync(WAITTYPE_GEDRAWSYNC, 1, drawCompleteTicks); return true; //no more lists! } @@ -622,7 +617,7 @@ void GPUCommon::ExecuteOp(u32 op, u32 diff) { } // TODO: Technically, jump/call/ret should generate an interrupt, but before the pc change maybe? if (interruptsEnabled_ && trigger) { - if (__GeTriggerInterrupt(currentList->id, currentList->pc)) + if (__GeTriggerInterrupt(currentList->id, currentList->pc, startingTicks + cyclesExecuted)) gpuState = GPUSTATE_INTERRUPT; } } @@ -631,7 +626,7 @@ void GPUCommon::ExecuteOp(u32 op, u32 diff) { switch (currentList->signal) { case PSP_GE_SIGNAL_HANDLER_PAUSE: if (interruptsEnabled_) { - if (__GeTriggerInterrupt(currentList->id, currentList->pc)) + if (__GeTriggerInterrupt(currentList->id, currentList->pc, startingTicks + cyclesExecuted)) gpuState = GPUSTATE_INTERRUPT; } break; @@ -645,9 +640,9 @@ void GPUCommon::ExecuteOp(u32 op, u32 diff) { currentList->subIntrToken = prev & 0xFFFF; currentList->state = PSP_GE_DL_STATE_COMPLETED; gpuState = GPUSTATE_DONE; - if (!interruptsEnabled_ || !__GeTriggerInterrupt(currentList->id, currentList->pc)) { - currentList->shouldWait = false; - __KernelTriggerWait(WAITTYPE_GELISTSYNC, currentList->id, 0, "GeListSync", true); + if (!interruptsEnabled_ || !__GeTriggerInterrupt(currentList->id, currentList->pc, startingTicks + cyclesExecuted)) { + currentList->waitTicks = startingTicks + cyclesExecuted; + __GeTriggerSync(WAITTYPE_GELISTSYNC, currentList->id, currentList->waitTicks); } break; } @@ -682,7 +677,7 @@ void GPUCommon::DoState(PointerWrap &p) { p.Do(prev); p.Do(gpuState); p.Do(isbreak); - p.Do(drawComplete); + p.Do(drawCompleteTicks); p.DoMarker("GPUCommon"); } @@ -697,10 +692,23 @@ void GPUCommon::InterruptEnd(int listid) DisplayList &dl = dls[listid]; // TODO: Unless the signal handler could change it? - if (dl.state == PSP_GE_DL_STATE_COMPLETED) { - dl.shouldWait = false; + if (dl.state == PSP_GE_DL_STATE_COMPLETED || dl.state == PSP_GE_DL_STATE_NONE) { + dl.waitTicks = 0; __KernelTriggerWait(WAITTYPE_GELISTSYNC, listid, 0, "GeListSync", true); } ProcessDLQueue(); } + +// TODO: Maybe cleaner to keep this in GE and trigger the clear directly? +void GPUCommon::SyncEnd(WaitType waitType, int listid, bool wokeThreads) +{ + if (waitType == WAITTYPE_GEDRAWSYNC && wokeThreads) + { + for (int i = 0; i < DisplayListMaxCount; ++i) { + if (dls[i].state == PSP_GE_DL_STATE_COMPLETED) { + dls[i].state = PSP_GE_DL_STATE_NONE; + } + } + } +} diff --git a/GPU/GPUCommon.h b/GPU/GPUCommon.h index b392cff5bd..53174216f3 100644 --- a/GPU/GPUCommon.h +++ b/GPU/GPUCommon.h @@ -10,6 +10,7 @@ public: virtual void InterruptStart(int listid); virtual void InterruptEnd(int listid); + virtual void SyncEnd(WaitType waitType, int listid, bool wokeThreads); virtual void EnableInterrupts(bool enable) { interruptsEnabled_ = enable; } @@ -43,7 +44,7 @@ protected: u32 prev; GPUState gpuState; bool isbreak; - bool drawComplete; + u64 drawCompleteTicks; u64 startingTicks; u32 cycleLastPC; diff --git a/GPU/GPUInterface.h b/GPU/GPUInterface.h index 265d051454..8727043ad7 100644 --- a/GPU/GPUInterface.h +++ b/GPU/GPUInterface.h @@ -21,6 +21,7 @@ #include "GPUState.h" #include +enum WaitType; class PointerWrap; enum DisplayListStatus @@ -121,7 +122,7 @@ struct DisplayList u32 stack[32]; int stackptr; bool interrupted; - bool shouldWait; + u64 waitTicks; }; class GPUInterface @@ -147,6 +148,7 @@ public: virtual void InterruptStart(int listid) = 0; virtual void InterruptEnd(int listid) = 0; + virtual void SyncEnd(WaitType waitType, int listid, bool wokeThreads) = 0; virtual void PreExecuteOp(u32 op, u32 diff) = 0; virtual void ExecuteOp(u32 op, u32 diff) = 0;