From 5b64a41a973599058c3c30bf02e0219cc8ebc573 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Sat, 12 Jun 2021 21:57:16 +0200 Subject: [PATCH] ParallelLoop: A bit smarter straggler handling. --- Common/Thread/ParallelLoop.cpp | 55 +++++++++++++++++++++++++--------- Common/Thread/ParallelLoop.h | 5 ++-- UI/GameInfoCache.cpp | 4 +-- unittest/TestThreadManager.cpp | 4 +-- 4 files changed, 47 insertions(+), 21 deletions(-) diff --git a/Common/Thread/ParallelLoop.cpp b/Common/Thread/ParallelLoop.cpp index 8e446dcd37..20c4f2fe3a 100644 --- a/Common/Thread/ParallelLoop.cpp +++ b/Common/Thread/ParallelLoop.cpp @@ -35,25 +35,52 @@ WaitableCounter *ParallelRangeLoopWaitable(ThreadManager *threadMan, const std:: if (range <= numTasks) { // Just assign one task per thread, as many as we have. - WaitableCounter *counter = new WaitableCounter(range); + WaitableCounter *waitableCounter = new WaitableCounter(range); for (int i = 0; i < range; i++) { - threadMan->EnqueueTaskOnThread(i, new LoopRangeTask(counter, loop, i, i + 1), TaskType::CPU_COMPUTE); + threadMan->EnqueueTaskOnThread(i, new LoopRangeTask(waitableCounter, loop, i, i + 1), TaskType::CPU_COMPUTE); } - return counter; + return waitableCounter; } else { - WaitableCounter *counter = new WaitableCounter(numTasks); - // Split the range between threads. - double dx = (double)range / (double)numTasks; - double d = 0.0; - int lastEnd = 0; + // Split the range between threads. Allow for some fractional bits. + const int fractionalBits = 8; + + int64_t totalFrac = (int64_t)range << fractionalBits; + int64_t delta = totalFrac / numTasks; + + delta = std::max(delta, (int64_t)minSize << fractionalBits); + + // Now we can compute the actual number of tasks. + // Remember that stragglers are done on the current thread + // so we don't round up. + numTasks = (int)(totalFrac / delta); + + WaitableCounter *waitableCounter = new WaitableCounter(numTasks); + int64_t counter = (int64_t)lower << fractionalBits; + + // Split up tasks as equitable as possible. for (int i = 0; i < numTasks; i++) { - int start = lastEnd; - d += dx; - int end = i == numTasks - 1 ? range : (int)d; - threadMan->EnqueueTaskOnThread(i, new LoopRangeTask(counter, loop, start, end), TaskType::CPU_COMPUTE); - lastEnd = end; + int start = (int)(counter >> fractionalBits); + int end = (int)((counter + delta) >> fractionalBits); + if (end > upper) { + // Let's do the stragglers on the current thread. + break; + } + threadMan->EnqueueTaskOnThread(i, new LoopRangeTask(waitableCounter, loop, start, end), TaskType::CPU_COMPUTE); + counter += delta; + if ((counter >> fractionalBits) > upper) { + break; + } } - return counter; + + // Run stragglers on the calling thread directly. + // We might add a flag later to avoid this for some cases. + int stragglerStart = (int)(counter >> fractionalBits); + int stragglerEnd = upper; + if (stragglerStart < stragglerEnd) { + // printf("doing stragglers: %d-%d\n", start, upper); + loop(stragglerStart, stragglerEnd); + } + return waitableCounter; } } diff --git a/Common/Thread/ParallelLoop.h b/Common/Thread/ParallelLoop.h index 60e8f30694..5a0bc22578 100644 --- a/Common/Thread/ParallelLoop.h +++ b/Common/Thread/ParallelLoop.h @@ -32,11 +32,10 @@ public: std::condition_variable cond_; }; -// Note that upper bounds are non-inclusive. -// This one never executes the remainer on the calling thread. +// Note that upper bounds are non-inclusive: range is [lower, upper) WaitableCounter *ParallelRangeLoopWaitable(ThreadManager *threadMan, const std::function &loop, int lower, int upper, int minSize); -// This one optimizes by running the remainder on the calling thread. +// Note that upper bounds are non-inclusive: range is [lower, upper) void ParallelRangeLoop(ThreadManager *threadMan, const std::function &loop, int lower, int upper, int minSize); // Common utilities for large (!) memory copies. diff --git a/UI/GameInfoCache.cpp b/UI/GameInfoCache.cpp index e40fde9b8f..37fe76ddd9 100644 --- a/UI/GameInfoCache.cpp +++ b/UI/GameInfoCache.cpp @@ -627,8 +627,8 @@ handleELF: } done: - info_->pending = false; - info_->working = false; + info_->pending.store(false); + info_->working.store(false); info_->readyEvent.Notify(); // INFO_LOG(SYSTEM, "Completed writing info for %s", info_->GetTitle().c_str()); } diff --git a/unittest/TestThreadManager.cpp b/unittest/TestThreadManager.cpp index 8625d96eb2..cde37e7df3 100644 --- a/unittest/TestThreadManager.cpp +++ b/unittest/TestThreadManager.cpp @@ -46,10 +46,10 @@ bool TestParallelLoop(ThreadManager *threadMan) { // Now it's done. // Try a loop with stragglers. - printf("blocking test #1\n"); + printf("blocking test #1 [0-65)\n"); ParallelRangeLoop(threadMan, rangeFunc, 0, 65, 1); // Try a loop with a relatively large minimum size. - printf("blocking test #2\n"); + printf("blocking test #2 [0-100)\n"); ParallelRangeLoop(threadMan, rangeFunc, 0, 100, 40); return true; }