mirror of
https://github.com/hrydgard/ppsspp.git
synced 2025-04-02 11:01:50 -04:00
ParallelLoop: A bit smarter straggler handling.
This commit is contained in:
parent
3be5c7bd9a
commit
5b64a41a97
4 changed files with 47 additions and 21 deletions
|
@ -35,25 +35,52 @@ WaitableCounter *ParallelRangeLoopWaitable(ThreadManager *threadMan, const std::
|
|||
|
||||
if (range <= numTasks) {
|
||||
// Just assign one task per thread, as many as we have.
|
||||
WaitableCounter *counter = new WaitableCounter(range);
|
||||
WaitableCounter *waitableCounter = new WaitableCounter(range);
|
||||
for (int i = 0; i < range; i++) {
|
||||
threadMan->EnqueueTaskOnThread(i, new LoopRangeTask(counter, loop, i, i + 1), TaskType::CPU_COMPUTE);
|
||||
threadMan->EnqueueTaskOnThread(i, new LoopRangeTask(waitableCounter, loop, i, i + 1), TaskType::CPU_COMPUTE);
|
||||
}
|
||||
return counter;
|
||||
return waitableCounter;
|
||||
} else {
|
||||
WaitableCounter *counter = new WaitableCounter(numTasks);
|
||||
// Split the range between threads.
|
||||
double dx = (double)range / (double)numTasks;
|
||||
double d = 0.0;
|
||||
int lastEnd = 0;
|
||||
// Split the range between threads. Allow for some fractional bits.
|
||||
const int fractionalBits = 8;
|
||||
|
||||
int64_t totalFrac = (int64_t)range << fractionalBits;
|
||||
int64_t delta = totalFrac / numTasks;
|
||||
|
||||
delta = std::max(delta, (int64_t)minSize << fractionalBits);
|
||||
|
||||
// Now we can compute the actual number of tasks.
|
||||
// Remember that stragglers are done on the current thread
|
||||
// so we don't round up.
|
||||
numTasks = (int)(totalFrac / delta);
|
||||
|
||||
WaitableCounter *waitableCounter = new WaitableCounter(numTasks);
|
||||
int64_t counter = (int64_t)lower << fractionalBits;
|
||||
|
||||
// Split up tasks as equitable as possible.
|
||||
for (int i = 0; i < numTasks; i++) {
|
||||
int start = lastEnd;
|
||||
d += dx;
|
||||
int end = i == numTasks - 1 ? range : (int)d;
|
||||
threadMan->EnqueueTaskOnThread(i, new LoopRangeTask(counter, loop, start, end), TaskType::CPU_COMPUTE);
|
||||
lastEnd = end;
|
||||
int start = (int)(counter >> fractionalBits);
|
||||
int end = (int)((counter + delta) >> fractionalBits);
|
||||
if (end > upper) {
|
||||
// Let's do the stragglers on the current thread.
|
||||
break;
|
||||
}
|
||||
threadMan->EnqueueTaskOnThread(i, new LoopRangeTask(waitableCounter, loop, start, end), TaskType::CPU_COMPUTE);
|
||||
counter += delta;
|
||||
if ((counter >> fractionalBits) > upper) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return counter;
|
||||
|
||||
// Run stragglers on the calling thread directly.
|
||||
// We might add a flag later to avoid this for some cases.
|
||||
int stragglerStart = (int)(counter >> fractionalBits);
|
||||
int stragglerEnd = upper;
|
||||
if (stragglerStart < stragglerEnd) {
|
||||
// printf("doing stragglers: %d-%d\n", start, upper);
|
||||
loop(stragglerStart, stragglerEnd);
|
||||
}
|
||||
return waitableCounter;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -32,11 +32,10 @@ public:
|
|||
std::condition_variable cond_;
|
||||
};
|
||||
|
||||
// Note that upper bounds are non-inclusive.
|
||||
// This one never executes the remainer on the calling thread.
|
||||
// Note that upper bounds are non-inclusive: range is [lower, upper)
|
||||
WaitableCounter *ParallelRangeLoopWaitable(ThreadManager *threadMan, const std::function<void(int, int)> &loop, int lower, int upper, int minSize);
|
||||
|
||||
// This one optimizes by running the remainder on the calling thread.
|
||||
// Note that upper bounds are non-inclusive: range is [lower, upper)
|
||||
void ParallelRangeLoop(ThreadManager *threadMan, const std::function<void(int, int)> &loop, int lower, int upper, int minSize);
|
||||
|
||||
// Common utilities for large (!) memory copies.
|
||||
|
|
|
@ -627,8 +627,8 @@ handleELF:
|
|||
}
|
||||
|
||||
done:
|
||||
info_->pending = false;
|
||||
info_->working = false;
|
||||
info_->pending.store(false);
|
||||
info_->working.store(false);
|
||||
info_->readyEvent.Notify();
|
||||
// INFO_LOG(SYSTEM, "Completed writing info for %s", info_->GetTitle().c_str());
|
||||
}
|
||||
|
|
|
@ -46,10 +46,10 @@ bool TestParallelLoop(ThreadManager *threadMan) {
|
|||
// Now it's done.
|
||||
|
||||
// Try a loop with stragglers.
|
||||
printf("blocking test #1\n");
|
||||
printf("blocking test #1 [0-65)\n");
|
||||
ParallelRangeLoop(threadMan, rangeFunc, 0, 65, 1);
|
||||
// Try a loop with a relatively large minimum size.
|
||||
printf("blocking test #2\n");
|
||||
printf("blocking test #2 [0-100)\n");
|
||||
ParallelRangeLoop(threadMan, rangeFunc, 0, 100, 40);
|
||||
return true;
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue