mirror of
https://github.com/hrydgard/ppsspp.git
synced 2025-04-02 11:01:50 -04:00
Adjusted Texture Scaler to use global threadpool
This commit is contained in:
parent
0ef103525f
commit
95e6a5c4ea
2 changed files with 10 additions and 109 deletions
|
@ -21,83 +21,20 @@
|
|||
#include "Common/Log.h"
|
||||
#include "Common/MsgHandler.h"
|
||||
#include "Common/CommonFuncs.h"
|
||||
#include "Common/ThreadPool.h"
|
||||
#include "ext/xbrz/xbrz.h"
|
||||
|
||||
WorkerThread::WorkerThread() : active(true), started(false) {
|
||||
thread = new std::thread([&]() { WorkFunc(); });
|
||||
doneMutex.lock();
|
||||
while(!started) { };
|
||||
}
|
||||
|
||||
WorkerThread::~WorkerThread() {
|
||||
mutex.lock();
|
||||
active = false;
|
||||
signal.notify_one();
|
||||
mutex.unlock();
|
||||
thread->join();
|
||||
delete thread;
|
||||
}
|
||||
|
||||
void WorkerThread::Process(const std::function<void()>& work) {
|
||||
mutex.lock();
|
||||
work_ = work;
|
||||
signal.notify_one();
|
||||
mutex.unlock();
|
||||
}
|
||||
|
||||
void WorkerThread::WaitForCompletion() {
|
||||
done.wait(doneMutex);
|
||||
}
|
||||
|
||||
void WorkerThread::WorkFunc() {
|
||||
mutex.lock();
|
||||
started = true;
|
||||
while(active) {
|
||||
signal.wait(mutex);
|
||||
if(active) work_();
|
||||
doneMutex.lock();
|
||||
done.notify_one();
|
||||
doneMutex.unlock();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
TextureScaler::TextureScaler() : numThreads(4), workersStarted(false) {
|
||||
}
|
||||
|
||||
void TextureScaler::StartWorkers() {
|
||||
if(!workersStarted) {
|
||||
for(int i=0; i<numThreads; ++i) {
|
||||
workers.push_back(std::make_shared<WorkerThread>());
|
||||
}
|
||||
workersStarted = true;
|
||||
}
|
||||
}
|
||||
|
||||
void TextureScaler::ParallelLoop(std::function<void(int,int)> loop, int lower, int upper) {
|
||||
StartWorkers();
|
||||
int range = upper-lower;
|
||||
if(range >= numThreads*2) { // don't parallelize tiny loops
|
||||
// could do slightly better load balancing for the generic case,
|
||||
// but doesn't matter since all our loops are power of 2
|
||||
int chunk = range/numThreads;
|
||||
for(int s=lower, i=0; i<numThreads; s+=chunk, ++i) {
|
||||
workers[i]->Process(std::bind(loop, s, std::min(s+chunk,upper)));
|
||||
}
|
||||
for(int i=0; i<numThreads; ++i) {
|
||||
workers[i]->WaitForCompletion();
|
||||
}
|
||||
} else {
|
||||
loop(lower, upper);
|
||||
}
|
||||
}
|
||||
|
||||
// Report the time and throughput for each larger scaling operation in the log
|
||||
//#define SCALING_MEASURE_TIME
|
||||
|
||||
#ifdef SCALING_MEASURE_TIME
|
||||
#include "native/base/timeutil.h"
|
||||
#endif
|
||||
|
||||
|
||||
TextureScaler::TextureScaler() {
|
||||
}
|
||||
|
||||
void TextureScaler::Scale(u32* &data, GLenum &dstFmt, int &width, int &height) {
|
||||
if(g_Config.iXBRZTexScalingLevel > 1) {
|
||||
#ifdef SCALING_MEASURE_TIME
|
||||
|
@ -119,7 +56,7 @@ void TextureScaler::Scale(u32* &data, GLenum &dstFmt, int &width, int &height) {
|
|||
break;
|
||||
|
||||
case GL_UNSIGNED_SHORT_4_4_4_4:
|
||||
ParallelLoop([&](int l, int u){
|
||||
GlobalThreadPool::Loop([&](int l, int u){
|
||||
for(int y = l; y < u; ++y) {
|
||||
for(int x = 0; x < width; ++x) {
|
||||
u32 val = ((u16*)data)[y*width + x];
|
||||
|
@ -134,7 +71,7 @@ void TextureScaler::Scale(u32* &data, GLenum &dstFmt, int &width, int &height) {
|
|||
break;
|
||||
|
||||
case GL_UNSIGNED_SHORT_5_6_5:
|
||||
ParallelLoop([&](int l, int u){
|
||||
GlobalThreadPool::Loop([&](int l, int u){
|
||||
for(int y = l; y < u; ++y) {
|
||||
for(int x = 0; x < width; ++x) {
|
||||
u32 val = ((u16*)data)[y*width + x];
|
||||
|
@ -148,7 +85,7 @@ void TextureScaler::Scale(u32* &data, GLenum &dstFmt, int &width, int &height) {
|
|||
break;
|
||||
|
||||
case GL_UNSIGNED_SHORT_5_5_5_1:
|
||||
ParallelLoop([&](int l, int u) {
|
||||
GlobalThreadPool::Loop([&](int l, int u) {
|
||||
for(int y = l; y < u; ++y) {
|
||||
for(int x = 0; x < width; ++x) {
|
||||
u32 val = ((u16*)data)[y*width + x];
|
||||
|
@ -168,7 +105,7 @@ void TextureScaler::Scale(u32* &data, GLenum &dstFmt, int &width, int &height) {
|
|||
|
||||
// scale
|
||||
xbrz::ScalerCfg cfg;
|
||||
ParallelLoop([&](int l, int u) {
|
||||
GlobalThreadPool::Loop([&](int l, int u) {
|
||||
xbrz::scale(factor, xbrzInputBuf, xbrzBuf, width, height, cfg, l, u);
|
||||
}, 0, height);
|
||||
|
||||
|
|
|
@ -24,34 +24,6 @@
|
|||
#include <functional>
|
||||
#include <vector>
|
||||
|
||||
#include "native/thread/thread.h"
|
||||
#include "base/mutex.h"
|
||||
|
||||
// This is the simplest possible worker implementation I can think of
|
||||
// but entirely sufficient for the given purpose.
|
||||
// Only handles a single item of work at a time.
|
||||
class WorkerThread {
|
||||
public:
|
||||
WorkerThread();
|
||||
~WorkerThread();
|
||||
|
||||
// submit a new work item
|
||||
void Process(const std::function<void()>& work);
|
||||
// wait for a submitted work item to be completed
|
||||
void WaitForCompletion();
|
||||
|
||||
private:
|
||||
std::thread *thread; // the worker thread
|
||||
condition_variable signal; // used to signal new work
|
||||
condition_variable done; // used to signal work completion
|
||||
recursive_mutex mutex, doneMutex; // associated with each respective condition variable
|
||||
volatile bool active, started;
|
||||
std::function<void()> work_; // the work to be done by this thread
|
||||
|
||||
void WorkFunc();
|
||||
|
||||
WorkerThread(const WorkerThread& other) { } // prevent copies
|
||||
};
|
||||
|
||||
class TextureScaler {
|
||||
public:
|
||||
|
@ -60,14 +32,6 @@ public:
|
|||
void Scale(u32* &data, GLenum &dstfmt, int &width, int &height);
|
||||
|
||||
private:
|
||||
const int numThreads;
|
||||
std::vector<std::shared_ptr<WorkerThread>> workers;
|
||||
|
||||
bool workersStarted;
|
||||
void StartWorkers();
|
||||
|
||||
void ParallelLoop(std::function<void(int,int)> loop, int lower, int upper);
|
||||
|
||||
SimpleBuf<u32> bufInput;
|
||||
SimpleBuf<u32> bufOutput;
|
||||
};
|
||||
|
|
Loading…
Add table
Reference in a new issue