Adjusted Texture Scaler to use global threadpool

2025-04-02 11:01:50 -04:00 · 2013-05-01 14:08:33 +02:00 · 2013-05-01 14:08:33 +02:00 · 95e6a5c4ea
commit 95e6a5c4ea
parent 0ef103525f
2 changed files with 10 additions and 109 deletions
--- a/GPU/GLES/TextureScaler.cpp
+++ b/GPU/GLES/TextureScaler.cpp
@ -21,83 +21,20 @@
 #include "Common/Log.h"
 #include "Common/MsgHandler.h"
 #include "Common/CommonFuncs.h"
+#include "Common/ThreadPool.h"
 #include "ext/xbrz/xbrz.h"

-WorkerThread::WorkerThread() : active(true), started(false) {
-	thread = new std::thread([&]() { WorkFunc(); });
-	doneMutex.lock();
-	while(!started) { };
-}
-
-WorkerThread::~WorkerThread() {
-	mutex.lock();
-	active = false;
-	signal.notify_one();
-	mutex.unlock();
-	thread->join();
-	delete thread;
-}
-
-void WorkerThread::Process(const std::function<void()>& work) {
-	mutex.lock();
-	work_ = work;
-	signal.notify_one();
-	mutex.unlock();
-}
-
-void WorkerThread::WaitForCompletion() {
-	done.wait(doneMutex);
-}
-
-void WorkerThread::WorkFunc() {
-	mutex.lock();
-	started = true;
-	while(active) {
-		signal.wait(mutex);
-		if(active) work_();
-		doneMutex.lock();
-		done.notify_one();
-		doneMutex.unlock();
-	}
-}
-
-
-TextureScaler::TextureScaler() : numThreads(4), workersStarted(false) {
-}
-
-void TextureScaler::StartWorkers() {
-	if(!workersStarted) {
-		for(int i=0; i<numThreads; ++i) {
-			workers.push_back(std::make_shared<WorkerThread>());
-		}
-		workersStarted = true;
-	}
-}
-
-void TextureScaler::ParallelLoop(std::function<void(int,int)> loop, int lower, int upper) {
-	StartWorkers();
-	int range = upper-lower;
-	if(range >= numThreads*2) { // don't parallelize tiny loops
-		// could do slightly better load balancing for the generic case, 
-		// but doesn't matter since all our loops are power of 2
-		int chunk = range/numThreads; 
-		for(int s=lower, i=0; i<numThreads; s+=chunk, ++i) {
-			workers[i]->Process(std::bind(loop, s, std::min(s+chunk,upper)));
-		}
-		for(int i=0; i<numThreads; ++i) {
-			workers[i]->WaitForCompletion();
-		}
-	} else {
-		loop(lower, upper);
-	}
-}
-
+// Report the time and throughput for each larger scaling operation in the log
 //#define SCALING_MEASURE_TIME

 #ifdef SCALING_MEASURE_TIME
 #include "native/base/timeutil.h"
 #endif

+
+TextureScaler::TextureScaler() {
+}
+
 void TextureScaler::Scale(u32* &data, GLenum &dstFmt, int &width, int &height) {
 	if(g_Config.iXBRZTexScalingLevel > 1) {
 		#ifdef SCALING_MEASURE_TIME
@ -119,7 +56,7 @@ void TextureScaler::Scale(u32* &data, GLenum &dstFmt, int &width, int &height) {
 			break;

 		case GL_UNSIGNED_SHORT_4_4_4_4:
-			ParallelLoop([&](int l, int u){
+			GlobalThreadPool::Loop([&](int l, int u){
 				for(int y = l; y < u; ++y) {
 					for(int x = 0; x < width; ++x) {
 						u32 val = ((u16*)data)[y*width + x];
@ -134,7 +71,7 @@ void TextureScaler::Scale(u32* &data, GLenum &dstFmt, int &width, int &height) {
 			break;

 		case GL_UNSIGNED_SHORT_5_6_5:
-			ParallelLoop([&](int l, int u){
+			GlobalThreadPool::Loop([&](int l, int u){
 				for(int y = l; y < u; ++y) {
 					for(int x = 0; x < width; ++x) {
 						u32 val = ((u16*)data)[y*width + x];
@ -148,7 +85,7 @@ void TextureScaler::Scale(u32* &data, GLenum &dstFmt, int &width, int &height) {
 			break;

 		case GL_UNSIGNED_SHORT_5_5_5_1:
-			ParallelLoop([&](int l, int u) {
+			GlobalThreadPool::Loop([&](int l, int u) {
 				for(int y = l; y < u; ++y) {
 					for(int x = 0; x < width; ++x) {
 						u32 val = ((u16*)data)[y*width + x];
@ -168,7 +105,7 @@ void TextureScaler::Scale(u32* &data, GLenum &dstFmt, int &width, int &height) {

 		// scale 
 		xbrz::ScalerCfg cfg;
-		ParallelLoop([&](int l, int u) {
+		GlobalThreadPool::Loop([&](int l, int u) {
 			xbrz::scale(factor, xbrzInputBuf, xbrzBuf, width, height, cfg, l, u);
 		}, 0, height);

--- a/GPU/GLES/TextureScaler.h
+++ b/GPU/GLES/TextureScaler.h
@ -24,34 +24,6 @@
 #include <functional>
 #include <vector>

-#include "native/thread/thread.h"
-#include "base/mutex.h"
-
-// This is the simplest possible worker implementation I can think of
-// but entirely sufficient for the given purpose.
-// Only handles a single item of work at a time.
-class WorkerThread {
-public:
-	WorkerThread();
-	~WorkerThread();
-
-	// submit a new work item
-	void Process(const std::function<void()>& work);
-	// wait for a submitted work item to be completed
-	void WaitForCompletion();
-
-private:
-	std::thread *thread; // the worker thread
-	condition_variable signal; // used to signal new work
-	condition_variable done; // used to signal work completion
-	recursive_mutex mutex, doneMutex; // associated with each respective condition variable
-	volatile bool active, started;
-	std::function<void()> work_; // the work to be done by this thread
-
-	void WorkFunc();
-
-	WorkerThread(const WorkerThread& other) { } // prevent copies
-};

 class TextureScaler {
 public:
@ -60,14 +32,6 @@ public:
 	void Scale(u32* &data, GLenum &dstfmt, int &width, int &height);

 private:
-	const int numThreads;
-	std::vector<std::shared_ptr<WorkerThread>> workers;
-
-	bool workersStarted;
-	void StartWorkers();
-
-	void ParallelLoop(std::function<void(int,int)> loop, int lower, int upper);
-
 	SimpleBuf<u32> bufInput;
 	SimpleBuf<u32> bufOutput;
 };