mirror of
https://github.com/hrydgard/ppsspp.git
synced 2025-04-02 11:01:50 -04:00
Merge pull request #14383 from unknownbrackets/threadpool
Use threads more on startup to improve load time
This commit is contained in:
commit
8e953b2a97
12 changed files with 233 additions and 161 deletions
|
@ -1,3 +1,5 @@
|
|||
#include <algorithm>
|
||||
#include <cstring>
|
||||
#include "Common/Thread/ThreadPool.h"
|
||||
#include "Common/Thread/ThreadUtil.h"
|
||||
|
||||
|
@ -54,43 +56,27 @@ void WorkerThread::WorkFunc() {
|
|||
}
|
||||
}
|
||||
|
||||
void LoopWorkerThread::Process(std::function<void(int, int)> work, int start, int end) {
|
||||
void LoopWorkerThread::ProcessLoop(std::function<void(int, int)> work, int start, int end) {
|
||||
std::lock_guard<std::mutex> guard(mutex);
|
||||
work_ = std::move(work);
|
||||
loopWork_ = std::move(work);
|
||||
work_ = [this]() {
|
||||
loopWork_(start_, end_);
|
||||
};
|
||||
start_ = start;
|
||||
end_ = end;
|
||||
jobsTarget = jobsDone + 1;
|
||||
signal.notify_one();
|
||||
}
|
||||
|
||||
void LoopWorkerThread::WorkFunc() {
|
||||
setCurrentThreadName("LoopWorker");
|
||||
std::unique_lock<std::mutex> guard(mutex);
|
||||
while (active) {
|
||||
// 'active == false' is one of the conditions for signaling,
|
||||
// do not "optimize" it
|
||||
while (active && jobsTarget <= jobsDone) {
|
||||
signal.wait(guard);
|
||||
}
|
||||
if (active) {
|
||||
work_(start_, end_);
|
||||
|
||||
std::lock_guard<std::mutex> doneGuard(doneMutex);
|
||||
jobsDone++;
|
||||
done.notify_one();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////// ThreadPool
|
||||
|
||||
ThreadPool::ThreadPool(int numThreads) {
|
||||
if (numThreads <= 0) {
|
||||
numThreads_ = 1;
|
||||
INFO_LOG(JIT, "ThreadPool: Bad number of threads %d", numThreads);
|
||||
} else if (numThreads > 8) {
|
||||
INFO_LOG(JIT, "ThreadPool: Capping number of threads to 8 (was %d)", numThreads);
|
||||
numThreads_ = 8;
|
||||
} else if (numThreads > 16) {
|
||||
INFO_LOG(JIT, "ThreadPool: Capping number of threads to 16 (was %d)", numThreads);
|
||||
numThreads_ = 16;
|
||||
} else {
|
||||
numThreads_ = numThreads;
|
||||
}
|
||||
|
@ -108,23 +94,32 @@ void ThreadPool::StartWorkers() {
|
|||
}
|
||||
}
|
||||
|
||||
void ThreadPool::ParallelLoop(const std::function<void(int,int)> &loop, int lower, int upper) {
|
||||
void ThreadPool::ParallelLoop(const std::function<void(int,int)> &loop, int lower, int upper, int minSize) {
|
||||
// Don't parallelize tiny loops.
|
||||
if (minSize == -1)
|
||||
minSize = 4;
|
||||
|
||||
int range = upper - lower;
|
||||
if (range >= numThreads_ * 2) { // don't parallelize tiny loops (this could be better, maybe add optional parameter that estimates work per iteration)
|
||||
if (range >= minSize) {
|
||||
std::lock_guard<std::mutex> guard(mutex);
|
||||
StartWorkers();
|
||||
|
||||
// could do slightly better load balancing for the generic case,
|
||||
// but doesn't matter since all our loops are power of 2
|
||||
int chunk = range / numThreads_;
|
||||
int chunk = std::max(minSize, range / numThreads_);
|
||||
int s = lower;
|
||||
for (auto& worker : workers) {
|
||||
worker->Process(loop, s, s+chunk);
|
||||
s+=chunk;
|
||||
for (auto &worker : workers) {
|
||||
// We'll do the last chunk on the current thread.
|
||||
if (s + chunk >= upper) {
|
||||
break;
|
||||
}
|
||||
worker->ProcessLoop(loop, s, s + chunk);
|
||||
s += chunk;
|
||||
}
|
||||
// This is the final chunk.
|
||||
loop(s, upper);
|
||||
for (auto& worker : workers) {
|
||||
if (s < upper)
|
||||
loop(s, upper);
|
||||
for (auto &worker : workers) {
|
||||
worker->WaitForCompletion();
|
||||
}
|
||||
} else {
|
||||
|
@ -132,3 +127,16 @@ void ThreadPool::ParallelLoop(const std::function<void(int,int)> &loop, int lowe
|
|||
}
|
||||
}
|
||||
|
||||
void ThreadPool::ParallelMemcpy(void *dest, const void *src, int size) {
|
||||
static const int MIN_SIZE = 128 * 1024;
|
||||
ParallelLoop([&](int l, int h) {
|
||||
memmove((uint8_t *)dest + l, (const uint8_t *)src + l, h - l);
|
||||
}, 0, size, MIN_SIZE);
|
||||
}
|
||||
|
||||
void ThreadPool::ParallelMemset(void *dest, uint8_t val, int size) {
|
||||
static const int MIN_SIZE = 128 * 1024;
|
||||
ParallelLoop([&](int l, int h) {
|
||||
memset((uint8_t *)dest + l, val, h - l);
|
||||
}, 0, size, MIN_SIZE);
|
||||
}
|
||||
|
|
|
@ -23,6 +23,8 @@ public:
|
|||
void WaitForCompletion();
|
||||
|
||||
protected:
|
||||
virtual void WorkFunc();
|
||||
|
||||
std::thread thread; // the worker thread
|
||||
std::condition_variable signal; // used to signal new work
|
||||
std::condition_variable done; // used to signal work completion
|
||||
|
@ -30,11 +32,10 @@ protected:
|
|||
bool active = true;
|
||||
int jobsDone = 0;
|
||||
int jobsTarget = 0;
|
||||
private:
|
||||
virtual void WorkFunc();
|
||||
|
||||
std::function<void()> work_; // the work to be done by this thread
|
||||
|
||||
private:
|
||||
WorkerThread(const WorkerThread& other) = delete; // prevent copies
|
||||
void operator =(const WorkerThread &other) = delete;
|
||||
};
|
||||
|
@ -42,14 +43,12 @@ private:
|
|||
class LoopWorkerThread final : public WorkerThread {
|
||||
public:
|
||||
LoopWorkerThread() = default;
|
||||
void Process(std::function<void(int, int)> work, int start, int end);
|
||||
void ProcessLoop(std::function<void(int, int)> work, int start, int end);
|
||||
|
||||
private:
|
||||
virtual void WorkFunc() override;
|
||||
|
||||
int start_;
|
||||
int end_;
|
||||
std::function<void(int, int)> work_; // the work to be done by this thread
|
||||
std::function<void(int, int)> loopWork_; // the work to be done by this thread
|
||||
};
|
||||
|
||||
// A thread pool manages a set of worker threads, and allows the execution of parallel loops on them
|
||||
|
@ -61,7 +60,9 @@ public:
|
|||
// don't need a destructor, "workers" is cleared on delete,
|
||||
// leading to the stopping and joining of all worker threads (RAII and all that)
|
||||
|
||||
void ParallelLoop(const std::function<void(int,int)> &loop, int lower, int upper);
|
||||
void ParallelLoop(const std::function<void(int,int)> &loop, int lower, int upper, int minSize);
|
||||
void ParallelMemcpy(void *dest, const void *src, int sz);
|
||||
void ParallelMemset(void *dest, uint8_t val, int sz);
|
||||
|
||||
private:
|
||||
int numThreads_;
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
#include "Core/MemMap.h"
|
||||
#include "Core/Reporting.h"
|
||||
#include "Core/ThreadPools.h"
|
||||
#include "Core/MIPS/MIPSTables.h"
|
||||
#include "Core/ELF/ElfReader.h"
|
||||
#include "Core/Debugger/MemBlockInfo.h"
|
||||
|
@ -57,86 +58,83 @@ bool ElfReader::LoadRelocations(const Elf32_Rel *rels, int numRelocs)
|
|||
{
|
||||
int numErrors = 0;
|
||||
DEBUG_LOG(LOADER, "Loading %i relocations...", numRelocs);
|
||||
for (int r = 0; r < numRelocs; r++)
|
||||
{
|
||||
// INFO_LOG(LOADER, "Loading reloc %i (%p)...", r, rels + r);
|
||||
u32 info = rels[r].r_info;
|
||||
u32 addr = rels[r].r_offset;
|
||||
GlobalThreadPool::Loop([&](int l, int h) {
|
||||
for (int r = l; r < h; r++) {
|
||||
VERBOSE_LOG(LOADER, "Loading reloc %i (%p)...", r, rels + r);
|
||||
u32 info = rels[r].r_info;
|
||||
u32 addr = rels[r].r_offset;
|
||||
|
||||
int type = info & 0xf;
|
||||
int type = info & 0xf;
|
||||
|
||||
int readwrite = (info>>8) & 0xff;
|
||||
int relative = (info>>16) & 0xff;
|
||||
int readwrite = (info >> 8) & 0xff;
|
||||
int relative = (info >> 16) & 0xff;
|
||||
|
||||
//0 = code
|
||||
//1 = data
|
||||
//0 = code
|
||||
//1 = data
|
||||
|
||||
if (readwrite >= (int)ARRAY_SIZE(segmentVAddr)) {
|
||||
if (numErrors < 10) {
|
||||
ERROR_LOG_REPORT(LOADER, "Bad segment number %i", readwrite);
|
||||
if (readwrite >= (int)ARRAY_SIZE(segmentVAddr)) {
|
||||
if (numErrors < 10) {
|
||||
ERROR_LOG_REPORT(LOADER, "Bad segment number %i", readwrite);
|
||||
}
|
||||
numErrors++;
|
||||
continue;
|
||||
}
|
||||
numErrors++;
|
||||
continue;
|
||||
}
|
||||
|
||||
addr += segmentVAddr[readwrite];
|
||||
addr += segmentVAddr[readwrite];
|
||||
|
||||
// It appears that misaligned relocations are allowed.
|
||||
// Will they work correctly on big-endian?
|
||||
// It appears that misaligned relocations are allowed.
|
||||
// Will they work correctly on big-endian?
|
||||
|
||||
if (((addr & 3) && type != R_MIPS_32) || !Memory::IsValidAddress(addr)) {
|
||||
if (numErrors < 10) {
|
||||
WARN_LOG_REPORT(LOADER, "Suspicious address %08x, skipping reloc, type = %d", addr, type);
|
||||
} else if (numErrors == 10) {
|
||||
WARN_LOG(LOADER, "Too many bad relocations, skipping logging");
|
||||
if (((addr & 3) && type != R_MIPS_32) || !Memory::IsValidAddress(addr)) {
|
||||
if (numErrors < 10) {
|
||||
WARN_LOG_REPORT(LOADER, "Suspicious address %08x, skipping reloc, type = %d", addr, type);
|
||||
} else if (numErrors == 10) {
|
||||
WARN_LOG(LOADER, "Too many bad relocations, skipping logging");
|
||||
}
|
||||
numErrors++;
|
||||
continue;
|
||||
}
|
||||
numErrors++;
|
||||
continue;
|
||||
}
|
||||
|
||||
u32 op = Memory::Read_Instruction(addr, true).encoding;
|
||||
u32 op = Memory::ReadUnchecked_Instruction(addr, true).encoding;
|
||||
|
||||
const bool log = false;
|
||||
//log=true;
|
||||
if (log) {
|
||||
DEBUG_LOG(LOADER,"rel at: %08x info: %08x type: %i",addr, info, type);
|
||||
}
|
||||
u32 relocateTo = segmentVAddr[relative];
|
||||
const bool log = false;
|
||||
//log=true;
|
||||
if (log) {
|
||||
DEBUG_LOG(LOADER, "rel at: %08x info: %08x type: %i", addr, info, type);
|
||||
}
|
||||
u32 relocateTo = segmentVAddr[relative];
|
||||
|
||||
switch (type)
|
||||
{
|
||||
case R_MIPS_32:
|
||||
if (log)
|
||||
DEBUG_LOG(LOADER,"Full address reloc %08x", addr);
|
||||
//full address, no problemo
|
||||
op += relocateTo;
|
||||
break;
|
||||
switch (type) {
|
||||
case R_MIPS_32:
|
||||
if (log)
|
||||
DEBUG_LOG(LOADER, "Full address reloc %08x", addr);
|
||||
//full address, no problemo
|
||||
op += relocateTo;
|
||||
break;
|
||||
|
||||
case R_MIPS_26: //j, jal
|
||||
//add on to put in correct address space
|
||||
if (log)
|
||||
DEBUG_LOG(LOADER,"j/jal reloc %08x", addr);
|
||||
op = (op & 0xFC000000) | (((op&0x03FFFFFF)+(relocateTo>>2))&0x03FFFFFF);
|
||||
break;
|
||||
case R_MIPS_26: //j, jal
|
||||
//add on to put in correct address space
|
||||
if (log)
|
||||
DEBUG_LOG(LOADER, "j/jal reloc %08x", addr);
|
||||
op = (op & 0xFC000000) | (((op & 0x03FFFFFF) + (relocateTo >> 2)) & 0x03FFFFFF);
|
||||
break;
|
||||
|
||||
case R_MIPS_HI16: //lui part of lui-addiu pairs
|
||||
case R_MIPS_HI16: //lui part of lui-addiu pairs
|
||||
{
|
||||
if (log)
|
||||
DEBUG_LOG(LOADER,"HI reloc %08x", addr);
|
||||
DEBUG_LOG(LOADER, "HI reloc %08x", addr);
|
||||
|
||||
u32 cur = (op & 0xFFFF) << 16;
|
||||
u16 hi = 0;
|
||||
bool found = false;
|
||||
for (int t = r + 1; t<numRelocs; t++)
|
||||
{
|
||||
if ((rels[t].r_info & 0xF) == R_MIPS_LO16)
|
||||
{
|
||||
for (int t = r + 1; t < numRelocs; t++) {
|
||||
if ((rels[t].r_info & 0xF) == R_MIPS_LO16) {
|
||||
u32 corrLoAddr = rels[t].r_offset + segmentVAddr[readwrite];
|
||||
if (log) {
|
||||
DEBUG_LOG(LOADER,"Corresponding lo found at %08x", corrLoAddr);
|
||||
DEBUG_LOG(LOADER, "Corresponding lo found at %08x", corrLoAddr);
|
||||
}
|
||||
if (Memory::IsValidAddress(corrLoAddr)) {
|
||||
s16 lo = (s16)Memory::ReadUnchecked_U16(corrLoAddr);
|
||||
s16 lo = (s16)Memory::ReadUnchecked_Instruction(corrLoAddr, true).encoding;
|
||||
cur += lo;
|
||||
cur += relocateTo;
|
||||
addrToHiLo(cur, hi, lo);
|
||||
|
@ -150,14 +148,14 @@ bool ElfReader::LoadRelocations(const Elf32_Rel *rels, int numRelocs)
|
|||
if (!found) {
|
||||
ERROR_LOG_REPORT(LOADER, "R_MIPS_HI16: could not find R_MIPS_LO16");
|
||||
}
|
||||
op = (op & 0xFFFF0000) | (hi);
|
||||
op = (op & 0xFFFF0000) | hi;
|
||||
}
|
||||
break;
|
||||
|
||||
case R_MIPS_LO16: //addiu part of lui-addiu pairs
|
||||
case R_MIPS_LO16: //addiu part of lui-addiu pairs
|
||||
{
|
||||
if (log)
|
||||
DEBUG_LOG(LOADER,"LO reloc %08x", addr);
|
||||
DEBUG_LOG(LOADER, "LO reloc %08x", addr);
|
||||
u32 cur = op & 0xFFFF;
|
||||
cur += relocateTo;
|
||||
cur &= 0xFFFF;
|
||||
|
@ -165,29 +163,32 @@ bool ElfReader::LoadRelocations(const Elf32_Rel *rels, int numRelocs)
|
|||
}
|
||||
break;
|
||||
|
||||
case R_MIPS_GPREL16: //gp
|
||||
// It seems safe to ignore this, almost a notification of a gp-relative operation?
|
||||
break;
|
||||
case R_MIPS_GPREL16: //gp
|
||||
// It seems safe to ignore this, almost a notification of a gp-relative operation?
|
||||
break;
|
||||
|
||||
case R_MIPS_16:
|
||||
op = (op & 0xFFFF0000) | (((int)(op & 0xFFFF) + (int)relocateTo) & 0xFFFF);
|
||||
break;
|
||||
case R_MIPS_16:
|
||||
op = (op & 0xFFFF0000) | (((int)(op & 0xFFFF) + (int)relocateTo) & 0xFFFF);
|
||||
break;
|
||||
|
||||
case R_MIPS_NONE:
|
||||
// This shouldn't matter, not sure the purpose of it.
|
||||
break;
|
||||
case R_MIPS_NONE:
|
||||
// This shouldn't matter, not sure the purpose of it.
|
||||
break;
|
||||
|
||||
default:
|
||||
default:
|
||||
{
|
||||
char temp[256];
|
||||
MIPSDisAsm(MIPSOpcode(op), 0, temp);
|
||||
ERROR_LOG_REPORT(LOADER,"ARGH IT'S AN UNKNOWN RELOCATION!!!!!!!! %08x, type=%d : %s", addr, type, temp);
|
||||
ERROR_LOG_REPORT(LOADER, "ARGH IT'S AN UNKNOWN RELOCATION!!!!!!!! %08x, type=%d : %s", addr, type, temp);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
Memory::WriteUnchecked_U32(op, addr);
|
||||
NotifyMemInfo(MemBlockFlags::WRITE, addr, 4, "Relocation");
|
||||
}
|
||||
Memory::Write_U32(op, addr);
|
||||
NotifyMemInfo(MemBlockFlags::WRITE, addr, 4, "Relocation");
|
||||
}
|
||||
}, 0, numRelocs, 32);
|
||||
|
||||
if (numErrors) {
|
||||
WARN_LOG(LOADER, "%i bad relocations found!!!", numErrors);
|
||||
}
|
||||
|
|
|
@ -774,6 +774,7 @@ void __IoShutdown() {
|
|||
delete flash0System;
|
||||
flash0System = nullptr;
|
||||
|
||||
MemoryStick_Shutdown();
|
||||
memStickCallbacks.clear();
|
||||
memStickFatCallbacks.clear();
|
||||
}
|
||||
|
|
|
@ -24,10 +24,11 @@
|
|||
#include "Core/Debugger/MemBlockInfo.h"
|
||||
#include "Core/HLE/HLE.h"
|
||||
#include "Core/HLE/FunctionWrappers.h"
|
||||
#include "Core/System.h"
|
||||
#include "Core/MIPS/MIPS.h"
|
||||
#include "Core/MemMapHelpers.h"
|
||||
#include "Core/Reporting.h"
|
||||
#include "Core/System.h"
|
||||
#include "Core/ThreadPools.h"
|
||||
#include "Common/Serialize/Serializer.h"
|
||||
#include "Common/Serialize/SerializeFuncs.h"
|
||||
#include "Common/Serialize/SerializeMap.h"
|
||||
|
@ -430,8 +431,8 @@ void __KernelMemoryInit()
|
|||
MemBlockInfoInit();
|
||||
kernelMemory.Init(PSP_GetKernelMemoryBase(), PSP_GetKernelMemoryEnd() - PSP_GetKernelMemoryBase(), false);
|
||||
userMemory.Init(PSP_GetUserMemoryBase(), PSP_GetUserMemoryEnd() - PSP_GetUserMemoryBase(), false);
|
||||
Memory::Memset(PSP_GetKernelMemoryBase(), 0, PSP_GetKernelMemoryEnd() - PSP_GetKernelMemoryBase(), "MemInit");
|
||||
Memory::Memset(PSP_GetUserMemoryBase(), 0, PSP_GetUserMemoryEnd() - PSP_GetUserMemoryBase(), "MemInit");
|
||||
GlobalThreadPool::Memset(Memory::GetPointer(PSP_GetKernelMemoryBase()), 0, PSP_GetUserMemoryEnd() - PSP_GetKernelMemoryBase());
|
||||
NotifyMemInfo(MemBlockFlags::WRITE, PSP_GetKernelMemoryBase(), PSP_GetUserMemoryEnd() - PSP_GetKernelMemoryBase(), "MemInit");
|
||||
INFO_LOG(SCEKERNEL, "Kernel and user memory pools initialized");
|
||||
|
||||
vplWaitTimer = CoreTiming::RegisterEvent("VplTimeout", __KernelVplTimeout);
|
||||
|
|
|
@ -863,9 +863,11 @@ void PSPModule::Cleanup() {
|
|||
|
||||
if (memoryBlockAddr != 0 && nm.text_addr != 0 && memoryBlockSize >= nm.data_size + nm.bss_size + nm.text_size) {
|
||||
DEBUG_LOG(LOADER, "Zeroing out module %s memory: %08x - %08x", nm.name, memoryBlockAddr, memoryBlockAddr + memoryBlockSize);
|
||||
for (u32 i = 0; i < (u32)(nm.text_size + 3); i += 4) {
|
||||
Memory::Write_U32(MIPS_MAKE_BREAK(1), nm.text_addr + i);
|
||||
u32 clearSize = Memory::ValidSize(nm.text_addr, (u32)nm.text_size + 3);
|
||||
for (u32 i = 0; i < clearSize; i += 4) {
|
||||
Memory::WriteUnchecked_U32(MIPS_MAKE_BREAK(1), nm.text_addr + i);
|
||||
}
|
||||
NotifyMemInfo(MemBlockFlags::WRITE, nm.text_addr, clearSize, "ModuleClear");
|
||||
Memory::Memset(nm.text_addr + nm.text_size, -1, nm.data_size + nm.bss_size, "ModuleClear");
|
||||
|
||||
// Let's also invalidate, just to make sure it's cleared out for any future data.
|
||||
|
@ -1268,7 +1270,7 @@ static PSPModule *__KernelLoadELFFromPtr(const u8 *ptr, size_t elfSize, u32 load
|
|||
ElfReader reader((void*)ptr, elfSize);
|
||||
|
||||
int result = reader.LoadInto(loadAddress, fromTop);
|
||||
if (result != SCE_KERNEL_ERROR_OK) {
|
||||
if (result != SCE_KERNEL_ERROR_OK) {
|
||||
ERROR_LOG(SCEMODULE, "LoadInto failed with error %08x",result);
|
||||
if (newptr)
|
||||
delete [] newptr;
|
||||
|
|
|
@ -16,6 +16,9 @@
|
|||
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
|
||||
|
||||
#include <algorithm>
|
||||
#include <condition_variable>
|
||||
#include <mutex>
|
||||
#include <thread>
|
||||
#include "Common/Serialize/Serializer.h"
|
||||
#include "Common/Serialize/SerializeFuncs.h"
|
||||
#include "Core/CoreTiming.h"
|
||||
|
@ -31,8 +34,20 @@ static bool memStickNeedsAssign = false;
|
|||
static u64 memStickInsertedAt = 0;
|
||||
static uint64_t memstickInitialFree = 0;
|
||||
|
||||
const u64 normalMemstickSize = 9ULL * 1024 * 1024 * 1024;
|
||||
const u64 smallMemstickSize = 1ULL * 1024 * 1024 * 1024;
|
||||
enum FreeCalcStatus {
|
||||
NONE,
|
||||
RUNNING,
|
||||
DONE,
|
||||
CLEANED_UP,
|
||||
};
|
||||
|
||||
static std::thread freeCalcThread;
|
||||
static std::condition_variable freeCalcCond;
|
||||
static std::mutex freeCalcMutex;
|
||||
static FreeCalcStatus freeCalcStatus = FreeCalcStatus::NONE;
|
||||
|
||||
static const u64 normalMemstickSize = 9ULL * 1024 * 1024 * 1024;
|
||||
static const u64 smallMemstickSize = 1ULL * 1024 * 1024 * 1024;
|
||||
|
||||
void MemoryStick_DoState(PointerWrap &p) {
|
||||
auto s = p.Section("MemoryStick", 1, 5);
|
||||
|
@ -75,7 +90,31 @@ u64 MemoryStick_SectorSize() {
|
|||
return 32 * 1024; // 32KB
|
||||
}
|
||||
|
||||
static void MemoryStick_CalcInitialFree() {
|
||||
std::unique_lock<std::mutex> guard(freeCalcMutex);
|
||||
freeCalcStatus = FreeCalcStatus::RUNNING;
|
||||
freeCalcThread = std::thread([] {
|
||||
memstickInitialFree = pspFileSystem.FreeSpace("ms0:/") + pspFileSystem.getDirSize("ms0:/PSP/SAVEDATA/");
|
||||
|
||||
std::unique_lock<std::mutex> guard(freeCalcMutex);
|
||||
freeCalcStatus = FreeCalcStatus::DONE;
|
||||
freeCalcCond.notify_all();
|
||||
});
|
||||
}
|
||||
|
||||
static void MemoryStick_WaitInitialFree() {
|
||||
std::unique_lock<std::mutex> guard(freeCalcMutex);
|
||||
while (freeCalcStatus == FreeCalcStatus::RUNNING) {
|
||||
freeCalcCond.wait(guard);
|
||||
}
|
||||
if (freeCalcStatus == FreeCalcStatus::DONE)
|
||||
freeCalcThread.join();
|
||||
freeCalcStatus = FreeCalcStatus::CLEANED_UP;
|
||||
}
|
||||
|
||||
u64 MemoryStick_FreeSpace() {
|
||||
MemoryStick_WaitInitialFree();
|
||||
|
||||
const CompatFlags &flags = PSP_CoreParameter().compat.flags();
|
||||
u64 realFreeSpace = pspFileSystem.FreeSpace("ms0:/");
|
||||
|
||||
|
@ -135,5 +174,9 @@ void MemoryStick_Init() {
|
|||
}
|
||||
|
||||
memStickNeedsAssign = false;
|
||||
memstickInitialFree = pspFileSystem.FreeSpace("ms0:/") + pspFileSystem.getDirSize("ms0:/PSP/SAVEDATA/");
|
||||
MemoryStick_CalcInitialFree();
|
||||
}
|
||||
|
||||
void MemoryStick_Shutdown() {
|
||||
MemoryStick_WaitInitialFree();
|
||||
}
|
||||
|
|
|
@ -41,6 +41,7 @@ enum MemStickDriverState {
|
|||
};
|
||||
|
||||
void MemoryStick_Init();
|
||||
void MemoryStick_Shutdown();
|
||||
void MemoryStick_DoState(PointerWrap &p);
|
||||
MemStickState MemoryStick_State();
|
||||
MemStickFatState MemoryStick_FatState();
|
||||
|
|
|
@ -324,14 +324,10 @@ static void DoMemoryVoid(PointerWrap &p, uint32_t start, uint32_t size) {
|
|||
|
||||
switch (p.mode) {
|
||||
case PointerWrap::MODE_READ:
|
||||
GlobalThreadPool::Loop([&](int l, int h) {
|
||||
memmove(d + l, storage + l, h - l);
|
||||
}, 0, size);
|
||||
GlobalThreadPool::Memcpy(d, storage, size);
|
||||
break;
|
||||
case PointerWrap::MODE_WRITE:
|
||||
GlobalThreadPool::Loop([&](int l, int h) {
|
||||
memmove(storage + l, d + l, h - l);
|
||||
}, 0, size);
|
||||
GlobalThreadPool::Memcpy(storage, d, size);
|
||||
break;
|
||||
case PointerWrap::MODE_MEASURE:
|
||||
// Nothing to do here.
|
||||
|
@ -395,15 +391,6 @@ void Shutdown() {
|
|||
DEBUG_LOG(MEMMAP, "Memory system shut down.");
|
||||
}
|
||||
|
||||
void Clear() {
|
||||
if (m_pPhysicalRAM)
|
||||
memset(GetPointerUnchecked(PSP_GetKernelMemoryBase()), 0, g_MemorySize);
|
||||
if (m_pPhysicalScratchPad)
|
||||
memset(m_pPhysicalScratchPad, 0, SCRATCHPAD_SIZE);
|
||||
if (m_pPhysicalVRAM1)
|
||||
memset(m_pPhysicalVRAM1, 0, VRAM_SIZE);
|
||||
}
|
||||
|
||||
bool IsActive() {
|
||||
return base != nullptr;
|
||||
}
|
||||
|
|
|
@ -6,9 +6,19 @@
|
|||
std::unique_ptr<ThreadPool> GlobalThreadPool::pool;
|
||||
std::once_flag GlobalThreadPool::init_flag;
|
||||
|
||||
void GlobalThreadPool::Loop(const std::function<void(int,int)>& loop, int lower, int upper) {
|
||||
void GlobalThreadPool::Loop(const std::function<void(int,int)>& loop, int lower, int upper, int minSize) {
|
||||
std::call_once(init_flag, Inititialize);
|
||||
pool->ParallelLoop(loop, lower, upper);
|
||||
pool->ParallelLoop(loop, lower, upper, minSize);
|
||||
}
|
||||
|
||||
void GlobalThreadPool::Memcpy(void *dest, const void *src, int size) {
|
||||
std::call_once(init_flag, Inititialize);
|
||||
pool->ParallelMemcpy(dest, src, size);
|
||||
}
|
||||
|
||||
void GlobalThreadPool::Memset(void *dest, uint8_t val, int size) {
|
||||
std::call_once(init_flag, Inititialize);
|
||||
pool->ParallelMemset(dest, val, size);
|
||||
}
|
||||
|
||||
void GlobalThreadPool::Inititialize() {
|
||||
|
|
|
@ -6,7 +6,9 @@ class GlobalThreadPool {
|
|||
public:
|
||||
// will execute slices of "loop" from "lower" to "upper"
|
||||
// in parallel on the global thread pool
|
||||
static void Loop(const std::function<void(int,int)>& loop, int lower, int upper);
|
||||
static void Loop(const std::function<void(int,int)>& loop, int lower, int upper, int minSize = -1);
|
||||
static void Memcpy(void *dest, const void *src, int size);
|
||||
static void Memset(void *dest, uint8_t val, int size);
|
||||
|
||||
private:
|
||||
static std::unique_ptr<ThreadPool> pool;
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
|
||||
|
||||
#include <algorithm>
|
||||
#include <atomic>
|
||||
#include <cstring>
|
||||
#include <functional>
|
||||
#include <set>
|
||||
|
@ -32,6 +33,7 @@
|
|||
#include "Core/HLE/sceDisplay.h"
|
||||
#include "Core/MemMap.h"
|
||||
#include "Core/System.h"
|
||||
#include "Core/ThreadPools.h"
|
||||
#include "GPU/GPUInterface.h"
|
||||
#include "GPU/GPUState.h"
|
||||
#include "GPU/ge_constants.h"
|
||||
|
@ -166,34 +168,47 @@ static const u8 *mymemmem(const u8 *haystack, size_t off, size_t hlen, const u8
|
|||
}
|
||||
|
||||
const u8 *last_possible = haystack + hlen - nlen;
|
||||
const u8 *first_possible = haystack + off;
|
||||
int first = *needle;
|
||||
const u8 *p = haystack + off;
|
||||
|
||||
const uintptr_t align_mask = align - 1;
|
||||
auto poffset = [&]() {
|
||||
return ((uintptr_t)(p - haystack) & align_mask);
|
||||
};
|
||||
auto alignp = [&]() {
|
||||
uintptr_t offset = poffset();
|
||||
if (offset != 0)
|
||||
p += align - offset;
|
||||
};
|
||||
const u8 *result = nullptr;
|
||||
std::mutex resultLock;
|
||||
|
||||
alignp();
|
||||
while (p <= last_possible) {
|
||||
p = (const u8 *)memchr(p, first, last_possible - p + 1);
|
||||
if (!p) {
|
||||
return nullptr;
|
||||
}
|
||||
if (poffset() == 0 && !memcmp(p, needle, nlen)) {
|
||||
return p;
|
||||
}
|
||||
int range = (int)(last_possible - first_possible);
|
||||
GlobalThreadPool::Loop([&](int l, int h) {
|
||||
const u8 *p = haystack + off + l;
|
||||
const u8 *pend = haystack + off + h;
|
||||
|
||||
const uintptr_t align_mask = align - 1;
|
||||
auto poffset = [&]() {
|
||||
return ((uintptr_t)(p - haystack) & align_mask);
|
||||
};
|
||||
auto alignp = [&]() {
|
||||
uintptr_t offset = poffset();
|
||||
if (offset != 0)
|
||||
p += align - offset;
|
||||
};
|
||||
|
||||
p++;
|
||||
alignp();
|
||||
}
|
||||
while (p <= pend) {
|
||||
p = (const u8 *)memchr(p, first, pend - p + 1);
|
||||
if (!p) {
|
||||
return;
|
||||
}
|
||||
if (poffset() == 0 && !memcmp(p, needle, nlen)) {
|
||||
std::lock_guard<std::mutex> guard(resultLock);
|
||||
// Take the lowest result so we get the same file for any # of threads.
|
||||
if (!result || p < result)
|
||||
result = p;
|
||||
return;
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
p++;
|
||||
alignp();
|
||||
}
|
||||
}, 0, range, 128 * 1024);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static Command EmitCommandWithRAM(CommandType t, const void *p, u32 sz, u32 align) {
|
||||
|
|
Loading…
Add table
Reference in a new issue