mirror of
https://github.com/hrydgard/ppsspp.git
synced 2025-04-02 11:01:50 -04:00
Merge pull request #15316 from unknownbrackets/softgpu-binning
Throw some memory at the softgpu problem
This commit is contained in:
commit
06ae4d0577
5 changed files with 117 additions and 18 deletions
|
@ -20,6 +20,8 @@
|
|||
#include <mutex>
|
||||
#include "Common/Profiler/Profiler.h"
|
||||
#include "Common/Thread/ThreadManager.h"
|
||||
#include "Common/TimeUtil.h"
|
||||
#include "Core/System.h"
|
||||
#include "GPU/Software/BinManager.h"
|
||||
#include "GPU/Software/Rasterizer.h"
|
||||
#include "GPU/Software/RasterizerRectangle.h"
|
||||
|
@ -87,7 +89,7 @@ static inline void DrawBinItem(const BinItem &item, const RasterizerState &state
|
|||
|
||||
class DrawBinItemsTask : public Task {
|
||||
public:
|
||||
DrawBinItemsTask(BinWaitable *notify, BinQueue<BinItem, 1024> &items, std::atomic<bool> &status, const BinQueue<RasterizerState, 64> &states)
|
||||
DrawBinItemsTask(BinWaitable *notify, BinManager::BinItemQueue &items, std::atomic<bool> &status, const BinManager::BinStateQueue &states)
|
||||
: notify_(notify), items_(items), status_(status), states_(states) {
|
||||
}
|
||||
|
||||
|
@ -113,9 +115,9 @@ private:
|
|||
}
|
||||
|
||||
BinWaitable *notify_;
|
||||
BinQueue<BinItem, 1024> &items_;
|
||||
BinManager::BinItemQueue &items_;
|
||||
std::atomic<bool> &status_;
|
||||
const BinQueue<RasterizerState, 64> &states_;
|
||||
const BinManager::BinStateQueue &states_;
|
||||
};
|
||||
|
||||
BinManager::BinManager() {
|
||||
|
@ -127,6 +129,13 @@ BinManager::BinManager() {
|
|||
waitable_ = new BinWaitable();
|
||||
for (auto &s : taskStatus_)
|
||||
s = false;
|
||||
|
||||
int maxInitTasks = std::min(g_threadManager.GetNumLooperThreads(), MAX_POSSIBLE_TASKS);
|
||||
for (int i = 0; i < maxInitTasks; ++i)
|
||||
taskQueues_[i].Setup();
|
||||
states_.Setup();
|
||||
cluts_.Setup();
|
||||
queue_.Setup();
|
||||
}
|
||||
|
||||
BinManager::~BinManager() {
|
||||
|
@ -136,7 +145,7 @@ BinManager::~BinManager() {
|
|||
void BinManager::UpdateState() {
|
||||
PROFILE_THIS_SCOPE("bin_state");
|
||||
if (states_.Full())
|
||||
Flush();
|
||||
Flush("states");
|
||||
stateIndex_ = (int)states_.Push(RasterizerState());
|
||||
ComputeRasterizerState(&states_[stateIndex_]);
|
||||
states_[stateIndex_].samplerID.cached.clut = cluts_[clutIndex_].readable;
|
||||
|
@ -165,12 +174,12 @@ void BinManager::UpdateState() {
|
|||
// We don't want to overlap wrong, so flush any pending.
|
||||
if (maxTasks_ != newMaxTasks) {
|
||||
maxTasks_ = newMaxTasks;
|
||||
Flush();
|
||||
Flush("selfrender");
|
||||
}
|
||||
|
||||
// Our bin sizes are based on offset, so if that changes we have to flush.
|
||||
if (queueOffsetX_ != gstate.getOffsetX16() || queueOffsetY_ != gstate.getOffsetY16()) {
|
||||
Flush();
|
||||
Flush("offset");
|
||||
queueOffsetX_ = gstate.getOffsetX16();
|
||||
queueOffsetY_ = gstate.getOffsetY16();
|
||||
}
|
||||
|
@ -179,7 +188,7 @@ void BinManager::UpdateState() {
|
|||
void BinManager::UpdateClut(const void *src) {
|
||||
PROFILE_THIS_SCOPE("bin_clut");
|
||||
if (cluts_.Full())
|
||||
Flush();
|
||||
Flush("cluts");
|
||||
clutIndex_ = (int)cluts_.Push(BinClut());
|
||||
memcpy(cluts_[clutIndex_].readable, src, sizeof(BinClut));
|
||||
}
|
||||
|
@ -323,7 +332,10 @@ void BinManager::Drain() {
|
|||
}
|
||||
}
|
||||
|
||||
void BinManager::Flush() {
|
||||
void BinManager::Flush(const char *reason) {
|
||||
double st;
|
||||
if (coreCollectDebugStats)
|
||||
st = time_now_d();
|
||||
Drain();
|
||||
waitable_->Wait();
|
||||
taskRanges_.clear();
|
||||
|
@ -341,6 +353,59 @@ void BinManager::Flush() {
|
|||
queueRange_.y2 = 0;
|
||||
queueOffsetX_ = -1;
|
||||
queueOffsetY_ = -1;
|
||||
|
||||
if (coreCollectDebugStats) {
|
||||
double et = time_now_d();
|
||||
flushReasonTimes_[reason] += et - st;
|
||||
if (et - st > slowestFlushTime_) {
|
||||
slowestFlushTime_ = et - st;
|
||||
slowestFlushReason_ = reason;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void BinManager::GetStats(char *buffer, size_t bufsize) {
|
||||
double allTotal = 0.0;
|
||||
double slowestTotalTime = 0.0;
|
||||
const char *slowestTotalReason = nullptr;
|
||||
for (auto &it : flushReasonTimes_) {
|
||||
if (it.second > slowestTotalTime) {
|
||||
slowestTotalTime = it.second;
|
||||
slowestTotalReason = it.first;
|
||||
}
|
||||
allTotal += it.second;
|
||||
}
|
||||
|
||||
// Many games are 30 FPS, so check last frame too for better stats.
|
||||
double recentTotal = allTotal;
|
||||
double slowestRecentTime = slowestTotalTime;
|
||||
const char *slowestRecentReason = slowestTotalReason;
|
||||
for (auto &it : lastFlushReasonTimes_) {
|
||||
if (it.second > slowestRecentTime) {
|
||||
slowestRecentTime = it.second;
|
||||
slowestRecentReason = it.first;
|
||||
}
|
||||
recentTotal += it.second;
|
||||
}
|
||||
|
||||
snprintf(buffer, bufsize,
|
||||
"Slowest individual flush: %s (%0.4f)\n"
|
||||
"Slowest frame flush: %s (%0.4f)\n"
|
||||
"Slowest recent flush: %s (%0.4f)\n"
|
||||
"Total flush time: %0.4f (%05.2f%%, last 2: %05.2f%%)\n",
|
||||
slowestFlushReason_, slowestFlushTime_,
|
||||
slowestTotalReason, slowestTotalTime,
|
||||
slowestRecentReason, slowestRecentTime,
|
||||
allTotal, allTotal * (6000.0 / 1.001), recentTotal * (3000.0 / 1.001));
|
||||
|
||||
constexpr int foo = sizeof(BinItem);
|
||||
}
|
||||
|
||||
void BinManager::ResetStats() {
|
||||
lastFlushReasonTimes_ = std::move(flushReasonTimes_);
|
||||
flushReasonTimes_.clear();
|
||||
slowestFlushReason_ = nullptr;
|
||||
slowestFlushTime_ = 0.0;
|
||||
}
|
||||
|
||||
inline BinCoords BinCoords::Intersect(const BinCoords &range) const {
|
||||
|
|
|
@ -18,10 +18,12 @@
|
|||
#pragma once
|
||||
|
||||
#include <atomic>
|
||||
#include <unordered_map>
|
||||
#include "Common/Log.h"
|
||||
#include "GPU/Software/Rasterizer.h"
|
||||
|
||||
struct BinWaitable;
|
||||
class DrawBinItemsTask;
|
||||
|
||||
enum class BinItemType {
|
||||
TRIANGLE,
|
||||
|
@ -56,13 +58,16 @@ struct BinItem {
|
|||
template <typename T, size_t N>
|
||||
struct BinQueue {
|
||||
BinQueue() {
|
||||
items_ = new T[N];
|
||||
Reset();
|
||||
}
|
||||
~BinQueue() {
|
||||
delete [] items_;
|
||||
}
|
||||
|
||||
void Setup() {
|
||||
items_ = new T[N];
|
||||
}
|
||||
|
||||
void Reset() {
|
||||
head_ = 0;
|
||||
tail_ = 0;
|
||||
|
@ -166,17 +171,31 @@ public:
|
|||
void AddPoint(const VertexData &v0);
|
||||
|
||||
void Drain();
|
||||
void Flush();
|
||||
void Flush(const char *reason);
|
||||
|
||||
void GetStats(char *buffer, size_t bufsize);
|
||||
void ResetStats();
|
||||
|
||||
protected:
|
||||
static constexpr int MAX_POSSIBLE_TASKS = 64;
|
||||
// This is about 1MB of state data.
|
||||
static constexpr int QUEUED_STATES = 4096;
|
||||
// These are 1KB each, so half an MB.
|
||||
static constexpr int QUEUED_CLUTS = 512;
|
||||
// About 320 KB, but we have usually 16 or less of them, so 5 MB - 20 MB.
|
||||
static constexpr int QUEUED_PRIMS = 1024;
|
||||
|
||||
typedef BinQueue<Rasterizer::RasterizerState, QUEUED_STATES> BinStateQueue;
|
||||
typedef BinQueue<BinClut, QUEUED_CLUTS> BinClutQueue;
|
||||
typedef BinQueue<BinItem, QUEUED_PRIMS> BinItemQueue;
|
||||
|
||||
private:
|
||||
static constexpr int MAX_POSSIBLE_TASKS = 64;
|
||||
|
||||
BinQueue<Rasterizer::RasterizerState, 64> states_;
|
||||
BinStateQueue states_;
|
||||
int stateIndex_;
|
||||
BinQueue<BinClut, 64> cluts_;
|
||||
BinClutQueue cluts_;
|
||||
int clutIndex_;
|
||||
BinCoords scissor_;
|
||||
BinQueue<BinItem, 1024> queue_;
|
||||
BinItemQueue queue_;
|
||||
BinCoords queueRange_;
|
||||
int queueOffsetX_ = -1;
|
||||
int queueOffsetY_ = -1;
|
||||
|
@ -184,13 +203,20 @@ private:
|
|||
int maxTasks_ = 1;
|
||||
bool tasksSplit_ = false;
|
||||
std::vector<BinCoords> taskRanges_;
|
||||
BinQueue<BinItem, 1024> taskQueues_[MAX_POSSIBLE_TASKS];
|
||||
BinItemQueue taskQueues_[MAX_POSSIBLE_TASKS];
|
||||
std::atomic<bool> taskStatus_[MAX_POSSIBLE_TASKS];
|
||||
BinWaitable *waitable_ = nullptr;
|
||||
|
||||
std::unordered_map<const char *, double> flushReasonTimes_;
|
||||
std::unordered_map<const char *, double> lastFlushReasonTimes_;
|
||||
const char *slowestFlushReason_ = nullptr;
|
||||
double slowestFlushTime_ = 0.0;
|
||||
|
||||
BinCoords Scissor(BinCoords range);
|
||||
BinCoords Range(const VertexData &v0, const VertexData &v1, const VertexData &v2);
|
||||
BinCoords Range(const VertexData &v0, const VertexData &v1);
|
||||
BinCoords Range(const VertexData &v0);
|
||||
void Expand(const BinCoords &range);
|
||||
|
||||
friend class DrawBinItemsTask;
|
||||
};
|
||||
|
|
|
@ -900,7 +900,7 @@ void SoftGPU::FinishDeferred() {
|
|||
}
|
||||
|
||||
void SoftGPU::GetStats(char *buffer, size_t bufsize) {
|
||||
snprintf(buffer, bufsize, "SoftGPU: (N/A)");
|
||||
drawEngine_->transformUnit.GetStats(buffer, bufsize);
|
||||
}
|
||||
|
||||
void SoftGPU::InvalidateCache(u32 addr, int size, GPUInvalidationType type)
|
||||
|
|
|
@ -609,10 +609,16 @@ void TransformUnit::SubmitPrimitive(void* vertices, void* indices, GEPrimitiveTy
|
|||
}
|
||||
|
||||
void TransformUnit::Flush(const char *reason) {
|
||||
binner_->Flush();
|
||||
binner_->Flush(reason);
|
||||
GPUDebug::NotifyDraw();
|
||||
}
|
||||
|
||||
void TransformUnit::GetStats(char *buffer, size_t bufsize) {
|
||||
// TODO: More stats?
|
||||
binner_->GetStats(buffer, bufsize);
|
||||
binner_->ResetStats();
|
||||
}
|
||||
|
||||
void TransformUnit::FlushIfOverlap(const char *reason, uint32_t addr, uint32_t sz) {
|
||||
if (!Memory::IsVRAMAddress(addr))
|
||||
return;
|
||||
|
|
|
@ -123,6 +123,8 @@ public:
|
|||
void FlushIfOverlap(const char *reason, uint32_t addr, uint32_t sz);
|
||||
void NotifyClutUpdate(const void *src);
|
||||
|
||||
void GetStats(char *buffer, size_t bufsize);
|
||||
|
||||
private:
|
||||
VertexData ReadVertex(VertexReader &vreader, bool &outside_range_flag);
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue