mirror of
https://github.com/hrydgard/ppsspp.git
synced 2025-04-02 11:01:50 -04:00
Merge pull request #15873 from unknownbrackets/softgpu-xfer-hazard
softgpu: Flush on transfer to pending tex read
This commit is contained in:
commit
5f30c88e38
5 changed files with 69 additions and 8 deletions
|
@ -210,6 +210,9 @@ void BinManager::UpdateState() {
|
|||
}
|
||||
|
||||
if (HasDirty(SoftDirty::BINNER_OVERLAP)) {
|
||||
// This is a good place to record any dependencies for block transfer overlap.
|
||||
MarkPendingReads(state);
|
||||
|
||||
// Disallow threads when rendering to the target, even offset.
|
||||
bool selfRender = HasTextureWrite(state);
|
||||
int newMaxTasks = selfRender ? 1 : g_threadManager.GetNumLooperThreads();
|
||||
|
@ -251,6 +254,34 @@ bool BinManager::HasTextureWrite(const RasterizerState &state) {
|
|||
return false;
|
||||
}
|
||||
|
||||
void BinManager::MarkPendingReads(const Rasterizer::RasterizerState &state) {
|
||||
if (!state.enableTextures)
|
||||
return;
|
||||
|
||||
const uint8_t textureBits = textureBitsPerPixel[state.samplerID.texfmt];
|
||||
for (int i = 0; i <= state.maxTexLevel; ++i) {
|
||||
uint32_t byteStride = (state.texbufw[i] * textureBits) / 8;
|
||||
uint32_t byteWidth = (state.samplerID.cached.sizes[i].w * textureBits) / 8;
|
||||
uint32_t h = state.samplerID.cached.sizes[i].h;
|
||||
auto it = pendingReads_.find(state.texaddr[i]);
|
||||
if (it != pendingReads_.end()) {
|
||||
uint32_t total = byteStride * (h - 1) + byteWidth;
|
||||
uint32_t existing = it->second.strideBytes * (it->second.height - 1) + it->second.widthBytes;
|
||||
if (existing < total) {
|
||||
it->second.strideBytes = std::max(it->second.strideBytes, byteStride);
|
||||
it->second.widthBytes = std::max(it->second.widthBytes, byteWidth);
|
||||
it->second.height = std::max(it->second.height, h);
|
||||
}
|
||||
} else {
|
||||
auto &range = pendingReads_[state.texaddr[i]];
|
||||
range.base = state.texaddr[i];
|
||||
range.strideBytes = byteStride;
|
||||
range.widthBytes = byteWidth;
|
||||
range.height = h;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline void BinDirtyRange::Expand(uint32_t newBase, uint32_t bpp, uint32_t stride, DrawingCoords &tl, DrawingCoords &br) {
|
||||
const uint32_t w = br.x - tl.x + 1;
|
||||
const uint32_t h = br.y - tl.y + 1;
|
||||
|
@ -465,9 +496,10 @@ void BinManager::Flush(const char *reason) {
|
|||
for (auto &pending : pendingWrites_)
|
||||
pending.base = 0;
|
||||
pendingOverlap_ = false;
|
||||
pendingReads_.clear();
|
||||
|
||||
// We'll need to set the pending writes again, since we just flushed it.
|
||||
dirty_ |= SoftDirty::BINNER_RANGE;
|
||||
// We'll need to set the pending writes and reads again, since we just flushed it.
|
||||
dirty_ |= SoftDirty::BINNER_RANGE | SoftDirty::BINNER_OVERLAP;
|
||||
|
||||
if (coreCollectDebugStats) {
|
||||
double et = time_now_d();
|
||||
|
@ -486,7 +518,7 @@ bool BinManager::HasPendingWrite(uint32_t start, uint32_t stride, uint32_t w, ui
|
|||
// Ignore mirrors for overlap detection.
|
||||
start &= 0x0FFFFFFF & ~0x00600000;
|
||||
|
||||
uint32_t size = stride * h;
|
||||
uint32_t size = stride * (h - 1) + w;
|
||||
for (const auto &range : pendingWrites_) {
|
||||
if (range.base == 0 || range.strideBytes == 0)
|
||||
continue;
|
||||
|
@ -512,6 +544,28 @@ bool BinManager::HasPendingWrite(uint32_t start, uint32_t stride, uint32_t w, ui
|
|||
return false;
|
||||
}
|
||||
|
||||
bool BinManager::HasPendingRead(uint32_t start, uint32_t stride, uint32_t w, uint32_t h) {
|
||||
if (Memory::IsVRAMAddress(start)) {
|
||||
// Ignore VRAM mirrors.
|
||||
start &= 0x0FFFFFFF & ~0x00600000;
|
||||
} else {
|
||||
// Ignore only regular RAM mirrors.
|
||||
start &= 0x3FFFFFFF;
|
||||
}
|
||||
|
||||
uint32_t size = stride * (h - 1) + w;
|
||||
for (const auto &pair : pendingReads_) {
|
||||
const auto &range = pair.second;
|
||||
if (start >= range.base + range.height * range.strideBytes || start + size <= range.base)
|
||||
continue;
|
||||
|
||||
// Stride gaps are uncommon with reads, so don't bother.
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void BinManager::GetStats(char *buffer, size_t bufsize) {
|
||||
double allTotal = 0.0;
|
||||
double slowestTotalTime = 0.0;
|
||||
|
|
|
@ -198,6 +198,8 @@ public:
|
|||
void Drain();
|
||||
void Flush(const char *reason);
|
||||
bool HasPendingWrite(uint32_t start, uint32_t stride, uint32_t w, uint32_t h);
|
||||
// Assumes you've also checked for a write (writes are partial so are automatically reads.)
|
||||
bool HasPendingRead(uint32_t start, uint32_t stride, uint32_t w, uint32_t h);
|
||||
|
||||
void GetStats(char *buffer, size_t bufsize);
|
||||
void ResetStats();
|
||||
|
@ -252,6 +254,8 @@ private:
|
|||
BinWaitable *waitable_ = nullptr;
|
||||
|
||||
BinDirtyRange pendingWrites_[2]{};
|
||||
std::unordered_map<uint32_t, BinDirtyRange> pendingReads_;
|
||||
|
||||
bool pendingOverlap_ = false;
|
||||
|
||||
std::unordered_map<const char *, double> flushReasonTimes_;
|
||||
|
@ -262,6 +266,7 @@ private:
|
|||
int enqueues_ = 0;
|
||||
int mostThreads_ = 0;
|
||||
|
||||
void MarkPendingReads(const Rasterizer::RasterizerState &state);
|
||||
bool HasTextureWrite(const Rasterizer::RasterizerState &state);
|
||||
BinCoords Scissor(BinCoords range);
|
||||
BinCoords Range(const VertexData &v0, const VertexData &v1, const VertexData &v2);
|
||||
|
|
|
@ -787,8 +787,8 @@ void SoftGPU::Execute_BlockTransferStart(u32 op, u32 diff) {
|
|||
const uint32_t dstSize = height * dstStride * bpp;
|
||||
|
||||
// Need to flush both source and target, so we overwrite properly.
|
||||
drawEngine_->transformUnit.FlushIfOverlap("blockxfer", src, srcStride, width * bpp, height);
|
||||
drawEngine_->transformUnit.FlushIfOverlap("blockxfer", dst, dstStride, width * bpp, height);
|
||||
drawEngine_->transformUnit.FlushIfOverlap("blockxfer", false, src, srcStride, width * bpp, height);
|
||||
drawEngine_->transformUnit.FlushIfOverlap("blockxfer", true, dst, dstStride, width * bpp, height);
|
||||
|
||||
DEBUG_LOG(G3D, "Block transfer: %08x/%x -> %08x/%x, %ix%ix%i (%i,%i)->(%i,%i)", srcBasePtr, srcStride, dstBasePtr, dstStride, width, height, bpp, srcX, srcY, dstX, dstY);
|
||||
|
||||
|
@ -977,7 +977,7 @@ void SoftGPU::Execute_LoadClut(u32 op, u32 diff) {
|
|||
clutTotalBytes = 1024;
|
||||
|
||||
// Might be copying drawing into the CLUT, so flush.
|
||||
drawEngine_->transformUnit.FlushIfOverlap("loadclut", clutAddr, clutTotalBytes, clutTotalBytes, 1);
|
||||
drawEngine_->transformUnit.FlushIfOverlap("loadclut", false, clutAddr, clutTotalBytes, clutTotalBytes, 1);
|
||||
|
||||
bool changed = false;
|
||||
if (Memory::IsValidAddress(clutAddr)) {
|
||||
|
|
|
@ -797,9 +797,11 @@ void TransformUnit::GetStats(char *buffer, size_t bufsize) {
|
|||
binner_->GetStats(buffer, bufsize);
|
||||
}
|
||||
|
||||
void TransformUnit::FlushIfOverlap(const char *reason, uint32_t addr, uint32_t stride, uint32_t w, uint32_t h) {
|
||||
void TransformUnit::FlushIfOverlap(const char *reason, bool modifying, uint32_t addr, uint32_t stride, uint32_t w, uint32_t h) {
|
||||
if (binner_->HasPendingWrite(addr, stride, w, h))
|
||||
Flush(reason);
|
||||
if (modifying && binner_->HasPendingRead(addr, stride, w, h))
|
||||
Flush(reason);
|
||||
}
|
||||
|
||||
void TransformUnit::NotifyClutUpdate(const void *src) {
|
||||
|
|
|
@ -123,7 +123,7 @@ public:
|
|||
bool GetCurrentSimpleVertices(int count, std::vector<GPUDebugVertex> &vertices, std::vector<u16> &indices);
|
||||
|
||||
void Flush(const char *reason);
|
||||
void FlushIfOverlap(const char *reason, uint32_t addr, uint32_t stride, uint32_t w, uint32_t h);
|
||||
void FlushIfOverlap(const char *reason, bool modifying, uint32_t addr, uint32_t stride, uint32_t w, uint32_t h);
|
||||
void NotifyClutUpdate(const void *src);
|
||||
|
||||
void GetStats(char *buffer, size_t bufsize);
|
||||
|
|
Loading…
Add table
Reference in a new issue