Merge pull request #15873 from unknownbrackets/softgpu-xfer-hazard

softgpu: Flush on transfer to pending tex read
This commit is contained in:
Henrik Rydgård 2022-08-21 10:04:27 +02:00 committed by GitHub
commit 5f30c88e38
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 69 additions and 8 deletions

View file

@ -210,6 +210,9 @@ void BinManager::UpdateState() {
}
if (HasDirty(SoftDirty::BINNER_OVERLAP)) {
// This is a good place to record any dependencies for block transfer overlap.
MarkPendingReads(state);
// Disallow threads when rendering to the target, even offset.
bool selfRender = HasTextureWrite(state);
int newMaxTasks = selfRender ? 1 : g_threadManager.GetNumLooperThreads();
@ -251,6 +254,34 @@ bool BinManager::HasTextureWrite(const RasterizerState &state) {
return false;
}
void BinManager::MarkPendingReads(const Rasterizer::RasterizerState &state) {
if (!state.enableTextures)
return;
const uint8_t textureBits = textureBitsPerPixel[state.samplerID.texfmt];
for (int i = 0; i <= state.maxTexLevel; ++i) {
uint32_t byteStride = (state.texbufw[i] * textureBits) / 8;
uint32_t byteWidth = (state.samplerID.cached.sizes[i].w * textureBits) / 8;
uint32_t h = state.samplerID.cached.sizes[i].h;
auto it = pendingReads_.find(state.texaddr[i]);
if (it != pendingReads_.end()) {
uint32_t total = byteStride * (h - 1) + byteWidth;
uint32_t existing = it->second.strideBytes * (it->second.height - 1) + it->second.widthBytes;
if (existing < total) {
it->second.strideBytes = std::max(it->second.strideBytes, byteStride);
it->second.widthBytes = std::max(it->second.widthBytes, byteWidth);
it->second.height = std::max(it->second.height, h);
}
} else {
auto &range = pendingReads_[state.texaddr[i]];
range.base = state.texaddr[i];
range.strideBytes = byteStride;
range.widthBytes = byteWidth;
range.height = h;
}
}
}
inline void BinDirtyRange::Expand(uint32_t newBase, uint32_t bpp, uint32_t stride, DrawingCoords &tl, DrawingCoords &br) {
const uint32_t w = br.x - tl.x + 1;
const uint32_t h = br.y - tl.y + 1;
@ -465,9 +496,10 @@ void BinManager::Flush(const char *reason) {
for (auto &pending : pendingWrites_)
pending.base = 0;
pendingOverlap_ = false;
pendingReads_.clear();
// We'll need to set the pending writes again, since we just flushed it.
dirty_ |= SoftDirty::BINNER_RANGE;
// We'll need to set the pending writes and reads again, since we just flushed it.
dirty_ |= SoftDirty::BINNER_RANGE | SoftDirty::BINNER_OVERLAP;
if (coreCollectDebugStats) {
double et = time_now_d();
@ -486,7 +518,7 @@ bool BinManager::HasPendingWrite(uint32_t start, uint32_t stride, uint32_t w, ui
// Ignore mirrors for overlap detection.
start &= 0x0FFFFFFF & ~0x00600000;
uint32_t size = stride * h;
uint32_t size = stride * (h - 1) + w;
for (const auto &range : pendingWrites_) {
if (range.base == 0 || range.strideBytes == 0)
continue;
@ -512,6 +544,28 @@ bool BinManager::HasPendingWrite(uint32_t start, uint32_t stride, uint32_t w, ui
return false;
}
bool BinManager::HasPendingRead(uint32_t start, uint32_t stride, uint32_t w, uint32_t h) {
if (Memory::IsVRAMAddress(start)) {
// Ignore VRAM mirrors.
start &= 0x0FFFFFFF & ~0x00600000;
} else {
// Ignore only regular RAM mirrors.
start &= 0x3FFFFFFF;
}
uint32_t size = stride * (h - 1) + w;
for (const auto &pair : pendingReads_) {
const auto &range = pair.second;
if (start >= range.base + range.height * range.strideBytes || start + size <= range.base)
continue;
// Stride gaps are uncommon with reads, so don't bother.
return true;
}
return false;
}
void BinManager::GetStats(char *buffer, size_t bufsize) {
double allTotal = 0.0;
double slowestTotalTime = 0.0;

View file

@ -198,6 +198,8 @@ public:
void Drain();
void Flush(const char *reason);
bool HasPendingWrite(uint32_t start, uint32_t stride, uint32_t w, uint32_t h);
// Assumes you've also checked for a write (writes are partial so are automatically reads.)
bool HasPendingRead(uint32_t start, uint32_t stride, uint32_t w, uint32_t h);
void GetStats(char *buffer, size_t bufsize);
void ResetStats();
@ -252,6 +254,8 @@ private:
BinWaitable *waitable_ = nullptr;
BinDirtyRange pendingWrites_[2]{};
std::unordered_map<uint32_t, BinDirtyRange> pendingReads_;
bool pendingOverlap_ = false;
std::unordered_map<const char *, double> flushReasonTimes_;
@ -262,6 +266,7 @@ private:
int enqueues_ = 0;
int mostThreads_ = 0;
void MarkPendingReads(const Rasterizer::RasterizerState &state);
bool HasTextureWrite(const Rasterizer::RasterizerState &state);
BinCoords Scissor(BinCoords range);
BinCoords Range(const VertexData &v0, const VertexData &v1, const VertexData &v2);

View file

@ -787,8 +787,8 @@ void SoftGPU::Execute_BlockTransferStart(u32 op, u32 diff) {
const uint32_t dstSize = height * dstStride * bpp;
// Need to flush both source and target, so we overwrite properly.
drawEngine_->transformUnit.FlushIfOverlap("blockxfer", src, srcStride, width * bpp, height);
drawEngine_->transformUnit.FlushIfOverlap("blockxfer", dst, dstStride, width * bpp, height);
drawEngine_->transformUnit.FlushIfOverlap("blockxfer", false, src, srcStride, width * bpp, height);
drawEngine_->transformUnit.FlushIfOverlap("blockxfer", true, dst, dstStride, width * bpp, height);
DEBUG_LOG(G3D, "Block transfer: %08x/%x -> %08x/%x, %ix%ix%i (%i,%i)->(%i,%i)", srcBasePtr, srcStride, dstBasePtr, dstStride, width, height, bpp, srcX, srcY, dstX, dstY);
@ -977,7 +977,7 @@ void SoftGPU::Execute_LoadClut(u32 op, u32 diff) {
clutTotalBytes = 1024;
// Might be copying drawing into the CLUT, so flush.
drawEngine_->transformUnit.FlushIfOverlap("loadclut", clutAddr, clutTotalBytes, clutTotalBytes, 1);
drawEngine_->transformUnit.FlushIfOverlap("loadclut", false, clutAddr, clutTotalBytes, clutTotalBytes, 1);
bool changed = false;
if (Memory::IsValidAddress(clutAddr)) {

View file

@ -797,9 +797,11 @@ void TransformUnit::GetStats(char *buffer, size_t bufsize) {
binner_->GetStats(buffer, bufsize);
}
void TransformUnit::FlushIfOverlap(const char *reason, uint32_t addr, uint32_t stride, uint32_t w, uint32_t h) {
void TransformUnit::FlushIfOverlap(const char *reason, bool modifying, uint32_t addr, uint32_t stride, uint32_t w, uint32_t h) {
if (binner_->HasPendingWrite(addr, stride, w, h))
Flush(reason);
if (modifying && binner_->HasPendingRead(addr, stride, w, h))
Flush(reason);
}
void TransformUnit::NotifyClutUpdate(const void *src) {

View file

@ -123,7 +123,7 @@ public:
bool GetCurrentSimpleVertices(int count, std::vector<GPUDebugVertex> &vertices, std::vector<u16> &indices);
void Flush(const char *reason);
void FlushIfOverlap(const char *reason, uint32_t addr, uint32_t stride, uint32_t w, uint32_t h);
void FlushIfOverlap(const char *reason, bool modifying, uint32_t addr, uint32_t stride, uint32_t w, uint32_t h);
void NotifyClutUpdate(const void *src);
void GetStats(char *buffer, size_t bufsize);