diff --git a/GPU/Software/BinManager.cpp b/GPU/Software/BinManager.cpp index 294261131a..1f225c990a 100644 --- a/GPU/Software/BinManager.cpp +++ b/GPU/Software/BinManager.cpp @@ -165,23 +165,6 @@ void BinManager::UpdateState() { ComputeRasterizerState(&states_[stateIndex_]); states_[stateIndex_].samplerID.cached.clut = cluts_[clutIndex_].readable; - DrawingCoords scissorTL(gstate.getScissorX1(), gstate.getScissorY1()); - DrawingCoords scissorBR(gstate.getScissorX2(), gstate.getScissorY2()); - ScreenCoords screenScissorTL = TransformUnit::DrawingToScreen(scissorTL, 0); - ScreenCoords screenScissorBR = TransformUnit::DrawingToScreen(scissorBR, 0); - - scissor_.x1 = screenScissorTL.x; - scissor_.y1 = screenScissorTL.y; - scissor_.x2 = screenScissorBR.x + 15; - scissor_.y2 = screenScissorBR.y + 15; - - // Our bin sizes are based on offset, so if that changes we have to flush. - if (queueOffsetX_ != gstate.getOffsetX16() || queueOffsetY_ != gstate.getOffsetY16()) { - Flush("offset"); - queueOffsetX_ = gstate.getOffsetX16(); - queueOffsetY_ = gstate.getOffsetY16(); - } - if (lastFlipstats_ != gpuStats.numFlips) { lastFlipstats_ = gpuStats.numFlips; ResetStats(); @@ -193,31 +176,56 @@ void BinManager::UpdateState() { if (HasTextureWrite(state)) Flush("tex"); - // Okay, now update what's pending. - constexpr uint32_t mirrorMask = 0x0FFFFFFF & ~0x00600000; - const uint32_t bpp = state.pixelID.FBFormat() == GE_FORMAT_8888 ? 4 : 2; - pendingWrites_[0].Expand(gstate.getFrameBufAddress() & mirrorMask, bpp, gstate.FrameBufStride(), scissorTL, scissorBR); - if (state.pixelID.depthWrite) - pendingWrites_[1].Expand(gstate.getDepthBufAddress() & mirrorMask, 2, gstate.DepthBufStride(), scissorTL, scissorBR); + if (dirty_ & SoftDirty::BINNER_RANGE) { + DrawingCoords scissorTL(gstate.getScissorX1(), gstate.getScissorY1()); + DrawingCoords scissorBR(gstate.getScissorX2(), gstate.getScissorY2()); + ScreenCoords screenScissorTL = TransformUnit::DrawingToScreen(scissorTL, 0); + ScreenCoords screenScissorBR = TransformUnit::DrawingToScreen(scissorBR, 0); - // Disallow threads when rendering to the target, even offset. - bool selfRender = HasTextureWrite(state); - int newMaxTasks = selfRender ? 1 : g_threadManager.GetNumLooperThreads(); - if (newMaxTasks > MAX_POSSIBLE_TASKS) - newMaxTasks = MAX_POSSIBLE_TASKS; - // We don't want to overlap wrong, so flush any pending. - if (maxTasks_ != newMaxTasks) { - maxTasks_ = newMaxTasks; - Flush("selfrender"); + scissor_.x1 = screenScissorTL.x; + scissor_.y1 = screenScissorTL.y; + scissor_.x2 = screenScissorBR.x + 15; + scissor_.y2 = screenScissorBR.y + 15; + + // Our bin sizes are based on offset, so if that changes we have to flush. + if (queueOffsetX_ != gstate.getOffsetX16() || queueOffsetY_ != gstate.getOffsetY16()) { + Flush("offset"); + queueOffsetX_ = gstate.getOffsetX16(); + queueOffsetY_ = gstate.getOffsetY16(); + } + + // Okay, now update what's pending. + constexpr uint32_t mirrorMask = 0x0FFFFFFF & ~0x00600000; + const uint32_t bpp = state.pixelID.FBFormat() == GE_FORMAT_8888 ? 4 : 2; + pendingWrites_[0].Expand(gstate.getFrameBufAddress() & mirrorMask, bpp, gstate.FrameBufStride(), scissorTL, scissorBR); + if (state.pixelID.depthWrite) + pendingWrites_[1].Expand(gstate.getDepthBufAddress() & mirrorMask, 2, gstate.DepthBufStride(), scissorTL, scissorBR); + + dirty_ &= ~SoftDirty::BINNER_RANGE; } - // Lastly, we have to check if we're newly writing depth we were texturing before. - // This happens in Call of Duty (depth clear after depth texture), for example. - if (!hadDepth && state.pixelID.depthWrite) { - for (size_t i = 0; i < states_.Size(); ++i) { - if (HasTextureWrite(states_.Peek(i))) - Flush("selfdepth"); + if (dirty_ & SoftDirty::BINNER_OVERLAP) { + // Disallow threads when rendering to the target, even offset. + bool selfRender = HasTextureWrite(state); + int newMaxTasks = selfRender ? 1 : g_threadManager.GetNumLooperThreads(); + if (newMaxTasks > MAX_POSSIBLE_TASKS) + newMaxTasks = MAX_POSSIBLE_TASKS; + // We don't want to overlap wrong, so flush any pending. + if (maxTasks_ != newMaxTasks) { + maxTasks_ = newMaxTasks; + Flush("selfrender"); } + + // Lastly, we have to check if we're newly writing depth we were texturing before. + // This happens in Call of Duty (depth clear after depth texture), for example. + if (!hadDepth && state.pixelID.depthWrite) { + for (size_t i = 0; i < states_.Size(); ++i) { + if (HasTextureWrite(states_.Peek(i))) { + Flush("selfdepth"); + } + } + } + dirty_ &= ~SoftDirty::BINNER_OVERLAP; } } @@ -435,12 +443,13 @@ void BinManager::Flush(const char *reason) { queueRange_.y1 = 0x7FFFFFFF; queueRange_.x2 = 0; queueRange_.y2 = 0; - queueOffsetX_ = -1; - queueOffsetY_ = -1; for (auto &pending : pendingWrites_) pending.base = 0; + // We'll need to set the pending writes again, since we just flushed it. + dirty_ |= SoftDirty::BINNER_RANGE; + if (coreCollectDebugStats) { double et = time_now_d(); flushReasonTimes_[reason] += et - st; diff --git a/GPU/Software/SoftGpu.cpp b/GPU/Software/SoftGpu.cpp index bb5ac10bad..fa658d53e7 100644 --- a/GPU/Software/SoftGpu.cpp +++ b/GPU/Software/SoftGpu.cpp @@ -106,8 +106,8 @@ const SoftwareCommandTableEntry softgpuCommandTable[] = { { GE_CMD_FOG1, 0, SoftDirty::TRANSFORM_FOG }, { GE_CMD_FOG2, 0, SoftDirty::TRANSFORM_FOG }, - { GE_CMD_CLEARMODE, 0, SoftDirty::TRANSFORM_BASIC | SoftDirty::RAST_TEX | SoftDirty::SAMPLER_BASIC | SoftDirty::SAMPLER_TEXLIST | SoftDirty::PIXEL_BASIC | SoftDirty::PIXEL_ALPHA | SoftDirty::PIXEL_STENCIL | SoftDirty::PIXEL_CACHED }, - { GE_CMD_TEXTUREMAPENABLE, 0, SoftDirty::SAMPLER_BASIC | SoftDirty::SAMPLER_TEXLIST | SoftDirty::RAST_TEX | SoftDirty::TRANSFORM_BASIC }, + { GE_CMD_CLEARMODE, 0, SoftDirty::TRANSFORM_BASIC | SoftDirty::RAST_TEX | SoftDirty::SAMPLER_BASIC | SoftDirty::SAMPLER_TEXLIST | SoftDirty::PIXEL_BASIC | SoftDirty::PIXEL_ALPHA | SoftDirty::PIXEL_STENCIL | SoftDirty::PIXEL_CACHED | SoftDirty::BINNER_RANGE | SoftDirty::BINNER_OVERLAP }, + { GE_CMD_TEXTUREMAPENABLE, 0, SoftDirty::SAMPLER_BASIC | SoftDirty::SAMPLER_TEXLIST | SoftDirty::RAST_TEX | SoftDirty::TRANSFORM_BASIC | SoftDirty::BINNER_OVERLAP }, { GE_CMD_FOGENABLE, 0, SoftDirty::PIXEL_BASIC | SoftDirty::PIXEL_CACHED | SoftDirty::TRANSFORM_BASIC | SoftDirty::TRANSFORM_FOG | SoftDirty::TRANSFORM_MATRIX }, { GE_CMD_TEXMODE, 0, SoftDirty::SAMPLER_BASIC | SoftDirty::SAMPLER_TEXLIST | SoftDirty::RAST_TEX }, // Currently this doesn't affect any state, but maybe it should. @@ -155,8 +155,8 @@ const SoftwareCommandTableEntry softgpuCommandTable[] = { { GE_CMD_MASKRGB, 0, SoftDirty::PIXEL_BASIC | SoftDirty::PIXEL_WRITEMASK }, { GE_CMD_MASKALPHA, 0, SoftDirty::PIXEL_BASIC | SoftDirty::PIXEL_WRITEMASK }, { GE_CMD_ZTEST, 0, SoftDirty::PIXEL_BASIC }, - { GE_CMD_ZTESTENABLE, 0, SoftDirty::PIXEL_BASIC }, - { GE_CMD_ZWRITEDISABLE, 0, SoftDirty::PIXEL_BASIC }, + { GE_CMD_ZTESTENABLE, 0, SoftDirty::PIXEL_BASIC | SoftDirty::BINNER_RANGE | SoftDirty::BINNER_OVERLAP }, + { GE_CMD_ZWRITEDISABLE, 0, SoftDirty::PIXEL_BASIC | SoftDirty::BINNER_RANGE | SoftDirty::BINNER_OVERLAP }, { GE_CMD_LOGICOP, 0, SoftDirty::PIXEL_BASIC | SoftDirty::PIXEL_CACHED }, { GE_CMD_LOGICOPENABLE, 0, SoftDirty::PIXEL_BASIC | SoftDirty::PIXEL_CACHED }, @@ -168,33 +168,33 @@ const SoftwareCommandTableEntry softgpuCommandTable[] = { { GE_CMD_TEXOFFSETU }, { GE_CMD_TEXOFFSETV }, - { GE_CMD_TEXSIZE0, 0, SoftDirty::SAMPLER_TEXLIST }, - { GE_CMD_TEXSIZE1, 0, SoftDirty::SAMPLER_TEXLIST }, - { GE_CMD_TEXSIZE2, 0, SoftDirty::SAMPLER_TEXLIST }, - { GE_CMD_TEXSIZE3, 0, SoftDirty::SAMPLER_TEXLIST }, - { GE_CMD_TEXSIZE4, 0, SoftDirty::SAMPLER_TEXLIST }, - { GE_CMD_TEXSIZE5, 0, SoftDirty::SAMPLER_TEXLIST }, - { GE_CMD_TEXSIZE6, 0, SoftDirty::SAMPLER_TEXLIST }, - { GE_CMD_TEXSIZE7, 0, SoftDirty::SAMPLER_TEXLIST }, - { GE_CMD_TEXFORMAT, 0, SoftDirty::SAMPLER_BASIC | SoftDirty::SAMPLER_TEXLIST }, + { GE_CMD_TEXSIZE0, 0, SoftDirty::SAMPLER_TEXLIST | SoftDirty::BINNER_OVERLAP }, + { GE_CMD_TEXSIZE1, 0, SoftDirty::SAMPLER_TEXLIST | SoftDirty::BINNER_OVERLAP }, + { GE_CMD_TEXSIZE2, 0, SoftDirty::SAMPLER_TEXLIST | SoftDirty::BINNER_OVERLAP }, + { GE_CMD_TEXSIZE3, 0, SoftDirty::SAMPLER_TEXLIST | SoftDirty::BINNER_OVERLAP }, + { GE_CMD_TEXSIZE4, 0, SoftDirty::SAMPLER_TEXLIST | SoftDirty::BINNER_OVERLAP }, + { GE_CMD_TEXSIZE5, 0, SoftDirty::SAMPLER_TEXLIST | SoftDirty::BINNER_OVERLAP }, + { GE_CMD_TEXSIZE6, 0, SoftDirty::SAMPLER_TEXLIST | SoftDirty::BINNER_OVERLAP }, + { GE_CMD_TEXSIZE7, 0, SoftDirty::SAMPLER_TEXLIST | SoftDirty::BINNER_OVERLAP }, + { GE_CMD_TEXFORMAT, 0, SoftDirty::SAMPLER_BASIC | SoftDirty::SAMPLER_TEXLIST | SoftDirty::BINNER_OVERLAP }, { GE_CMD_TEXLEVEL, 0, SoftDirty::RAST_TEX }, { GE_CMD_TEXLODSLOPE, 0, SoftDirty::RAST_TEX }, - { GE_CMD_TEXADDR0, 0, SoftDirty::SAMPLER_TEXLIST }, - { GE_CMD_TEXADDR1, 0, SoftDirty::SAMPLER_TEXLIST }, - { GE_CMD_TEXADDR2, 0, SoftDirty::SAMPLER_TEXLIST }, - { GE_CMD_TEXADDR3, 0, SoftDirty::SAMPLER_TEXLIST }, - { GE_CMD_TEXADDR4, 0, SoftDirty::SAMPLER_TEXLIST }, - { GE_CMD_TEXADDR5, 0, SoftDirty::SAMPLER_TEXLIST }, - { GE_CMD_TEXADDR6, 0, SoftDirty::SAMPLER_TEXLIST }, - { GE_CMD_TEXADDR7, 0, SoftDirty::SAMPLER_TEXLIST }, - { GE_CMD_TEXBUFWIDTH0, 0, SoftDirty::SAMPLER_TEXLIST }, - { GE_CMD_TEXBUFWIDTH1, 0, SoftDirty::SAMPLER_TEXLIST }, - { GE_CMD_TEXBUFWIDTH2, 0, SoftDirty::SAMPLER_TEXLIST }, - { GE_CMD_TEXBUFWIDTH3, 0, SoftDirty::SAMPLER_TEXLIST }, - { GE_CMD_TEXBUFWIDTH4, 0, SoftDirty::SAMPLER_TEXLIST }, - { GE_CMD_TEXBUFWIDTH5, 0, SoftDirty::SAMPLER_TEXLIST }, - { GE_CMD_TEXBUFWIDTH6, 0, SoftDirty::SAMPLER_TEXLIST }, - { GE_CMD_TEXBUFWIDTH7, 0, SoftDirty::SAMPLER_TEXLIST }, + { GE_CMD_TEXADDR0, 0, SoftDirty::SAMPLER_TEXLIST | SoftDirty::BINNER_OVERLAP }, + { GE_CMD_TEXADDR1, 0, SoftDirty::SAMPLER_TEXLIST | SoftDirty::BINNER_OVERLAP }, + { GE_CMD_TEXADDR2, 0, SoftDirty::SAMPLER_TEXLIST | SoftDirty::BINNER_OVERLAP }, + { GE_CMD_TEXADDR3, 0, SoftDirty::SAMPLER_TEXLIST | SoftDirty::BINNER_OVERLAP }, + { GE_CMD_TEXADDR4, 0, SoftDirty::SAMPLER_TEXLIST | SoftDirty::BINNER_OVERLAP }, + { GE_CMD_TEXADDR5, 0, SoftDirty::SAMPLER_TEXLIST | SoftDirty::BINNER_OVERLAP }, + { GE_CMD_TEXADDR6, 0, SoftDirty::SAMPLER_TEXLIST | SoftDirty::BINNER_OVERLAP }, + { GE_CMD_TEXADDR7, 0, SoftDirty::SAMPLER_TEXLIST | SoftDirty::BINNER_OVERLAP }, + { GE_CMD_TEXBUFWIDTH0, 0, SoftDirty::SAMPLER_TEXLIST | SoftDirty::BINNER_OVERLAP }, + { GE_CMD_TEXBUFWIDTH1, 0, SoftDirty::SAMPLER_TEXLIST | SoftDirty::BINNER_OVERLAP }, + { GE_CMD_TEXBUFWIDTH2, 0, SoftDirty::SAMPLER_TEXLIST | SoftDirty::BINNER_OVERLAP }, + { GE_CMD_TEXBUFWIDTH3, 0, SoftDirty::SAMPLER_TEXLIST | SoftDirty::BINNER_OVERLAP }, + { GE_CMD_TEXBUFWIDTH4, 0, SoftDirty::SAMPLER_TEXLIST | SoftDirty::BINNER_OVERLAP }, + { GE_CMD_TEXBUFWIDTH5, 0, SoftDirty::SAMPLER_TEXLIST | SoftDirty::BINNER_OVERLAP }, + { GE_CMD_TEXBUFWIDTH6, 0, SoftDirty::SAMPLER_TEXLIST | SoftDirty::BINNER_OVERLAP }, + { GE_CMD_TEXBUFWIDTH7, 0, SoftDirty::SAMPLER_TEXLIST | SoftDirty::BINNER_OVERLAP }, { GE_CMD_CLUTADDR }, { GE_CMD_CLUTADDRUPPER }, diff --git a/GPU/Software/SoftGpu.h b/GPU/Software/SoftGpu.h index bec67ae1fe..703e299e1d 100644 --- a/GPU/Software/SoftGpu.h +++ b/GPU/Software/SoftGpu.h @@ -84,6 +84,7 @@ enum class SoftDirty : uint64_t { TRANSFORM_FOG = 1ULL << 20, BINNER_RANGE = 1ULL << 21, + BINNER_OVERLAP = 1ULL << 22, }; static inline SoftDirty operator |(const SoftDirty &lhs, const SoftDirty &rhs) { return SoftDirty((uint64_t)lhs | (uint64_t)rhs);