mirror of
https://github.com/hrydgard/ppsspp.git
synced 2025-04-02 11:01:50 -04:00
Merge pull request #19776 from hrydgard/depth-draw-queue
Implement queueing of depth raster draws
This commit is contained in:
commit
0b82405889
10 changed files with 147 additions and 79 deletions
|
@ -103,25 +103,18 @@ enum class TriangleResult {
|
|||
TooSmall,
|
||||
};
|
||||
|
||||
constexpr int MIN_TRI_AREA = 10;
|
||||
constexpr int MIN_TWICE_TRI_AREA = 10;
|
||||
|
||||
// Adapted from Intel's depth rasterizer example.
|
||||
// Started with the scalar version, will SIMD-ify later.
|
||||
// x1/y1 etc are the scissor rect.
|
||||
template<ZCompareMode compareMode>
|
||||
TriangleResult DepthRasterTriangle(uint16_t *depthBuf, int stride, DepthScissor scissor, const int *tx, const int *ty, const float *tz) {
|
||||
const int tileStartX = scissor.x1;
|
||||
const int tileEndX = scissor.x2;
|
||||
|
||||
const int tileStartY = scissor.y1;
|
||||
const int tileEndY = scissor.y2;
|
||||
|
||||
// BEGIN triangle setup. This should be done SIMD, four triangles at a time.
|
||||
// Due to the many multiplications, we might want to do it in floating point as 32-bit integer muls
|
||||
// are slow on SSE2.
|
||||
|
||||
// NOTE: Triangles are stored in groups of 4.
|
||||
|
||||
int v0x = tx[0];
|
||||
int v0y = ty[0];
|
||||
int v1x = tx[4];
|
||||
|
@ -131,10 +124,10 @@ TriangleResult DepthRasterTriangle(uint16_t *depthBuf, int stride, DepthScissor
|
|||
|
||||
// use fixed-point only for X and Y. Avoid work for Z and W.
|
||||
// We use 4x1 tiles for simplicity.
|
||||
int minX = std::max(std::min(std::min(v0x, v1x), v2x), tileStartX) & ~3;
|
||||
int maxX = std::min(std::max(std::max(v0x, v1x), v2x) + 3, tileEndX) & ~3;
|
||||
int minY = std::max(std::min(std::min(v0y, v1y), v2y), tileStartY);
|
||||
int maxY = std::min(std::max(std::max(v0y, v1y), v2y), tileEndY);
|
||||
int minX = std::max(std::min(std::min(v0x, v1x), v2x), (int)scissor.x1) & ~3;
|
||||
int maxX = std::min(std::max(std::max(v0x, v1x), v2x) + 3, (int)scissor.x2) & ~3;
|
||||
int minY = std::max(std::min(std::min(v0y, v1y), v2y), (int)scissor.y1);
|
||||
int maxY = std::min(std::max(std::max(v0y, v1y), v2y), (int)scissor.y2);
|
||||
if (maxX == minX || maxY == minY) {
|
||||
// No pixels, or outside screen.
|
||||
return TriangleResult::NoPixels;
|
||||
|
@ -145,7 +138,7 @@ TriangleResult DepthRasterTriangle(uint16_t *depthBuf, int stride, DepthScissor
|
|||
if (triArea < 0) {
|
||||
return TriangleResult::Backface;
|
||||
}
|
||||
if (triArea < MIN_TRI_AREA) {
|
||||
if (triArea < MIN_TWICE_TRI_AREA) {
|
||||
return TriangleResult::TooSmall; // Or zero area.
|
||||
}
|
||||
|
||||
|
|
|
@ -15,7 +15,7 @@ struct DepthScreenVertex {
|
|||
#endif
|
||||
|
||||
// We only need to support these three modes.
|
||||
enum class ZCompareMode {
|
||||
enum class ZCompareMode : u8 {
|
||||
Greater, // Most common
|
||||
Less, // Less common
|
||||
Always, // Mostly used for clears
|
||||
|
@ -29,13 +29,14 @@ struct DepthScissor {
|
|||
};
|
||||
|
||||
struct DepthDraw {
|
||||
u32 depthAddr;
|
||||
u16 depthStride;
|
||||
u8 cullMode;
|
||||
GEPrimitiveType prim;
|
||||
ZCompareMode compareMode;
|
||||
bool cullEnabled;
|
||||
int cullMode;
|
||||
DepthScissor scissor;
|
||||
bool through;
|
||||
int transformedStartIndex;
|
||||
int vertexOffset;
|
||||
int indexOffset;
|
||||
int vertexCount;
|
||||
};
|
||||
|
|
|
@ -39,7 +39,7 @@
|
|||
|
||||
enum {
|
||||
TRANSFORMED_VERTEX_BUFFER_SIZE = VERTEX_BUFFER_MAX * sizeof(TransformedVertex),
|
||||
DEPTH_TRANSFORMED_SIZE = VERTEX_BUFFER_MAX * 4,
|
||||
DEPTH_TRANSFORMED_SIZE = VERTEX_BUFFER_MAX * 4 * sizeof(float),
|
||||
DEPTH_SCREENVERTS_COMPONENT_COUNT = VERTEX_BUFFER_MAX,
|
||||
DEPTH_SCREENVERTS_COMPONENT_SIZE = DEPTH_SCREENVERTS_COMPONENT_COUNT * sizeof(int) + 384,
|
||||
DEPTH_SCREENVERTS_SIZE = DEPTH_SCREENVERTS_COMPONENT_SIZE * 3,
|
||||
|
@ -67,6 +67,9 @@ DrawEngineCommon::DrawEngineCommon() : decoderMap_(32) {
|
|||
case DepthRasterMode::OFF:
|
||||
useDepthRaster_ = false;
|
||||
}
|
||||
if (useDepthRaster_) {
|
||||
depthDraws_.reserve(256);
|
||||
}
|
||||
}
|
||||
|
||||
DrawEngineCommon::~DrawEngineCommon() {
|
||||
|
@ -803,7 +806,7 @@ void DrawEngineCommon::DecodeVerts(VertexDecoder *dec, u8 *dest) {
|
|||
int i = decodeVertsCounter_;
|
||||
int stride = (int)dec->GetDecVtxFmt().stride;
|
||||
for (; i < numDrawVerts_; i++) {
|
||||
DeferredVerts &dv = drawVerts_[i];
|
||||
const DeferredVerts &dv = drawVerts_[i];
|
||||
|
||||
int indexLowerBound = dv.indexLowerBound;
|
||||
drawVertexOffsets_[i] = numDecodedVerts_ - indexLowerBound;
|
||||
|
@ -932,7 +935,7 @@ Mat4F32 ComputeFinalProjMatrix() {
|
|||
return m;
|
||||
}
|
||||
|
||||
static bool CalculateDepthDraw(DepthDraw *draw, GEPrimitiveType prim, int vertexCount) {
|
||||
bool DrawEngineCommon::CalculateDepthDraw(DepthDraw *draw, GEPrimitiveType prim, int vertexCount) {
|
||||
switch (prim) {
|
||||
case GE_PRIM_INVALID:
|
||||
case GE_PRIM_KEEP_PREVIOUS:
|
||||
|
@ -978,8 +981,15 @@ static bool CalculateDepthDraw(DepthDraw *draw, GEPrimitiveType prim, int vertex
|
|||
_dbg_assert_(gstate.isDepthWriteEnabled());
|
||||
}
|
||||
|
||||
draw->transformedStartIndex = 0;
|
||||
draw->indexOffset = 0;
|
||||
if (depthVertexCount_ + vertexCount >= DEPTH_INDEXBUFFER_SIZE) {
|
||||
// Can't add more.
|
||||
return false;
|
||||
}
|
||||
|
||||
draw->depthAddr = gstate.getDepthBufRawAddress() | 0x04000000;
|
||||
draw->depthStride = gstate.DepthBufStride();
|
||||
draw->vertexOffset = depthVertexCount_;
|
||||
draw->indexOffset = depthIndexCount_;
|
||||
draw->vertexCount = vertexCount;
|
||||
draw->cullEnabled = gstate.isCullEnabled();
|
||||
draw->cullMode = gstate.getCullMode();
|
||||
|
@ -1010,32 +1020,31 @@ void DrawEngineCommon::DepthRasterTransform(GEPrimitiveType prim, VertexDecoder
|
|||
return;
|
||||
}
|
||||
|
||||
TimeCollector collectStat(&gpuStats.msRasterizingDepth, coreCollectDebugStats);
|
||||
TimeCollector collectStat(&gpuStats.msPrepareDepth, coreCollectDebugStats);
|
||||
|
||||
// Decode.
|
||||
int numDec = 0;
|
||||
int numDecoded = 0;
|
||||
for (int i = 0; i < numDrawVerts_; i++) {
|
||||
DeferredVerts &dv = drawVerts_[i];
|
||||
|
||||
int indexLowerBound = dv.indexLowerBound;
|
||||
drawVertexOffsets_[i] = numDec - indexLowerBound;
|
||||
|
||||
int indexUpperBound = dv.indexUpperBound;
|
||||
if (indexUpperBound + 1 - indexLowerBound + numDec >= VERTEX_BUFFER_MAX) {
|
||||
const DeferredVerts &dv = drawVerts_[i];
|
||||
if (dv.indexUpperBound + 1 - dv.indexLowerBound + numDecoded >= VERTEX_BUFFER_MAX) {
|
||||
// Hit our limit! Stop decoding in this draw.
|
||||
break;
|
||||
}
|
||||
|
||||
// Decode the verts (and at the same time apply morphing/skinning). Simple.
|
||||
DecodeAndTransformForDepthRaster(depthTransformed_ + numDec * 4, worldviewproj, dv.verts, indexLowerBound, indexUpperBound, dec, vertTypeID);
|
||||
numDec += indexUpperBound - indexLowerBound + 1;
|
||||
DecodeAndTransformForDepthRaster(depthTransformed_ + (draw.vertexOffset + numDecoded) * 4, worldviewproj, dv.verts, dv.indexLowerBound, dv.indexUpperBound, dec, vertTypeID);
|
||||
numDecoded += dv.indexUpperBound - dv.indexLowerBound + 1;
|
||||
}
|
||||
|
||||
// Copy indices.
|
||||
memcpy(depthIndices_, decIndex_, sizeof(uint16_t) * vertexCount);
|
||||
memcpy(depthIndices_ + draw.indexOffset, decIndex_, sizeof(uint16_t) * vertexCount);
|
||||
|
||||
// FUTURE SPLIT --- The above will always run on the main thread. The below can be split across workers.
|
||||
FlushDepthDraw(draw);
|
||||
// Commit
|
||||
depthIndexCount_ += vertexCount;
|
||||
depthVertexCount_ += numDecoded;
|
||||
|
||||
depthDraws_.push_back(draw);
|
||||
|
||||
// FlushQueuedDepth();
|
||||
}
|
||||
|
||||
void DrawEngineCommon::DepthRasterPredecoded(GEPrimitiveType prim, const void *inVerts, int numDecoded, VertexDecoder *dec, int vertexCount) {
|
||||
|
@ -1048,45 +1057,63 @@ void DrawEngineCommon::DepthRasterPredecoded(GEPrimitiveType prim, const void *i
|
|||
return;
|
||||
}
|
||||
|
||||
TimeCollector collectStat(&gpuStats.msRasterizingDepth, coreCollectDebugStats);
|
||||
TimeCollector collectStat(&gpuStats.msPrepareDepth, coreCollectDebugStats);
|
||||
|
||||
_dbg_assert_(prim != GE_PRIM_TRIANGLE_STRIP && prim != GE_PRIM_TRIANGLE_FAN);
|
||||
|
||||
if (dec->throughmode) {
|
||||
ConvertPredecodedThroughForDepthRaster(depthTransformed_, decoded_, dec, numDecoded);
|
||||
ConvertPredecodedThroughForDepthRaster(depthTransformed_ + 4 * draw.vertexOffset, decoded_, dec, numDecoded);
|
||||
} else {
|
||||
if (dec->VertexType() & (GE_VTYPE_WEIGHT_MASK | GE_VTYPE_MORPHCOUNT_MASK)) {
|
||||
return;
|
||||
}
|
||||
float worldviewproj[16];
|
||||
ComputeFinalProjMatrix().Store(worldviewproj);
|
||||
TransformPredecodedForDepthRaster(depthTransformed_, worldviewproj, decoded_, dec, numDecoded);
|
||||
TransformPredecodedForDepthRaster(depthTransformed_ + 4 * draw.vertexOffset, worldviewproj, decoded_, dec, numDecoded);
|
||||
}
|
||||
|
||||
// Copy indices.
|
||||
memcpy(depthIndices_, decIndex_, sizeof(uint16_t) * vertexCount);
|
||||
memcpy(depthIndices_ + draw.indexOffset, decIndex_, sizeof(uint16_t) * vertexCount);
|
||||
|
||||
// FUTURE SPLIT --- The above will always run on the main thread. The below can be split across workers.
|
||||
FlushDepthDraw(draw);
|
||||
// Commit
|
||||
depthIndexCount_ += vertexCount;
|
||||
depthVertexCount_ += numDecoded;
|
||||
|
||||
depthDraws_.push_back(draw);
|
||||
|
||||
// FlushQueuedDepth();
|
||||
}
|
||||
|
||||
void DrawEngineCommon::FlushDepthDraw(const DepthDraw &draw) {
|
||||
int *tx = depthScreenVerts_;
|
||||
int *ty = depthScreenVerts_ + DEPTH_SCREENVERTS_COMPONENT_COUNT;
|
||||
float *tz = (float *)(depthScreenVerts_ + DEPTH_SCREENVERTS_COMPONENT_COUNT * 2);
|
||||
void DrawEngineCommon::FlushQueuedDepth() {
|
||||
TimeCollector collectStat(&gpuStats.msRasterizeDepth, coreCollectDebugStats);
|
||||
|
||||
int outVertCount = 0;
|
||||
switch (draw.prim) {
|
||||
case GE_PRIM_RECTANGLES:
|
||||
outVertCount = DepthRasterClipIndexedRectangles(tx, ty, tz, depthTransformed_, depthIndices_ + draw.indexOffset, draw);
|
||||
break;
|
||||
case GE_PRIM_TRIANGLES:
|
||||
outVertCount = DepthRasterClipIndexedTriangles(tx, ty, tz, depthTransformed_, depthIndices_ + draw.indexOffset, draw);
|
||||
break;
|
||||
default:
|
||||
_dbg_assert_(false);
|
||||
break;
|
||||
for (const auto &draw : depthDraws_) {
|
||||
int *tx = depthScreenVerts_;
|
||||
int *ty = depthScreenVerts_ + DEPTH_SCREENVERTS_COMPONENT_COUNT;
|
||||
float *tz = (float *)(depthScreenVerts_ + DEPTH_SCREENVERTS_COMPONENT_COUNT * 2);
|
||||
|
||||
int outVertCount = 0;
|
||||
|
||||
const float *vertices = depthTransformed_ + 4 * draw.vertexOffset;
|
||||
const uint16_t *indices = depthIndices_ + draw.indexOffset;
|
||||
|
||||
switch (draw.prim) {
|
||||
case GE_PRIM_RECTANGLES:
|
||||
outVertCount = DepthRasterClipIndexedRectangles(tx, ty, tz, vertices, indices, draw);
|
||||
break;
|
||||
case GE_PRIM_TRIANGLES:
|
||||
outVertCount = DepthRasterClipIndexedTriangles(tx, ty, tz, vertices, indices, draw);
|
||||
break;
|
||||
default:
|
||||
_dbg_assert_(false);
|
||||
break;
|
||||
}
|
||||
// TODO: Could potentially split into tasks here!
|
||||
DepthRasterScreenVerts((uint16_t *)Memory::GetPointerWrite(draw.depthAddr), draw.depthStride, tx, ty, tz, outVertCount, draw);
|
||||
}
|
||||
DepthRasterScreenVerts((uint16_t *)Memory::GetPointerWrite(gstate.getDepthBufRawAddress() | 0x04000000), gstate.DepthBufStride(),
|
||||
tx, ty, tz, outVertCount, draw);
|
||||
|
||||
// Reset queue
|
||||
depthIndexCount_ = 0;
|
||||
depthVertexCount_ = 0;
|
||||
depthDraws_.clear();
|
||||
}
|
||||
|
|
|
@ -164,6 +164,8 @@ public:
|
|||
return decoded_ + 12 * 65536;
|
||||
}
|
||||
|
||||
void FlushQueuedDepth();
|
||||
|
||||
protected:
|
||||
virtual bool UpdateUseHWTessellation(bool enabled) const { return enabled; }
|
||||
void UpdatePlanes();
|
||||
|
@ -177,7 +179,7 @@ protected:
|
|||
|
||||
void DepthRasterTransform(GEPrimitiveType prim, VertexDecoder *dec, uint32_t vertTypeID, int vertexCount);
|
||||
void DepthRasterPredecoded(GEPrimitiveType prim, const void *inVerts, int numDecoded, VertexDecoder *dec, int vertexCount);
|
||||
void FlushDepthDraw(const DepthDraw &draw);
|
||||
bool CalculateDepthDraw(DepthDraw *draw, GEPrimitiveType prim, int vertexCount);
|
||||
|
||||
static inline int IndexSize(u32 vtype) {
|
||||
const u32 indexType = (vtype & GE_VTYPE_IDX_MASK);
|
||||
|
@ -360,4 +362,9 @@ protected:
|
|||
float *depthTransformed_ = nullptr;
|
||||
int *depthScreenVerts_ = nullptr;
|
||||
uint16_t *depthIndices_ = nullptr;
|
||||
|
||||
// Queue
|
||||
int depthVertexCount_ = 0;
|
||||
int depthIndexCount_ = 0;
|
||||
std::vector<DepthDraw> depthDraws_;
|
||||
};
|
||||
|
|
|
@ -3240,6 +3240,7 @@ void FramebufferManagerCommon::ReadFramebufferToMemory(VirtualFramebuffer *vfb,
|
|||
}
|
||||
|
||||
void FramebufferManagerCommon::FlushBeforeCopy() {
|
||||
drawEngine_->FlushQueuedDepth();
|
||||
// Flush anything not yet drawn before blitting, downloading, or uploading.
|
||||
// This might be a stalled list, or unflushed before a block transfer, etc.
|
||||
// Only bother if any draws are pending.
|
||||
|
@ -3247,7 +3248,8 @@ void FramebufferManagerCommon::FlushBeforeCopy() {
|
|||
// TODO: It's really bad that we are calling SetRenderFramebuffer here with
|
||||
// all the irrelevant state checking it'll use to decide what to do. Should
|
||||
// do something more focused here.
|
||||
SetRenderFrameBuffer(gstate_c.IsDirty(DIRTY_FRAMEBUF), gstate_c.skipDrawReason);
|
||||
bool changed;
|
||||
SetRenderFrameBuffer(gstate_c.IsDirty(DIRTY_FRAMEBUF), gstate_c.skipDrawReason, &changed);
|
||||
drawEngine_->Flush();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -306,13 +306,14 @@ public:
|
|||
void DestroyFramebuf(VirtualFramebuffer *v);
|
||||
|
||||
VirtualFramebuffer *DoSetRenderFrameBuffer(FramebufferHeuristicParams ¶ms, u32 skipDrawReason);
|
||||
VirtualFramebuffer *SetRenderFrameBuffer(bool framebufChanged, int skipDrawReason) {
|
||||
VirtualFramebuffer *SetRenderFrameBuffer(bool framebufChanged, int skipDrawReason, bool *changed) {
|
||||
// Inlining this part since it's so frequent.
|
||||
if (!framebufChanged && currentRenderVfb_) {
|
||||
currentRenderVfb_->last_frame_render = gpuStats.numFlips;
|
||||
currentRenderVfb_->dirtyAfterDisplay = true;
|
||||
if (!skipDrawReason)
|
||||
currentRenderVfb_->reallyDirtyAfterDisplay = true;
|
||||
*changed = false;
|
||||
return currentRenderVfb_;
|
||||
} else {
|
||||
// This is so that we will be able to drive DoSetRenderFramebuffer with inputs
|
||||
|
@ -322,6 +323,7 @@ public:
|
|||
VirtualFramebuffer *vfb = DoSetRenderFrameBuffer(inputs, skipDrawReason);
|
||||
_dbg_assert_msg_(vfb, "DoSetRenderFramebuffer must return a valid framebuffer.");
|
||||
_dbg_assert_msg_(currentRenderVfb_, "DoSetRenderFramebuffer must set a valid framebuffer.");
|
||||
*changed = true;
|
||||
return vfb;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -108,7 +108,8 @@ struct GPUStatistics {
|
|||
numCachedReplacedTextures = 0;
|
||||
numClutTextures = 0;
|
||||
msProcessingDisplayLists = 0;
|
||||
msRasterizingDepth = 0.0f;
|
||||
msPrepareDepth = 0.0f;
|
||||
msRasterizeDepth = 0.0f;
|
||||
numDepthRasterPrims = 0;
|
||||
numDepthRasterBackface = 0;
|
||||
numDepthRasterNoPixels = 0;
|
||||
|
@ -153,7 +154,8 @@ struct GPUStatistics {
|
|||
int numCachedReplacedTextures;
|
||||
int numClutTextures;
|
||||
double msProcessingDisplayLists;
|
||||
double msRasterizingDepth;
|
||||
double msPrepareDepth;
|
||||
double msRasterizeDepth;
|
||||
int vertexGPUCycles;
|
||||
int otherGPUCycles;
|
||||
int numDepthRasterPrims;
|
||||
|
|
|
@ -987,8 +987,10 @@ void GPUCommon::Execute_Ret(u32 op, u32 diff) {
|
|||
}
|
||||
|
||||
void GPUCommon::Execute_End(u32 op, u32 diff) {
|
||||
if (flushOnParams_)
|
||||
if (flushOnParams_) {
|
||||
drawEngineCommon_->FlushQueuedDepth();
|
||||
Flush();
|
||||
}
|
||||
|
||||
const u32 prev = Memory::ReadUnchecked_U32(currentList->pc - 4);
|
||||
UpdatePC(currentList->pc, currentList->pc);
|
||||
|
@ -1311,8 +1313,10 @@ void GPUCommon::FlushImm() {
|
|||
gstate_c.UpdateUVScaleOffset();
|
||||
|
||||
VirtualFramebuffer *vfb = nullptr;
|
||||
if (framebufferManager_)
|
||||
vfb = framebufferManager_->SetRenderFrameBuffer(gstate_c.IsDirty(DIRTY_FRAMEBUF), gstate_c.skipDrawReason);
|
||||
if (framebufferManager_) {
|
||||
bool changed;
|
||||
vfb = framebufferManager_->SetRenderFrameBuffer(gstate_c.IsDirty(DIRTY_FRAMEBUF), gstate_c.skipDrawReason, &changed);
|
||||
}
|
||||
if (vfb) {
|
||||
CheckDepthUsage(vfb);
|
||||
}
|
||||
|
@ -1378,8 +1382,9 @@ void GPUCommon::FastLoadBoneMatrix(u32 target) {
|
|||
}
|
||||
|
||||
if (!g_Config.bSoftwareSkinning) {
|
||||
if (flushOnParams_)
|
||||
if (flushOnParams_) {
|
||||
Flush();
|
||||
}
|
||||
gstate_c.Dirty(uniformsToDirty);
|
||||
} else {
|
||||
gstate_c.deferredVertTypeDirty |= uniformsToDirty;
|
||||
|
|
|
@ -531,6 +531,7 @@ void GPUCommonHW::PreExecuteOp(u32 op, u32 diff) {
|
|||
}
|
||||
|
||||
void GPUCommonHW::CopyDisplayToOutput(bool reallyDirty) {
|
||||
drawEngineCommon_->FlushQueuedDepth();
|
||||
// Flush anything left over.
|
||||
drawEngineCommon_->Flush();
|
||||
|
||||
|
@ -949,11 +950,16 @@ void GPUCommonHW::Execute_Prim(u32 op, u32 diff) {
|
|||
}
|
||||
|
||||
// This also makes skipping drawing very effective.
|
||||
VirtualFramebuffer *vfb = framebufferManager_->SetRenderFrameBuffer(gstate_c.IsDirty(DIRTY_FRAMEBUF), gstate_c.skipDrawReason);
|
||||
bool changed;
|
||||
VirtualFramebuffer *vfb = framebufferManager_->SetRenderFrameBuffer(gstate_c.IsDirty(DIRTY_FRAMEBUF), gstate_c.skipDrawReason, &changed);
|
||||
if (blueToAlpha) {
|
||||
vfb->usageFlags |= FB_USAGE_BLUE_TO_ALPHA;
|
||||
}
|
||||
|
||||
if (changed) {
|
||||
drawEngineCommon_->FlushQueuedDepth();
|
||||
}
|
||||
|
||||
if (gstate_c.dirty & DIRTY_VERTEXSHADER_STATE) {
|
||||
vertexCost_ = EstimatePerVertexCost();
|
||||
}
|
||||
|
@ -1273,7 +1279,11 @@ void GPUCommonHW::Execute_Bezier(u32 op, u32 diff) {
|
|||
gstate_c.framebufFormat = gstate.FrameBufFormat();
|
||||
|
||||
// This also make skipping drawing very effective.
|
||||
VirtualFramebuffer *vfb = framebufferManager_->SetRenderFrameBuffer(gstate_c.IsDirty(DIRTY_FRAMEBUF), gstate_c.skipDrawReason);
|
||||
bool changed;
|
||||
VirtualFramebuffer *vfb = framebufferManager_->SetRenderFrameBuffer(gstate_c.IsDirty(DIRTY_FRAMEBUF), gstate_c.skipDrawReason, &changed);
|
||||
if (changed) {
|
||||
drawEngineCommon_->FlushQueuedDepth();
|
||||
}
|
||||
if (gstate_c.skipDrawReason & (SKIPDRAW_SKIPFRAME | SKIPDRAW_NON_DISPLAYED_FB)) {
|
||||
// TODO: Should this eat some cycles? Probably yes. Not sure if important.
|
||||
return;
|
||||
|
@ -1282,7 +1292,7 @@ void GPUCommonHW::Execute_Bezier(u32 op, u32 diff) {
|
|||
CheckDepthUsage(vfb);
|
||||
|
||||
if (!Memory::IsValidAddress(gstate_c.vertexAddr)) {
|
||||
ERROR_LOG_REPORT(Log::G3D, "Bad vertex address %08x!", gstate_c.vertexAddr);
|
||||
ERROR_LOG(Log::G3D, "Bad vertex address %08x!", gstate_c.vertexAddr);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -1290,7 +1300,7 @@ void GPUCommonHW::Execute_Bezier(u32 op, u32 diff) {
|
|||
const void *indices = NULL;
|
||||
if ((gstate.vertType & GE_VTYPE_IDX_MASK) != GE_VTYPE_IDX_NONE) {
|
||||
if (!Memory::IsValidAddress(gstate_c.indexAddr)) {
|
||||
ERROR_LOG_REPORT(Log::G3D, "Bad index address %08x!", gstate_c.indexAddr);
|
||||
ERROR_LOG(Log::G3D, "Bad index address %08x!", gstate_c.indexAddr);
|
||||
return;
|
||||
}
|
||||
indices = Memory::GetPointerUnchecked(gstate_c.indexAddr);
|
||||
|
@ -1345,7 +1355,11 @@ void GPUCommonHW::Execute_Spline(u32 op, u32 diff) {
|
|||
gstate_c.framebufFormat = gstate.FrameBufFormat();
|
||||
|
||||
// This also make skipping drawing very effective.
|
||||
VirtualFramebuffer *vfb = framebufferManager_->SetRenderFrameBuffer(gstate_c.IsDirty(DIRTY_FRAMEBUF), gstate_c.skipDrawReason);
|
||||
bool changed;
|
||||
VirtualFramebuffer *vfb = framebufferManager_->SetRenderFrameBuffer(gstate_c.IsDirty(DIRTY_FRAMEBUF), gstate_c.skipDrawReason, &changed);
|
||||
if (changed) {
|
||||
drawEngineCommon_->FlushQueuedDepth();
|
||||
}
|
||||
if (gstate_c.skipDrawReason & (SKIPDRAW_SKIPFRAME | SKIPDRAW_NON_DISPLAYED_FB)) {
|
||||
// TODO: Should this eat some cycles? Probably yes. Not sure if important.
|
||||
return;
|
||||
|
@ -1354,7 +1368,7 @@ void GPUCommonHW::Execute_Spline(u32 op, u32 diff) {
|
|||
CheckDepthUsage(vfb);
|
||||
|
||||
if (!Memory::IsValidAddress(gstate_c.vertexAddr)) {
|
||||
ERROR_LOG_REPORT(Log::G3D, "Bad vertex address %08x!", gstate_c.vertexAddr);
|
||||
ERROR_LOG(Log::G3D, "Bad vertex address %08x!", gstate_c.vertexAddr);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -1362,14 +1376,14 @@ void GPUCommonHW::Execute_Spline(u32 op, u32 diff) {
|
|||
const void *indices = NULL;
|
||||
if ((gstate.vertType & GE_VTYPE_IDX_MASK) != GE_VTYPE_IDX_NONE) {
|
||||
if (!Memory::IsValidAddress(gstate_c.indexAddr)) {
|
||||
ERROR_LOG_REPORT(Log::G3D, "Bad index address %08x!", gstate_c.indexAddr);
|
||||
ERROR_LOG(Log::G3D, "Bad index address %08x!", gstate_c.indexAddr);
|
||||
return;
|
||||
}
|
||||
indices = Memory::GetPointerUnchecked(gstate_c.indexAddr);
|
||||
}
|
||||
|
||||
if (vertTypeIsSkinningEnabled(gstate.vertType)) {
|
||||
DEBUG_LOG_REPORT(Log::G3D, "Unusual bezier/spline vtype: %08x, morph: %d, bones: %d", gstate.vertType, (gstate.vertType & GE_VTYPE_MORPHCOUNT_MASK) >> GE_VTYPE_MORPHCOUNT_SHIFT, vertTypeGetNumBoneWeights(gstate.vertType));
|
||||
WARN_LOG_ONCE(unusualcurve, Log::G3D, "Unusual bezier/spline vtype: %08x, morph: %d, bones: %d", gstate.vertType, (gstate.vertType & GE_VTYPE_MORPHCOUNT_MASK) >> GE_VTYPE_MORPHCOUNT_SHIFT, vertTypeGetNumBoneWeights(gstate.vertType));
|
||||
}
|
||||
|
||||
// Can't flush after setting gstate_c.submitType below since it'll be a mess - it must be done already.
|
||||
|
@ -1415,6 +1429,7 @@ void GPUCommonHW::Execute_Spline(u32 op, u32 diff) {
|
|||
}
|
||||
|
||||
void GPUCommonHW::Execute_BlockTransferStart(u32 op, u32 diff) {
|
||||
drawEngineCommon_->FlushQueuedDepth();
|
||||
Flush();
|
||||
|
||||
PROFILE_THIS_SCOPE("block"); // don't include the flush in the profile, would be misleading.
|
||||
|
@ -1763,6 +1778,16 @@ void GPUCommonHW::Execute_TexFlush(u32 op, u32 diff) {
|
|||
framebufferManager_->DiscardFramebufferCopy();
|
||||
}
|
||||
|
||||
u32 GPUCommonHW::DrawSync(int mode) {
|
||||
drawEngineCommon_->FlushQueuedDepth();
|
||||
return GPUCommon::DrawSync(mode);
|
||||
}
|
||||
|
||||
int GPUCommonHW::ListSync(int listid, int mode) {
|
||||
drawEngineCommon_->FlushQueuedDepth();
|
||||
return GPUCommon::ListSync(listid, mode);
|
||||
}
|
||||
|
||||
size_t GPUCommonHW::FormatGPUStatsCommon(char *buffer, size_t size) {
|
||||
float vertexAverageCycles = gpuStats.numVertsSubmitted > 0 ? (float)gpuStats.vertexGPUCycles / (float)gpuStats.numVertsSubmitted : 0.0f;
|
||||
return snprintf(buffer, size,
|
||||
|
@ -1776,7 +1801,7 @@ size_t GPUCommonHW::FormatGPUStatsCommon(char *buffer, size_t size) {
|
|||
"replacer: tracks %d references, %d unique textures\n"
|
||||
"Cpy: depth %d, color %d, reint %d, blend %d, self %d\n"
|
||||
"GPU cycles: %d (%0.1f per vertex)\n"
|
||||
"Depth raster: %0.2f ms, %d prim, %d nopix, %d small, %d back, %d zcull\n%s",
|
||||
"Z-rast: %0.2f/%0.2f ms, %d prim, %d nopix, %d small, %d back, %d zcull\n%s",
|
||||
gpuStats.msProcessingDisplayLists * 1000.0f,
|
||||
gpuStats.numDrawSyncs,
|
||||
gpuStats.numListSyncs,
|
||||
|
@ -1813,7 +1838,8 @@ size_t GPUCommonHW::FormatGPUStatsCommon(char *buffer, size_t size) {
|
|||
gpuStats.numCopiesForSelfTex,
|
||||
gpuStats.vertexGPUCycles + gpuStats.otherGPUCycles,
|
||||
vertexAverageCycles,
|
||||
gpuStats.msRasterizingDepth * 1000.0,
|
||||
gpuStats.msPrepareDepth * 1000.0,
|
||||
gpuStats.msRasterizeDepth * 1000.0,
|
||||
gpuStats.numDepthRasterPrims,
|
||||
gpuStats.numDepthRasterNoPixels,
|
||||
gpuStats.numDepthRasterTooSmall,
|
||||
|
|
|
@ -42,6 +42,9 @@ public:
|
|||
void SetDisplayFramebuffer(u32 framebuf, u32 stride, GEBufferFormat format) override;
|
||||
void InvalidateCache(u32 addr, int size, GPUInvalidationType type) override;
|
||||
|
||||
u32 DrawSync(int mode) override;
|
||||
int ListSync(int listid, int mode) override;
|
||||
|
||||
bool FramebufferDirty() override;
|
||||
bool FramebufferReallyDirty() override;
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue