diff --git a/GPU/GPUCommon.cpp b/GPU/GPUCommon.cpp index a59e348d4c..7338d29d3e 100644 --- a/GPU/GPUCommon.cpp +++ b/GPU/GPUCommon.cpp @@ -42,6 +42,7 @@ #include "Core/HW/Display.h" #include "Core/MemMapHelpers.h" #include "Core/Util/PPGeDraw.h" +#include "GPU/GPUCommonHW.h" #include "GPU/Common/DrawEngineCommon.h" #include "GPU/Common/FramebufferManagerCommon.h" #include "GPU/Common/SplineCommon.h" @@ -50,7 +51,7 @@ #include "GPU/Debugger/Record.h" // TODO: Make class member? -GPUCommon::CommandInfo GPUCommon::cmdInfo_[256]; +GPUCommonHW::CommandInfo GPUCommon::cmdInfo_[256]; void GPUCommon::Flush() { drawEngineCommon_->DispatchFlush(); @@ -1380,444 +1381,6 @@ void GPUCommon::Execute_VertexTypeSkinning(u32 op, u32 diff) { gstate_c.Dirty(DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_GEOMETRYSHADER_STATE | DIRTY_CULLRANGE | DIRTY_FOGCOEFENABLE); } -void GPUCommon::CheckDepthUsage(VirtualFramebuffer *vfb) { - if (!gstate_c.usingDepth) { - bool isReadingDepth = false; - bool isClearingDepth = false; - bool isWritingDepth = false; - if (gstate.isModeClear()) { - isClearingDepth = gstate.isClearModeDepthMask(); - isWritingDepth = isClearingDepth; - } else if (gstate.isDepthTestEnabled()) { - isWritingDepth = gstate.isDepthWriteEnabled(); - isReadingDepth = gstate.getDepthTestFunction() > GE_COMP_ALWAYS; - } - - if (isWritingDepth || isReadingDepth) { - gstate_c.usingDepth = true; - gstate_c.clearingDepth = isClearingDepth; - vfb->last_frame_depth_render = gpuStats.numFlips; - if (isWritingDepth) { - vfb->last_frame_depth_updated = gpuStats.numFlips; - } - framebufferManager_->SetDepthFrameBuffer(isClearingDepth); - } - } -} - -void GPUCommon::Execute_Prim(u32 op, u32 diff) { - // This drives all drawing. All other state we just buffer up, then we apply it only - // when it's time to draw. As most PSP games set state redundantly ALL THE TIME, this is a huge optimization. - - PROFILE_THIS_SCOPE("execprim"); - - u32 data = op & 0xFFFFFF; - u32 count = data & 0xFFFF; - if (count == 0) - return; - FlushImm(); - - // Upper bits are ignored. - GEPrimitiveType prim = static_cast((data >> 16) & 7); - SetDrawType(DRAW_PRIM, prim); - - // Discard AA lines as we can't do anything that makes sense with these anyway. The SW plugin might, though. - if (gstate.isAntiAliasEnabled()) { - // Heuristic derived from discussions in #6483 and #12588. - // Discard AA lines in Persona 3 Portable, DOA Paradise and Summon Night 5, while still keeping AA lines in Echochrome. - if ((prim == GE_PRIM_LINE_STRIP || prim == GE_PRIM_LINES) && gstate.getTextureFunction() == GE_TEXFUNC_REPLACE) - return; - } - - // Update cached framebuffer format. - // We store it in the cache so it can be modified for blue-to-alpha, next. - gstate_c.framebufFormat = gstate.FrameBufFormat(); - - if (!Memory::IsValidAddress(gstate_c.vertexAddr)) { - ERROR_LOG(G3D, "Bad vertex address %08x!", gstate_c.vertexAddr); - return; - } - - // See the documentation for gstate_c.blueToAlpha. - bool blueToAlpha = false; - if (PSP_CoreParameter().compat.flags().BlueToAlpha) { - if (gstate_c.framebufFormat == GEBufferFormat::GE_FORMAT_565 && gstate.getColorMask() == 0x0FFFFF && !gstate.isLogicOpEnabled()) { - blueToAlpha = true; - gstate_c.framebufFormat = GEBufferFormat::GE_FORMAT_4444; - } - if (blueToAlpha != gstate_c.blueToAlpha) { - gstate_c.blueToAlpha = blueToAlpha; - gstate_c.Dirty(DIRTY_FRAMEBUF | DIRTY_FRAGMENTSHADER_STATE | DIRTY_BLEND_STATE); - } - } - - if (PSP_CoreParameter().compat.flags().SplitFramebufferMargin) { - switch (gstate.vertType & 0xFFFFFF) { - case 0x00800102: // through, u16 uv, u16 pos (used for the framebuffer effect in-game) - case 0x0080011c: // through, 8888 color, s16 pos (used for clearing in the margin of the title screen) - case 0x00000183: // float uv, float pos (used for drawing in the margin of the title screen) - // Need to re-check the framebuffer every one of these draws, to update the split if needed. - gstate_c.Dirty(DIRTY_FRAMEBUF); - } - } - - // This also makes skipping drawing very effective. - VirtualFramebuffer *vfb = framebufferManager_->SetRenderFrameBuffer(gstate_c.IsDirty(DIRTY_FRAMEBUF), gstate_c.skipDrawReason); - if (blueToAlpha) { - vfb->usageFlags |= FB_USAGE_BLUE_TO_ALPHA; - } - - // Must check this after SetRenderFrameBuffer so we know SKIPDRAW_NON_DISPLAYED_FB. - if (gstate_c.skipDrawReason & (SKIPDRAW_SKIPFRAME | SKIPDRAW_NON_DISPLAYED_FB)) { - // Rough estimate, not sure what's correct. - cyclesExecuted += EstimatePerVertexCost() * count; - if (gstate.isModeClear()) { - gpuStats.numClears++; - } - return; - } - - CheckDepthUsage(vfb); - - const void *verts = Memory::GetPointerUnchecked(gstate_c.vertexAddr); - const void *inds = nullptr; - u32 vertexType = gstate.vertType; - if ((vertexType & GE_VTYPE_IDX_MASK) != GE_VTYPE_IDX_NONE) { - u32 indexAddr = gstate_c.indexAddr; - if (!Memory::IsValidAddress(indexAddr)) { - ERROR_LOG(G3D, "Bad index address %08x!", indexAddr); - return; - } - inds = Memory::GetPointerUnchecked(indexAddr); - } - - if (gstate_c.dirty & DIRTY_VERTEXSHADER_STATE) { - vertexCost_ = EstimatePerVertexCost(); - } - - int bytesRead = 0; - UpdateUVScaleOffset(); - - // cull mode - int cullMode = gstate.getCullMode(); - - uint32_t vertTypeID = GetVertTypeID(vertexType, gstate.getUVGenMode(), g_Config.bSoftwareSkinning); - drawEngineCommon_->SubmitPrim(verts, inds, prim, count, vertTypeID, cullMode, &bytesRead); - // After drawing, we advance the vertexAddr (when non indexed) or indexAddr (when indexed). - // Some games rely on this, they don't bother reloading VADDR and IADDR. - // The VADDR/IADDR registers are NOT updated. - AdvanceVerts(vertexType, count, bytesRead); - int totalVertCount = count; - - // PRIMs are often followed by more PRIMs. Save some work and submit them immediately. - const u32_le *src = (const u32_le *)Memory::GetPointerUnchecked(currentList->pc + 4); - const u32_le *stall = currentList->stall ? (const u32_le *)Memory::GetPointerUnchecked(currentList->stall) : 0; - int cmdCount = 0; - - // Optimized submission of sequences of PRIM. Allows us to avoid going through all the mess - // above for each one. This can be expanded to support additional games that intersperse - // PRIM commands with other commands. A special case is Earth Defence Force 2 that changes culling mode - // between each prim, we just change the triangle winding right here to still be able to join draw calls. - - uint32_t vtypeCheckMask = ~GE_VTYPE_WEIGHTCOUNT_MASK; - if (!g_Config.bSoftwareSkinning) - vtypeCheckMask = 0xFFFFFFFF; - - if (debugRecording_) - goto bail; - - while (src != stall) { - uint32_t data = *src; - switch (data >> 24) { - case GE_CMD_PRIM: - { - u32 count = data & 0xFFFF; - if (count == 0) { - // Ignore. - break; - } - - GEPrimitiveType newPrim = static_cast((data >> 16) & 7); - SetDrawType(DRAW_PRIM, newPrim); - // TODO: more efficient updating of verts/inds - verts = Memory::GetPointerUnchecked(gstate_c.vertexAddr); - inds = nullptr; - if ((vertexType & GE_VTYPE_IDX_MASK) != GE_VTYPE_IDX_NONE) { - inds = Memory::GetPointerUnchecked(gstate_c.indexAddr); - } - - drawEngineCommon_->SubmitPrim(verts, inds, newPrim, count, vertTypeID, cullMode, &bytesRead); - AdvanceVerts(vertexType, count, bytesRead); - totalVertCount += count; - break; - } - case GE_CMD_VERTEXTYPE: - { - uint32_t diff = data ^ vertexType; - // don't mask upper bits, vertexType is unmasked - if (diff & vtypeCheckMask) { - goto bail; - } else { - vertexType = data; - vertTypeID = GetVertTypeID(vertexType, gstate.getUVGenMode(), g_Config.bSoftwareSkinning); - } - break; - } - case GE_CMD_VADDR: - gstate.cmdmem[GE_CMD_VADDR] = data; - gstate_c.vertexAddr = gstate_c.getRelativeAddress(data & 0x00FFFFFF); - break; - case GE_CMD_IADDR: - gstate.cmdmem[GE_CMD_IADDR] = data; - gstate_c.indexAddr = gstate_c.getRelativeAddress(data & 0x00FFFFFF); - break; - case GE_CMD_OFFSETADDR: - gstate.cmdmem[GE_CMD_OFFSETADDR] = data; - gstate_c.offsetAddr = data << 8; - break; - case GE_CMD_BASE: - gstate.cmdmem[GE_CMD_BASE] = data; - break; - case GE_CMD_CULLFACEENABLE: - // Earth Defence Force 2 - if (gstate.cmdmem[GE_CMD_CULLFACEENABLE] != data) { - goto bail; - } - break; - case GE_CMD_CULL: - // flip face by indices for triangles - cullMode = data & 1; - break; - case GE_CMD_TEXFLUSH: - case GE_CMD_NOP: - case GE_CMD_NOP_FF: - gstate.cmdmem[data >> 24] = data; - break; - case GE_CMD_BONEMATRIXNUMBER: - gstate.cmdmem[GE_CMD_BONEMATRIXNUMBER] = data; - break; - case GE_CMD_TEXSCALEU: - gstate.cmdmem[GE_CMD_TEXSCALEU] = data; - gstate_c.uv.uScale = getFloat24(data); - break; - case GE_CMD_TEXSCALEV: - gstate.cmdmem[GE_CMD_TEXSCALEV] = data; - gstate_c.uv.vScale = getFloat24(data); - break; - case GE_CMD_TEXOFFSETU: - gstate.cmdmem[GE_CMD_TEXOFFSETU] = data; - gstate_c.uv.uOff = getFloat24(data); - break; - case GE_CMD_TEXOFFSETV: - gstate.cmdmem[GE_CMD_TEXOFFSETV] = data; - gstate_c.uv.vOff = getFloat24(data); - break; - case GE_CMD_TEXLEVEL: - // Same Gran Turismo hack from Execute_TexLevel - if ((data & 3) != GE_TEXLEVEL_MODE_AUTO && (0x00FF0000 & data) != 0) { - goto bail; - } - gstate.cmdmem[GE_CMD_TEXLEVEL] = data; - break; - case GE_CMD_CALL: - { - // A bone matrix probably. If not we bail. - const u32 target = gstate_c.getRelativeAddress(data & 0x00FFFFFC); - if ((Memory::ReadUnchecked_U32(target) >> 24) == GE_CMD_BONEMATRIXDATA && - (Memory::ReadUnchecked_U32(target + 11 * 4) >> 24) == GE_CMD_BONEMATRIXDATA && - (Memory::ReadUnchecked_U32(target + 12 * 4) >> 24) == GE_CMD_RET && - (target > currentList->stall || target + 12 * 4 < currentList->stall) && - (gstate.boneMatrixNumber & 0x00FFFFFF) <= 96 - 12) { - FastLoadBoneMatrix(target); - } else { - goto bail; - } - break; - } - - case GE_CMD_TEXBUFWIDTH0: - case GE_CMD_TEXADDR0: - if (data != gstate.cmdmem[data >> 24]) - goto bail; - break; - - default: - // All other commands might need a flush or something, stop this inner loop. - goto bail; - } - cmdCount++; - src++; - } - -bail: - gstate.cmdmem[GE_CMD_VERTEXTYPE] = vertexType; - // Skip over the commands we just read out manually. - if (cmdCount > 0) { - UpdatePC(currentList->pc, currentList->pc + cmdCount * 4); - currentList->pc += cmdCount * 4; - // flush back cull mode - if (cullMode != gstate.getCullMode()) { - // We rewrote everything to the old cull mode, so flush first. - drawEngineCommon_->DispatchFlush(); - - // Now update things for next time. - gstate.cmdmem[GE_CMD_CULL] ^= 1; - gstate_c.Dirty(DIRTY_RASTER_STATE); - } - } - - gpuStats.vertexGPUCycles += vertexCost_ * totalVertCount; - cyclesExecuted += vertexCost_ * totalVertCount; -} - -void GPUCommon::Execute_Bezier(u32 op, u32 diff) { - // We don't dirty on normal changes anymore as we prescale, but it's needed for splines/bezier. - gstate_c.Dirty(DIRTY_UVSCALEOFFSET); - - gstate_c.framebufFormat = gstate.FrameBufFormat(); - - // This also make skipping drawing very effective. - VirtualFramebuffer *vfb = framebufferManager_->SetRenderFrameBuffer(gstate_c.IsDirty(DIRTY_FRAMEBUF), gstate_c.skipDrawReason); - if (gstate_c.skipDrawReason & (SKIPDRAW_SKIPFRAME | SKIPDRAW_NON_DISPLAYED_FB)) { - // TODO: Should this eat some cycles? Probably yes. Not sure if important. - return; - } - - CheckDepthUsage(vfb); - - if (!Memory::IsValidAddress(gstate_c.vertexAddr)) { - ERROR_LOG_REPORT(G3D, "Bad vertex address %08x!", gstate_c.vertexAddr); - return; - } - - const void *control_points = Memory::GetPointerUnchecked(gstate_c.vertexAddr); - const void *indices = NULL; - if ((gstate.vertType & GE_VTYPE_IDX_MASK) != GE_VTYPE_IDX_NONE) { - if (!Memory::IsValidAddress(gstate_c.indexAddr)) { - ERROR_LOG_REPORT(G3D, "Bad index address %08x!", gstate_c.indexAddr); - return; - } - indices = Memory::GetPointerUnchecked(gstate_c.indexAddr); - } - - if (vertTypeIsSkinningEnabled(gstate.vertType)) { - DEBUG_LOG_REPORT(G3D, "Unusual bezier/spline vtype: %08x, morph: %d, bones: %d", gstate.vertType, (gstate.vertType & GE_VTYPE_MORPHCOUNT_MASK) >> GE_VTYPE_MORPHCOUNT_SHIFT, vertTypeGetNumBoneWeights(gstate.vertType)); - } - - // Can't flush after setting gstate_c.submitType below since it'll be a mess - it must be done already. - if (flushOnParams_) - drawEngineCommon_->DispatchFlush(); - - Spline::BezierSurface surface; - surface.tess_u = gstate.getPatchDivisionU(); - surface.tess_v = gstate.getPatchDivisionV(); - surface.num_points_u = op & 0xFF; - surface.num_points_v = (op >> 8) & 0xFF; - surface.num_patches_u = (surface.num_points_u - 1) / 3; - surface.num_patches_v = (surface.num_points_v - 1) / 3; - surface.primType = gstate.getPatchPrimitiveType(); - surface.patchFacing = gstate.patchfacing & 1; - - SetDrawType(DRAW_BEZIER, PatchPrimToPrim(surface.primType)); - - gstate_c.Dirty(DIRTY_RASTER_STATE | DIRTY_VERTEXSHADER_STATE | DIRTY_GEOMETRYSHADER_STATE); - if (drawEngineCommon_->CanUseHardwareTessellation(surface.primType)) { - gstate_c.submitType = SubmitType::HW_BEZIER; - if (gstate_c.spline_num_points_u != surface.num_points_u) { - gstate_c.Dirty(DIRTY_BEZIERSPLINE); - gstate_c.spline_num_points_u = surface.num_points_u; - } - } else { - gstate_c.submitType = SubmitType::BEZIER; - } - - int bytesRead = 0; - UpdateUVScaleOffset(); - drawEngineCommon_->SubmitCurve(control_points, indices, surface, gstate.vertType, &bytesRead, "bezier"); - - gstate_c.Dirty(DIRTY_RASTER_STATE | DIRTY_VERTEXSHADER_STATE | DIRTY_GEOMETRYSHADER_STATE); - gstate_c.submitType = SubmitType::DRAW; - - // After drawing, we advance pointers - see SubmitPrim which does the same. - int count = surface.num_points_u * surface.num_points_v; - AdvanceVerts(gstate.vertType, count, bytesRead); -} - -void GPUCommon::Execute_Spline(u32 op, u32 diff) { - // We don't dirty on normal changes anymore as we prescale, but it's needed for splines/bezier. - gstate_c.Dirty(DIRTY_UVSCALEOFFSET); - - gstate_c.framebufFormat = gstate.FrameBufFormat(); - - // This also make skipping drawing very effective. - VirtualFramebuffer *vfb = framebufferManager_->SetRenderFrameBuffer(gstate_c.IsDirty(DIRTY_FRAMEBUF), gstate_c.skipDrawReason); - if (gstate_c.skipDrawReason & (SKIPDRAW_SKIPFRAME | SKIPDRAW_NON_DISPLAYED_FB)) { - // TODO: Should this eat some cycles? Probably yes. Not sure if important. - return; - } - - CheckDepthUsage(vfb); - - if (!Memory::IsValidAddress(gstate_c.vertexAddr)) { - ERROR_LOG_REPORT(G3D, "Bad vertex address %08x!", gstate_c.vertexAddr); - return; - } - - const void *control_points = Memory::GetPointerUnchecked(gstate_c.vertexAddr); - const void *indices = NULL; - if ((gstate.vertType & GE_VTYPE_IDX_MASK) != GE_VTYPE_IDX_NONE) { - if (!Memory::IsValidAddress(gstate_c.indexAddr)) { - ERROR_LOG_REPORT(G3D, "Bad index address %08x!", gstate_c.indexAddr); - return; - } - indices = Memory::GetPointerUnchecked(gstate_c.indexAddr); - } - - if (vertTypeIsSkinningEnabled(gstate.vertType)) { - DEBUG_LOG_REPORT(G3D, "Unusual bezier/spline vtype: %08x, morph: %d, bones: %d", gstate.vertType, (gstate.vertType & GE_VTYPE_MORPHCOUNT_MASK) >> GE_VTYPE_MORPHCOUNT_SHIFT, vertTypeGetNumBoneWeights(gstate.vertType)); - } - - // Can't flush after setting gstate_c.submitType below since it'll be a mess - it must be done already. - if (flushOnParams_) - drawEngineCommon_->DispatchFlush(); - - Spline::SplineSurface surface; - surface.tess_u = gstate.getPatchDivisionU(); - surface.tess_v = gstate.getPatchDivisionV(); - surface.type_u = (op >> 16) & 0x3; - surface.type_v = (op >> 18) & 0x3; - surface.num_points_u = op & 0xFF; - surface.num_points_v = (op >> 8) & 0xFF; - surface.num_patches_u = surface.num_points_u - 3; - surface.num_patches_v = surface.num_points_v - 3; - surface.primType = gstate.getPatchPrimitiveType(); - surface.patchFacing = gstate.patchfacing & 1; - - SetDrawType(DRAW_SPLINE, PatchPrimToPrim(surface.primType)); - - gstate_c.Dirty(DIRTY_RASTER_STATE | DIRTY_VERTEXSHADER_STATE | DIRTY_GEOMETRYSHADER_STATE); - if (drawEngineCommon_->CanUseHardwareTessellation(surface.primType)) { - gstate_c.submitType = SubmitType::HW_SPLINE; - if (gstate_c.spline_num_points_u != surface.num_points_u) { - gstate_c.Dirty(DIRTY_BEZIERSPLINE); - gstate_c.spline_num_points_u = surface.num_points_u; - } - } else { - gstate_c.submitType = SubmitType::SPLINE; - } - - int bytesRead = 0; - UpdateUVScaleOffset(); - drawEngineCommon_->SubmitCurve(control_points, indices, surface, gstate.vertType, &bytesRead, "spline"); - - gstate_c.Dirty(DIRTY_RASTER_STATE | DIRTY_VERTEXSHADER_STATE | DIRTY_GEOMETRYSHADER_STATE); - gstate_c.submitType = SubmitType::DRAW; - - // After drawing, we advance pointers - see SubmitPrim which does the same. - int count = surface.num_points_u * surface.num_points_v; - AdvanceVerts(gstate.vertType, count, bytesRead); -} - void GPUCommon::Execute_BoundingBox(u32 op, u32 diff) { // Just resetting, nothing to check bounds for. const u32 count = op & 0xFFFF; @@ -1872,18 +1435,6 @@ void GPUCommon::Execute_BoundingBox(u32 op, u32 diff) { } } -void GPUCommon::Execute_BlockTransferStart(u32 op, u32 diff) { - Flush(); - - PROFILE_THIS_SCOPE("block"); // don't include the flush in the profile, would be misleading. - - gstate_c.framebufFormat = gstate.FrameBufFormat(); - - // and take appropriate action. This is a block transfer between RAM and VRAM, or vice versa. - // Can we skip this on SkipDraw? - DoBlockTransfer(gstate_c.skipDrawReason); -} - void GPUCommon::Execute_WorldMtxNum(u32 op, u32 diff) { if (!currentList) { gstate.worldmtxnum = (GE_CMD_WORLDMATRIXNUMBER << 24) | (op & 0xF); diff --git a/GPU/GPUCommon.h b/GPU/GPUCommon.h index eecfaa68da..f234f2faa3 100644 --- a/GPU/GPUCommon.h +++ b/GPU/GPUCommon.h @@ -155,11 +155,7 @@ public: void Execute_VertexType(u32 op, u32 diff); void Execute_VertexTypeSkinning(u32 op, u32 diff); - void Execute_Prim(u32 op, u32 diff); - void Execute_Bezier(u32 op, u32 diff); - void Execute_Spline(u32 op, u32 diff); void Execute_BoundingBox(u32 op, u32 diff); - void Execute_BlockTransferStart(u32 op, u32 diff); void Execute_LoadClut(u32 op, u32 diff); @@ -415,7 +411,6 @@ protected: std::string reportingFullInfo_; private: - void CheckDepthUsage(VirtualFramebuffer *vfb); void DoExecuteCall(u32 target); void PopDLQueue(); void CheckDrawSync(); @@ -427,10 +422,3 @@ private: int lastVsync_ = -1; }; - -struct CommonCommandTableEntry { - uint8_t cmd; - uint8_t flags; - uint64_t dirty; - GPUCommon::CmdFunc func; -}; diff --git a/GPU/GPUCommonHW.cpp b/GPU/GPUCommonHW.cpp index 2f5029d5f7..eb62e1ff60 100644 --- a/GPU/GPUCommonHW.cpp +++ b/GPU/GPUCommonHW.cpp @@ -1,3 +1,5 @@ +#include "Common/Profiler/Profiler.h" + #include "Common/GPU/thin3d.h" #include "Common/Serialize/Serializer.h" #include "Common/System/System.h" @@ -6,10 +8,18 @@ #include "Core/Config.h" #include "GPU/GPUCommonHW.h" +#include "GPU/Common/SplineCommon.h" #include "GPU/Common/DrawEngineCommon.h" #include "GPU/Common/TextureCacheCommon.h" #include "GPU/Common/FramebufferManagerCommon.h" +struct CommonCommandTableEntry { + uint8_t cmd; + uint8_t flags; + uint64_t dirty; + GPUCommonHW::CmdFunc func; +}; + const CommonCommandTableEntry commonCommandTable[] = { // From Common. No flushing but definitely need execute. { GE_CMD_OFFSETADDR, FLAG_EXECUTE, 0, &GPUCommon::Execute_OffsetAddr }, @@ -21,11 +31,11 @@ const CommonCommandTableEntry commonCommandTable[] = { { GE_CMD_VADDR, FLAG_EXECUTE, 0, &GPUCommon::Execute_Vaddr }, { GE_CMD_IADDR, FLAG_EXECUTE, 0, &GPUCommon::Execute_Iaddr }, { GE_CMD_BJUMP, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, 0, &GPUCommon::Execute_BJump }, // EXECUTE - { GE_CMD_BOUNDINGBOX, FLAG_EXECUTE, 0, &GPUCommon::Execute_BoundingBox }, // Shouldn't need to FLUSHBEFORE. + { GE_CMD_BOUNDINGBOX, FLAG_EXECUTE, 0, &GPUCommonHW::Execute_BoundingBox }, // Shouldn't need to FLUSHBEFORE. - { GE_CMD_PRIM, FLAG_EXECUTE, 0, &GPUCommon::Execute_Prim }, - { GE_CMD_BEZIER, FLAG_EXECUTE, 0, &GPUCommon::Execute_Bezier }, - { GE_CMD_SPLINE, FLAG_EXECUTE, 0, &GPUCommon::Execute_Spline }, + { GE_CMD_PRIM, FLAG_EXECUTE, 0, &GPUCommonHW::Execute_Prim }, + { GE_CMD_BEZIER, FLAG_EXECUTE, 0, &GPUCommonHW::Execute_Bezier }, + { GE_CMD_SPLINE, FLAG_EXECUTE, 0, &GPUCommonHW::Execute_Spline }, // Changing the vertex type requires us to flush. { GE_CMD_VERTEXTYPE, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, 0, &GPUCommon::Execute_VertexType }, @@ -274,7 +284,7 @@ const CommonCommandTableEntry commonCommandTable[] = { { GE_CMD_TRANSFERSRCPOS, 0 }, { GE_CMD_TRANSFERDSTPOS, 0 }, { GE_CMD_TRANSFERSIZE, 0 }, - { GE_CMD_TRANSFERSTART, FLAG_EXECUTE | FLAG_READS_PC, 0, &GPUCommon::Execute_BlockTransferStart }, + { GE_CMD_TRANSFERSTART, FLAG_EXECUTE | FLAG_READS_PC, 0, &GPUCommonHW::Execute_BlockTransferStart }, // We don't use the dither table. { GE_CMD_DITH0 }, @@ -346,7 +356,7 @@ GPUCommonHW::GPUCommonHW(GraphicsContext *gfxCtx, Draw::DrawContext *draw) : GPU dupeCheck.insert(cmd); } cmdInfo_[cmd].flags |= (uint64_t)commonCommandTable[i].flags | (commonCommandTable[i].dirty << 8); - cmdInfo_[cmd].func = commonCommandTable[i].func; + cmdInfo_[cmd].func = (GPUCommon::CmdFunc)commonCommandTable[i].func; if ((cmdInfo_[cmd].flags & (FLAG_EXECUTE | FLAG_EXECUTEONCHANGE)) && !cmdInfo_[cmd].func) { // Can't have FLAG_EXECUTE commands without a function pointer to execute. Crash(); @@ -568,3 +578,453 @@ std::string GPUCommonHW::DebugGetShaderString(std::string id, DebugShaderType ty return shaderManager_->DebugGetShaderString(id, type, stringType); } } + +void GPUCommonHW::CheckDepthUsage(VirtualFramebuffer *vfb) { + if (!gstate_c.usingDepth) { + bool isReadingDepth = false; + bool isClearingDepth = false; + bool isWritingDepth = false; + if (gstate.isModeClear()) { + isClearingDepth = gstate.isClearModeDepthMask(); + isWritingDepth = isClearingDepth; + } else if (gstate.isDepthTestEnabled()) { + isWritingDepth = gstate.isDepthWriteEnabled(); + isReadingDepth = gstate.getDepthTestFunction() > GE_COMP_ALWAYS; + } + + if (isWritingDepth || isReadingDepth) { + gstate_c.usingDepth = true; + gstate_c.clearingDepth = isClearingDepth; + vfb->last_frame_depth_render = gpuStats.numFlips; + if (isWritingDepth) { + vfb->last_frame_depth_updated = gpuStats.numFlips; + } + framebufferManager_->SetDepthFrameBuffer(isClearingDepth); + } + } +} + +void GPUCommonHW::Execute_Prim(u32 op, u32 diff) { + // This drives all drawing. All other state we just buffer up, then we apply it only + // when it's time to draw. As most PSP games set state redundantly ALL THE TIME, this is a huge optimization. + + PROFILE_THIS_SCOPE("execprim"); + + u32 data = op & 0xFFFFFF; + u32 count = data & 0xFFFF; + if (count == 0) + return; + FlushImm(); + + // Upper bits are ignored. + GEPrimitiveType prim = static_cast((data >> 16) & 7); + SetDrawType(DRAW_PRIM, prim); + + // Discard AA lines as we can't do anything that makes sense with these anyway. The SW plugin might, though. + if (gstate.isAntiAliasEnabled()) { + // Heuristic derived from discussions in #6483 and #12588. + // Discard AA lines in Persona 3 Portable, DOA Paradise and Summon Night 5, while still keeping AA lines in Echochrome. + if ((prim == GE_PRIM_LINE_STRIP || prim == GE_PRIM_LINES) && gstate.getTextureFunction() == GE_TEXFUNC_REPLACE) + return; + } + + // Update cached framebuffer format. + // We store it in the cache so it can be modified for blue-to-alpha, next. + gstate_c.framebufFormat = gstate.FrameBufFormat(); + + if (!Memory::IsValidAddress(gstate_c.vertexAddr)) { + ERROR_LOG(G3D, "Bad vertex address %08x!", gstate_c.vertexAddr); + return; + } + + // See the documentation for gstate_c.blueToAlpha. + bool blueToAlpha = false; + if (PSP_CoreParameter().compat.flags().BlueToAlpha) { + if (gstate_c.framebufFormat == GEBufferFormat::GE_FORMAT_565 && gstate.getColorMask() == 0x0FFFFF && !gstate.isLogicOpEnabled()) { + blueToAlpha = true; + gstate_c.framebufFormat = GEBufferFormat::GE_FORMAT_4444; + } + if (blueToAlpha != gstate_c.blueToAlpha) { + gstate_c.blueToAlpha = blueToAlpha; + gstate_c.Dirty(DIRTY_FRAMEBUF | DIRTY_FRAGMENTSHADER_STATE | DIRTY_BLEND_STATE); + } + } + + if (PSP_CoreParameter().compat.flags().SplitFramebufferMargin) { + switch (gstate.vertType & 0xFFFFFF) { + case 0x00800102: // through, u16 uv, u16 pos (used for the framebuffer effect in-game) + case 0x0080011c: // through, 8888 color, s16 pos (used for clearing in the margin of the title screen) + case 0x00000183: // float uv, float pos (used for drawing in the margin of the title screen) + // Need to re-check the framebuffer every one of these draws, to update the split if needed. + gstate_c.Dirty(DIRTY_FRAMEBUF); + } + } + + // This also makes skipping drawing very effective. + VirtualFramebuffer *vfb = framebufferManager_->SetRenderFrameBuffer(gstate_c.IsDirty(DIRTY_FRAMEBUF), gstate_c.skipDrawReason); + if (blueToAlpha) { + vfb->usageFlags |= FB_USAGE_BLUE_TO_ALPHA; + } + + // Must check this after SetRenderFrameBuffer so we know SKIPDRAW_NON_DISPLAYED_FB. + if (gstate_c.skipDrawReason & (SKIPDRAW_SKIPFRAME | SKIPDRAW_NON_DISPLAYED_FB)) { + // Rough estimate, not sure what's correct. + cyclesExecuted += EstimatePerVertexCost() * count; + if (gstate.isModeClear()) { + gpuStats.numClears++; + } + return; + } + + CheckDepthUsage(vfb); + + const void *verts = Memory::GetPointerUnchecked(gstate_c.vertexAddr); + const void *inds = nullptr; + u32 vertexType = gstate.vertType; + if ((vertexType & GE_VTYPE_IDX_MASK) != GE_VTYPE_IDX_NONE) { + u32 indexAddr = gstate_c.indexAddr; + if (!Memory::IsValidAddress(indexAddr)) { + ERROR_LOG(G3D, "Bad index address %08x!", indexAddr); + return; + } + inds = Memory::GetPointerUnchecked(indexAddr); + } + + if (gstate_c.dirty & DIRTY_VERTEXSHADER_STATE) { + vertexCost_ = EstimatePerVertexCost(); + } + + int bytesRead = 0; + UpdateUVScaleOffset(); + + // cull mode + int cullMode = gstate.getCullMode(); + + uint32_t vertTypeID = GetVertTypeID(vertexType, gstate.getUVGenMode(), g_Config.bSoftwareSkinning); + drawEngineCommon_->SubmitPrim(verts, inds, prim, count, vertTypeID, cullMode, &bytesRead); + // After drawing, we advance the vertexAddr (when non indexed) or indexAddr (when indexed). + // Some games rely on this, they don't bother reloading VADDR and IADDR. + // The VADDR/IADDR registers are NOT updated. + AdvanceVerts(vertexType, count, bytesRead); + int totalVertCount = count; + + // PRIMs are often followed by more PRIMs. Save some work and submit them immediately. + const u32_le *src = (const u32_le *)Memory::GetPointerUnchecked(currentList->pc + 4); + const u32_le *stall = currentList->stall ? (const u32_le *)Memory::GetPointerUnchecked(currentList->stall) : 0; + int cmdCount = 0; + + // Optimized submission of sequences of PRIM. Allows us to avoid going through all the mess + // above for each one. This can be expanded to support additional games that intersperse + // PRIM commands with other commands. A special case is Earth Defence Force 2 that changes culling mode + // between each prim, we just change the triangle winding right here to still be able to join draw calls. + + uint32_t vtypeCheckMask = ~GE_VTYPE_WEIGHTCOUNT_MASK; + if (!g_Config.bSoftwareSkinning) + vtypeCheckMask = 0xFFFFFFFF; + + if (debugRecording_) + goto bail; + + while (src != stall) { + uint32_t data = *src; + switch (data >> 24) { + case GE_CMD_PRIM: + { + u32 count = data & 0xFFFF; + if (count == 0) { + // Ignore. + break; + } + + GEPrimitiveType newPrim = static_cast((data >> 16) & 7); + SetDrawType(DRAW_PRIM, newPrim); + // TODO: more efficient updating of verts/inds + verts = Memory::GetPointerUnchecked(gstate_c.vertexAddr); + inds = nullptr; + if ((vertexType & GE_VTYPE_IDX_MASK) != GE_VTYPE_IDX_NONE) { + inds = Memory::GetPointerUnchecked(gstate_c.indexAddr); + } + + drawEngineCommon_->SubmitPrim(verts, inds, newPrim, count, vertTypeID, cullMode, &bytesRead); + AdvanceVerts(vertexType, count, bytesRead); + totalVertCount += count; + break; + } + case GE_CMD_VERTEXTYPE: + { + uint32_t diff = data ^ vertexType; + // don't mask upper bits, vertexType is unmasked + if (diff & vtypeCheckMask) { + goto bail; + } else { + vertexType = data; + vertTypeID = GetVertTypeID(vertexType, gstate.getUVGenMode(), g_Config.bSoftwareSkinning); + } + break; + } + case GE_CMD_VADDR: + gstate.cmdmem[GE_CMD_VADDR] = data; + gstate_c.vertexAddr = gstate_c.getRelativeAddress(data & 0x00FFFFFF); + break; + case GE_CMD_IADDR: + gstate.cmdmem[GE_CMD_IADDR] = data; + gstate_c.indexAddr = gstate_c.getRelativeAddress(data & 0x00FFFFFF); + break; + case GE_CMD_OFFSETADDR: + gstate.cmdmem[GE_CMD_OFFSETADDR] = data; + gstate_c.offsetAddr = data << 8; + break; + case GE_CMD_BASE: + gstate.cmdmem[GE_CMD_BASE] = data; + break; + case GE_CMD_CULLFACEENABLE: + // Earth Defence Force 2 + if (gstate.cmdmem[GE_CMD_CULLFACEENABLE] != data) { + goto bail; + } + break; + case GE_CMD_CULL: + // flip face by indices for triangles + cullMode = data & 1; + break; + case GE_CMD_TEXFLUSH: + case GE_CMD_NOP: + case GE_CMD_NOP_FF: + gstate.cmdmem[data >> 24] = data; + break; + case GE_CMD_BONEMATRIXNUMBER: + gstate.cmdmem[GE_CMD_BONEMATRIXNUMBER] = data; + break; + case GE_CMD_TEXSCALEU: + gstate.cmdmem[GE_CMD_TEXSCALEU] = data; + gstate_c.uv.uScale = getFloat24(data); + break; + case GE_CMD_TEXSCALEV: + gstate.cmdmem[GE_CMD_TEXSCALEV] = data; + gstate_c.uv.vScale = getFloat24(data); + break; + case GE_CMD_TEXOFFSETU: + gstate.cmdmem[GE_CMD_TEXOFFSETU] = data; + gstate_c.uv.uOff = getFloat24(data); + break; + case GE_CMD_TEXOFFSETV: + gstate.cmdmem[GE_CMD_TEXOFFSETV] = data; + gstate_c.uv.vOff = getFloat24(data); + break; + case GE_CMD_TEXLEVEL: + // Same Gran Turismo hack from Execute_TexLevel + if ((data & 3) != GE_TEXLEVEL_MODE_AUTO && (0x00FF0000 & data) != 0) { + goto bail; + } + gstate.cmdmem[GE_CMD_TEXLEVEL] = data; + break; + case GE_CMD_CALL: + { + // A bone matrix probably. If not we bail. + const u32 target = gstate_c.getRelativeAddress(data & 0x00FFFFFC); + if ((Memory::ReadUnchecked_U32(target) >> 24) == GE_CMD_BONEMATRIXDATA && + (Memory::ReadUnchecked_U32(target + 11 * 4) >> 24) == GE_CMD_BONEMATRIXDATA && + (Memory::ReadUnchecked_U32(target + 12 * 4) >> 24) == GE_CMD_RET && + (target > currentList->stall || target + 12 * 4 < currentList->stall) && + (gstate.boneMatrixNumber & 0x00FFFFFF) <= 96 - 12) { + FastLoadBoneMatrix(target); + } else { + goto bail; + } + break; + } + + case GE_CMD_TEXBUFWIDTH0: + case GE_CMD_TEXADDR0: + if (data != gstate.cmdmem[data >> 24]) + goto bail; + break; + + default: + // All other commands might need a flush or something, stop this inner loop. + goto bail; + } + cmdCount++; + src++; + } + +bail: + gstate.cmdmem[GE_CMD_VERTEXTYPE] = vertexType; + // Skip over the commands we just read out manually. + if (cmdCount > 0) { + UpdatePC(currentList->pc, currentList->pc + cmdCount * 4); + currentList->pc += cmdCount * 4; + // flush back cull mode + if (cullMode != gstate.getCullMode()) { + // We rewrote everything to the old cull mode, so flush first. + drawEngineCommon_->DispatchFlush(); + + // Now update things for next time. + gstate.cmdmem[GE_CMD_CULL] ^= 1; + gstate_c.Dirty(DIRTY_RASTER_STATE); + } + } + + gpuStats.vertexGPUCycles += vertexCost_ * totalVertCount; + cyclesExecuted += vertexCost_ * totalVertCount; +} + +void GPUCommonHW::Execute_Bezier(u32 op, u32 diff) { + // We don't dirty on normal changes anymore as we prescale, but it's needed for splines/bezier. + gstate_c.Dirty(DIRTY_UVSCALEOFFSET); + + gstate_c.framebufFormat = gstate.FrameBufFormat(); + + // This also make skipping drawing very effective. + VirtualFramebuffer *vfb = framebufferManager_->SetRenderFrameBuffer(gstate_c.IsDirty(DIRTY_FRAMEBUF), gstate_c.skipDrawReason); + if (gstate_c.skipDrawReason & (SKIPDRAW_SKIPFRAME | SKIPDRAW_NON_DISPLAYED_FB)) { + // TODO: Should this eat some cycles? Probably yes. Not sure if important. + return; + } + + CheckDepthUsage(vfb); + + if (!Memory::IsValidAddress(gstate_c.vertexAddr)) { + ERROR_LOG_REPORT(G3D, "Bad vertex address %08x!", gstate_c.vertexAddr); + return; + } + + const void *control_points = Memory::GetPointerUnchecked(gstate_c.vertexAddr); + const void *indices = NULL; + if ((gstate.vertType & GE_VTYPE_IDX_MASK) != GE_VTYPE_IDX_NONE) { + if (!Memory::IsValidAddress(gstate_c.indexAddr)) { + ERROR_LOG_REPORT(G3D, "Bad index address %08x!", gstate_c.indexAddr); + return; + } + indices = Memory::GetPointerUnchecked(gstate_c.indexAddr); + } + + if (vertTypeIsSkinningEnabled(gstate.vertType)) { + DEBUG_LOG_REPORT(G3D, "Unusual bezier/spline vtype: %08x, morph: %d, bones: %d", gstate.vertType, (gstate.vertType & GE_VTYPE_MORPHCOUNT_MASK) >> GE_VTYPE_MORPHCOUNT_SHIFT, vertTypeGetNumBoneWeights(gstate.vertType)); + } + + // Can't flush after setting gstate_c.submitType below since it'll be a mess - it must be done already. + if (flushOnParams_) + drawEngineCommon_->DispatchFlush(); + + Spline::BezierSurface surface; + surface.tess_u = gstate.getPatchDivisionU(); + surface.tess_v = gstate.getPatchDivisionV(); + surface.num_points_u = op & 0xFF; + surface.num_points_v = (op >> 8) & 0xFF; + surface.num_patches_u = (surface.num_points_u - 1) / 3; + surface.num_patches_v = (surface.num_points_v - 1) / 3; + surface.primType = gstate.getPatchPrimitiveType(); + surface.patchFacing = gstate.patchfacing & 1; + + SetDrawType(DRAW_BEZIER, PatchPrimToPrim(surface.primType)); + + gstate_c.Dirty(DIRTY_RASTER_STATE | DIRTY_VERTEXSHADER_STATE | DIRTY_GEOMETRYSHADER_STATE); + if (drawEngineCommon_->CanUseHardwareTessellation(surface.primType)) { + gstate_c.submitType = SubmitType::HW_BEZIER; + if (gstate_c.spline_num_points_u != surface.num_points_u) { + gstate_c.Dirty(DIRTY_BEZIERSPLINE); + gstate_c.spline_num_points_u = surface.num_points_u; + } + } else { + gstate_c.submitType = SubmitType::BEZIER; + } + + int bytesRead = 0; + UpdateUVScaleOffset(); + drawEngineCommon_->SubmitCurve(control_points, indices, surface, gstate.vertType, &bytesRead, "bezier"); + + gstate_c.Dirty(DIRTY_RASTER_STATE | DIRTY_VERTEXSHADER_STATE | DIRTY_GEOMETRYSHADER_STATE); + gstate_c.submitType = SubmitType::DRAW; + + // After drawing, we advance pointers - see SubmitPrim which does the same. + int count = surface.num_points_u * surface.num_points_v; + AdvanceVerts(gstate.vertType, count, bytesRead); +} + +void GPUCommonHW::Execute_Spline(u32 op, u32 diff) { + // We don't dirty on normal changes anymore as we prescale, but it's needed for splines/bezier. + gstate_c.Dirty(DIRTY_UVSCALEOFFSET); + + gstate_c.framebufFormat = gstate.FrameBufFormat(); + + // This also make skipping drawing very effective. + VirtualFramebuffer *vfb = framebufferManager_->SetRenderFrameBuffer(gstate_c.IsDirty(DIRTY_FRAMEBUF), gstate_c.skipDrawReason); + if (gstate_c.skipDrawReason & (SKIPDRAW_SKIPFRAME | SKIPDRAW_NON_DISPLAYED_FB)) { + // TODO: Should this eat some cycles? Probably yes. Not sure if important. + return; + } + + CheckDepthUsage(vfb); + + if (!Memory::IsValidAddress(gstate_c.vertexAddr)) { + ERROR_LOG_REPORT(G3D, "Bad vertex address %08x!", gstate_c.vertexAddr); + return; + } + + const void *control_points = Memory::GetPointerUnchecked(gstate_c.vertexAddr); + const void *indices = NULL; + if ((gstate.vertType & GE_VTYPE_IDX_MASK) != GE_VTYPE_IDX_NONE) { + if (!Memory::IsValidAddress(gstate_c.indexAddr)) { + ERROR_LOG_REPORT(G3D, "Bad index address %08x!", gstate_c.indexAddr); + return; + } + indices = Memory::GetPointerUnchecked(gstate_c.indexAddr); + } + + if (vertTypeIsSkinningEnabled(gstate.vertType)) { + DEBUG_LOG_REPORT(G3D, "Unusual bezier/spline vtype: %08x, morph: %d, bones: %d", gstate.vertType, (gstate.vertType & GE_VTYPE_MORPHCOUNT_MASK) >> GE_VTYPE_MORPHCOUNT_SHIFT, vertTypeGetNumBoneWeights(gstate.vertType)); + } + + // Can't flush after setting gstate_c.submitType below since it'll be a mess - it must be done already. + if (flushOnParams_) + drawEngineCommon_->DispatchFlush(); + + Spline::SplineSurface surface; + surface.tess_u = gstate.getPatchDivisionU(); + surface.tess_v = gstate.getPatchDivisionV(); + surface.type_u = (op >> 16) & 0x3; + surface.type_v = (op >> 18) & 0x3; + surface.num_points_u = op & 0xFF; + surface.num_points_v = (op >> 8) & 0xFF; + surface.num_patches_u = surface.num_points_u - 3; + surface.num_patches_v = surface.num_points_v - 3; + surface.primType = gstate.getPatchPrimitiveType(); + surface.patchFacing = gstate.patchfacing & 1; + + SetDrawType(DRAW_SPLINE, PatchPrimToPrim(surface.primType)); + + gstate_c.Dirty(DIRTY_RASTER_STATE | DIRTY_VERTEXSHADER_STATE | DIRTY_GEOMETRYSHADER_STATE); + if (drawEngineCommon_->CanUseHardwareTessellation(surface.primType)) { + gstate_c.submitType = SubmitType::HW_SPLINE; + if (gstate_c.spline_num_points_u != surface.num_points_u) { + gstate_c.Dirty(DIRTY_BEZIERSPLINE); + gstate_c.spline_num_points_u = surface.num_points_u; + } + } else { + gstate_c.submitType = SubmitType::SPLINE; + } + + int bytesRead = 0; + UpdateUVScaleOffset(); + drawEngineCommon_->SubmitCurve(control_points, indices, surface, gstate.vertType, &bytesRead, "spline"); + + gstate_c.Dirty(DIRTY_RASTER_STATE | DIRTY_VERTEXSHADER_STATE | DIRTY_GEOMETRYSHADER_STATE); + gstate_c.submitType = SubmitType::DRAW; + + // After drawing, we advance pointers - see SubmitPrim which does the same. + int count = surface.num_points_u * surface.num_points_v; + AdvanceVerts(gstate.vertType, count, bytesRead); +} + +void GPUCommonHW::Execute_BlockTransferStart(u32 op, u32 diff) { + Flush(); + + PROFILE_THIS_SCOPE("block"); // don't include the flush in the profile, would be misleading. + + gstate_c.framebufFormat = gstate.FrameBufFormat(); + + // and take appropriate action. This is a block transfer between RAM and VRAM, or vice versa. + // Can we skip this on SkipDraw? + DoBlockTransfer(gstate_c.skipDrawReason); +} diff --git a/GPU/GPUCommonHW.h b/GPU/GPUCommonHW.h index f059d93fc0..11cadb0d0a 100644 --- a/GPU/GPUCommonHW.h +++ b/GPU/GPUCommonHW.h @@ -19,6 +19,13 @@ public: std::vector DebugGetShaderIDs(DebugShaderType shader) override; std::string DebugGetShaderString(std::string id, DebugShaderType shader, DebugShaderStringType stringType) override; + void Execute_Prim(u32 op, u32 diff); + void Execute_Bezier(u32 op, u32 diff); + void Execute_Spline(u32 op, u32 diff); + void Execute_BlockTransferStart(u32 op, u32 diff); + + typedef void (GPUCommonHW::*CmdFunc)(u32 op, u32 diff); + protected: void UpdateCmdInfo() override; @@ -32,5 +39,7 @@ protected: void CheckRenderResized() override; int msaaLevel_ = 0; -}; +private: + void CheckDepthUsage(VirtualFramebuffer *vfb); +};