Move the drawing commands to GPUCommonHW

This commit is contained in:
Henrik Rydgård 2023-02-25 17:17:09 +01:00
parent 05209a3968
commit d22a22569a
4 changed files with 478 additions and 470 deletions

View file

@ -42,6 +42,7 @@
#include "Core/HW/Display.h"
#include "Core/MemMapHelpers.h"
#include "Core/Util/PPGeDraw.h"
#include "GPU/GPUCommonHW.h"
#include "GPU/Common/DrawEngineCommon.h"
#include "GPU/Common/FramebufferManagerCommon.h"
#include "GPU/Common/SplineCommon.h"
@ -50,7 +51,7 @@
#include "GPU/Debugger/Record.h"
// TODO: Make class member?
GPUCommon::CommandInfo GPUCommon::cmdInfo_[256];
GPUCommonHW::CommandInfo GPUCommon::cmdInfo_[256];
void GPUCommon::Flush() {
drawEngineCommon_->DispatchFlush();
@ -1380,444 +1381,6 @@ void GPUCommon::Execute_VertexTypeSkinning(u32 op, u32 diff) {
gstate_c.Dirty(DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_GEOMETRYSHADER_STATE | DIRTY_CULLRANGE | DIRTY_FOGCOEFENABLE);
}
void GPUCommon::CheckDepthUsage(VirtualFramebuffer *vfb) {
if (!gstate_c.usingDepth) {
bool isReadingDepth = false;
bool isClearingDepth = false;
bool isWritingDepth = false;
if (gstate.isModeClear()) {
isClearingDepth = gstate.isClearModeDepthMask();
isWritingDepth = isClearingDepth;
} else if (gstate.isDepthTestEnabled()) {
isWritingDepth = gstate.isDepthWriteEnabled();
isReadingDepth = gstate.getDepthTestFunction() > GE_COMP_ALWAYS;
}
if (isWritingDepth || isReadingDepth) {
gstate_c.usingDepth = true;
gstate_c.clearingDepth = isClearingDepth;
vfb->last_frame_depth_render = gpuStats.numFlips;
if (isWritingDepth) {
vfb->last_frame_depth_updated = gpuStats.numFlips;
}
framebufferManager_->SetDepthFrameBuffer(isClearingDepth);
}
}
}
void GPUCommon::Execute_Prim(u32 op, u32 diff) {
// This drives all drawing. All other state we just buffer up, then we apply it only
// when it's time to draw. As most PSP games set state redundantly ALL THE TIME, this is a huge optimization.
PROFILE_THIS_SCOPE("execprim");
u32 data = op & 0xFFFFFF;
u32 count = data & 0xFFFF;
if (count == 0)
return;
FlushImm();
// Upper bits are ignored.
GEPrimitiveType prim = static_cast<GEPrimitiveType>((data >> 16) & 7);
SetDrawType(DRAW_PRIM, prim);
// Discard AA lines as we can't do anything that makes sense with these anyway. The SW plugin might, though.
if (gstate.isAntiAliasEnabled()) {
// Heuristic derived from discussions in #6483 and #12588.
// Discard AA lines in Persona 3 Portable, DOA Paradise and Summon Night 5, while still keeping AA lines in Echochrome.
if ((prim == GE_PRIM_LINE_STRIP || prim == GE_PRIM_LINES) && gstate.getTextureFunction() == GE_TEXFUNC_REPLACE)
return;
}
// Update cached framebuffer format.
// We store it in the cache so it can be modified for blue-to-alpha, next.
gstate_c.framebufFormat = gstate.FrameBufFormat();
if (!Memory::IsValidAddress(gstate_c.vertexAddr)) {
ERROR_LOG(G3D, "Bad vertex address %08x!", gstate_c.vertexAddr);
return;
}
// See the documentation for gstate_c.blueToAlpha.
bool blueToAlpha = false;
if (PSP_CoreParameter().compat.flags().BlueToAlpha) {
if (gstate_c.framebufFormat == GEBufferFormat::GE_FORMAT_565 && gstate.getColorMask() == 0x0FFFFF && !gstate.isLogicOpEnabled()) {
blueToAlpha = true;
gstate_c.framebufFormat = GEBufferFormat::GE_FORMAT_4444;
}
if (blueToAlpha != gstate_c.blueToAlpha) {
gstate_c.blueToAlpha = blueToAlpha;
gstate_c.Dirty(DIRTY_FRAMEBUF | DIRTY_FRAGMENTSHADER_STATE | DIRTY_BLEND_STATE);
}
}
if (PSP_CoreParameter().compat.flags().SplitFramebufferMargin) {
switch (gstate.vertType & 0xFFFFFF) {
case 0x00800102: // through, u16 uv, u16 pos (used for the framebuffer effect in-game)
case 0x0080011c: // through, 8888 color, s16 pos (used for clearing in the margin of the title screen)
case 0x00000183: // float uv, float pos (used for drawing in the margin of the title screen)
// Need to re-check the framebuffer every one of these draws, to update the split if needed.
gstate_c.Dirty(DIRTY_FRAMEBUF);
}
}
// This also makes skipping drawing very effective.
VirtualFramebuffer *vfb = framebufferManager_->SetRenderFrameBuffer(gstate_c.IsDirty(DIRTY_FRAMEBUF), gstate_c.skipDrawReason);
if (blueToAlpha) {
vfb->usageFlags |= FB_USAGE_BLUE_TO_ALPHA;
}
// Must check this after SetRenderFrameBuffer so we know SKIPDRAW_NON_DISPLAYED_FB.
if (gstate_c.skipDrawReason & (SKIPDRAW_SKIPFRAME | SKIPDRAW_NON_DISPLAYED_FB)) {
// Rough estimate, not sure what's correct.
cyclesExecuted += EstimatePerVertexCost() * count;
if (gstate.isModeClear()) {
gpuStats.numClears++;
}
return;
}
CheckDepthUsage(vfb);
const void *verts = Memory::GetPointerUnchecked(gstate_c.vertexAddr);
const void *inds = nullptr;
u32 vertexType = gstate.vertType;
if ((vertexType & GE_VTYPE_IDX_MASK) != GE_VTYPE_IDX_NONE) {
u32 indexAddr = gstate_c.indexAddr;
if (!Memory::IsValidAddress(indexAddr)) {
ERROR_LOG(G3D, "Bad index address %08x!", indexAddr);
return;
}
inds = Memory::GetPointerUnchecked(indexAddr);
}
if (gstate_c.dirty & DIRTY_VERTEXSHADER_STATE) {
vertexCost_ = EstimatePerVertexCost();
}
int bytesRead = 0;
UpdateUVScaleOffset();
// cull mode
int cullMode = gstate.getCullMode();
uint32_t vertTypeID = GetVertTypeID(vertexType, gstate.getUVGenMode(), g_Config.bSoftwareSkinning);
drawEngineCommon_->SubmitPrim(verts, inds, prim, count, vertTypeID, cullMode, &bytesRead);
// After drawing, we advance the vertexAddr (when non indexed) or indexAddr (when indexed).
// Some games rely on this, they don't bother reloading VADDR and IADDR.
// The VADDR/IADDR registers are NOT updated.
AdvanceVerts(vertexType, count, bytesRead);
int totalVertCount = count;
// PRIMs are often followed by more PRIMs. Save some work and submit them immediately.
const u32_le *src = (const u32_le *)Memory::GetPointerUnchecked(currentList->pc + 4);
const u32_le *stall = currentList->stall ? (const u32_le *)Memory::GetPointerUnchecked(currentList->stall) : 0;
int cmdCount = 0;
// Optimized submission of sequences of PRIM. Allows us to avoid going through all the mess
// above for each one. This can be expanded to support additional games that intersperse
// PRIM commands with other commands. A special case is Earth Defence Force 2 that changes culling mode
// between each prim, we just change the triangle winding right here to still be able to join draw calls.
uint32_t vtypeCheckMask = ~GE_VTYPE_WEIGHTCOUNT_MASK;
if (!g_Config.bSoftwareSkinning)
vtypeCheckMask = 0xFFFFFFFF;
if (debugRecording_)
goto bail;
while (src != stall) {
uint32_t data = *src;
switch (data >> 24) {
case GE_CMD_PRIM:
{
u32 count = data & 0xFFFF;
if (count == 0) {
// Ignore.
break;
}
GEPrimitiveType newPrim = static_cast<GEPrimitiveType>((data >> 16) & 7);
SetDrawType(DRAW_PRIM, newPrim);
// TODO: more efficient updating of verts/inds
verts = Memory::GetPointerUnchecked(gstate_c.vertexAddr);
inds = nullptr;
if ((vertexType & GE_VTYPE_IDX_MASK) != GE_VTYPE_IDX_NONE) {
inds = Memory::GetPointerUnchecked(gstate_c.indexAddr);
}
drawEngineCommon_->SubmitPrim(verts, inds, newPrim, count, vertTypeID, cullMode, &bytesRead);
AdvanceVerts(vertexType, count, bytesRead);
totalVertCount += count;
break;
}
case GE_CMD_VERTEXTYPE:
{
uint32_t diff = data ^ vertexType;
// don't mask upper bits, vertexType is unmasked
if (diff & vtypeCheckMask) {
goto bail;
} else {
vertexType = data;
vertTypeID = GetVertTypeID(vertexType, gstate.getUVGenMode(), g_Config.bSoftwareSkinning);
}
break;
}
case GE_CMD_VADDR:
gstate.cmdmem[GE_CMD_VADDR] = data;
gstate_c.vertexAddr = gstate_c.getRelativeAddress(data & 0x00FFFFFF);
break;
case GE_CMD_IADDR:
gstate.cmdmem[GE_CMD_IADDR] = data;
gstate_c.indexAddr = gstate_c.getRelativeAddress(data & 0x00FFFFFF);
break;
case GE_CMD_OFFSETADDR:
gstate.cmdmem[GE_CMD_OFFSETADDR] = data;
gstate_c.offsetAddr = data << 8;
break;
case GE_CMD_BASE:
gstate.cmdmem[GE_CMD_BASE] = data;
break;
case GE_CMD_CULLFACEENABLE:
// Earth Defence Force 2
if (gstate.cmdmem[GE_CMD_CULLFACEENABLE] != data) {
goto bail;
}
break;
case GE_CMD_CULL:
// flip face by indices for triangles
cullMode = data & 1;
break;
case GE_CMD_TEXFLUSH:
case GE_CMD_NOP:
case GE_CMD_NOP_FF:
gstate.cmdmem[data >> 24] = data;
break;
case GE_CMD_BONEMATRIXNUMBER:
gstate.cmdmem[GE_CMD_BONEMATRIXNUMBER] = data;
break;
case GE_CMD_TEXSCALEU:
gstate.cmdmem[GE_CMD_TEXSCALEU] = data;
gstate_c.uv.uScale = getFloat24(data);
break;
case GE_CMD_TEXSCALEV:
gstate.cmdmem[GE_CMD_TEXSCALEV] = data;
gstate_c.uv.vScale = getFloat24(data);
break;
case GE_CMD_TEXOFFSETU:
gstate.cmdmem[GE_CMD_TEXOFFSETU] = data;
gstate_c.uv.uOff = getFloat24(data);
break;
case GE_CMD_TEXOFFSETV:
gstate.cmdmem[GE_CMD_TEXOFFSETV] = data;
gstate_c.uv.vOff = getFloat24(data);
break;
case GE_CMD_TEXLEVEL:
// Same Gran Turismo hack from Execute_TexLevel
if ((data & 3) != GE_TEXLEVEL_MODE_AUTO && (0x00FF0000 & data) != 0) {
goto bail;
}
gstate.cmdmem[GE_CMD_TEXLEVEL] = data;
break;
case GE_CMD_CALL:
{
// A bone matrix probably. If not we bail.
const u32 target = gstate_c.getRelativeAddress(data & 0x00FFFFFC);
if ((Memory::ReadUnchecked_U32(target) >> 24) == GE_CMD_BONEMATRIXDATA &&
(Memory::ReadUnchecked_U32(target + 11 * 4) >> 24) == GE_CMD_BONEMATRIXDATA &&
(Memory::ReadUnchecked_U32(target + 12 * 4) >> 24) == GE_CMD_RET &&
(target > currentList->stall || target + 12 * 4 < currentList->stall) &&
(gstate.boneMatrixNumber & 0x00FFFFFF) <= 96 - 12) {
FastLoadBoneMatrix(target);
} else {
goto bail;
}
break;
}
case GE_CMD_TEXBUFWIDTH0:
case GE_CMD_TEXADDR0:
if (data != gstate.cmdmem[data >> 24])
goto bail;
break;
default:
// All other commands might need a flush or something, stop this inner loop.
goto bail;
}
cmdCount++;
src++;
}
bail:
gstate.cmdmem[GE_CMD_VERTEXTYPE] = vertexType;
// Skip over the commands we just read out manually.
if (cmdCount > 0) {
UpdatePC(currentList->pc, currentList->pc + cmdCount * 4);
currentList->pc += cmdCount * 4;
// flush back cull mode
if (cullMode != gstate.getCullMode()) {
// We rewrote everything to the old cull mode, so flush first.
drawEngineCommon_->DispatchFlush();
// Now update things for next time.
gstate.cmdmem[GE_CMD_CULL] ^= 1;
gstate_c.Dirty(DIRTY_RASTER_STATE);
}
}
gpuStats.vertexGPUCycles += vertexCost_ * totalVertCount;
cyclesExecuted += vertexCost_ * totalVertCount;
}
void GPUCommon::Execute_Bezier(u32 op, u32 diff) {
// We don't dirty on normal changes anymore as we prescale, but it's needed for splines/bezier.
gstate_c.Dirty(DIRTY_UVSCALEOFFSET);
gstate_c.framebufFormat = gstate.FrameBufFormat();
// This also make skipping drawing very effective.
VirtualFramebuffer *vfb = framebufferManager_->SetRenderFrameBuffer(gstate_c.IsDirty(DIRTY_FRAMEBUF), gstate_c.skipDrawReason);
if (gstate_c.skipDrawReason & (SKIPDRAW_SKIPFRAME | SKIPDRAW_NON_DISPLAYED_FB)) {
// TODO: Should this eat some cycles? Probably yes. Not sure if important.
return;
}
CheckDepthUsage(vfb);
if (!Memory::IsValidAddress(gstate_c.vertexAddr)) {
ERROR_LOG_REPORT(G3D, "Bad vertex address %08x!", gstate_c.vertexAddr);
return;
}
const void *control_points = Memory::GetPointerUnchecked(gstate_c.vertexAddr);
const void *indices = NULL;
if ((gstate.vertType & GE_VTYPE_IDX_MASK) != GE_VTYPE_IDX_NONE) {
if (!Memory::IsValidAddress(gstate_c.indexAddr)) {
ERROR_LOG_REPORT(G3D, "Bad index address %08x!", gstate_c.indexAddr);
return;
}
indices = Memory::GetPointerUnchecked(gstate_c.indexAddr);
}
if (vertTypeIsSkinningEnabled(gstate.vertType)) {
DEBUG_LOG_REPORT(G3D, "Unusual bezier/spline vtype: %08x, morph: %d, bones: %d", gstate.vertType, (gstate.vertType & GE_VTYPE_MORPHCOUNT_MASK) >> GE_VTYPE_MORPHCOUNT_SHIFT, vertTypeGetNumBoneWeights(gstate.vertType));
}
// Can't flush after setting gstate_c.submitType below since it'll be a mess - it must be done already.
if (flushOnParams_)
drawEngineCommon_->DispatchFlush();
Spline::BezierSurface surface;
surface.tess_u = gstate.getPatchDivisionU();
surface.tess_v = gstate.getPatchDivisionV();
surface.num_points_u = op & 0xFF;
surface.num_points_v = (op >> 8) & 0xFF;
surface.num_patches_u = (surface.num_points_u - 1) / 3;
surface.num_patches_v = (surface.num_points_v - 1) / 3;
surface.primType = gstate.getPatchPrimitiveType();
surface.patchFacing = gstate.patchfacing & 1;
SetDrawType(DRAW_BEZIER, PatchPrimToPrim(surface.primType));
gstate_c.Dirty(DIRTY_RASTER_STATE | DIRTY_VERTEXSHADER_STATE | DIRTY_GEOMETRYSHADER_STATE);
if (drawEngineCommon_->CanUseHardwareTessellation(surface.primType)) {
gstate_c.submitType = SubmitType::HW_BEZIER;
if (gstate_c.spline_num_points_u != surface.num_points_u) {
gstate_c.Dirty(DIRTY_BEZIERSPLINE);
gstate_c.spline_num_points_u = surface.num_points_u;
}
} else {
gstate_c.submitType = SubmitType::BEZIER;
}
int bytesRead = 0;
UpdateUVScaleOffset();
drawEngineCommon_->SubmitCurve(control_points, indices, surface, gstate.vertType, &bytesRead, "bezier");
gstate_c.Dirty(DIRTY_RASTER_STATE | DIRTY_VERTEXSHADER_STATE | DIRTY_GEOMETRYSHADER_STATE);
gstate_c.submitType = SubmitType::DRAW;
// After drawing, we advance pointers - see SubmitPrim which does the same.
int count = surface.num_points_u * surface.num_points_v;
AdvanceVerts(gstate.vertType, count, bytesRead);
}
void GPUCommon::Execute_Spline(u32 op, u32 diff) {
// We don't dirty on normal changes anymore as we prescale, but it's needed for splines/bezier.
gstate_c.Dirty(DIRTY_UVSCALEOFFSET);
gstate_c.framebufFormat = gstate.FrameBufFormat();
// This also make skipping drawing very effective.
VirtualFramebuffer *vfb = framebufferManager_->SetRenderFrameBuffer(gstate_c.IsDirty(DIRTY_FRAMEBUF), gstate_c.skipDrawReason);
if (gstate_c.skipDrawReason & (SKIPDRAW_SKIPFRAME | SKIPDRAW_NON_DISPLAYED_FB)) {
// TODO: Should this eat some cycles? Probably yes. Not sure if important.
return;
}
CheckDepthUsage(vfb);
if (!Memory::IsValidAddress(gstate_c.vertexAddr)) {
ERROR_LOG_REPORT(G3D, "Bad vertex address %08x!", gstate_c.vertexAddr);
return;
}
const void *control_points = Memory::GetPointerUnchecked(gstate_c.vertexAddr);
const void *indices = NULL;
if ((gstate.vertType & GE_VTYPE_IDX_MASK) != GE_VTYPE_IDX_NONE) {
if (!Memory::IsValidAddress(gstate_c.indexAddr)) {
ERROR_LOG_REPORT(G3D, "Bad index address %08x!", gstate_c.indexAddr);
return;
}
indices = Memory::GetPointerUnchecked(gstate_c.indexAddr);
}
if (vertTypeIsSkinningEnabled(gstate.vertType)) {
DEBUG_LOG_REPORT(G3D, "Unusual bezier/spline vtype: %08x, morph: %d, bones: %d", gstate.vertType, (gstate.vertType & GE_VTYPE_MORPHCOUNT_MASK) >> GE_VTYPE_MORPHCOUNT_SHIFT, vertTypeGetNumBoneWeights(gstate.vertType));
}
// Can't flush after setting gstate_c.submitType below since it'll be a mess - it must be done already.
if (flushOnParams_)
drawEngineCommon_->DispatchFlush();
Spline::SplineSurface surface;
surface.tess_u = gstate.getPatchDivisionU();
surface.tess_v = gstate.getPatchDivisionV();
surface.type_u = (op >> 16) & 0x3;
surface.type_v = (op >> 18) & 0x3;
surface.num_points_u = op & 0xFF;
surface.num_points_v = (op >> 8) & 0xFF;
surface.num_patches_u = surface.num_points_u - 3;
surface.num_patches_v = surface.num_points_v - 3;
surface.primType = gstate.getPatchPrimitiveType();
surface.patchFacing = gstate.patchfacing & 1;
SetDrawType(DRAW_SPLINE, PatchPrimToPrim(surface.primType));
gstate_c.Dirty(DIRTY_RASTER_STATE | DIRTY_VERTEXSHADER_STATE | DIRTY_GEOMETRYSHADER_STATE);
if (drawEngineCommon_->CanUseHardwareTessellation(surface.primType)) {
gstate_c.submitType = SubmitType::HW_SPLINE;
if (gstate_c.spline_num_points_u != surface.num_points_u) {
gstate_c.Dirty(DIRTY_BEZIERSPLINE);
gstate_c.spline_num_points_u = surface.num_points_u;
}
} else {
gstate_c.submitType = SubmitType::SPLINE;
}
int bytesRead = 0;
UpdateUVScaleOffset();
drawEngineCommon_->SubmitCurve(control_points, indices, surface, gstate.vertType, &bytesRead, "spline");
gstate_c.Dirty(DIRTY_RASTER_STATE | DIRTY_VERTEXSHADER_STATE | DIRTY_GEOMETRYSHADER_STATE);
gstate_c.submitType = SubmitType::DRAW;
// After drawing, we advance pointers - see SubmitPrim which does the same.
int count = surface.num_points_u * surface.num_points_v;
AdvanceVerts(gstate.vertType, count, bytesRead);
}
void GPUCommon::Execute_BoundingBox(u32 op, u32 diff) {
// Just resetting, nothing to check bounds for.
const u32 count = op & 0xFFFF;
@ -1872,18 +1435,6 @@ void GPUCommon::Execute_BoundingBox(u32 op, u32 diff) {
}
}
void GPUCommon::Execute_BlockTransferStart(u32 op, u32 diff) {
Flush();
PROFILE_THIS_SCOPE("block"); // don't include the flush in the profile, would be misleading.
gstate_c.framebufFormat = gstate.FrameBufFormat();
// and take appropriate action. This is a block transfer between RAM and VRAM, or vice versa.
// Can we skip this on SkipDraw?
DoBlockTransfer(gstate_c.skipDrawReason);
}
void GPUCommon::Execute_WorldMtxNum(u32 op, u32 diff) {
if (!currentList) {
gstate.worldmtxnum = (GE_CMD_WORLDMATRIXNUMBER << 24) | (op & 0xF);

View file

@ -155,11 +155,7 @@ public:
void Execute_VertexType(u32 op, u32 diff);
void Execute_VertexTypeSkinning(u32 op, u32 diff);
void Execute_Prim(u32 op, u32 diff);
void Execute_Bezier(u32 op, u32 diff);
void Execute_Spline(u32 op, u32 diff);
void Execute_BoundingBox(u32 op, u32 diff);
void Execute_BlockTransferStart(u32 op, u32 diff);
void Execute_LoadClut(u32 op, u32 diff);
@ -415,7 +411,6 @@ protected:
std::string reportingFullInfo_;
private:
void CheckDepthUsage(VirtualFramebuffer *vfb);
void DoExecuteCall(u32 target);
void PopDLQueue();
void CheckDrawSync();
@ -427,10 +422,3 @@ private:
int lastVsync_ = -1;
};
struct CommonCommandTableEntry {
uint8_t cmd;
uint8_t flags;
uint64_t dirty;
GPUCommon::CmdFunc func;
};

View file

@ -1,3 +1,5 @@
#include "Common/Profiler/Profiler.h"
#include "Common/GPU/thin3d.h"
#include "Common/Serialize/Serializer.h"
#include "Common/System/System.h"
@ -6,10 +8,18 @@
#include "Core/Config.h"
#include "GPU/GPUCommonHW.h"
#include "GPU/Common/SplineCommon.h"
#include "GPU/Common/DrawEngineCommon.h"
#include "GPU/Common/TextureCacheCommon.h"
#include "GPU/Common/FramebufferManagerCommon.h"
struct CommonCommandTableEntry {
uint8_t cmd;
uint8_t flags;
uint64_t dirty;
GPUCommonHW::CmdFunc func;
};
const CommonCommandTableEntry commonCommandTable[] = {
// From Common. No flushing but definitely need execute.
{ GE_CMD_OFFSETADDR, FLAG_EXECUTE, 0, &GPUCommon::Execute_OffsetAddr },
@ -21,11 +31,11 @@ const CommonCommandTableEntry commonCommandTable[] = {
{ GE_CMD_VADDR, FLAG_EXECUTE, 0, &GPUCommon::Execute_Vaddr },
{ GE_CMD_IADDR, FLAG_EXECUTE, 0, &GPUCommon::Execute_Iaddr },
{ GE_CMD_BJUMP, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, 0, &GPUCommon::Execute_BJump }, // EXECUTE
{ GE_CMD_BOUNDINGBOX, FLAG_EXECUTE, 0, &GPUCommon::Execute_BoundingBox }, // Shouldn't need to FLUSHBEFORE.
{ GE_CMD_BOUNDINGBOX, FLAG_EXECUTE, 0, &GPUCommonHW::Execute_BoundingBox }, // Shouldn't need to FLUSHBEFORE.
{ GE_CMD_PRIM, FLAG_EXECUTE, 0, &GPUCommon::Execute_Prim },
{ GE_CMD_BEZIER, FLAG_EXECUTE, 0, &GPUCommon::Execute_Bezier },
{ GE_CMD_SPLINE, FLAG_EXECUTE, 0, &GPUCommon::Execute_Spline },
{ GE_CMD_PRIM, FLAG_EXECUTE, 0, &GPUCommonHW::Execute_Prim },
{ GE_CMD_BEZIER, FLAG_EXECUTE, 0, &GPUCommonHW::Execute_Bezier },
{ GE_CMD_SPLINE, FLAG_EXECUTE, 0, &GPUCommonHW::Execute_Spline },
// Changing the vertex type requires us to flush.
{ GE_CMD_VERTEXTYPE, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, 0, &GPUCommon::Execute_VertexType },
@ -274,7 +284,7 @@ const CommonCommandTableEntry commonCommandTable[] = {
{ GE_CMD_TRANSFERSRCPOS, 0 },
{ GE_CMD_TRANSFERDSTPOS, 0 },
{ GE_CMD_TRANSFERSIZE, 0 },
{ GE_CMD_TRANSFERSTART, FLAG_EXECUTE | FLAG_READS_PC, 0, &GPUCommon::Execute_BlockTransferStart },
{ GE_CMD_TRANSFERSTART, FLAG_EXECUTE | FLAG_READS_PC, 0, &GPUCommonHW::Execute_BlockTransferStart },
// We don't use the dither table.
{ GE_CMD_DITH0 },
@ -346,7 +356,7 @@ GPUCommonHW::GPUCommonHW(GraphicsContext *gfxCtx, Draw::DrawContext *draw) : GPU
dupeCheck.insert(cmd);
}
cmdInfo_[cmd].flags |= (uint64_t)commonCommandTable[i].flags | (commonCommandTable[i].dirty << 8);
cmdInfo_[cmd].func = commonCommandTable[i].func;
cmdInfo_[cmd].func = (GPUCommon::CmdFunc)commonCommandTable[i].func;
if ((cmdInfo_[cmd].flags & (FLAG_EXECUTE | FLAG_EXECUTEONCHANGE)) && !cmdInfo_[cmd].func) {
// Can't have FLAG_EXECUTE commands without a function pointer to execute.
Crash();
@ -568,3 +578,453 @@ std::string GPUCommonHW::DebugGetShaderString(std::string id, DebugShaderType ty
return shaderManager_->DebugGetShaderString(id, type, stringType);
}
}
void GPUCommonHW::CheckDepthUsage(VirtualFramebuffer *vfb) {
if (!gstate_c.usingDepth) {
bool isReadingDepth = false;
bool isClearingDepth = false;
bool isWritingDepth = false;
if (gstate.isModeClear()) {
isClearingDepth = gstate.isClearModeDepthMask();
isWritingDepth = isClearingDepth;
} else if (gstate.isDepthTestEnabled()) {
isWritingDepth = gstate.isDepthWriteEnabled();
isReadingDepth = gstate.getDepthTestFunction() > GE_COMP_ALWAYS;
}
if (isWritingDepth || isReadingDepth) {
gstate_c.usingDepth = true;
gstate_c.clearingDepth = isClearingDepth;
vfb->last_frame_depth_render = gpuStats.numFlips;
if (isWritingDepth) {
vfb->last_frame_depth_updated = gpuStats.numFlips;
}
framebufferManager_->SetDepthFrameBuffer(isClearingDepth);
}
}
}
void GPUCommonHW::Execute_Prim(u32 op, u32 diff) {
// This drives all drawing. All other state we just buffer up, then we apply it only
// when it's time to draw. As most PSP games set state redundantly ALL THE TIME, this is a huge optimization.
PROFILE_THIS_SCOPE("execprim");
u32 data = op & 0xFFFFFF;
u32 count = data & 0xFFFF;
if (count == 0)
return;
FlushImm();
// Upper bits are ignored.
GEPrimitiveType prim = static_cast<GEPrimitiveType>((data >> 16) & 7);
SetDrawType(DRAW_PRIM, prim);
// Discard AA lines as we can't do anything that makes sense with these anyway. The SW plugin might, though.
if (gstate.isAntiAliasEnabled()) {
// Heuristic derived from discussions in #6483 and #12588.
// Discard AA lines in Persona 3 Portable, DOA Paradise and Summon Night 5, while still keeping AA lines in Echochrome.
if ((prim == GE_PRIM_LINE_STRIP || prim == GE_PRIM_LINES) && gstate.getTextureFunction() == GE_TEXFUNC_REPLACE)
return;
}
// Update cached framebuffer format.
// We store it in the cache so it can be modified for blue-to-alpha, next.
gstate_c.framebufFormat = gstate.FrameBufFormat();
if (!Memory::IsValidAddress(gstate_c.vertexAddr)) {
ERROR_LOG(G3D, "Bad vertex address %08x!", gstate_c.vertexAddr);
return;
}
// See the documentation for gstate_c.blueToAlpha.
bool blueToAlpha = false;
if (PSP_CoreParameter().compat.flags().BlueToAlpha) {
if (gstate_c.framebufFormat == GEBufferFormat::GE_FORMAT_565 && gstate.getColorMask() == 0x0FFFFF && !gstate.isLogicOpEnabled()) {
blueToAlpha = true;
gstate_c.framebufFormat = GEBufferFormat::GE_FORMAT_4444;
}
if (blueToAlpha != gstate_c.blueToAlpha) {
gstate_c.blueToAlpha = blueToAlpha;
gstate_c.Dirty(DIRTY_FRAMEBUF | DIRTY_FRAGMENTSHADER_STATE | DIRTY_BLEND_STATE);
}
}
if (PSP_CoreParameter().compat.flags().SplitFramebufferMargin) {
switch (gstate.vertType & 0xFFFFFF) {
case 0x00800102: // through, u16 uv, u16 pos (used for the framebuffer effect in-game)
case 0x0080011c: // through, 8888 color, s16 pos (used for clearing in the margin of the title screen)
case 0x00000183: // float uv, float pos (used for drawing in the margin of the title screen)
// Need to re-check the framebuffer every one of these draws, to update the split if needed.
gstate_c.Dirty(DIRTY_FRAMEBUF);
}
}
// This also makes skipping drawing very effective.
VirtualFramebuffer *vfb = framebufferManager_->SetRenderFrameBuffer(gstate_c.IsDirty(DIRTY_FRAMEBUF), gstate_c.skipDrawReason);
if (blueToAlpha) {
vfb->usageFlags |= FB_USAGE_BLUE_TO_ALPHA;
}
// Must check this after SetRenderFrameBuffer so we know SKIPDRAW_NON_DISPLAYED_FB.
if (gstate_c.skipDrawReason & (SKIPDRAW_SKIPFRAME | SKIPDRAW_NON_DISPLAYED_FB)) {
// Rough estimate, not sure what's correct.
cyclesExecuted += EstimatePerVertexCost() * count;
if (gstate.isModeClear()) {
gpuStats.numClears++;
}
return;
}
CheckDepthUsage(vfb);
const void *verts = Memory::GetPointerUnchecked(gstate_c.vertexAddr);
const void *inds = nullptr;
u32 vertexType = gstate.vertType;
if ((vertexType & GE_VTYPE_IDX_MASK) != GE_VTYPE_IDX_NONE) {
u32 indexAddr = gstate_c.indexAddr;
if (!Memory::IsValidAddress(indexAddr)) {
ERROR_LOG(G3D, "Bad index address %08x!", indexAddr);
return;
}
inds = Memory::GetPointerUnchecked(indexAddr);
}
if (gstate_c.dirty & DIRTY_VERTEXSHADER_STATE) {
vertexCost_ = EstimatePerVertexCost();
}
int bytesRead = 0;
UpdateUVScaleOffset();
// cull mode
int cullMode = gstate.getCullMode();
uint32_t vertTypeID = GetVertTypeID(vertexType, gstate.getUVGenMode(), g_Config.bSoftwareSkinning);
drawEngineCommon_->SubmitPrim(verts, inds, prim, count, vertTypeID, cullMode, &bytesRead);
// After drawing, we advance the vertexAddr (when non indexed) or indexAddr (when indexed).
// Some games rely on this, they don't bother reloading VADDR and IADDR.
// The VADDR/IADDR registers are NOT updated.
AdvanceVerts(vertexType, count, bytesRead);
int totalVertCount = count;
// PRIMs are often followed by more PRIMs. Save some work and submit them immediately.
const u32_le *src = (const u32_le *)Memory::GetPointerUnchecked(currentList->pc + 4);
const u32_le *stall = currentList->stall ? (const u32_le *)Memory::GetPointerUnchecked(currentList->stall) : 0;
int cmdCount = 0;
// Optimized submission of sequences of PRIM. Allows us to avoid going through all the mess
// above for each one. This can be expanded to support additional games that intersperse
// PRIM commands with other commands. A special case is Earth Defence Force 2 that changes culling mode
// between each prim, we just change the triangle winding right here to still be able to join draw calls.
uint32_t vtypeCheckMask = ~GE_VTYPE_WEIGHTCOUNT_MASK;
if (!g_Config.bSoftwareSkinning)
vtypeCheckMask = 0xFFFFFFFF;
if (debugRecording_)
goto bail;
while (src != stall) {
uint32_t data = *src;
switch (data >> 24) {
case GE_CMD_PRIM:
{
u32 count = data & 0xFFFF;
if (count == 0) {
// Ignore.
break;
}
GEPrimitiveType newPrim = static_cast<GEPrimitiveType>((data >> 16) & 7);
SetDrawType(DRAW_PRIM, newPrim);
// TODO: more efficient updating of verts/inds
verts = Memory::GetPointerUnchecked(gstate_c.vertexAddr);
inds = nullptr;
if ((vertexType & GE_VTYPE_IDX_MASK) != GE_VTYPE_IDX_NONE) {
inds = Memory::GetPointerUnchecked(gstate_c.indexAddr);
}
drawEngineCommon_->SubmitPrim(verts, inds, newPrim, count, vertTypeID, cullMode, &bytesRead);
AdvanceVerts(vertexType, count, bytesRead);
totalVertCount += count;
break;
}
case GE_CMD_VERTEXTYPE:
{
uint32_t diff = data ^ vertexType;
// don't mask upper bits, vertexType is unmasked
if (diff & vtypeCheckMask) {
goto bail;
} else {
vertexType = data;
vertTypeID = GetVertTypeID(vertexType, gstate.getUVGenMode(), g_Config.bSoftwareSkinning);
}
break;
}
case GE_CMD_VADDR:
gstate.cmdmem[GE_CMD_VADDR] = data;
gstate_c.vertexAddr = gstate_c.getRelativeAddress(data & 0x00FFFFFF);
break;
case GE_CMD_IADDR:
gstate.cmdmem[GE_CMD_IADDR] = data;
gstate_c.indexAddr = gstate_c.getRelativeAddress(data & 0x00FFFFFF);
break;
case GE_CMD_OFFSETADDR:
gstate.cmdmem[GE_CMD_OFFSETADDR] = data;
gstate_c.offsetAddr = data << 8;
break;
case GE_CMD_BASE:
gstate.cmdmem[GE_CMD_BASE] = data;
break;
case GE_CMD_CULLFACEENABLE:
// Earth Defence Force 2
if (gstate.cmdmem[GE_CMD_CULLFACEENABLE] != data) {
goto bail;
}
break;
case GE_CMD_CULL:
// flip face by indices for triangles
cullMode = data & 1;
break;
case GE_CMD_TEXFLUSH:
case GE_CMD_NOP:
case GE_CMD_NOP_FF:
gstate.cmdmem[data >> 24] = data;
break;
case GE_CMD_BONEMATRIXNUMBER:
gstate.cmdmem[GE_CMD_BONEMATRIXNUMBER] = data;
break;
case GE_CMD_TEXSCALEU:
gstate.cmdmem[GE_CMD_TEXSCALEU] = data;
gstate_c.uv.uScale = getFloat24(data);
break;
case GE_CMD_TEXSCALEV:
gstate.cmdmem[GE_CMD_TEXSCALEV] = data;
gstate_c.uv.vScale = getFloat24(data);
break;
case GE_CMD_TEXOFFSETU:
gstate.cmdmem[GE_CMD_TEXOFFSETU] = data;
gstate_c.uv.uOff = getFloat24(data);
break;
case GE_CMD_TEXOFFSETV:
gstate.cmdmem[GE_CMD_TEXOFFSETV] = data;
gstate_c.uv.vOff = getFloat24(data);
break;
case GE_CMD_TEXLEVEL:
// Same Gran Turismo hack from Execute_TexLevel
if ((data & 3) != GE_TEXLEVEL_MODE_AUTO && (0x00FF0000 & data) != 0) {
goto bail;
}
gstate.cmdmem[GE_CMD_TEXLEVEL] = data;
break;
case GE_CMD_CALL:
{
// A bone matrix probably. If not we bail.
const u32 target = gstate_c.getRelativeAddress(data & 0x00FFFFFC);
if ((Memory::ReadUnchecked_U32(target) >> 24) == GE_CMD_BONEMATRIXDATA &&
(Memory::ReadUnchecked_U32(target + 11 * 4) >> 24) == GE_CMD_BONEMATRIXDATA &&
(Memory::ReadUnchecked_U32(target + 12 * 4) >> 24) == GE_CMD_RET &&
(target > currentList->stall || target + 12 * 4 < currentList->stall) &&
(gstate.boneMatrixNumber & 0x00FFFFFF) <= 96 - 12) {
FastLoadBoneMatrix(target);
} else {
goto bail;
}
break;
}
case GE_CMD_TEXBUFWIDTH0:
case GE_CMD_TEXADDR0:
if (data != gstate.cmdmem[data >> 24])
goto bail;
break;
default:
// All other commands might need a flush or something, stop this inner loop.
goto bail;
}
cmdCount++;
src++;
}
bail:
gstate.cmdmem[GE_CMD_VERTEXTYPE] = vertexType;
// Skip over the commands we just read out manually.
if (cmdCount > 0) {
UpdatePC(currentList->pc, currentList->pc + cmdCount * 4);
currentList->pc += cmdCount * 4;
// flush back cull mode
if (cullMode != gstate.getCullMode()) {
// We rewrote everything to the old cull mode, so flush first.
drawEngineCommon_->DispatchFlush();
// Now update things for next time.
gstate.cmdmem[GE_CMD_CULL] ^= 1;
gstate_c.Dirty(DIRTY_RASTER_STATE);
}
}
gpuStats.vertexGPUCycles += vertexCost_ * totalVertCount;
cyclesExecuted += vertexCost_ * totalVertCount;
}
void GPUCommonHW::Execute_Bezier(u32 op, u32 diff) {
// We don't dirty on normal changes anymore as we prescale, but it's needed for splines/bezier.
gstate_c.Dirty(DIRTY_UVSCALEOFFSET);
gstate_c.framebufFormat = gstate.FrameBufFormat();
// This also make skipping drawing very effective.
VirtualFramebuffer *vfb = framebufferManager_->SetRenderFrameBuffer(gstate_c.IsDirty(DIRTY_FRAMEBUF), gstate_c.skipDrawReason);
if (gstate_c.skipDrawReason & (SKIPDRAW_SKIPFRAME | SKIPDRAW_NON_DISPLAYED_FB)) {
// TODO: Should this eat some cycles? Probably yes. Not sure if important.
return;
}
CheckDepthUsage(vfb);
if (!Memory::IsValidAddress(gstate_c.vertexAddr)) {
ERROR_LOG_REPORT(G3D, "Bad vertex address %08x!", gstate_c.vertexAddr);
return;
}
const void *control_points = Memory::GetPointerUnchecked(gstate_c.vertexAddr);
const void *indices = NULL;
if ((gstate.vertType & GE_VTYPE_IDX_MASK) != GE_VTYPE_IDX_NONE) {
if (!Memory::IsValidAddress(gstate_c.indexAddr)) {
ERROR_LOG_REPORT(G3D, "Bad index address %08x!", gstate_c.indexAddr);
return;
}
indices = Memory::GetPointerUnchecked(gstate_c.indexAddr);
}
if (vertTypeIsSkinningEnabled(gstate.vertType)) {
DEBUG_LOG_REPORT(G3D, "Unusual bezier/spline vtype: %08x, morph: %d, bones: %d", gstate.vertType, (gstate.vertType & GE_VTYPE_MORPHCOUNT_MASK) >> GE_VTYPE_MORPHCOUNT_SHIFT, vertTypeGetNumBoneWeights(gstate.vertType));
}
// Can't flush after setting gstate_c.submitType below since it'll be a mess - it must be done already.
if (flushOnParams_)
drawEngineCommon_->DispatchFlush();
Spline::BezierSurface surface;
surface.tess_u = gstate.getPatchDivisionU();
surface.tess_v = gstate.getPatchDivisionV();
surface.num_points_u = op & 0xFF;
surface.num_points_v = (op >> 8) & 0xFF;
surface.num_patches_u = (surface.num_points_u - 1) / 3;
surface.num_patches_v = (surface.num_points_v - 1) / 3;
surface.primType = gstate.getPatchPrimitiveType();
surface.patchFacing = gstate.patchfacing & 1;
SetDrawType(DRAW_BEZIER, PatchPrimToPrim(surface.primType));
gstate_c.Dirty(DIRTY_RASTER_STATE | DIRTY_VERTEXSHADER_STATE | DIRTY_GEOMETRYSHADER_STATE);
if (drawEngineCommon_->CanUseHardwareTessellation(surface.primType)) {
gstate_c.submitType = SubmitType::HW_BEZIER;
if (gstate_c.spline_num_points_u != surface.num_points_u) {
gstate_c.Dirty(DIRTY_BEZIERSPLINE);
gstate_c.spline_num_points_u = surface.num_points_u;
}
} else {
gstate_c.submitType = SubmitType::BEZIER;
}
int bytesRead = 0;
UpdateUVScaleOffset();
drawEngineCommon_->SubmitCurve(control_points, indices, surface, gstate.vertType, &bytesRead, "bezier");
gstate_c.Dirty(DIRTY_RASTER_STATE | DIRTY_VERTEXSHADER_STATE | DIRTY_GEOMETRYSHADER_STATE);
gstate_c.submitType = SubmitType::DRAW;
// After drawing, we advance pointers - see SubmitPrim which does the same.
int count = surface.num_points_u * surface.num_points_v;
AdvanceVerts(gstate.vertType, count, bytesRead);
}
void GPUCommonHW::Execute_Spline(u32 op, u32 diff) {
// We don't dirty on normal changes anymore as we prescale, but it's needed for splines/bezier.
gstate_c.Dirty(DIRTY_UVSCALEOFFSET);
gstate_c.framebufFormat = gstate.FrameBufFormat();
// This also make skipping drawing very effective.
VirtualFramebuffer *vfb = framebufferManager_->SetRenderFrameBuffer(gstate_c.IsDirty(DIRTY_FRAMEBUF), gstate_c.skipDrawReason);
if (gstate_c.skipDrawReason & (SKIPDRAW_SKIPFRAME | SKIPDRAW_NON_DISPLAYED_FB)) {
// TODO: Should this eat some cycles? Probably yes. Not sure if important.
return;
}
CheckDepthUsage(vfb);
if (!Memory::IsValidAddress(gstate_c.vertexAddr)) {
ERROR_LOG_REPORT(G3D, "Bad vertex address %08x!", gstate_c.vertexAddr);
return;
}
const void *control_points = Memory::GetPointerUnchecked(gstate_c.vertexAddr);
const void *indices = NULL;
if ((gstate.vertType & GE_VTYPE_IDX_MASK) != GE_VTYPE_IDX_NONE) {
if (!Memory::IsValidAddress(gstate_c.indexAddr)) {
ERROR_LOG_REPORT(G3D, "Bad index address %08x!", gstate_c.indexAddr);
return;
}
indices = Memory::GetPointerUnchecked(gstate_c.indexAddr);
}
if (vertTypeIsSkinningEnabled(gstate.vertType)) {
DEBUG_LOG_REPORT(G3D, "Unusual bezier/spline vtype: %08x, morph: %d, bones: %d", gstate.vertType, (gstate.vertType & GE_VTYPE_MORPHCOUNT_MASK) >> GE_VTYPE_MORPHCOUNT_SHIFT, vertTypeGetNumBoneWeights(gstate.vertType));
}
// Can't flush after setting gstate_c.submitType below since it'll be a mess - it must be done already.
if (flushOnParams_)
drawEngineCommon_->DispatchFlush();
Spline::SplineSurface surface;
surface.tess_u = gstate.getPatchDivisionU();
surface.tess_v = gstate.getPatchDivisionV();
surface.type_u = (op >> 16) & 0x3;
surface.type_v = (op >> 18) & 0x3;
surface.num_points_u = op & 0xFF;
surface.num_points_v = (op >> 8) & 0xFF;
surface.num_patches_u = surface.num_points_u - 3;
surface.num_patches_v = surface.num_points_v - 3;
surface.primType = gstate.getPatchPrimitiveType();
surface.patchFacing = gstate.patchfacing & 1;
SetDrawType(DRAW_SPLINE, PatchPrimToPrim(surface.primType));
gstate_c.Dirty(DIRTY_RASTER_STATE | DIRTY_VERTEXSHADER_STATE | DIRTY_GEOMETRYSHADER_STATE);
if (drawEngineCommon_->CanUseHardwareTessellation(surface.primType)) {
gstate_c.submitType = SubmitType::HW_SPLINE;
if (gstate_c.spline_num_points_u != surface.num_points_u) {
gstate_c.Dirty(DIRTY_BEZIERSPLINE);
gstate_c.spline_num_points_u = surface.num_points_u;
}
} else {
gstate_c.submitType = SubmitType::SPLINE;
}
int bytesRead = 0;
UpdateUVScaleOffset();
drawEngineCommon_->SubmitCurve(control_points, indices, surface, gstate.vertType, &bytesRead, "spline");
gstate_c.Dirty(DIRTY_RASTER_STATE | DIRTY_VERTEXSHADER_STATE | DIRTY_GEOMETRYSHADER_STATE);
gstate_c.submitType = SubmitType::DRAW;
// After drawing, we advance pointers - see SubmitPrim which does the same.
int count = surface.num_points_u * surface.num_points_v;
AdvanceVerts(gstate.vertType, count, bytesRead);
}
void GPUCommonHW::Execute_BlockTransferStart(u32 op, u32 diff) {
Flush();
PROFILE_THIS_SCOPE("block"); // don't include the flush in the profile, would be misleading.
gstate_c.framebufFormat = gstate.FrameBufFormat();
// and take appropriate action. This is a block transfer between RAM and VRAM, or vice versa.
// Can we skip this on SkipDraw?
DoBlockTransfer(gstate_c.skipDrawReason);
}

View file

@ -19,6 +19,13 @@ public:
std::vector<std::string> DebugGetShaderIDs(DebugShaderType shader) override;
std::string DebugGetShaderString(std::string id, DebugShaderType shader, DebugShaderStringType stringType) override;
void Execute_Prim(u32 op, u32 diff);
void Execute_Bezier(u32 op, u32 diff);
void Execute_Spline(u32 op, u32 diff);
void Execute_BlockTransferStart(u32 op, u32 diff);
typedef void (GPUCommonHW::*CmdFunc)(u32 op, u32 diff);
protected:
void UpdateCmdInfo() override;
@ -32,5 +39,7 @@ protected:
void CheckRenderResized() override;
int msaaLevel_ = 0;
};
private:
void CheckDepthUsage(VirtualFramebuffer *vfb);
};