mirror of
https://github.com/hrydgard/ppsspp.git
synced 2025-04-02 11:01:50 -04:00
Merge pull request #10653 from hrydgard/submitprim-unify
Unify the SubmitPrim function, and some others, between the HW backends
This commit is contained in:
commit
4b784e6035
23 changed files with 296 additions and 1198 deletions
|
@ -668,3 +668,69 @@ ReliableHashType DrawEngineCommon::ComputeHash() {
|
|||
fullhash += DoReliableHash(&uvScale[0], sizeof(uvScale[0]) * numDrawCalls, 0x0123e658);
|
||||
return fullhash;
|
||||
}
|
||||
|
||||
void DrawEngineCommon::SubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertType, int *bytesRead) {
|
||||
if (!indexGen.PrimCompatible(prevPrim_, prim) || numDrawCalls >= MAX_DEFERRED_DRAW_CALLS || vertexCountInDrawCalls_ + vertexCount > VERTEX_BUFFER_MAX) {
|
||||
DispatchFlush();
|
||||
}
|
||||
|
||||
// TODO: Is this the right thing to do?
|
||||
if (prim == GE_PRIM_KEEP_PREVIOUS) {
|
||||
prim = prevPrim_ != GE_PRIM_INVALID ? prevPrim_ : GE_PRIM_POINTS;
|
||||
} else {
|
||||
prevPrim_ = prim;
|
||||
}
|
||||
|
||||
SetupVertexDecoder(vertType);
|
||||
|
||||
*bytesRead = vertexCount * dec_->VertexSize();
|
||||
if ((vertexCount < 2 && prim > 0) || (vertexCount < 3 && prim > 2 && prim != GE_PRIM_RECTANGLES))
|
||||
return;
|
||||
|
||||
DeferredDrawCall &dc = drawCalls[numDrawCalls];
|
||||
dc.verts = verts;
|
||||
dc.inds = inds;
|
||||
dc.vertType = vertType;
|
||||
dc.indexType = (vertType & GE_VTYPE_IDX_MASK) >> GE_VTYPE_IDX_SHIFT;
|
||||
dc.prim = prim;
|
||||
dc.vertexCount = vertexCount;
|
||||
|
||||
if (g_Config.bVertexCache) {
|
||||
u32 dhash = dcid_;
|
||||
dhash ^= (u32)(uintptr_t)verts;
|
||||
dhash = __rotl(dhash, 13);
|
||||
dhash ^= (u32)(uintptr_t)inds;
|
||||
dhash = __rotl(dhash, 13);
|
||||
dhash ^= (u32)vertType;
|
||||
dhash = __rotl(dhash, 13);
|
||||
dhash ^= (u32)vertexCount;
|
||||
dhash = __rotl(dhash, 13);
|
||||
dhash ^= (u32)prim;
|
||||
dcid_ = dhash;
|
||||
}
|
||||
|
||||
if (inds) {
|
||||
GetIndexBounds(inds, vertexCount, vertType, &dc.indexLowerBound, &dc.indexUpperBound);
|
||||
} else {
|
||||
dc.indexLowerBound = 0;
|
||||
dc.indexUpperBound = vertexCount - 1;
|
||||
}
|
||||
|
||||
uvScale[numDrawCalls] = gstate_c.uv;
|
||||
|
||||
numDrawCalls++;
|
||||
vertexCountInDrawCalls_ += vertexCount;
|
||||
|
||||
if (g_Config.bSoftwareSkinning && (vertType & GE_VTYPE_WEIGHT_MASK)) {
|
||||
DecodeVertsStep(decoded, decodeCounter_, decodedVerts_);
|
||||
decodeCounter_++;
|
||||
}
|
||||
|
||||
if (prim == GE_PRIM_RECTANGLES && (gstate.getTextureAddress(0) & 0x3FFFFFFF) == (gstate.getFrameBufAddress() & 0x3FFFFFFF)) {
|
||||
// Rendertarget == texture?
|
||||
if (!g_Config.bDisableSlowFramebufEffects) {
|
||||
gstate_c.Dirty(DIRTY_TEXTURE_PARAMS);
|
||||
DispatchFlush();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -56,10 +56,16 @@ public:
|
|||
// Flush is normally non-virtual but here's a virtual way to call it, used by the shared spline code, which is expensive anyway.
|
||||
// Not really sure if these wrappers are worth it...
|
||||
virtual void DispatchFlush() = 0;
|
||||
// Same for SubmitPrim
|
||||
virtual void DispatchSubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertType, int *bytesRead) = 0;
|
||||
|
||||
// This would seem to be unnecessary now, but is still required for splines/beziers to work in the software backend since SubmitPrim
|
||||
// is different. Should probably refactor that.
|
||||
virtual void DispatchSubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertType, int *bytesRead) {
|
||||
SubmitPrim(verts, inds, prim, vertexCount, vertType, bytesRead);
|
||||
}
|
||||
|
||||
bool TestBoundingBox(void* control_points, int vertexCount, u32 vertType, int *bytesRead);
|
||||
|
||||
void SubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertType, int *bytesRead);
|
||||
void SubmitSpline(const void *control_points, const void *indices, int tess_u, int tess_v, int count_u, int count_v, int type_u, int type_v, GEPatchPrimType prim_type, bool computeNormals, bool patchFacing, u32 vertType, int *bytesRead);
|
||||
void SubmitBezier(const void *control_points, const void *indices, int tess_u, int tess_v, int count_u, int count_v, GEPatchPrimType prim_type, bool computeNormals, bool patchFacing, u32 vertType, int *bytesRead);
|
||||
|
||||
|
@ -73,6 +79,9 @@ public:
|
|||
bool IsCodePtrVertexDecoder(const u8 *ptr) const {
|
||||
return decJitCache_->IsInSpace(ptr);
|
||||
}
|
||||
int GetNumDrawCalls() const {
|
||||
return numDrawCalls;
|
||||
}
|
||||
|
||||
protected:
|
||||
virtual void ClearTrackedVertexArrays() {}
|
||||
|
|
|
@ -257,72 +257,6 @@ ID3D11InputLayout *DrawEngineD3D11::SetupDecFmtForDraw(D3D11VertexShader *vshade
|
|||
}
|
||||
}
|
||||
|
||||
void DrawEngineD3D11::SubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertType, int *bytesRead) {
|
||||
if (!indexGen.PrimCompatible(prevPrim_, prim) || numDrawCalls >= MAX_DEFERRED_DRAW_CALLS || vertexCountInDrawCalls_ + vertexCount > VERTEX_BUFFER_MAX)
|
||||
Flush();
|
||||
|
||||
// TODO: Is this the right thing to do?
|
||||
if (prim == GE_PRIM_KEEP_PREVIOUS) {
|
||||
prim = prevPrim_ != GE_PRIM_INVALID ? prevPrim_ : GE_PRIM_POINTS;
|
||||
} else {
|
||||
prevPrim_ = prim;
|
||||
}
|
||||
|
||||
SetupVertexDecoder(vertType);
|
||||
|
||||
*bytesRead = vertexCount * dec_->VertexSize();
|
||||
|
||||
if ((vertexCount < 2 && prim > 0) || (vertexCount < 3 && prim > 2 && prim != GE_PRIM_RECTANGLES))
|
||||
return;
|
||||
|
||||
DeferredDrawCall &dc = drawCalls[numDrawCalls];
|
||||
dc.verts = verts;
|
||||
dc.inds = inds;
|
||||
dc.vertType = vertType;
|
||||
dc.indexType = (vertType & GE_VTYPE_IDX_MASK) >> GE_VTYPE_IDX_SHIFT;
|
||||
dc.prim = prim;
|
||||
dc.vertexCount = vertexCount;
|
||||
|
||||
if (g_Config.bVertexCache) {
|
||||
u32 dhash = dcid_;
|
||||
dhash ^= (u32)(uintptr_t)verts;
|
||||
dhash = __rotl(dhash, 13);
|
||||
dhash ^= (u32)(uintptr_t)inds;
|
||||
dhash = __rotl(dhash, 13);
|
||||
dhash ^= (u32)vertType;
|
||||
dhash = __rotl(dhash, 13);
|
||||
dhash ^= (u32)vertexCount;
|
||||
dhash = __rotl(dhash, 13);
|
||||
dhash ^= (u32)prim;
|
||||
dcid_ = dhash;
|
||||
}
|
||||
|
||||
if (inds) {
|
||||
GetIndexBounds(inds, vertexCount, vertType, &dc.indexLowerBound, &dc.indexUpperBound);
|
||||
} else {
|
||||
dc.indexLowerBound = 0;
|
||||
dc.indexUpperBound = vertexCount - 1;
|
||||
}
|
||||
|
||||
uvScale[numDrawCalls] = gstate_c.uv;
|
||||
|
||||
numDrawCalls++;
|
||||
vertexCountInDrawCalls_ += vertexCount;
|
||||
|
||||
if (g_Config.bSoftwareSkinning && (vertType & GE_VTYPE_WEIGHT_MASK)) {
|
||||
DecodeVertsStep(decoded, decodeCounter_, decodedVerts_);
|
||||
decodeCounter_++;
|
||||
}
|
||||
|
||||
if (prim == GE_PRIM_RECTANGLES && (gstate.getTextureAddress(0) & 0x3FFFFFFF) == (gstate.getFrameBufAddress() & 0x3FFFFFFF)) {
|
||||
// Rendertarget == texture?
|
||||
if (!g_Config.bDisableSlowFramebufEffects) {
|
||||
gstate_c.Dirty(DIRTY_TEXTURE_PARAMS);
|
||||
Flush();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void DrawEngineD3D11::MarkUnreliable(VertexArrayInfoD3D11 *vai) {
|
||||
vai->status = VertexArrayInfoD3D11::VAI_UNRELIABLE;
|
||||
if (vai->vbo) {
|
||||
|
|
|
@ -105,8 +105,6 @@ public:
|
|||
DrawEngineD3D11(Draw::DrawContext *draw, ID3D11Device *device, ID3D11DeviceContext *context);
|
||||
virtual ~DrawEngineD3D11();
|
||||
|
||||
void SubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertType, int *bytesRead);
|
||||
|
||||
void SetShaderManager(ShaderManagerD3D11 *shaderManager) {
|
||||
shaderManager_ = shaderManager;
|
||||
}
|
||||
|
@ -135,9 +133,6 @@ public:
|
|||
}
|
||||
|
||||
void DispatchFlush() override { Flush(); }
|
||||
void DispatchSubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertType, int *bytesRead) override {
|
||||
SubmitPrim(verts, inds, prim, vertexCount, vertType, bytesRead);
|
||||
}
|
||||
|
||||
void ClearTrackedVertexArrays() override;
|
||||
|
||||
|
|
|
@ -66,31 +66,6 @@
|
|||
#include "Core/HLE/sceKernelInterrupt.h"
|
||||
#include "Core/HLE/sceGe.h"
|
||||
|
||||
struct D3D11CommandTableEntry {
|
||||
uint8_t cmd;
|
||||
uint8_t flags;
|
||||
uint64_t dirty;
|
||||
GPU_D3D11::CmdFunc func;
|
||||
};
|
||||
|
||||
// This table gets crunched into a faster form by init.
|
||||
static const D3D11CommandTableEntry commandTable[] = {
|
||||
// Changes that dirty the current texture.
|
||||
{ GE_CMD_TEXSIZE0, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTE, 0, &GPUCommon::Execute_TexSize0 },
|
||||
|
||||
// Changing the vertex type requires us to flush.
|
||||
{ GE_CMD_VERTEXTYPE, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, 0, &GPUCommon::Execute_VertexType },
|
||||
|
||||
{ GE_CMD_PRIM, FLAG_EXECUTE, 0, &GPU_D3D11::Execute_Prim },
|
||||
{ GE_CMD_BEZIER, FLAG_FLUSHBEFORE | FLAG_EXECUTE, 0, &GPUCommon::Execute_Bezier },
|
||||
{ GE_CMD_SPLINE, FLAG_FLUSHBEFORE | FLAG_EXECUTE, 0, &GPUCommon::Execute_Spline },
|
||||
|
||||
// Changes that trigger data copies. Only flushing on change for LOADCLUT must be a bit of a hack...
|
||||
{ GE_CMD_LOADCLUT, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTE, 0, &GPU_D3D11::Execute_LoadClut },
|
||||
};
|
||||
|
||||
GPU_D3D11::CommandInfo GPU_D3D11::cmdInfo_[256]{};
|
||||
|
||||
GPU_D3D11::GPU_D3D11(GraphicsContext *gfxCtx, Draw::DrawContext *draw)
|
||||
: GPUCommon(gfxCtx, draw), drawEngine_(draw,
|
||||
(ID3D11Device *)draw->GetNativeObject(Draw::NativeObject::DEVICE),
|
||||
|
@ -126,45 +101,6 @@ GPU_D3D11::GPU_D3D11(GraphicsContext *gfxCtx, Draw::DrawContext *draw)
|
|||
ERROR_LOG(G3D, "gstate has drifted out of sync!");
|
||||
}
|
||||
|
||||
memset(cmdInfo_, 0, sizeof(cmdInfo_));
|
||||
|
||||
// Import both the global and local command tables, and check for dupes
|
||||
std::set<u8> dupeCheck;
|
||||
for (size_t i = 0; i < commonCommandTableSize; i++) {
|
||||
const u8 cmd = commonCommandTable[i].cmd;
|
||||
if (dupeCheck.find(cmd) != dupeCheck.end()) {
|
||||
ERROR_LOG(G3D, "Command table Dupe: %02x (%i)", (int)cmd, (int)cmd);
|
||||
} else {
|
||||
dupeCheck.insert(cmd);
|
||||
}
|
||||
cmdInfo_[cmd].flags |= (uint64_t)commonCommandTable[i].flags | (commonCommandTable[i].dirty << 8);
|
||||
cmdInfo_[cmd].func = commonCommandTable[i].func;
|
||||
if ((cmdInfo_[cmd].flags & (FLAG_EXECUTE | FLAG_EXECUTEONCHANGE)) && !cmdInfo_[cmd].func) {
|
||||
Crash();
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < ARRAY_SIZE(commandTable); i++) {
|
||||
const u8 cmd = commandTable[i].cmd;
|
||||
if (dupeCheck.find(cmd) != dupeCheck.end()) {
|
||||
ERROR_LOG(G3D, "Command table Dupe: %02x (%i)", (int)cmd, (int)cmd);
|
||||
} else {
|
||||
dupeCheck.insert(cmd);
|
||||
}
|
||||
cmdInfo_[cmd].flags |= (uint64_t)commandTable[i].flags | (commandTable[i].dirty << 8);
|
||||
cmdInfo_[cmd].func = commandTable[i].func;
|
||||
if ((cmdInfo_[cmd].flags & (FLAG_EXECUTE | FLAG_EXECUTEONCHANGE)) && !cmdInfo_[cmd].func) {
|
||||
Crash();
|
||||
}
|
||||
}
|
||||
|
||||
// Find commands missing from the table.
|
||||
for (int i = 0; i < 0xEF; i++) {
|
||||
if (dupeCheck.find((u8)i) == dupeCheck.end()) {
|
||||
ERROR_LOG(G3D, "Command missing from table: %02x (%i)", i, i);
|
||||
}
|
||||
}
|
||||
|
||||
// No need to flush before the tex scale/offset commands if we are baking
|
||||
// the tex scale/offset into the vertices anyway.
|
||||
UpdateCmdInfo();
|
||||
|
@ -187,18 +123,6 @@ GPU_D3D11::~GPU_D3D11() {
|
|||
stockD3D11.Destroy();
|
||||
}
|
||||
|
||||
void GPU_D3D11::UpdateCmdInfo() {
|
||||
if (g_Config.bSoftwareSkinning) {
|
||||
cmdInfo_[GE_CMD_VERTEXTYPE].flags &= ~FLAG_FLUSHBEFOREONCHANGE;
|
||||
cmdInfo_[GE_CMD_VERTEXTYPE].func = &GPUCommon::Execute_VertexTypeSkinning;
|
||||
} else {
|
||||
cmdInfo_[GE_CMD_VERTEXTYPE].flags |= FLAG_FLUSHBEFOREONCHANGE;
|
||||
cmdInfo_[GE_CMD_VERTEXTYPE].func = &GPUCommon::Execute_VertexType;
|
||||
}
|
||||
|
||||
CheckGPUFeatures();
|
||||
}
|
||||
|
||||
void GPU_D3D11::CheckGPUFeatures() {
|
||||
u32 features = 0;
|
||||
|
||||
|
@ -330,26 +254,6 @@ void GPU_D3D11::SetDisplayFramebuffer(u32 framebuf, u32 stride, GEBufferFormat f
|
|||
framebufferManagerD3D11_->SetDisplayFramebuffer(framebuf, stride, format);
|
||||
}
|
||||
|
||||
bool GPU_D3D11::FramebufferDirty() {
|
||||
VirtualFramebuffer *vfb = framebufferManager_->GetDisplayVFB();
|
||||
if (vfb) {
|
||||
bool dirty = vfb->dirtyAfterDisplay;
|
||||
vfb->dirtyAfterDisplay = false;
|
||||
return dirty;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool GPU_D3D11::FramebufferReallyDirty() {
|
||||
VirtualFramebuffer *vfb = framebufferManager_->GetDisplayVFB();
|
||||
if (vfb) {
|
||||
bool dirty = vfb->reallyDirtyAfterDisplay;
|
||||
vfb->reallyDirtyAfterDisplay = false;
|
||||
return dirty;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void GPU_D3D11::CopyDisplayToOutput() {
|
||||
float blendColor[4]{};
|
||||
context_->OMSetBlendState(stockD3D11.blendStateDisabledWithColorMask[0xF], blendColor, 0xFFFFFFFF);
|
||||
|
@ -365,44 +269,6 @@ void GPU_D3D11::CopyDisplayToOutput() {
|
|||
gstate_c.Dirty(DIRTY_TEXTURE_IMAGE);
|
||||
}
|
||||
|
||||
// Maybe should write this in ASM...
|
||||
void GPU_D3D11::FastRunLoop(DisplayList &list) {
|
||||
PROFILE_THIS_SCOPE("gpuloop");
|
||||
const CommandInfo *cmdInfo = cmdInfo_;
|
||||
int dc = downcount;
|
||||
for (; dc > 0; --dc) {
|
||||
// We know that display list PCs have the upper nibble == 0 - no need to mask the pointer
|
||||
const u32 op = *(const u32 *)(Memory::base + list.pc);
|
||||
const u32 cmd = op >> 24;
|
||||
const CommandInfo &info = cmdInfo[cmd];
|
||||
const u32 diff = op ^ gstate.cmdmem[cmd];
|
||||
if (diff == 0) {
|
||||
if (info.flags & FLAG_EXECUTE) {
|
||||
downcount = dc;
|
||||
(this->*info.func)(op, diff);
|
||||
dc = downcount;
|
||||
}
|
||||
} else {
|
||||
uint64_t flags = info.flags;
|
||||
if (flags & FLAG_FLUSHBEFOREONCHANGE) {
|
||||
drawEngine_.Flush();
|
||||
}
|
||||
gstate.cmdmem[cmd] = op;
|
||||
if (flags & (FLAG_EXECUTE | FLAG_EXECUTEONCHANGE)) {
|
||||
downcount = dc;
|
||||
(this->*info.func)(op, diff);
|
||||
dc = downcount;
|
||||
} else {
|
||||
uint64_t dirty = flags >> 8;
|
||||
if (dirty)
|
||||
gstate_c.Dirty(dirty);
|
||||
}
|
||||
}
|
||||
list.pc += 4;
|
||||
}
|
||||
downcount = 0;
|
||||
}
|
||||
|
||||
void GPU_D3D11::FinishDeferred() {
|
||||
// This finishes reading any vertex data that is pending.
|
||||
drawEngine_.FinishDeferred();
|
||||
|
@ -435,85 +301,6 @@ void GPU_D3D11::ExecuteOp(u32 op, u32 diff) {
|
|||
}
|
||||
}
|
||||
|
||||
void GPU_D3D11::Execute_Prim(u32 op, u32 diff) {
|
||||
// This drives all drawing. All other state we just buffer up, then we apply it only
|
||||
// when it's time to draw. As most PSP games set state redundantly ALL THE TIME, this is a huge optimization.
|
||||
|
||||
u32 data = op & 0xFFFFFF;
|
||||
u32 count = data & 0xFFFF;
|
||||
if (count == 0)
|
||||
return;
|
||||
|
||||
// Upper bits are ignored.
|
||||
GEPrimitiveType prim = static_cast<GEPrimitiveType>((data >> 16) & 7);
|
||||
SetDrawType(DRAW_PRIM, prim);
|
||||
|
||||
// Discard AA lines as we can't do anything that makes sense with these anyway. The SW plugin might, though.
|
||||
|
||||
if (gstate.isAntiAliasEnabled()) {
|
||||
// Discard AA lines in DOA
|
||||
if (prim == GE_PRIM_LINE_STRIP)
|
||||
return;
|
||||
// Discard AA lines in Summon Night 5
|
||||
if ((prim == GE_PRIM_LINES) && gstate.isSkinningEnabled())
|
||||
return;
|
||||
}
|
||||
|
||||
// This also make skipping drawing very effective.
|
||||
framebufferManagerD3D11_->SetRenderFrameBuffer(gstate_c.IsDirty(DIRTY_FRAMEBUF), gstate_c.skipDrawReason);
|
||||
if (gstate_c.skipDrawReason & (SKIPDRAW_SKIPFRAME | SKIPDRAW_NON_DISPLAYED_FB)) {
|
||||
drawEngine_.SetupVertexDecoder(gstate.vertType);
|
||||
// Rough estimate, not sure what's correct.
|
||||
cyclesExecuted += EstimatePerVertexCost() * count;
|
||||
return;
|
||||
}
|
||||
|
||||
u32 vertexAddr = gstate_c.vertexAddr;
|
||||
if (!Memory::IsValidAddress(vertexAddr)) {
|
||||
ERROR_LOG_REPORT(G3D, "Bad vertex address %08x!", vertexAddr);
|
||||
return;
|
||||
}
|
||||
|
||||
void *verts = Memory::GetPointerUnchecked(vertexAddr);
|
||||
void *inds = 0;
|
||||
u32 vertexType = gstate.vertType;
|
||||
if ((vertexType & GE_VTYPE_IDX_MASK) != GE_VTYPE_IDX_NONE) {
|
||||
u32 indexAddr = gstate_c.indexAddr;
|
||||
if (!Memory::IsValidAddress(indexAddr)) {
|
||||
ERROR_LOG_REPORT(G3D, "Bad index address %08x!", indexAddr);
|
||||
return;
|
||||
}
|
||||
inds = Memory::GetPointerUnchecked(indexAddr);
|
||||
}
|
||||
|
||||
#ifndef MOBILE_DEVICE
|
||||
if (prim > GE_PRIM_RECTANGLES) {
|
||||
ERROR_LOG_REPORT_ONCE(reportPrim, G3D, "Unexpected prim type: %d", prim);
|
||||
}
|
||||
#endif
|
||||
|
||||
if (gstate_c.dirty & DIRTY_VERTEXSHADER_STATE) {
|
||||
vertexCost_ = EstimatePerVertexCost();
|
||||
}
|
||||
gpuStats.vertexGPUCycles += vertexCost_ * count;
|
||||
cyclesExecuted += vertexCost_* count;
|
||||
|
||||
int bytesRead = 0;
|
||||
UpdateUVScaleOffset();
|
||||
drawEngine_.SubmitPrim(verts, inds, prim, count, vertexType, &bytesRead);
|
||||
|
||||
// After drawing, we advance the vertexAddr (when non indexed) or indexAddr (when indexed).
|
||||
// Some games rely on this, they don't bother reloading VADDR and IADDR.
|
||||
// The VADDR/IADDR registers are NOT updated.
|
||||
AdvanceVerts(vertexType, count, bytesRead);
|
||||
}
|
||||
|
||||
void GPU_D3D11::Execute_LoadClut(u32 op, u32 diff) {
|
||||
gstate_c.Dirty(DIRTY_TEXTURE_PARAMS);
|
||||
textureCacheD3D11_->LoadClut(gstate.getClutAddress(), gstate.getClutLoadBytes());
|
||||
// This could be used to "dirty" textures with clut.
|
||||
}
|
||||
|
||||
void GPU_D3D11::GetStats(char *buffer, size_t bufsize) {
|
||||
float vertexAverageCycles = gpuStats.numVertsSubmitted > 0 ? (float)gpuStats.vertexGPUCycles / (float)gpuStats.numVertsSubmitted : 0.0f;
|
||||
snprintf(buffer, bufsize - 1,
|
||||
|
|
|
@ -36,7 +36,7 @@ public:
|
|||
GPU_D3D11(GraphicsContext *gfxCtx, Draw::DrawContext *draw);
|
||||
~GPU_D3D11();
|
||||
|
||||
void CheckGPUFeatures();
|
||||
void CheckGPUFeatures() override;
|
||||
void PreExecuteOp(u32 op, u32 diff) override;
|
||||
void ExecuteOp(u32 op, u32 diff) override;
|
||||
|
||||
|
@ -50,23 +50,12 @@ public:
|
|||
void DoState(PointerWrap &p) override;
|
||||
|
||||
void ClearShaderCache() override;
|
||||
bool FramebufferDirty() override;
|
||||
bool FramebufferReallyDirty() override;
|
||||
|
||||
void GetReportingInfo(std::string &primaryInfo, std::string &fullInfo) override {
|
||||
primaryInfo = reportingPrimaryInfo_;
|
||||
fullInfo = reportingFullInfo_;
|
||||
}
|
||||
|
||||
typedef void (GPU_D3D11::*CmdFunc)(u32 op, u32 diff);
|
||||
struct CommandInfo {
|
||||
uint64_t flags;
|
||||
GPU_D3D11::CmdFunc func;
|
||||
};
|
||||
|
||||
void Execute_Prim(u32 op, u32 diff);
|
||||
void Execute_LoadClut(u32 op, u32 diff);
|
||||
|
||||
// Using string because it's generic - makes no assumptions on the size of the shader IDs of this backend.
|
||||
std::vector<std::string> DebugGetShaderIDs(DebugShaderType shader) override;
|
||||
std::string DebugGetShaderString(std::string id, DebugShaderType shader, DebugShaderStringType stringType) override;
|
||||
|
@ -75,12 +64,9 @@ public:
|
|||
void EndHostFrame() override;
|
||||
|
||||
protected:
|
||||
void FastRunLoop(DisplayList &list) override;
|
||||
void FinishDeferred() override;
|
||||
|
||||
private:
|
||||
void UpdateCmdInfo();
|
||||
|
||||
void Flush() {
|
||||
drawEngine_.Flush();
|
||||
}
|
||||
|
@ -101,8 +87,6 @@ private:
|
|||
DrawEngineD3D11 drawEngine_;
|
||||
ShaderManagerD3D11 *shaderManagerD3D11_;
|
||||
|
||||
static CommandInfo cmdInfo_[256];
|
||||
|
||||
int lastVsync_;
|
||||
int vertexCost_ = 0;
|
||||
|
||||
|
|
|
@ -232,70 +232,6 @@ IDirect3DVertexDeclaration9 *DrawEngineDX9::SetupDecFmtForDraw(VSShader *vshader
|
|||
}
|
||||
}
|
||||
|
||||
void DrawEngineDX9::SubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertType, int *bytesRead) {
|
||||
if (!indexGen.PrimCompatible(prevPrim_, prim) || numDrawCalls >= MAX_DEFERRED_DRAW_CALLS || vertexCountInDrawCalls_ + vertexCount > VERTEX_BUFFER_MAX)
|
||||
Flush();
|
||||
|
||||
// TODO: Is this the right thing to do?
|
||||
if (prim == GE_PRIM_KEEP_PREVIOUS) {
|
||||
prim = prevPrim_ != GE_PRIM_INVALID ? prevPrim_ : GE_PRIM_POINTS;
|
||||
} else {
|
||||
prevPrim_ = prim;
|
||||
}
|
||||
|
||||
SetupVertexDecoder(vertType);
|
||||
|
||||
*bytesRead = vertexCount * dec_->VertexSize();
|
||||
|
||||
if ((vertexCount < 2 && prim > 0) || (vertexCount < 3 && prim > 2 && prim != GE_PRIM_RECTANGLES))
|
||||
return;
|
||||
|
||||
DeferredDrawCall &dc = drawCalls[numDrawCalls];
|
||||
dc.verts = verts;
|
||||
dc.inds = inds;
|
||||
dc.vertType = vertType;
|
||||
dc.indexType = (vertType & GE_VTYPE_IDX_MASK) >> GE_VTYPE_IDX_SHIFT;
|
||||
dc.prim = prim;
|
||||
dc.vertexCount = vertexCount;
|
||||
|
||||
u32 dhash = dcid_;
|
||||
dhash ^= (u32)(uintptr_t)verts;
|
||||
dhash = __rotl(dhash, 13);
|
||||
dhash ^= (u32)(uintptr_t)inds;
|
||||
dhash = __rotl(dhash, 13);
|
||||
dhash ^= (u32)vertType;
|
||||
dhash = __rotl(dhash, 13);
|
||||
dhash ^= (u32)vertexCount;
|
||||
dhash = __rotl(dhash, 13);
|
||||
dhash ^= (u32)prim;
|
||||
dcid_ = dhash;
|
||||
|
||||
if (inds) {
|
||||
GetIndexBounds(inds, vertexCount, vertType, &dc.indexLowerBound, &dc.indexUpperBound);
|
||||
} else {
|
||||
dc.indexLowerBound = 0;
|
||||
dc.indexUpperBound = vertexCount - 1;
|
||||
}
|
||||
|
||||
uvScale[numDrawCalls] = gstate_c.uv;
|
||||
|
||||
numDrawCalls++;
|
||||
vertexCountInDrawCalls_ += vertexCount;
|
||||
|
||||
if (g_Config.bSoftwareSkinning && (vertType & GE_VTYPE_WEIGHT_MASK)) {
|
||||
DecodeVertsStep(decoded, decodeCounter_, decodedVerts_);
|
||||
decodeCounter_++;
|
||||
}
|
||||
|
||||
if (prim == GE_PRIM_RECTANGLES && (gstate.getTextureAddress(0) & 0x3FFFFFFF) == (gstate.getFrameBufAddress() & 0x3FFFFFFF)) {
|
||||
// Rendertarget == texture?
|
||||
if (!g_Config.bDisableSlowFramebufEffects) {
|
||||
gstate_c.Dirty(DIRTY_TEXTURE_PARAMS);
|
||||
Flush();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void DrawEngineDX9::MarkUnreliable(VertexArrayInfoDX9 *vai) {
|
||||
vai->status = VertexArrayInfoDX9::VAI_UNRELIABLE;
|
||||
if (vai->vbo) {
|
||||
|
|
|
@ -103,8 +103,6 @@ public:
|
|||
DrawEngineDX9(Draw::DrawContext *draw);
|
||||
virtual ~DrawEngineDX9();
|
||||
|
||||
void SubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertType, int *bytesRead);
|
||||
|
||||
void SetShaderManager(ShaderManagerDX9 *shaderManager) {
|
||||
shaderManager_ = shaderManager;
|
||||
}
|
||||
|
@ -134,9 +132,6 @@ public:
|
|||
}
|
||||
|
||||
void DispatchFlush() override { Flush(); }
|
||||
void DispatchSubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertType, int *bytesRead) override {
|
||||
SubmitPrim(verts, inds, prim, vertexCount, vertType, bytesRead);
|
||||
}
|
||||
|
||||
private:
|
||||
void DoFlush();
|
||||
|
|
|
@ -50,31 +50,6 @@
|
|||
|
||||
namespace DX9 {
|
||||
|
||||
struct D3D9CommandTableEntry {
|
||||
uint8_t cmd;
|
||||
uint8_t flags;
|
||||
uint64_t dirty;
|
||||
GPU_DX9::CmdFunc func;
|
||||
};
|
||||
|
||||
// This table gets crunched into a faster form by init.
|
||||
static const D3D9CommandTableEntry commandTable[] = {
|
||||
// Changes that dirty the current texture.
|
||||
{ GE_CMD_TEXSIZE0, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTE, 0, &GPUCommon::Execute_TexSize0 },
|
||||
|
||||
// Changing the vertex type requires us to flush.
|
||||
{ GE_CMD_VERTEXTYPE, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, 0, &GPUCommon::Execute_VertexType },
|
||||
|
||||
{ GE_CMD_PRIM, FLAG_EXECUTE, 0, &GPU_DX9::Execute_Prim },
|
||||
{ GE_CMD_BEZIER, FLAG_FLUSHBEFORE | FLAG_EXECUTE, 0, &GPUCommon::Execute_Bezier },
|
||||
{ GE_CMD_SPLINE, FLAG_FLUSHBEFORE | FLAG_EXECUTE, 0, &GPUCommon::Execute_Spline },
|
||||
|
||||
// Changes that trigger data copies. Only flushing on change for LOADCLUT must be a bit of a hack...
|
||||
{ GE_CMD_LOADCLUT, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTE, 0, &GPU_DX9::Execute_LoadClut },
|
||||
};
|
||||
|
||||
GPU_DX9::CommandInfo GPU_DX9::cmdInfo_[256];
|
||||
|
||||
GPU_DX9::GPU_DX9(GraphicsContext *gfxCtx, Draw::DrawContext *draw)
|
||||
: GPUCommon(gfxCtx, draw),
|
||||
depalShaderCache_(draw),
|
||||
|
@ -108,44 +83,6 @@ GPU_DX9::GPU_DX9(GraphicsContext *gfxCtx, Draw::DrawContext *draw)
|
|||
ERROR_LOG(G3D, "gstate has drifted out of sync!");
|
||||
}
|
||||
|
||||
memset(cmdInfo_, 0, sizeof(cmdInfo_));
|
||||
|
||||
// Import both the global and local command tables, and check for dupes
|
||||
std::set<u8> dupeCheck;
|
||||
for (size_t i = 0; i < commonCommandTableSize; i++) {
|
||||
const u8 cmd = commonCommandTable[i].cmd;
|
||||
if (dupeCheck.find(cmd) != dupeCheck.end()) {
|
||||
ERROR_LOG(G3D, "Command table Dupe: %02x (%i)", (int)cmd, (int)cmd);
|
||||
} else {
|
||||
dupeCheck.insert(cmd);
|
||||
}
|
||||
cmdInfo_[cmd].flags |= (uint64_t)commonCommandTable[i].flags | (commonCommandTable[i].dirty << 8);
|
||||
cmdInfo_[cmd].func = commonCommandTable[i].func;
|
||||
if ((cmdInfo_[cmd].flags & (FLAG_EXECUTE | FLAG_EXECUTEONCHANGE)) && !cmdInfo_[cmd].func) {
|
||||
Crash();
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < ARRAY_SIZE(commandTable); i++) {
|
||||
const u8 cmd = commandTable[i].cmd;
|
||||
if (dupeCheck.find(cmd) != dupeCheck.end()) {
|
||||
ERROR_LOG(G3D, "Command table Dupe: %02x (%i)", (int)cmd, (int)cmd);
|
||||
} else {
|
||||
dupeCheck.insert(cmd);
|
||||
}
|
||||
cmdInfo_[cmd].flags |= (uint64_t)commandTable[i].flags | (commandTable[i].dirty << 8);
|
||||
cmdInfo_[cmd].func = commandTable[i].func;
|
||||
if ((cmdInfo_[cmd].flags & (FLAG_EXECUTE | FLAG_EXECUTEONCHANGE)) && !cmdInfo_[cmd].func) {
|
||||
Crash();
|
||||
}
|
||||
}
|
||||
// Find commands missing from the table.
|
||||
for (int i = 0; i < 0xEF; i++) {
|
||||
if (dupeCheck.find((u8)i) == dupeCheck.end()) {
|
||||
ERROR_LOG(G3D, "Command missing from table: %02x (%i)", i, i);
|
||||
}
|
||||
}
|
||||
|
||||
// No need to flush before the tex scale/offset commands if we are baking
|
||||
// the tex scale/offset into the vertices anyway.
|
||||
UpdateCmdInfo();
|
||||
|
@ -166,18 +103,6 @@ GPU_DX9::GPU_DX9(GraphicsContext *gfxCtx, Draw::DrawContext *draw)
|
|||
}
|
||||
}
|
||||
|
||||
void GPU_DX9::UpdateCmdInfo() {
|
||||
if (g_Config.bSoftwareSkinning) {
|
||||
cmdInfo_[GE_CMD_VERTEXTYPE].flags &= ~FLAG_FLUSHBEFOREONCHANGE;
|
||||
cmdInfo_[GE_CMD_VERTEXTYPE].func = &GPUCommon::Execute_VertexTypeSkinning;
|
||||
} else {
|
||||
cmdInfo_[GE_CMD_VERTEXTYPE].flags |= FLAG_FLUSHBEFOREONCHANGE;
|
||||
cmdInfo_[GE_CMD_VERTEXTYPE].func = &GPUCommon::Execute_VertexType;
|
||||
}
|
||||
|
||||
CheckGPUFeatures();
|
||||
}
|
||||
|
||||
void GPU_DX9::CheckGPUFeatures() {
|
||||
u32 features = 0;
|
||||
|
||||
|
@ -305,26 +230,6 @@ void GPU_DX9::SetDisplayFramebuffer(u32 framebuf, u32 stride, GEBufferFormat for
|
|||
framebufferManagerDX9_->SetDisplayFramebuffer(framebuf, stride, format);
|
||||
}
|
||||
|
||||
bool GPU_DX9::FramebufferDirty() {
|
||||
VirtualFramebuffer *vfb = framebufferManager_->GetDisplayVFB();
|
||||
if (vfb) {
|
||||
bool dirty = vfb->dirtyAfterDisplay;
|
||||
vfb->dirtyAfterDisplay = false;
|
||||
return dirty;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool GPU_DX9::FramebufferReallyDirty() {
|
||||
VirtualFramebuffer *vfb = framebufferManager_->GetDisplayVFB();
|
||||
if (vfb) {
|
||||
bool dirty = vfb->reallyDirtyAfterDisplay;
|
||||
vfb->reallyDirtyAfterDisplay = false;
|
||||
return dirty;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void GPU_DX9::CopyDisplayToOutput() {
|
||||
dxstate.depthWrite.set(true);
|
||||
dxstate.colorMask.set(true, true, true, true);
|
||||
|
@ -340,44 +245,6 @@ void GPU_DX9::CopyDisplayToOutput() {
|
|||
gstate_c.Dirty(DIRTY_TEXTURE_IMAGE);
|
||||
}
|
||||
|
||||
// Maybe should write this in ASM...
|
||||
void GPU_DX9::FastRunLoop(DisplayList &list) {
|
||||
PROFILE_THIS_SCOPE("gpuloop");
|
||||
const CommandInfo *cmdInfo = cmdInfo_;
|
||||
int dc = downcount;
|
||||
for (; dc > 0; --dc) {
|
||||
// We know that display list PCs have the upper nibble == 0 - no need to mask the pointer
|
||||
const u32 op = *(const u32 *)(Memory::base + list.pc);
|
||||
const u32 cmd = op >> 24;
|
||||
const CommandInfo &info = cmdInfo[cmd];
|
||||
const u32 diff = op ^ gstate.cmdmem[cmd];
|
||||
if (diff == 0) {
|
||||
if (info.flags & FLAG_EXECUTE) {
|
||||
downcount = dc;
|
||||
(this->*info.func)(op, diff);
|
||||
dc = downcount;
|
||||
}
|
||||
} else {
|
||||
uint64_t flags = info.flags;
|
||||
if (flags & FLAG_FLUSHBEFOREONCHANGE) {
|
||||
drawEngine_.Flush();
|
||||
}
|
||||
gstate.cmdmem[cmd] = op;
|
||||
if (flags & (FLAG_EXECUTE | FLAG_EXECUTEONCHANGE)) {
|
||||
downcount = dc;
|
||||
(this->*info.func)(op, diff);
|
||||
dc = downcount;
|
||||
} else {
|
||||
uint64_t dirty = flags >> 8;
|
||||
if (dirty)
|
||||
gstate_c.Dirty(dirty);
|
||||
}
|
||||
}
|
||||
list.pc += 4;
|
||||
}
|
||||
downcount = 0;
|
||||
}
|
||||
|
||||
void GPU_DX9::FinishDeferred() {
|
||||
// This finishes reading any vertex data that is pending.
|
||||
drawEngine_.FinishDeferred();
|
||||
|
@ -410,84 +277,6 @@ void GPU_DX9::ExecuteOp(u32 op, u32 diff) {
|
|||
}
|
||||
}
|
||||
|
||||
void GPU_DX9::Execute_Prim(u32 op, u32 diff) {
|
||||
// This drives all drawing. All other state we just buffer up, then we apply it only
|
||||
// when it's time to draw. As most PSP games set state redundantly ALL THE TIME, this is a huge optimization.
|
||||
|
||||
u32 data = op & 0xFFFFFF;
|
||||
u32 count = data & 0xFFFF;
|
||||
if (count == 0)
|
||||
return;
|
||||
// Upper bits are ignored.
|
||||
GEPrimitiveType prim = static_cast<GEPrimitiveType>((data >> 16) & 7);
|
||||
SetDrawType(DRAW_PRIM, prim);
|
||||
|
||||
// Discard AA lines as we can't do anything that makes sense with these anyway. The SW plugin might, though.
|
||||
|
||||
if (gstate.isAntiAliasEnabled()) {
|
||||
// Discard AA lines in DOA
|
||||
if (prim == GE_PRIM_LINE_STRIP)
|
||||
return;
|
||||
// Discard AA lines in Summon Night 5
|
||||
if ((prim == GE_PRIM_LINES) && gstate.isSkinningEnabled())
|
||||
return;
|
||||
}
|
||||
|
||||
// This also make skipping drawing very effective.
|
||||
framebufferManagerDX9_->SetRenderFrameBuffer(gstate_c.IsDirty(DIRTY_FRAMEBUF), gstate_c.skipDrawReason);
|
||||
if (gstate_c.skipDrawReason & (SKIPDRAW_SKIPFRAME | SKIPDRAW_NON_DISPLAYED_FB)) {
|
||||
drawEngine_.SetupVertexDecoder(gstate.vertType);
|
||||
// Rough estimate, not sure what's correct.
|
||||
cyclesExecuted += EstimatePerVertexCost() * count;
|
||||
return;
|
||||
}
|
||||
|
||||
u32 vertexAddr = gstate_c.vertexAddr;
|
||||
if (!Memory::IsValidAddress(vertexAddr)) {
|
||||
ERROR_LOG_REPORT(G3D, "Bad vertex address %08x!", vertexAddr);
|
||||
return;
|
||||
}
|
||||
|
||||
void *verts = Memory::GetPointerUnchecked(vertexAddr);
|
||||
void *inds = 0;
|
||||
u32 vertexType = gstate.vertType;
|
||||
if ((vertexType & GE_VTYPE_IDX_MASK) != GE_VTYPE_IDX_NONE) {
|
||||
u32 indexAddr = gstate_c.indexAddr;
|
||||
if (!Memory::IsValidAddress(indexAddr)) {
|
||||
ERROR_LOG_REPORT(G3D, "Bad index address %08x!", indexAddr);
|
||||
return;
|
||||
}
|
||||
inds = Memory::GetPointerUnchecked(indexAddr);
|
||||
}
|
||||
|
||||
#ifndef MOBILE_DEVICE
|
||||
if (prim > GE_PRIM_RECTANGLES) {
|
||||
ERROR_LOG_REPORT_ONCE(reportPrim, G3D, "Unexpected prim type: %d", prim);
|
||||
}
|
||||
#endif
|
||||
|
||||
if (gstate_c.dirty & DIRTY_VERTEXSHADER_STATE) {
|
||||
vertexCost_ = EstimatePerVertexCost();
|
||||
}
|
||||
gpuStats.vertexGPUCycles += vertexCost_ * count;
|
||||
cyclesExecuted += vertexCost_* count;
|
||||
|
||||
int bytesRead = 0;
|
||||
UpdateUVScaleOffset();
|
||||
drawEngine_.SubmitPrim(verts, inds, prim, count, vertexType, &bytesRead);
|
||||
|
||||
// After drawing, we advance the vertexAddr (when non indexed) or indexAddr (when indexed).
|
||||
// Some games rely on this, they don't bother reloading VADDR and IADDR.
|
||||
// The VADDR/IADDR registers are NOT updated.
|
||||
AdvanceVerts(vertexType, count, bytesRead);
|
||||
}
|
||||
|
||||
void GPU_DX9::Execute_LoadClut(u32 op, u32 diff) {
|
||||
gstate_c.Dirty(DIRTY_TEXTURE_PARAMS);
|
||||
textureCacheDX9_->LoadClut(gstate.getClutAddress(), gstate.getClutLoadBytes());
|
||||
// This could be used to "dirty" textures with clut.
|
||||
}
|
||||
|
||||
void GPU_DX9::GetStats(char *buffer, size_t bufsize) {
|
||||
float vertexAverageCycles = gpuStats.numVertsSubmitted > 0 ? (float)gpuStats.vertexGPUCycles / (float)gpuStats.numVertsSubmitted : 0.0f;
|
||||
snprintf(buffer, bufsize - 1,
|
||||
|
|
|
@ -37,7 +37,7 @@ public:
|
|||
GPU_DX9(GraphicsContext *gfxCtx, Draw::DrawContext *draw);
|
||||
~GPU_DX9();
|
||||
|
||||
void CheckGPUFeatures();
|
||||
void CheckGPUFeatures() override;
|
||||
void PreExecuteOp(u32 op, u32 diff) override;
|
||||
void ExecuteOp(u32 op, u32 diff) override;
|
||||
|
||||
|
@ -51,24 +51,12 @@ public:
|
|||
void DoState(PointerWrap &p) override;
|
||||
|
||||
void ClearShaderCache() override;
|
||||
bool FramebufferDirty() override;
|
||||
bool FramebufferReallyDirty() override;
|
||||
|
||||
void GetReportingInfo(std::string &primaryInfo, std::string &fullInfo) override {
|
||||
primaryInfo = reportingPrimaryInfo_;
|
||||
fullInfo = reportingFullInfo_;
|
||||
}
|
||||
|
||||
typedef void (GPU_DX9::*CmdFunc)(u32 op, u32 diff);
|
||||
struct CommandInfo {
|
||||
uint64_t flags;
|
||||
GPU_DX9::CmdFunc func;
|
||||
};
|
||||
|
||||
void Execute_Prim(u32 op, u32 diff);
|
||||
void Execute_TexSize0(u32 op, u32 diff);
|
||||
void Execute_LoadClut(u32 op, u32 diff);
|
||||
|
||||
// Using string because it's generic - makes no assumptions on the size of the shader IDs of this backend.
|
||||
std::vector<std::string> DebugGetShaderIDs(DebugShaderType shader) override;
|
||||
std::string DebugGetShaderString(std::string id, DebugShaderType shader, DebugShaderStringType stringType) override;
|
||||
|
@ -76,12 +64,9 @@ public:
|
|||
void BeginHostFrame() override;
|
||||
|
||||
protected:
|
||||
void FastRunLoop(DisplayList &list) override;
|
||||
void FinishDeferred() override;
|
||||
|
||||
private:
|
||||
void UpdateCmdInfo();
|
||||
|
||||
void Flush() {
|
||||
drawEngine_.Flush();
|
||||
}
|
||||
|
@ -102,8 +87,6 @@ private:
|
|||
DrawEngineDX9 drawEngine_;
|
||||
ShaderManagerDX9 *shaderManagerDX9_;
|
||||
|
||||
static CommandInfo cmdInfo_[256];
|
||||
|
||||
int lastVsync_;
|
||||
int vertexCost_ = 0;
|
||||
|
||||
|
|
|
@ -276,70 +276,6 @@ GLRInputLayout *DrawEngineGLES::SetupDecFmtForDraw(LinkedShader *program, const
|
|||
return inputLayout;
|
||||
}
|
||||
|
||||
void DrawEngineGLES::SubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertType, int *bytesRead) {
|
||||
if (!indexGen.PrimCompatible(prevPrim_, prim) || numDrawCalls >= MAX_DEFERRED_DRAW_CALLS || vertexCountInDrawCalls_ + vertexCount > VERTEX_BUFFER_MAX)
|
||||
Flush();
|
||||
|
||||
// TODO: Is this the right thing to do?
|
||||
if (prim == GE_PRIM_KEEP_PREVIOUS) {
|
||||
prim = prevPrim_ != GE_PRIM_INVALID ? prevPrim_ : GE_PRIM_POINTS;
|
||||
} else {
|
||||
prevPrim_ = prim;
|
||||
}
|
||||
|
||||
SetupVertexDecoder(vertType);
|
||||
|
||||
*bytesRead = vertexCount * dec_->VertexSize();
|
||||
|
||||
if ((vertexCount < 2 && prim > 0) || (vertexCount < 3 && prim > 2 && prim != GE_PRIM_RECTANGLES))
|
||||
return;
|
||||
|
||||
DeferredDrawCall &dc = drawCalls[numDrawCalls];
|
||||
dc.verts = verts;
|
||||
dc.inds = inds;
|
||||
dc.vertType = vertType;
|
||||
dc.indexType = (vertType & GE_VTYPE_IDX_MASK) >> GE_VTYPE_IDX_SHIFT;
|
||||
dc.prim = prim;
|
||||
dc.vertexCount = vertexCount;
|
||||
|
||||
u32 dhash = dcid_;
|
||||
dhash ^= (u32)(uintptr_t)verts;
|
||||
dhash = __rotl(dhash, 13);
|
||||
dhash ^= (u32)(uintptr_t)inds;
|
||||
dhash = __rotl(dhash, 13);
|
||||
dhash ^= (u32)vertType;
|
||||
dhash = __rotl(dhash, 13);
|
||||
dhash ^= (u32)vertexCount;
|
||||
dhash = __rotl(dhash, 13);
|
||||
dhash ^= (u32)prim;
|
||||
dcid_ = dhash;
|
||||
|
||||
if (inds) {
|
||||
GetIndexBounds(inds, vertexCount, vertType, &dc.indexLowerBound, &dc.indexUpperBound);
|
||||
} else {
|
||||
dc.indexLowerBound = 0;
|
||||
dc.indexUpperBound = vertexCount - 1;
|
||||
}
|
||||
|
||||
uvScale[numDrawCalls] = gstate_c.uv;
|
||||
|
||||
numDrawCalls++;
|
||||
vertexCountInDrawCalls_ += vertexCount;
|
||||
|
||||
if (g_Config.bSoftwareSkinning && (vertType & GE_VTYPE_WEIGHT_MASK)) {
|
||||
DecodeVertsStep(decoded, decodeCounter_, decodedVerts_);
|
||||
decodeCounter_++;
|
||||
}
|
||||
|
||||
if (prim == GE_PRIM_RECTANGLES && (gstate.getTextureAddress(0) & 0x3FFFFFFF) == (gstate.getFrameBufAddress() & 0x3FFFFFFF)) {
|
||||
// Rendertarget == texture?
|
||||
if (!g_Config.bDisableSlowFramebufEffects) {
|
||||
gstate_c.Dirty(DIRTY_TEXTURE_PARAMS);
|
||||
Flush();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void DrawEngineGLES::DecodeVertsToPushBuffer(GLPushBuffer *push, uint32_t *bindOffset, GLRBuffer **buf) {
|
||||
u8 *dest = decoded;
|
||||
|
||||
|
|
|
@ -105,8 +105,6 @@ public:
|
|||
DrawEngineGLES(Draw::DrawContext *draw);
|
||||
virtual ~DrawEngineGLES();
|
||||
|
||||
void SubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertType, int *bytesRead);
|
||||
|
||||
void SetShaderManager(ShaderManagerGLES *shaderManager) {
|
||||
shaderManager_ = shaderManager;
|
||||
}
|
||||
|
@ -146,9 +144,6 @@ public:
|
|||
bool IsCodePtrVertexDecoder(const u8 *ptr) const;
|
||||
|
||||
void DispatchFlush() override { Flush(); }
|
||||
void DispatchSubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertType, int *bytesRead) override {
|
||||
SubmitPrim(verts, inds, prim, vertexCount, vertType, bytesRead);
|
||||
}
|
||||
|
||||
GLPushBuffer *GetPushVertexBuffer() {
|
||||
return frameData_[render_->GetCurFrame()].pushVertex;
|
||||
|
|
|
@ -51,32 +51,6 @@
|
|||
#include "Windows/GPU/WindowsGLContext.h"
|
||||
#endif
|
||||
|
||||
struct GLESCommandTableEntry {
|
||||
uint8_t cmd;
|
||||
uint8_t flags;
|
||||
uint64_t dirty;
|
||||
GPU_GLES::CmdFunc func;
|
||||
};
|
||||
|
||||
// This table gets crunched into a faster form by init.
|
||||
// TODO: Share this table between the backends. Will have to make another indirection for the function pointers though..
|
||||
static const GLESCommandTableEntry commandTable[] = {
|
||||
// Changes that dirty the current texture.
|
||||
{ GE_CMD_TEXSIZE0, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTE, 0, &GPUCommon::Execute_TexSize0 },
|
||||
|
||||
// Changing the vertex type requires us to flush.
|
||||
{ GE_CMD_VERTEXTYPE, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, 0, &GPUCommon::Execute_VertexType },
|
||||
|
||||
{ GE_CMD_PRIM, FLAG_EXECUTE, 0, &GPU_GLES::Execute_Prim },
|
||||
{ GE_CMD_BEZIER, FLAG_FLUSHBEFORE | FLAG_EXECUTE, 0, &GPUCommon::Execute_Bezier },
|
||||
{ GE_CMD_SPLINE, FLAG_FLUSHBEFORE | FLAG_EXECUTE, 0, &GPUCommon::Execute_Spline },
|
||||
|
||||
// Changes that trigger data copies. Only flushing on change for LOADCLUT must be a bit of a hack...
|
||||
{ GE_CMD_LOADCLUT, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTE, 0, &GPU_GLES::Execute_LoadClut },
|
||||
};
|
||||
|
||||
GPU_GLES::CommandInfo GPU_GLES::cmdInfo_[256];
|
||||
|
||||
GPU_GLES::GPU_GLES(GraphicsContext *gfxCtx, Draw::DrawContext *draw)
|
||||
: GPUCommon(gfxCtx, draw), drawEngine_(draw), fragmentTestCache_(draw), depalShaderCache_(draw) {
|
||||
UpdateVsyncInterval(true);
|
||||
|
@ -112,44 +86,6 @@ GPU_GLES::GPU_GLES(GraphicsContext *gfxCtx, Draw::DrawContext *draw)
|
|||
ERROR_LOG(G3D, "gstate has drifted out of sync!");
|
||||
}
|
||||
|
||||
memset(cmdInfo_, 0, sizeof(cmdInfo_));
|
||||
|
||||
// Import both the global and local command tables, and check for dupes
|
||||
std::set<u8> dupeCheck;
|
||||
for (size_t i = 0; i < commonCommandTableSize; i++) {
|
||||
const u8 cmd = commonCommandTable[i].cmd;
|
||||
if (dupeCheck.find(cmd) != dupeCheck.end()) {
|
||||
ERROR_LOG(G3D, "Command table Dupe: %02x (%i)", (int)cmd, (int)cmd);
|
||||
} else {
|
||||
dupeCheck.insert(cmd);
|
||||
}
|
||||
cmdInfo_[cmd].flags |= (uint64_t)commonCommandTable[i].flags | (commonCommandTable[i].dirty << 8);
|
||||
cmdInfo_[cmd].func = commonCommandTable[i].func;
|
||||
if ((cmdInfo_[cmd].flags & (FLAG_EXECUTE | FLAG_EXECUTEONCHANGE)) && !cmdInfo_[cmd].func) {
|
||||
Crash();
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < ARRAY_SIZE(commandTable); i++) {
|
||||
const u8 cmd = commandTable[i].cmd;
|
||||
if (dupeCheck.find(cmd) != dupeCheck.end()) {
|
||||
ERROR_LOG(G3D, "Command table Dupe: %02x (%i)", (int)cmd, (int)cmd);
|
||||
} else {
|
||||
dupeCheck.insert(cmd);
|
||||
}
|
||||
cmdInfo_[cmd].flags |= (uint64_t)commandTable[i].flags | (commandTable[i].dirty << 8);
|
||||
cmdInfo_[cmd].func = commandTable[i].func;
|
||||
if ((cmdInfo_[cmd].flags & (FLAG_EXECUTE | FLAG_EXECUTEONCHANGE)) && !cmdInfo_[cmd].func) {
|
||||
Crash();
|
||||
}
|
||||
}
|
||||
// Find commands missing from the table.
|
||||
for (int i = 0; i < 0xEF; i++) {
|
||||
if (dupeCheck.find((u8)i) == dupeCheck.end()) {
|
||||
ERROR_LOG(G3D, "Command missing from table: %02x (%i)", i, i);
|
||||
}
|
||||
}
|
||||
|
||||
// No need to flush before the tex scale/offset commands if we are baking
|
||||
// the tex scale/offset into the vertices anyway.
|
||||
|
||||
|
@ -467,16 +403,6 @@ inline void GPU_GLES::UpdateVsyncInterval(bool force) {
|
|||
#endif
|
||||
}
|
||||
|
||||
void GPU_GLES::UpdateCmdInfo() {
|
||||
if (g_Config.bSoftwareSkinning) {
|
||||
cmdInfo_[GE_CMD_VERTEXTYPE].flags &= ~FLAG_FLUSHBEFOREONCHANGE;
|
||||
cmdInfo_[GE_CMD_VERTEXTYPE].func = &GPUCommon::Execute_VertexTypeSkinning;
|
||||
} else {
|
||||
cmdInfo_[GE_CMD_VERTEXTYPE].flags |= FLAG_FLUSHBEFOREONCHANGE;
|
||||
cmdInfo_[GE_CMD_VERTEXTYPE].func = &GPUCommon::Execute_VertexType;
|
||||
}
|
||||
}
|
||||
|
||||
void GPU_GLES::ReapplyGfxState() {
|
||||
GPUCommon::ReapplyGfxState();
|
||||
}
|
||||
|
@ -510,26 +436,6 @@ void GPU_GLES::SetDisplayFramebuffer(u32 framebuf, u32 stride, GEBufferFormat fo
|
|||
framebufferManagerGL_->SetDisplayFramebuffer(framebuf, stride, format);
|
||||
}
|
||||
|
||||
bool GPU_GLES::FramebufferDirty() {
|
||||
VirtualFramebuffer *vfb = framebufferManagerGL_->GetDisplayVFB();
|
||||
if (vfb) {
|
||||
bool dirty = vfb->dirtyAfterDisplay;
|
||||
vfb->dirtyAfterDisplay = false;
|
||||
return dirty;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool GPU_GLES::FramebufferReallyDirty() {
|
||||
VirtualFramebuffer *vfb = framebufferManagerGL_->GetDisplayVFB();
|
||||
if (vfb) {
|
||||
bool dirty = vfb->reallyDirtyAfterDisplay;
|
||||
vfb->reallyDirtyAfterDisplay = false;
|
||||
return dirty;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void GPU_GLES::CopyDisplayToOutput() {
|
||||
// Flush anything left over.
|
||||
framebufferManagerGL_->RebindFramebuffer();
|
||||
|
@ -551,44 +457,6 @@ void GPU_GLES::CopyDisplayToOutput() {
|
|||
#endif
|
||||
}
|
||||
|
||||
// Maybe should write this in ASM...
|
||||
void GPU_GLES::FastRunLoop(DisplayList &list) {
|
||||
PROFILE_THIS_SCOPE("gpuloop");
|
||||
const CommandInfo *cmdInfo = cmdInfo_;
|
||||
int dc = downcount;
|
||||
for (; dc > 0; --dc) {
|
||||
// We know that display list PCs have the upper nibble == 0 - no need to mask the pointer
|
||||
const u32 op = *(const u32 *)(Memory::base + list.pc);
|
||||
const u32 cmd = op >> 24;
|
||||
const CommandInfo &info = cmdInfo[cmd];
|
||||
const u32 diff = op ^ gstate.cmdmem[cmd];
|
||||
if (diff == 0) {
|
||||
if (info.flags & FLAG_EXECUTE) {
|
||||
downcount = dc;
|
||||
(this->*info.func)(op, diff);
|
||||
dc = downcount;
|
||||
}
|
||||
} else {
|
||||
uint64_t flags = info.flags;
|
||||
if (flags & FLAG_FLUSHBEFOREONCHANGE) {
|
||||
drawEngine_.Flush();
|
||||
}
|
||||
gstate.cmdmem[cmd] = op;
|
||||
if (flags & (FLAG_EXECUTE | FLAG_EXECUTEONCHANGE)) {
|
||||
downcount = dc;
|
||||
(this->*info.func)(op, diff);
|
||||
dc = downcount;
|
||||
} else {
|
||||
uint64_t dirty = flags >> 8;
|
||||
if (dirty)
|
||||
gstate_c.Dirty(dirty);
|
||||
}
|
||||
}
|
||||
list.pc += 4;
|
||||
}
|
||||
downcount = 0;
|
||||
}
|
||||
|
||||
void GPU_GLES::FinishDeferred() {
|
||||
// This finishes reading any vertex data that is pending.
|
||||
drawEngine_.FinishDeferred();
|
||||
|
@ -621,81 +489,6 @@ void GPU_GLES::ExecuteOp(u32 op, u32 diff) {
|
|||
}
|
||||
}
|
||||
|
||||
void GPU_GLES::Execute_Prim(u32 op, u32 diff) {
|
||||
// This drives all drawing. All other state we just buffer up, then we apply it only
|
||||
// when it's time to draw. As most PSP games set state redundantly ALL THE TIME, this is a huge optimization.
|
||||
|
||||
u32 data = op & 0xFFFFFF;
|
||||
u32 count = data & 0xFFFF;
|
||||
if (count == 0)
|
||||
return;
|
||||
|
||||
// Upper bits are ignored.
|
||||
GEPrimitiveType prim = static_cast<GEPrimitiveType>((data >> 16) & 7);
|
||||
SetDrawType(DRAW_PRIM, prim);
|
||||
|
||||
// Discard AA lines as we can't do anything that makes sense with these anyway. The SW plugin might, though.
|
||||
|
||||
if (gstate.isAntiAliasEnabled()) {
|
||||
// Discard AA lines in DOA
|
||||
if (prim == GE_PRIM_LINE_STRIP)
|
||||
return;
|
||||
// Discard AA lines in Summon Night 5
|
||||
if ((prim == GE_PRIM_LINES) && gstate.isSkinningEnabled())
|
||||
return;
|
||||
}
|
||||
|
||||
// This also makes skipping drawing very effective. This function can change the framebuffer.
|
||||
framebufferManagerGL_->SetRenderFrameBuffer(gstate_c.IsDirty(DIRTY_FRAMEBUF), gstate_c.skipDrawReason);
|
||||
if (gstate_c.skipDrawReason & (SKIPDRAW_SKIPFRAME | SKIPDRAW_NON_DISPLAYED_FB)) {
|
||||
drawEngine_.SetupVertexDecoder(gstate.vertType);
|
||||
// Rough estimate, not sure what's correct.
|
||||
cyclesExecuted += EstimatePerVertexCost() * count;
|
||||
return;
|
||||
}
|
||||
|
||||
if (!Memory::IsValidAddress(gstate_c.vertexAddr)) {
|
||||
ERROR_LOG_REPORT(G3D, "Bad vertex address %08x!", gstate_c.vertexAddr);
|
||||
return;
|
||||
}
|
||||
|
||||
void *verts = Memory::GetPointerUnchecked(gstate_c.vertexAddr);
|
||||
void *inds = 0;
|
||||
if ((gstate.vertType & GE_VTYPE_IDX_MASK) != GE_VTYPE_IDX_NONE) {
|
||||
if (!Memory::IsValidAddress(gstate_c.indexAddr)) {
|
||||
ERROR_LOG_REPORT(G3D, "Bad index address %08x!", gstate_c.indexAddr);
|
||||
return;
|
||||
}
|
||||
inds = Memory::GetPointerUnchecked(gstate_c.indexAddr);
|
||||
}
|
||||
|
||||
#ifndef MOBILE_DEVICE
|
||||
if (prim > GE_PRIM_RECTANGLES) {
|
||||
ERROR_LOG_REPORT_ONCE(reportPrim, G3D, "Unexpected prim type: %d", prim);
|
||||
}
|
||||
#endif
|
||||
|
||||
if (gstate_c.dirty & DIRTY_VERTEXSHADER_STATE) {
|
||||
vertexCost_ = EstimatePerVertexCost();
|
||||
}
|
||||
gpuStats.vertexGPUCycles += vertexCost_ * count;
|
||||
cyclesExecuted += vertexCost_* count;
|
||||
|
||||
int bytesRead = 0;
|
||||
UpdateUVScaleOffset();
|
||||
drawEngine_.SubmitPrim(verts, inds, prim, count, gstate.vertType, &bytesRead);
|
||||
|
||||
// After drawing, we advance the vertexAddr (when non indexed) or indexAddr (when indexed).
|
||||
// Some games rely on this, they don't bother reloading VADDR and IADDR.
|
||||
// The VADDR/IADDR registers are NOT updated.
|
||||
AdvanceVerts(gstate.vertType, count, bytesRead);
|
||||
}
|
||||
|
||||
void GPU_GLES::Execute_LoadClut(u32 op, u32 diff) {
|
||||
gstate_c.Dirty(DIRTY_TEXTURE_PARAMS);
|
||||
textureCacheGL_->LoadClut(gstate.getClutAddress(), gstate.getClutLoadBytes());
|
||||
}
|
||||
|
||||
void GPU_GLES::GetStats(char *buffer, size_t bufsize) {
|
||||
float vertexAverageCycles = gpuStats.numVertsSubmitted > 0 ? (float)gpuStats.vertexGPUCycles / (float)gpuStats.numVertsSubmitted : 0.0f;
|
||||
snprintf(buffer, bufsize - 1,
|
||||
|
|
|
@ -36,7 +36,7 @@ public:
|
|||
~GPU_GLES();
|
||||
|
||||
// This gets called on startup and when we get back from settings.
|
||||
void CheckGPUFeatures();
|
||||
void CheckGPUFeatures() override;
|
||||
|
||||
bool IsReady() override;
|
||||
|
||||
|
@ -55,23 +55,12 @@ public:
|
|||
|
||||
void ClearShaderCache() override;
|
||||
void CleanupBeforeUI() override;
|
||||
bool FramebufferDirty() override;
|
||||
bool FramebufferReallyDirty() override;
|
||||
|
||||
void GetReportingInfo(std::string &primaryInfo, std::string &fullInfo) override {
|
||||
primaryInfo = reportingPrimaryInfo_;
|
||||
fullInfo = reportingFullInfo_;
|
||||
}
|
||||
|
||||
typedef void (GPU_GLES::*CmdFunc)(u32 op, u32 diff);
|
||||
struct CommandInfo {
|
||||
uint64_t flags;
|
||||
GPU_GLES::CmdFunc func;
|
||||
};
|
||||
|
||||
void Execute_Prim(u32 op, u32 diff);
|
||||
void Execute_LoadClut(u32 op, u32 diff);
|
||||
|
||||
// Using string because it's generic - makes no assumptions on the size of the shader IDs of this backend.
|
||||
std::vector<std::string> DebugGetShaderIDs(DebugShaderType shader) override;
|
||||
std::string DebugGetShaderString(std::string id, DebugShaderType shader, DebugShaderStringType stringType) override;
|
||||
|
@ -80,7 +69,6 @@ public:
|
|||
void EndHostFrame() override;
|
||||
|
||||
protected:
|
||||
void FastRunLoop(DisplayList &list) override;
|
||||
void FinishDeferred() override;
|
||||
|
||||
private:
|
||||
|
@ -96,9 +84,6 @@ private:
|
|||
void Reinitialize() override;
|
||||
|
||||
inline void UpdateVsyncInterval(bool force);
|
||||
void UpdateCmdInfo();
|
||||
|
||||
static CommandInfo cmdInfo_[256];
|
||||
|
||||
FramebufferManagerGLES *framebufferManagerGL_;
|
||||
TextureCacheGLES *textureCacheGL_;
|
||||
|
|
|
@ -110,6 +110,6 @@ bool GPU_Init(GraphicsContext *ctx, Draw::DrawContext *draw) {
|
|||
|
||||
void GPU_Shutdown() {
|
||||
delete gpu;
|
||||
gpu = 0;
|
||||
gpu = nullptr;
|
||||
gpuDebug = 0;
|
||||
}
|
||||
|
|
|
@ -42,6 +42,15 @@ const CommonCommandTableEntry commonCommandTable[] = {
|
|||
{ GE_CMD_BJUMP, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, 0, &GPUCommon::Execute_BJump }, // EXECUTE
|
||||
{ GE_CMD_BOUNDINGBOX, FLAG_EXECUTE, 0, &GPUCommon::Execute_BoundingBox }, // + FLUSHBEFORE when we implement... or not, do we need to?
|
||||
|
||||
{ GE_CMD_PRIM, FLAG_EXECUTE, 0, &GPUCommon::Execute_Prim },
|
||||
{ GE_CMD_BEZIER, FLAG_FLUSHBEFORE | FLAG_EXECUTE, 0, &GPUCommon::Execute_Bezier },
|
||||
{ GE_CMD_SPLINE, FLAG_FLUSHBEFORE | FLAG_EXECUTE, 0, &GPUCommon::Execute_Spline },
|
||||
|
||||
// Changing the vertex type requires us to flush.
|
||||
{ GE_CMD_VERTEXTYPE, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, 0, &GPUCommon::Execute_VertexType },
|
||||
|
||||
{ GE_CMD_LOADCLUT, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTE, 0, &GPUCommon::Execute_LoadClut },
|
||||
|
||||
// These two are actually processed in CMD_END. Not sure if FLAG_FLUSHBEFORE matters.
|
||||
{ GE_CMD_SIGNAL, FLAG_FLUSHBEFORE },
|
||||
{ GE_CMD_FINISH, FLAG_FLUSHBEFORE },
|
||||
|
@ -121,7 +130,7 @@ const CommonCommandTableEntry commonCommandTable[] = {
|
|||
{ GE_CMD_TEXOFFSETU },
|
||||
{ GE_CMD_TEXOFFSETV },
|
||||
|
||||
// TEXSIZE0 is handled by each backend.
|
||||
{ GE_CMD_TEXSIZE0, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTE, 0, &GPUCommon::Execute_TexSize0 },
|
||||
{ GE_CMD_TEXSIZE1, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_PARAMS },
|
||||
{ GE_CMD_TEXSIZE2, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_PARAMS },
|
||||
{ GE_CMD_TEXSIZE3, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_PARAMS },
|
||||
|
@ -343,6 +352,9 @@ const CommonCommandTableEntry commonCommandTable[] = {
|
|||
};
|
||||
size_t commonCommandTableSize = ARRAY_SIZE(commonCommandTable);
|
||||
|
||||
// TODO: Make class member?
|
||||
GPUCommon::CommandInfo GPUCommon::cmdInfo_[256];
|
||||
|
||||
void GPUCommon::Flush() {
|
||||
drawEngineCommon_->DispatchFlush();
|
||||
}
|
||||
|
@ -366,11 +378,47 @@ GPUCommon::GPUCommon(GraphicsContext *gfxCtx, Draw::DrawContext *draw) :
|
|||
gstate.Reset();
|
||||
gstate_c.Reset();
|
||||
gpuStats.Reset();
|
||||
|
||||
memset(cmdInfo_, 0, sizeof(cmdInfo_));
|
||||
|
||||
// Import both the global and local command tables, and check for dupes
|
||||
std::set<u8> dupeCheck;
|
||||
for (size_t i = 0; i < commonCommandTableSize; i++) {
|
||||
const u8 cmd = commonCommandTable[i].cmd;
|
||||
if (dupeCheck.find(cmd) != dupeCheck.end()) {
|
||||
ERROR_LOG(G3D, "Command table Dupe: %02x (%i)", (int)cmd, (int)cmd);
|
||||
} else {
|
||||
dupeCheck.insert(cmd);
|
||||
}
|
||||
cmdInfo_[cmd].flags |= (uint64_t)commonCommandTable[i].flags | (commonCommandTable[i].dirty << 8);
|
||||
cmdInfo_[cmd].func = commonCommandTable[i].func;
|
||||
if ((cmdInfo_[cmd].flags & (FLAG_EXECUTE | FLAG_EXECUTEONCHANGE)) && !cmdInfo_[cmd].func) {
|
||||
Crash();
|
||||
}
|
||||
}
|
||||
// Find commands missing from the table.
|
||||
for (int i = 0; i < 0xEF; i++) {
|
||||
if (dupeCheck.find((u8)i) == dupeCheck.end()) {
|
||||
ERROR_LOG(G3D, "Command missing from table: %02x (%i)", i, i);
|
||||
}
|
||||
}
|
||||
|
||||
UpdateCmdInfo();
|
||||
}
|
||||
|
||||
GPUCommon::~GPUCommon() {
|
||||
}
|
||||
|
||||
void GPUCommon::UpdateCmdInfo() {
|
||||
if (g_Config.bSoftwareSkinning) {
|
||||
cmdInfo_[GE_CMD_VERTEXTYPE].flags &= ~FLAG_FLUSHBEFOREONCHANGE;
|
||||
cmdInfo_[GE_CMD_VERTEXTYPE].func = &GPUCommon::Execute_VertexTypeSkinning;
|
||||
} else {
|
||||
cmdInfo_[GE_CMD_VERTEXTYPE].flags |= FLAG_FLUSHBEFOREONCHANGE;
|
||||
cmdInfo_[GE_CMD_VERTEXTYPE].func = &GPUCommon::Execute_VertexType;
|
||||
}
|
||||
}
|
||||
|
||||
void GPUCommon::BeginHostFrame() {
|
||||
ReapplyGfxState();
|
||||
|
||||
|
@ -917,6 +965,46 @@ bool GPUCommon::InterpretList(DisplayList &list) {
|
|||
return gpuState == GPUSTATE_DONE || gpuState == GPUSTATE_ERROR;
|
||||
}
|
||||
|
||||
// Maybe should write this in ASM...
|
||||
void GPUCommon::FastRunLoop(DisplayList &list) {
|
||||
PROFILE_THIS_SCOPE("gpuloop");
|
||||
const CommandInfo *cmdInfo = cmdInfo_;
|
||||
int dc = downcount;
|
||||
for (; dc > 0; --dc) {
|
||||
// We know that display list PCs have the upper nibble == 0 - no need to mask the pointer
|
||||
const u32 op = *(const u32 *)(Memory::base + list.pc);
|
||||
const u32 cmd = op >> 24;
|
||||
const CommandInfo &info = cmdInfo[cmd];
|
||||
const u32 diff = op ^ gstate.cmdmem[cmd];
|
||||
if (diff == 0) {
|
||||
if (info.flags & FLAG_EXECUTE) {
|
||||
downcount = dc;
|
||||
(this->*info.func)(op, diff);
|
||||
dc = downcount;
|
||||
}
|
||||
} else {
|
||||
uint64_t flags = info.flags;
|
||||
if (flags & FLAG_FLUSHBEFOREONCHANGE) {
|
||||
if (drawEngineCommon_->GetNumDrawCalls()) {
|
||||
drawEngineCommon_->DispatchFlush();
|
||||
}
|
||||
}
|
||||
gstate.cmdmem[cmd] = op;
|
||||
if (flags & (FLAG_EXECUTE | FLAG_EXECUTEONCHANGE)) {
|
||||
downcount = dc;
|
||||
(this->*info.func)(op, diff);
|
||||
dc = downcount;
|
||||
} else {
|
||||
uint64_t dirty = flags >> 8;
|
||||
if (dirty)
|
||||
gstate_c.Dirty(dirty);
|
||||
}
|
||||
}
|
||||
list.pc += 4;
|
||||
}
|
||||
downcount = 0;
|
||||
}
|
||||
|
||||
void GPUCommon::BeginFrame() {
|
||||
immCount_ = 0;
|
||||
if (dumpNextFrame_) {
|
||||
|
@ -1347,6 +1435,11 @@ void GPUCommon::Execute_VertexType(u32 op, u32 diff) {
|
|||
}
|
||||
}
|
||||
|
||||
void GPUCommon::Execute_LoadClut(u32 op, u32 diff) {
|
||||
gstate_c.Dirty(DIRTY_TEXTURE_PARAMS);
|
||||
textureCache_->LoadClut(gstate.getClutAddress(), gstate.getClutLoadBytes());
|
||||
}
|
||||
|
||||
void GPUCommon::Execute_VertexTypeSkinning(u32 op, u32 diff) {
|
||||
// Don't flush when weight count changes, unless morph is enabled.
|
||||
if ((diff & ~GE_VTYPE_WEIGHTCOUNT_MASK) || (op & GE_VTYPE_MORPHCOUNT_MASK) != 0) {
|
||||
|
@ -1368,6 +1461,81 @@ void GPUCommon::Execute_VertexTypeSkinning(u32 op, u32 diff) {
|
|||
gstate_c.Dirty(DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_FRAGMENTSHADER_STATE);
|
||||
}
|
||||
|
||||
|
||||
void GPUCommon::Execute_Prim(u32 op, u32 diff) {
|
||||
// This drives all drawing. All other state we just buffer up, then we apply it only
|
||||
// when it's time to draw. As most PSP games set state redundantly ALL THE TIME, this is a huge optimization.
|
||||
|
||||
PROFILE_THIS_SCOPE("execprim");
|
||||
|
||||
u32 data = op & 0xFFFFFF;
|
||||
u32 count = data & 0xFFFF;
|
||||
if (count == 0)
|
||||
return;
|
||||
|
||||
// Upper bits are ignored.
|
||||
GEPrimitiveType prim = static_cast<GEPrimitiveType>((data >> 16) & 7);
|
||||
SetDrawType(DRAW_PRIM, prim);
|
||||
|
||||
// Discard AA lines as we can't do anything that makes sense with these anyway. The SW plugin might, though.
|
||||
if (gstate.isAntiAliasEnabled()) {
|
||||
// Discard AA lines in DOA
|
||||
if (prim == GE_PRIM_LINE_STRIP)
|
||||
return;
|
||||
// Discard AA lines in Summon Night 5
|
||||
if ((prim == GE_PRIM_LINES) && gstate.isSkinningEnabled())
|
||||
return;
|
||||
}
|
||||
|
||||
// This also makes skipping drawing very effective.
|
||||
framebufferManager_->SetRenderFrameBuffer(gstate_c.IsDirty(DIRTY_FRAMEBUF), gstate_c.skipDrawReason);
|
||||
|
||||
if (gstate_c.skipDrawReason & (SKIPDRAW_SKIPFRAME | SKIPDRAW_NON_DISPLAYED_FB)) {
|
||||
drawEngineCommon_->SetupVertexDecoder(gstate.vertType); // Do we still need to do this?
|
||||
// Rough estimate, not sure what's correct.
|
||||
cyclesExecuted += EstimatePerVertexCost() * count;
|
||||
return;
|
||||
}
|
||||
|
||||
if (!Memory::IsValidAddress(gstate_c.vertexAddr)) {
|
||||
ERROR_LOG_REPORT(G3D, "Bad vertex address %08x!", gstate_c.vertexAddr);
|
||||
return;
|
||||
}
|
||||
|
||||
void *verts = Memory::GetPointerUnchecked(gstate_c.vertexAddr);
|
||||
void *inds = 0;
|
||||
u32 vertexType = gstate.vertType;
|
||||
if ((gstate.vertType & GE_VTYPE_IDX_MASK) != GE_VTYPE_IDX_NONE) {
|
||||
u32 indexAddr = gstate_c.indexAddr;
|
||||
if (!Memory::IsValidAddress(indexAddr)) {
|
||||
ERROR_LOG_REPORT(G3D, "Bad index address %08x!", indexAddr);
|
||||
return;
|
||||
}
|
||||
inds = Memory::GetPointerUnchecked(indexAddr);
|
||||
}
|
||||
|
||||
#ifndef MOBILE_DEVICE
|
||||
if (prim > GE_PRIM_RECTANGLES) {
|
||||
ERROR_LOG_REPORT_ONCE(reportPrim, G3D, "Unexpected prim type: %d", prim);
|
||||
}
|
||||
#endif
|
||||
|
||||
if (gstate_c.dirty & DIRTY_VERTEXSHADER_STATE) {
|
||||
vertexCost_ = EstimatePerVertexCost();
|
||||
}
|
||||
gpuStats.vertexGPUCycles += vertexCost_ * count;
|
||||
cyclesExecuted += vertexCost_* count;
|
||||
|
||||
int bytesRead = 0;
|
||||
UpdateUVScaleOffset();
|
||||
drawEngineCommon_->SubmitPrim(verts, inds, prim, count, vertexType, &bytesRead);
|
||||
|
||||
// After drawing, we advance the vertexAddr (when non indexed) or indexAddr (when indexed).
|
||||
// Some games rely on this, they don't bother reloading VADDR and IADDR.
|
||||
// The VADDR/IADDR registers are NOT updated.
|
||||
AdvanceVerts(vertexType, count, bytesRead);
|
||||
}
|
||||
|
||||
void GPUCommon::Execute_Bezier(u32 op, u32 diff) {
|
||||
drawEngineCommon_->DispatchFlush();
|
||||
|
||||
|
@ -2432,3 +2600,23 @@ bool GPUCommon::DescribeCodePtr(const u8 *ptr, std::string &name) {
|
|||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool GPUCommon::FramebufferDirty() {
|
||||
VirtualFramebuffer *vfb = framebufferManager_->GetDisplayVFB();
|
||||
if (vfb) {
|
||||
bool dirty = vfb->dirtyAfterDisplay;
|
||||
vfb->dirtyAfterDisplay = false;
|
||||
return dirty;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool GPUCommon::FramebufferReallyDirty() {
|
||||
VirtualFramebuffer *vfb = framebufferManager_->GetDisplayVFB();
|
||||
if (vfb) {
|
||||
bool dirty = vfb->reallyDirtyAfterDisplay;
|
||||
vfb->reallyDirtyAfterDisplay = false;
|
||||
return dirty;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -70,6 +70,10 @@ public:
|
|||
Draw::DrawContext *GetDrawContext() override {
|
||||
return draw_;
|
||||
}
|
||||
virtual void CheckGPUFeatures() = 0;
|
||||
|
||||
void UpdateCmdInfo();
|
||||
|
||||
bool IsReady() override {
|
||||
return true;
|
||||
}
|
||||
|
@ -129,11 +133,14 @@ public:
|
|||
void Execute_VertexType(u32 op, u32 diff);
|
||||
void Execute_VertexTypeSkinning(u32 op, u32 diff);
|
||||
|
||||
void Execute_Prim(u32 op, u32 diff);
|
||||
void Execute_Bezier(u32 op, u32 diff);
|
||||
void Execute_Spline(u32 op, u32 diff);
|
||||
void Execute_BoundingBox(u32 op, u32 diff);
|
||||
void Execute_BlockTransferStart(u32 op, u32 diff);
|
||||
|
||||
void Execute_LoadClut(u32 op, u32 diff);
|
||||
|
||||
void Execute_TexSize0(u32 op, u32 diff);
|
||||
void Execute_TexLevel(u32 op, u32 diff);
|
||||
|
||||
|
@ -238,6 +245,9 @@ public:
|
|||
return -1;
|
||||
}
|
||||
|
||||
bool FramebufferDirty() override;
|
||||
bool FramebufferReallyDirty() override;
|
||||
|
||||
typedef void (GPUCommon::*CmdFunc)(u32 op, u32 diff);
|
||||
|
||||
protected:
|
||||
|
@ -258,8 +268,8 @@ protected:
|
|||
|
||||
void BeginFrame() override;
|
||||
|
||||
// To avoid virtual calls to PreExecuteOp().
|
||||
virtual void FastRunLoop(DisplayList &list) = 0;
|
||||
virtual void FastRunLoop(DisplayList &list);
|
||||
|
||||
void SlowRunLoop(DisplayList &list);
|
||||
void UpdatePC(u32 currentPC, u32 newPC);
|
||||
void UpdateState(GPURunState state);
|
||||
|
@ -290,6 +300,13 @@ protected:
|
|||
GraphicsContext *gfxCtx_;
|
||||
Draw::DrawContext *draw_;
|
||||
|
||||
struct CommandInfo {
|
||||
uint64_t flags;
|
||||
GPUCommon::CmdFunc func;
|
||||
};
|
||||
|
||||
static CommandInfo cmdInfo_[256];
|
||||
|
||||
typedef std::list<int> DisplayListQueue;
|
||||
|
||||
int nextListID;
|
||||
|
@ -316,6 +333,8 @@ protected:
|
|||
DrawType lastDraw_;
|
||||
GEPrimitiveType lastPrim_;
|
||||
|
||||
int vertexCost_ = 0;
|
||||
|
||||
// No idea how big this buffer needs to be.
|
||||
enum {
|
||||
MAX_IMMBUFFER_SIZE = 32,
|
||||
|
|
|
@ -26,6 +26,8 @@ class NullGPU : public GPUCommon {
|
|||
public:
|
||||
NullGPU();
|
||||
~NullGPU();
|
||||
|
||||
void CheckGPUFeatures() override {}
|
||||
void InitClear() override {}
|
||||
void ExecuteOp(u32 op, u32 diff) override;
|
||||
|
||||
|
|
|
@ -52,6 +52,8 @@ class SoftGPU : public GPUCommon {
|
|||
public:
|
||||
SoftGPU(GraphicsContext *gfxCtx, Draw::DrawContext *_thin3D);
|
||||
~SoftGPU();
|
||||
|
||||
void CheckGPUFeatures() override {}
|
||||
void InitClear() override {}
|
||||
void ExecuteOp(u32 op, u32 diff) override;
|
||||
|
||||
|
|
|
@ -337,72 +337,6 @@ void DrawEngineVulkan::EndFrame() {
|
|||
vertexCache_->End();
|
||||
}
|
||||
|
||||
void DrawEngineVulkan::SubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertType, int *bytesRead) {
|
||||
if (!indexGen.PrimCompatible(prevPrim_, prim) || numDrawCalls >= MAX_DEFERRED_DRAW_CALLS || vertexCountInDrawCalls_ + vertexCount > VERTEX_BUFFER_MAX) {
|
||||
Flush();
|
||||
}
|
||||
|
||||
// TODO: Is this the right thing to do?
|
||||
if (prim == GE_PRIM_KEEP_PREVIOUS) {
|
||||
prim = prevPrim_ != GE_PRIM_INVALID ? prevPrim_ : GE_PRIM_POINTS;
|
||||
} else {
|
||||
prevPrim_ = prim;
|
||||
}
|
||||
|
||||
SetupVertexDecoder(vertType);
|
||||
|
||||
*bytesRead = vertexCount * dec_->VertexSize();
|
||||
if ((vertexCount < 2 && prim > 0) || (vertexCount < 3 && prim > 2 && prim != GE_PRIM_RECTANGLES))
|
||||
return;
|
||||
|
||||
DeferredDrawCall &dc = drawCalls[numDrawCalls];
|
||||
dc.verts = verts;
|
||||
dc.inds = inds;
|
||||
dc.vertType = vertType;
|
||||
dc.indexType = (vertType & GE_VTYPE_IDX_MASK) >> GE_VTYPE_IDX_SHIFT;
|
||||
dc.prim = prim;
|
||||
dc.vertexCount = vertexCount;
|
||||
|
||||
if (g_Config.bVertexCache) {
|
||||
u32 dhash = dcid_;
|
||||
dhash ^= (u32)(uintptr_t)verts;
|
||||
dhash = __rotl(dhash, 13);
|
||||
dhash ^= (u32)(uintptr_t)inds;
|
||||
dhash = __rotl(dhash, 13);
|
||||
dhash ^= (u32)vertType;
|
||||
dhash = __rotl(dhash, 13);
|
||||
dhash ^= (u32)vertexCount;
|
||||
dhash = __rotl(dhash, 13);
|
||||
dhash ^= (u32)prim;
|
||||
dcid_ = dhash;
|
||||
}
|
||||
|
||||
if (inds) {
|
||||
GetIndexBounds(inds, vertexCount, vertType, &dc.indexLowerBound, &dc.indexUpperBound);
|
||||
} else {
|
||||
dc.indexLowerBound = 0;
|
||||
dc.indexUpperBound = vertexCount - 1;
|
||||
}
|
||||
|
||||
uvScale[numDrawCalls] = gstate_c.uv;
|
||||
|
||||
numDrawCalls++;
|
||||
vertexCountInDrawCalls_ += vertexCount;
|
||||
|
||||
if (g_Config.bSoftwareSkinning && (vertType & GE_VTYPE_WEIGHT_MASK)) {
|
||||
DecodeVertsStep(decoded, decodeCounter_, decodedVerts_);
|
||||
decodeCounter_++;
|
||||
}
|
||||
|
||||
if (prim == GE_PRIM_RECTANGLES && (gstate.getTextureAddress(0) & 0x3FFFFFFF) == (gstate.getFrameBufAddress() & 0x3FFFFFFF)) {
|
||||
// Rendertarget == texture?
|
||||
if (!g_Config.bDisableSlowFramebufEffects) {
|
||||
gstate_c.Dirty(DIRTY_TEXTURE_PARAMS);
|
||||
Flush();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void DrawEngineVulkan::DecodeVertsToPushBuffer(VulkanPushBuffer *push, uint32_t *bindOffset, VkBuffer *vkbuf) {
|
||||
u8 *dest = decoded;
|
||||
|
||||
|
|
|
@ -122,8 +122,6 @@ public:
|
|||
DrawEngineVulkan(VulkanContext *vulkan, Draw::DrawContext *draw);
|
||||
virtual ~DrawEngineVulkan();
|
||||
|
||||
void SubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertType, int *bytesRead);
|
||||
|
||||
void SetShaderManager(ShaderManagerVulkan *shaderManager) {
|
||||
shaderManager_ = shaderManager;
|
||||
}
|
||||
|
@ -157,9 +155,6 @@ public:
|
|||
}
|
||||
|
||||
void DispatchFlush() override { Flush(); }
|
||||
void DispatchSubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertType, int *bytesRead) override {
|
||||
SubmitPrim(verts, inds, prim, vertexCount, vertType, bytesRead);
|
||||
}
|
||||
|
||||
VkPipelineLayout GetPipelineLayout() const {
|
||||
return pipelineLayout_;
|
||||
|
|
|
@ -46,31 +46,6 @@
|
|||
#include "Core/HLE/sceKernelInterrupt.h"
|
||||
#include "Core/HLE/sceGe.h"
|
||||
|
||||
struct VulkanCommandTableEntry {
|
||||
uint8_t cmd;
|
||||
uint8_t flags;
|
||||
uint64_t dirty;
|
||||
GPU_Vulkan::CmdFunc func;
|
||||
};
|
||||
|
||||
GPU_Vulkan::CommandInfo GPU_Vulkan::cmdInfo_[256];
|
||||
|
||||
// This table gets crunched into a faster form by init.
|
||||
static const VulkanCommandTableEntry commandTable[] = {
|
||||
// Changes that dirty the current texture.
|
||||
{ GE_CMD_TEXSIZE0, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTE, 0, &GPUCommon::Execute_TexSize0 },
|
||||
|
||||
// Changing the vertex type requires us to flush.
|
||||
{ GE_CMD_VERTEXTYPE, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, 0, &GPUCommon::Execute_VertexType },
|
||||
|
||||
{ GE_CMD_PRIM, FLAG_EXECUTE, 0, &GPU_Vulkan::Execute_Prim },
|
||||
{ GE_CMD_BEZIER, FLAG_FLUSHBEFORE | FLAG_EXECUTE, 0, &GPUCommon::Execute_Bezier },
|
||||
{ GE_CMD_SPLINE, FLAG_FLUSHBEFORE | FLAG_EXECUTE, 0, &GPUCommon::Execute_Spline },
|
||||
|
||||
// Changes that trigger data copies. Only flushing on change for LOADCLUT must be a bit of a hack...
|
||||
{ GE_CMD_LOADCLUT, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTE, 0, &GPU_Vulkan::Execute_LoadClut },
|
||||
};
|
||||
|
||||
GPU_Vulkan::GPU_Vulkan(GraphicsContext *gfxCtx, Draw::DrawContext *draw)
|
||||
: GPUCommon(gfxCtx, draw),
|
||||
vulkan_((VulkanContext *)gfxCtx->GetAPIContext()),
|
||||
|
@ -111,46 +86,6 @@ GPU_Vulkan::GPU_Vulkan(GraphicsContext *gfxCtx, Draw::DrawContext *draw)
|
|||
ERROR_LOG(G3D, "gstate has drifted out of sync!");
|
||||
}
|
||||
|
||||
memset(cmdInfo_, 0, sizeof(cmdInfo_));
|
||||
|
||||
// Import both the global and local command tables, and check for dupes
|
||||
std::set<u8> dupeCheck;
|
||||
for (size_t i = 0; i < commonCommandTableSize; i++) {
|
||||
const u8 cmd = commonCommandTable[i].cmd;
|
||||
if (dupeCheck.find(cmd) != dupeCheck.end()) {
|
||||
ERROR_LOG(G3D, "Command table Dupe: %02x (%i)", (int)cmd, (int)cmd);
|
||||
} else {
|
||||
dupeCheck.insert(cmd);
|
||||
}
|
||||
cmdInfo_[cmd].flags |= (uint64_t)commonCommandTable[i].flags | (commonCommandTable[i].dirty << 8);
|
||||
cmdInfo_[cmd].func = commonCommandTable[i].func;
|
||||
if ((cmdInfo_[cmd].flags & (FLAG_EXECUTE | FLAG_EXECUTEONCHANGE)) && !cmdInfo_[cmd].func) {
|
||||
Crash();
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < ARRAY_SIZE(commandTable); i++) {
|
||||
const u8 cmd = commandTable[i].cmd;
|
||||
if (dupeCheck.find(cmd) != dupeCheck.end()) {
|
||||
ERROR_LOG(G3D, "Command table Dupe: %02x (%i)", (int)cmd, (int)cmd);
|
||||
} else {
|
||||
dupeCheck.insert(cmd);
|
||||
}
|
||||
cmdInfo_[cmd].flags |= (uint64_t)commandTable[i].flags | (commandTable[i].dirty << 8);
|
||||
cmdInfo_[cmd].func = commandTable[i].func;
|
||||
if ((cmdInfo_[cmd].flags & (FLAG_EXECUTE | FLAG_EXECUTEONCHANGE)) && !cmdInfo_[cmd].func) {
|
||||
Crash();
|
||||
}
|
||||
}
|
||||
// Find commands missing from the table.
|
||||
for (int i = 0; i < 0xEF; i++) {
|
||||
if (dupeCheck.find((u8)i) == dupeCheck.end()) {
|
||||
ERROR_LOG(G3D, "Command missing from table: %02x (%i)", i, i);
|
||||
}
|
||||
}
|
||||
|
||||
UpdateCmdInfo();
|
||||
|
||||
BuildReportingInfo();
|
||||
// Update again after init to be sure of any silly driver problems.
|
||||
UpdateVsyncInterval(true);
|
||||
|
@ -405,41 +340,11 @@ void GPU_Vulkan::UpdateVsyncInterval(bool force) {
|
|||
// TODO
|
||||
}
|
||||
|
||||
void GPU_Vulkan::UpdateCmdInfo() {
|
||||
if (g_Config.bSoftwareSkinning) {
|
||||
cmdInfo_[GE_CMD_VERTEXTYPE].flags &= ~FLAG_FLUSHBEFOREONCHANGE;
|
||||
cmdInfo_[GE_CMD_VERTEXTYPE].func = &GPUCommon::Execute_VertexTypeSkinning;
|
||||
} else {
|
||||
cmdInfo_[GE_CMD_VERTEXTYPE].flags |= FLAG_FLUSHBEFOREONCHANGE;
|
||||
cmdInfo_[GE_CMD_VERTEXTYPE].func = &GPUCommon::Execute_VertexType;
|
||||
}
|
||||
}
|
||||
|
||||
void GPU_Vulkan::SetDisplayFramebuffer(u32 framebuf, u32 stride, GEBufferFormat format) {
|
||||
host->GPUNotifyDisplay(framebuf, stride, format);
|
||||
framebufferManager_->SetDisplayFramebuffer(framebuf, stride, format);
|
||||
}
|
||||
|
||||
bool GPU_Vulkan::FramebufferDirty() {
|
||||
VirtualFramebuffer *vfb = framebufferManager_->GetDisplayVFB();
|
||||
if (vfb) {
|
||||
bool dirty = vfb->dirtyAfterDisplay;
|
||||
vfb->dirtyAfterDisplay = false;
|
||||
return dirty;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool GPU_Vulkan::FramebufferReallyDirty() {
|
||||
VirtualFramebuffer *vfb = framebufferManager_->GetDisplayVFB();
|
||||
if (vfb) {
|
||||
bool dirty = vfb->reallyDirtyAfterDisplay;
|
||||
vfb->reallyDirtyAfterDisplay = false;
|
||||
return dirty;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void GPU_Vulkan::CopyDisplayToOutput() {
|
||||
// Flush anything left over.
|
||||
drawEngine_.Flush();
|
||||
|
@ -451,44 +356,6 @@ void GPU_Vulkan::CopyDisplayToOutput() {
|
|||
gstate_c.Dirty(DIRTY_TEXTURE_IMAGE);
|
||||
}
|
||||
|
||||
// Maybe should write this in ASM...
|
||||
void GPU_Vulkan::FastRunLoop(DisplayList &list) {
|
||||
PROFILE_THIS_SCOPE("gpuloop");
|
||||
const CommandInfo *cmdInfo = cmdInfo_;
|
||||
int dc = downcount;
|
||||
for (; dc > 0; --dc) {
|
||||
// We know that display list PCs have the upper nibble == 0 - no need to mask the pointer
|
||||
const u32 op = *(const u32 *)(Memory::base + list.pc);
|
||||
const u32 cmd = op >> 24;
|
||||
const CommandInfo &info = cmdInfo[cmd];
|
||||
const u32 diff = op ^ gstate.cmdmem[cmd];
|
||||
if (diff == 0) {
|
||||
if (info.flags & FLAG_EXECUTE) {
|
||||
downcount = dc;
|
||||
(this->*info.func)(op, diff);
|
||||
dc = downcount;
|
||||
}
|
||||
} else {
|
||||
uint64_t flags = info.flags;
|
||||
if (flags & FLAG_FLUSHBEFOREONCHANGE) {
|
||||
drawEngine_.Flush();
|
||||
}
|
||||
gstate.cmdmem[cmd] = op;
|
||||
if (flags & (FLAG_EXECUTE | FLAG_EXECUTEONCHANGE)) {
|
||||
downcount = dc;
|
||||
(this->*info.func)(op, diff);
|
||||
dc = downcount;
|
||||
} else {
|
||||
uint64_t dirty = flags >> 8;
|
||||
if (dirty)
|
||||
gstate_c.Dirty(dirty);
|
||||
}
|
||||
}
|
||||
list.pc += 4;
|
||||
}
|
||||
downcount = 0;
|
||||
}
|
||||
|
||||
void GPU_Vulkan::FinishDeferred() {
|
||||
drawEngine_.FinishDeferred();
|
||||
}
|
||||
|
@ -520,85 +387,6 @@ void GPU_Vulkan::ExecuteOp(u32 op, u32 diff) {
|
|||
}
|
||||
}
|
||||
|
||||
void GPU_Vulkan::Execute_Prim(u32 op, u32 diff) {
|
||||
// This drives all drawing. All other state we just buffer up, then we apply it only
|
||||
// when it's time to draw. As most PSP games set state redundantly ALL THE TIME, this is a huge optimization.
|
||||
|
||||
PROFILE_THIS_SCOPE("execprim");
|
||||
|
||||
u32 data = op & 0xFFFFFF;
|
||||
u32 count = data & 0xFFFF;
|
||||
if (count == 0)
|
||||
return;
|
||||
|
||||
// Upper bits are ignored.
|
||||
GEPrimitiveType prim = static_cast<GEPrimitiveType>((data >> 16) & 7);
|
||||
SetDrawType(DRAW_PRIM, prim);
|
||||
|
||||
// Discard AA lines as we can't do anything that makes sense with these anyway. The SW plugin might, though.
|
||||
if (gstate.isAntiAliasEnabled()) {
|
||||
// Discard AA lines in DOA
|
||||
if (prim == GE_PRIM_LINE_STRIP)
|
||||
return;
|
||||
// Discard AA lines in Summon Night 5
|
||||
if ((prim == GE_PRIM_LINES) && gstate.isSkinningEnabled())
|
||||
return;
|
||||
}
|
||||
|
||||
// This also makes skipping drawing very effective.
|
||||
framebufferManager_->SetRenderFrameBuffer(gstate_c.IsDirty(DIRTY_FRAMEBUF), gstate_c.skipDrawReason);
|
||||
|
||||
if (gstate_c.skipDrawReason & (SKIPDRAW_SKIPFRAME | SKIPDRAW_NON_DISPLAYED_FB)) {
|
||||
drawEngine_.SetupVertexDecoder(gstate.vertType); // Do we still need to do this?
|
||||
// Rough estimate, not sure what's correct.
|
||||
cyclesExecuted += EstimatePerVertexCost() * count;
|
||||
return;
|
||||
}
|
||||
|
||||
if (!Memory::IsValidAddress(gstate_c.vertexAddr)) {
|
||||
ERROR_LOG_REPORT(G3D, "Bad vertex address %08x!", gstate_c.vertexAddr);
|
||||
return;
|
||||
}
|
||||
|
||||
void *verts = Memory::GetPointerUnchecked(gstate_c.vertexAddr);
|
||||
void *inds = 0;
|
||||
u32 vertexType = gstate.vertType;
|
||||
if ((gstate.vertType & GE_VTYPE_IDX_MASK) != GE_VTYPE_IDX_NONE) {
|
||||
u32 indexAddr = gstate_c.indexAddr;
|
||||
if (!Memory::IsValidAddress(indexAddr)) {
|
||||
ERROR_LOG_REPORT(G3D, "Bad index address %08x!", indexAddr);
|
||||
return;
|
||||
}
|
||||
inds = Memory::GetPointerUnchecked(indexAddr);
|
||||
}
|
||||
|
||||
#ifndef MOBILE_DEVICE
|
||||
if (prim > GE_PRIM_RECTANGLES) {
|
||||
ERROR_LOG_REPORT_ONCE(reportPrim, G3D, "Unexpected prim type: %d", prim);
|
||||
}
|
||||
#endif
|
||||
|
||||
if (gstate_c.dirty & DIRTY_VERTEXSHADER_STATE) {
|
||||
vertexCost_ = EstimatePerVertexCost();
|
||||
}
|
||||
gpuStats.vertexGPUCycles += vertexCost_ * count;
|
||||
cyclesExecuted += vertexCost_* count;
|
||||
|
||||
int bytesRead = 0;
|
||||
UpdateUVScaleOffset();
|
||||
drawEngine_.SubmitPrim(verts, inds, prim, count, vertexType, &bytesRead);
|
||||
|
||||
// After drawing, we advance the vertexAddr (when non indexed) or indexAddr (when indexed).
|
||||
// Some games rely on this, they don't bother reloading VADDR and IADDR.
|
||||
// The VADDR/IADDR registers are NOT updated.
|
||||
AdvanceVerts(vertexType, count, bytesRead);
|
||||
}
|
||||
|
||||
void GPU_Vulkan::Execute_LoadClut(u32 op, u32 diff) {
|
||||
gstate_c.Dirty(DIRTY_TEXTURE_PARAMS);
|
||||
textureCacheVulkan_->LoadClut(gstate.getClutAddress(), gstate.getClutLoadBytes());
|
||||
}
|
||||
|
||||
void GPU_Vulkan::InitDeviceObjects() {
|
||||
ILOG("GPU_Vulkan::InitDeviceObjects");
|
||||
// Initialize framedata
|
||||
|
|
|
@ -36,7 +36,7 @@ public:
|
|||
~GPU_Vulkan();
|
||||
|
||||
// This gets called on startup and when we get back from settings.
|
||||
void CheckGPUFeatures();
|
||||
void CheckGPUFeatures() override;
|
||||
|
||||
// These are where we can reset command buffers etc.
|
||||
void BeginHostFrame() override;
|
||||
|
@ -54,23 +54,12 @@ public:
|
|||
void DoState(PointerWrap &p) override;
|
||||
|
||||
void ClearShaderCache() override;
|
||||
bool FramebufferDirty() override;
|
||||
bool FramebufferReallyDirty() override;
|
||||
|
||||
void GetReportingInfo(std::string &primaryInfo, std::string &fullInfo) override {
|
||||
primaryInfo = reportingPrimaryInfo_;
|
||||
fullInfo = reportingFullInfo_;
|
||||
}
|
||||
|
||||
typedef void (GPU_Vulkan::*CmdFunc)(u32 op, u32 diff);
|
||||
struct CommandInfo {
|
||||
uint64_t flags;
|
||||
GPU_Vulkan::CmdFunc func;
|
||||
};
|
||||
|
||||
void Execute_Prim(u32 op, u32 diff);
|
||||
void Execute_LoadClut(u32 op, u32 diff);
|
||||
|
||||
// Using string because it's generic - makes no assumptions on the size of the shader IDs of this backend.
|
||||
std::vector<std::string> DebugGetShaderIDs(DebugShaderType shader) override;
|
||||
std::string DebugGetShaderString(std::string id, DebugShaderType shader, DebugShaderStringType stringType) override;
|
||||
|
@ -80,7 +69,6 @@ public:
|
|||
}
|
||||
|
||||
protected:
|
||||
void FastRunLoop(DisplayList &list) override;
|
||||
void FinishDeferred() override;
|
||||
|
||||
private:
|
||||
|
@ -93,13 +81,10 @@ private:
|
|||
void CopyDisplayToOutput() override;
|
||||
void Reinitialize() override;
|
||||
inline void UpdateVsyncInterval(bool force);
|
||||
void UpdateCmdInfo();
|
||||
|
||||
void InitDeviceObjects();
|
||||
void DestroyDeviceObjects();
|
||||
|
||||
static CommandInfo cmdInfo_[256];
|
||||
|
||||
VulkanContext *vulkan_;
|
||||
FramebufferManagerVulkan *framebufferManagerVulkan_;
|
||||
TextureCacheVulkan *textureCacheVulkan_;
|
||||
|
@ -112,8 +97,6 @@ private:
|
|||
// Manages state and pipeline objects
|
||||
PipelineManagerVulkan *pipelineManager_;
|
||||
|
||||
int vertexCost_ = 0;
|
||||
|
||||
std::string reportingPrimaryInfo_;
|
||||
std::string reportingFullInfo_;
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue