mirror of
https://github.com/hrydgard/ppsspp.git
synced 2025-04-02 11:01:50 -04:00
Merge pull request #5898 from unknownbrackets/gpu-cmd-funcs
Use individual funcs, skipping the ExecuteOpInternal switch
This commit is contained in:
commit
e223ab7df6
4 changed files with 500 additions and 387 deletions
|
@ -55,13 +55,14 @@ enum {
|
|||
struct CommandTableEntry {
|
||||
u8 cmd;
|
||||
u8 flags;
|
||||
GLES_GPU::CmdFunc func;
|
||||
};
|
||||
|
||||
static const CommandTableEntry commandTable[] = {
|
||||
// Changes that dirty the framebuffer
|
||||
{GE_CMD_FRAMEBUFPTR, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE},
|
||||
{GE_CMD_FRAMEBUFWIDTH, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE},
|
||||
{GE_CMD_FRAMEBUFPIXFORMAT, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE},
|
||||
{GE_CMD_FRAMEBUFPTR, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GLES_GPU::Execute_FramebufType},
|
||||
{GE_CMD_FRAMEBUFWIDTH, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GLES_GPU::Execute_FramebufType},
|
||||
{GE_CMD_FRAMEBUFPIXFORMAT, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GLES_GPU::Execute_FramebufType},
|
||||
{GE_CMD_ZBUFPTR, FLAG_FLUSHBEFOREONCHANGE},
|
||||
{GE_CMD_ZBUFWIDTH, FLAG_FLUSHBEFOREONCHANGE},
|
||||
|
||||
|
@ -83,7 +84,7 @@ static const CommandTableEntry commandTable[] = {
|
|||
|
||||
// Changes that dirty the current texture. Really should be possible to avoid executing these if we compile
|
||||
// by adding some more flags.
|
||||
{GE_CMD_TEXSIZE0, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTE}, // NOTE: only one that uses diff?
|
||||
{GE_CMD_TEXSIZE0, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTE, &GLES_GPU::Execute_TexSize0},
|
||||
{GE_CMD_TEXSIZE1, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE},
|
||||
{GE_CMD_TEXSIZE2, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE},
|
||||
{GE_CMD_TEXSIZE3, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE},
|
||||
|
@ -92,7 +93,7 @@ static const CommandTableEntry commandTable[] = {
|
|||
{GE_CMD_TEXSIZE6, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE},
|
||||
{GE_CMD_TEXSIZE7, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE},
|
||||
{GE_CMD_TEXFORMAT, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE},
|
||||
{GE_CMD_TEXADDR0, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE},
|
||||
{GE_CMD_TEXADDR0, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GLES_GPU::Execute_TexAddr0},
|
||||
{GE_CMD_TEXADDR1, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE},
|
||||
{GE_CMD_TEXADDR2, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE},
|
||||
{GE_CMD_TEXADDR3, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE},
|
||||
|
@ -203,22 +204,22 @@ static const CommandTableEntry commandTable[] = {
|
|||
{GE_CMD_VIEWPORTZ2, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE},
|
||||
|
||||
// Region
|
||||
{GE_CMD_REGION1, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE},
|
||||
{GE_CMD_REGION2, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE},
|
||||
{GE_CMD_REGION1, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GLES_GPU::Execute_Region},
|
||||
{GE_CMD_REGION2, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GLES_GPU::Execute_Region},
|
||||
|
||||
// Scissor
|
||||
{GE_CMD_SCISSOR1, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE},
|
||||
{GE_CMD_SCISSOR2, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE},
|
||||
|
||||
// These dirty various vertex shader uniforms. Could embed information about that in this table and call dirtyuniform directly, hm...
|
||||
{GE_CMD_AMBIENTCOLOR, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE},
|
||||
{GE_CMD_AMBIENTALPHA, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE},
|
||||
{GE_CMD_MATERIALDIFFUSE, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE},
|
||||
{GE_CMD_MATERIALEMISSIVE, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE},
|
||||
{GE_CMD_MATERIALAMBIENT, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE},
|
||||
{GE_CMD_MATERIALALPHA, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE},
|
||||
{GE_CMD_MATERIALSPECULAR, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE},
|
||||
{GE_CMD_MATERIALSPECULARCOEF, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE},
|
||||
{GE_CMD_AMBIENTCOLOR, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GLES_GPU::Execute_Ambient},
|
||||
{GE_CMD_AMBIENTALPHA, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GLES_GPU::Execute_Ambient},
|
||||
{GE_CMD_MATERIALDIFFUSE, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GLES_GPU::Execute_MaterialDiffuse},
|
||||
{GE_CMD_MATERIALEMISSIVE, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GLES_GPU::Execute_MaterialEmissive},
|
||||
{GE_CMD_MATERIALAMBIENT, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GLES_GPU::Execute_MaterialAmbient},
|
||||
{GE_CMD_MATERIALALPHA, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GLES_GPU::Execute_MaterialAmbient},
|
||||
{GE_CMD_MATERIALSPECULAR, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GLES_GPU::Execute_MaterialSpecular},
|
||||
{GE_CMD_MATERIALSPECULARCOEF, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GLES_GPU::Execute_MaterialSpecular},
|
||||
|
||||
// These precompute a value. not sure if worth it. Also dirty uniforms, which could be table-ized to avoid execute.
|
||||
{GE_CMD_LX0, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE},
|
||||
|
@ -302,20 +303,20 @@ static const CommandTableEntry commandTable[] = {
|
|||
{GE_CMD_TRANSFERSIZE, 0},
|
||||
|
||||
// From Common. No flushing but definitely need execute.
|
||||
{GE_CMD_OFFSETADDR, FLAG_EXECUTE},
|
||||
{GE_CMD_ORIGIN, FLAG_EXECUTE | FLAG_READS_PC}, // Really?
|
||||
{GE_CMD_PRIM, FLAG_EXECUTE},
|
||||
{GE_CMD_JUMP, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC},
|
||||
{GE_CMD_CALL, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC},
|
||||
{GE_CMD_RET, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC},
|
||||
{GE_CMD_END, FLAG_FLUSHBEFORE | FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC}, // Flush?
|
||||
{GE_CMD_VADDR, FLAG_EXECUTE},
|
||||
{GE_CMD_IADDR, FLAG_EXECUTE},
|
||||
{GE_CMD_BJUMP, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC}, // EXECUTE
|
||||
{GE_CMD_OFFSETADDR, FLAG_EXECUTE, &GPUCommon::Execute_OffsetAddr},
|
||||
{GE_CMD_ORIGIN, FLAG_EXECUTE | FLAG_READS_PC, &GPUCommon::Execute_Origin}, // Really?
|
||||
{GE_CMD_PRIM, FLAG_EXECUTE, &GLES_GPU::Execute_Prim},
|
||||
{GE_CMD_JUMP, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, &GPUCommon::Execute_Jump},
|
||||
{GE_CMD_CALL, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, &GPUCommon::Execute_Call},
|
||||
{GE_CMD_RET, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, &GPUCommon::Execute_Ret},
|
||||
{GE_CMD_END, FLAG_FLUSHBEFORE | FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, &GPUCommon::Execute_End}, // Flush?
|
||||
{GE_CMD_VADDR, FLAG_EXECUTE, &GLES_GPU::Execute_Vaddr},
|
||||
{GE_CMD_IADDR, FLAG_EXECUTE, &GLES_GPU::Execute_Iaddr},
|
||||
{GE_CMD_BJUMP, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, &GPUCommon::Execute_BJump}, // EXECUTE
|
||||
{GE_CMD_BOUNDINGBOX, FLAG_EXECUTE}, // + FLUSHBEFORE when we implement... or not, do we need to?
|
||||
|
||||
// Changing the vertex type requires us to flush.
|
||||
{GE_CMD_VERTEXTYPE, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE},
|
||||
{GE_CMD_VERTEXTYPE, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GLES_GPU::Execute_VertexType},
|
||||
|
||||
{GE_CMD_BEZIER, FLAG_FLUSHBEFORE | FLAG_EXECUTE},
|
||||
{GE_CMD_SPLINE, FLAG_FLUSHBEFORE | FLAG_EXECUTE},
|
||||
|
@ -325,7 +326,7 @@ static const CommandTableEntry commandTable[] = {
|
|||
{GE_CMD_FINISH, FLAG_FLUSHBEFORE},
|
||||
|
||||
// Changes that trigger data copies. Only flushing on change for LOADCLUT must be a bit of a hack...
|
||||
{GE_CMD_LOADCLUT, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTE},
|
||||
{GE_CMD_LOADCLUT, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTE, &GLES_GPU::Execute_LoadClut},
|
||||
{GE_CMD_TRANSFERSTART, FLAG_FLUSHBEFORE | FLAG_EXECUTE | FLAG_READS_PC},
|
||||
|
||||
// We don't use the dither table.
|
||||
|
@ -382,6 +383,8 @@ static const CommandTableEntry commandTable[] = {
|
|||
{GE_CMD_UNKNOWN_FF, FLAG_EXECUTE},
|
||||
};
|
||||
|
||||
GLES_GPU::CommandInfo GLES_GPU::cmdInfo_[256];
|
||||
|
||||
|
||||
GLES_GPU::GLES_GPU()
|
||||
: resized_(false) {
|
||||
|
@ -410,18 +413,21 @@ GLES_GPU::GLES_GPU()
|
|||
ERROR_LOG(G3D, "gstate has drifted out of sync!");
|
||||
}
|
||||
|
||||
// Sanity check commandFlags table - no dupes please
|
||||
// Sanity check cmdInfo_ table - no dupes please
|
||||
std::set<u8> dupeCheck;
|
||||
commandFlags_ = new u8[256];
|
||||
memset(commandFlags_, 0, 256 * sizeof(bool));
|
||||
memset(cmdInfo_, 0, sizeof(cmdInfo_));
|
||||
for (size_t i = 0; i < ARRAY_SIZE(commandTable); i++) {
|
||||
u8 cmd = commandTable[i].cmd;
|
||||
const u8 cmd = commandTable[i].cmd;
|
||||
if (dupeCheck.find(cmd) != dupeCheck.end()) {
|
||||
ERROR_LOG(G3D, "Command table Dupe: %02x (%i)", (int)cmd, (int)cmd);
|
||||
} else {
|
||||
dupeCheck.insert(cmd);
|
||||
}
|
||||
commandFlags_[cmd] |= commandTable[i].flags;
|
||||
cmdInfo_[cmd].flags |= commandTable[i].flags;
|
||||
cmdInfo_[cmd].func = commandTable[i].func;
|
||||
if (!cmdInfo_[cmd].func) {
|
||||
cmdInfo_[cmd].func = &GLES_GPU::ExecuteOpInternal;
|
||||
}
|
||||
}
|
||||
// Find commands missing from the table.
|
||||
for (int i = 0; i < 0xEF; i++) {
|
||||
|
@ -434,14 +440,14 @@ GLES_GPU::GLES_GPU()
|
|||
// the tex scale/offset into the vertices anyway.
|
||||
|
||||
if (g_Config.bPrescaleUV) {
|
||||
commandFlags_[GE_CMD_TEXSCALEU] &= ~FLAG_FLUSHBEFOREONCHANGE;
|
||||
commandFlags_[GE_CMD_TEXSCALEV] &= ~FLAG_FLUSHBEFOREONCHANGE;
|
||||
commandFlags_[GE_CMD_TEXOFFSETU] &= ~FLAG_FLUSHBEFOREONCHANGE;
|
||||
commandFlags_[GE_CMD_TEXOFFSETV] &= ~FLAG_FLUSHBEFOREONCHANGE;
|
||||
cmdInfo_[GE_CMD_TEXSCALEU].flags &= ~FLAG_FLUSHBEFOREONCHANGE;
|
||||
cmdInfo_[GE_CMD_TEXSCALEV].flags &= ~FLAG_FLUSHBEFOREONCHANGE;
|
||||
cmdInfo_[GE_CMD_TEXOFFSETU].flags &= ~FLAG_FLUSHBEFOREONCHANGE;
|
||||
cmdInfo_[GE_CMD_TEXOFFSETV].flags &= ~FLAG_FLUSHBEFOREONCHANGE;
|
||||
}
|
||||
|
||||
if (g_Config.bSoftwareSkinning) {
|
||||
commandFlags_[GE_CMD_VERTEXTYPE] &= ~FLAG_FLUSHBEFOREONCHANGE;
|
||||
cmdInfo_[GE_CMD_VERTEXTYPE].flags &= ~FLAG_FLUSHBEFOREONCHANGE;
|
||||
}
|
||||
|
||||
BuildReportingInfo();
|
||||
|
@ -451,7 +457,6 @@ GLES_GPU::~GLES_GPU() {
|
|||
framebufferManager_.DestroyAllFBOs();
|
||||
shaderManager_->ClearCache(true);
|
||||
delete shaderManager_;
|
||||
delete [] commandFlags_;
|
||||
}
|
||||
|
||||
// Let's avoid passing nulls into snprintf().
|
||||
|
@ -615,12 +620,13 @@ void GLES_GPU::CopyDisplayToOutputInternal() {
|
|||
|
||||
// Maybe should write this in ASM...
|
||||
void GLES_GPU::FastRunLoop(DisplayList &list) {
|
||||
const u8 *commandFlags = commandFlags_;
|
||||
const CommandInfo *cmdInfo = cmdInfo_;
|
||||
for (; downcount > 0; --downcount) {
|
||||
// We know that display list PCs have the upper nibble == 0 - no need to mask the pointer
|
||||
const u32 op = *(const u32 *)(Memory::base + list.pc);
|
||||
const u32 cmd = op >> 24;
|
||||
const u8 cmdFlags = commandFlags[cmd]; // If we stashed the cmdFlags in the top bits of the cmdmem, we could get away with one table lookup instead of two
|
||||
const CommandInfo info = cmdInfo[cmd];
|
||||
const u8 cmdFlags = info.flags; // If we stashed the cmdFlags in the top bits of the cmdmem, we could get away with one table lookup instead of two
|
||||
const u32 diff = op ^ gstate.cmdmem[cmd];
|
||||
// Inlined CheckFlushOp here to get rid of the dumpThisFrame_ check.
|
||||
if ((cmdFlags & FLAG_FLUSHBEFORE) || (diff && (cmdFlags & FLAG_FLUSHBEFOREONCHANGE))) {
|
||||
|
@ -628,7 +634,7 @@ void GLES_GPU::FastRunLoop(DisplayList &list) {
|
|||
}
|
||||
gstate.cmdmem[cmd] = op; // TODO: no need to write if diff==0...
|
||||
if ((cmdFlags & FLAG_EXECUTE) || (diff && (cmdFlags & FLAG_EXECUTEONCHANGE))) {
|
||||
ExecuteOpInternal(op, diff);
|
||||
(this->*info.func)(op, diff);
|
||||
}
|
||||
list.pc += 4;
|
||||
}
|
||||
|
@ -658,7 +664,7 @@ void GLES_GPU::ProcessEvent(GPUEvent ev) {
|
|||
}
|
||||
|
||||
inline void GLES_GPU::CheckFlushOp(int cmd, u32 diff) {
|
||||
const u8 cmdFlags = commandFlags_[cmd];
|
||||
const u8 cmdFlags = cmdInfo_[cmd].flags;
|
||||
if ((cmdFlags & FLAG_FLUSHBEFORE) || (diff && (cmdFlags & FLAG_FLUSHBEFOREONCHANGE))) {
|
||||
if (dumpThisFrame_) {
|
||||
NOTICE_LOG(G3D, "================ FLUSH ================");
|
||||
|
@ -673,12 +679,166 @@ void GLES_GPU::PreExecuteOp(u32 op, u32 diff) {
|
|||
|
||||
void GLES_GPU::ExecuteOp(u32 op, u32 diff) {
|
||||
const u8 cmd = op >> 24;
|
||||
const u8 cmdFlags = commandFlags_[cmd];
|
||||
const CommandInfo info = cmdInfo_[cmd];
|
||||
const u8 cmdFlags = info.flags;
|
||||
if ((cmdFlags & FLAG_EXECUTE) || (diff && (cmdFlags & FLAG_EXECUTEONCHANGE))) {
|
||||
ExecuteOpInternal(op, diff);
|
||||
(this->*info.func)(op, diff);
|
||||
}
|
||||
}
|
||||
|
||||
void GLES_GPU::Execute_Vaddr(u32 op, u32 diff) {
|
||||
gstate_c.vertexAddr = gstate_c.getRelativeAddress(op & 0x00FFFFFF);
|
||||
}
|
||||
|
||||
void GLES_GPU::Execute_Iaddr(u32 op, u32 diff) {
|
||||
gstate_c.indexAddr = gstate_c.getRelativeAddress(op & 0x00FFFFFF);
|
||||
}
|
||||
|
||||
void GLES_GPU::Execute_Prim(u32 op, u32 diff) {
|
||||
// This drives all drawing. All other state we just buffer up, then we apply it only
|
||||
// when it's time to draw. As most PSP games set state redundantly ALL THE TIME, this is a huge optimization.
|
||||
|
||||
u32 data = op & 0xFFFFFF;
|
||||
u32 count = data & 0xFFFF;
|
||||
GEPrimitiveType prim = static_cast<GEPrimitiveType>(data >> 16);
|
||||
|
||||
if (count == 0)
|
||||
return;
|
||||
|
||||
// Discard AA lines as we can't do anything that makes sense with these anyway. The SW plugin might, though.
|
||||
|
||||
if (gstate.isAntiAliasEnabled()) {
|
||||
// Discard AA lines in DOA
|
||||
if (prim == GE_PRIM_LINE_STRIP)
|
||||
return;
|
||||
// Discard AA lines in Summon Night 5
|
||||
if ((prim == GE_PRIM_LINES) && gstate.isSkinningEnabled())
|
||||
return;
|
||||
}
|
||||
|
||||
// This also make skipping drawing very effective.
|
||||
framebufferManager_.SetRenderFrameBuffer();
|
||||
if (gstate_c.skipDrawReason & (SKIPDRAW_SKIPFRAME | SKIPDRAW_NON_DISPLAYED_FB)) {
|
||||
transformDraw_.SetupVertexDecoder(gstate.vertType);
|
||||
// Rough estimate, not sure what's correct.
|
||||
int vertexCost = transformDraw_.EstimatePerVertexCost();
|
||||
cyclesExecuted += vertexCost * count;
|
||||
return;
|
||||
}
|
||||
|
||||
if (!Memory::IsValidAddress(gstate_c.vertexAddr)) {
|
||||
ERROR_LOG_REPORT(G3D, "Bad vertex address %08x!", gstate_c.vertexAddr);
|
||||
return;
|
||||
}
|
||||
|
||||
// TODO: Split this so that we can collect sequences of primitives, can greatly speed things up
|
||||
// on platforms where draw calls are expensive like mobile and D3D
|
||||
void *verts = Memory::GetPointerUnchecked(gstate_c.vertexAddr);
|
||||
void *inds = 0;
|
||||
if ((gstate.vertType & GE_VTYPE_IDX_MASK) != GE_VTYPE_IDX_NONE) {
|
||||
if (!Memory::IsValidAddress(gstate_c.indexAddr)) {
|
||||
ERROR_LOG_REPORT(G3D, "Bad index address %08x!", gstate_c.indexAddr);
|
||||
return;
|
||||
}
|
||||
inds = Memory::GetPointerUnchecked(gstate_c.indexAddr);
|
||||
}
|
||||
|
||||
#ifndef MOBILE_DEVICE
|
||||
if (prim > GE_PRIM_RECTANGLES) {
|
||||
ERROR_LOG_REPORT_ONCE(reportPrim, G3D, "Unexpected prim type: %d", prim);
|
||||
}
|
||||
#endif
|
||||
|
||||
int bytesRead;
|
||||
transformDraw_.SubmitPrim(verts, inds, prim, count, gstate.vertType, &bytesRead);
|
||||
|
||||
int vertexCost = transformDraw_.EstimatePerVertexCost();
|
||||
gpuStats.vertexGPUCycles += vertexCost * count;
|
||||
cyclesExecuted += vertexCost * count;
|
||||
|
||||
// After drawing, we advance the vertexAddr (when non indexed) or indexAddr (when indexed).
|
||||
// Some games rely on this, they don't bother reloading VADDR and IADDR.
|
||||
// Q: Are these changed reflected in the real registers? Needs testing.
|
||||
if (inds) {
|
||||
int indexSize = 1;
|
||||
if ((gstate.vertType & GE_VTYPE_IDX_MASK) == GE_VTYPE_IDX_16BIT)
|
||||
indexSize = 2;
|
||||
gstate_c.indexAddr += count * indexSize;
|
||||
} else {
|
||||
gstate_c.vertexAddr += bytesRead;
|
||||
}
|
||||
}
|
||||
|
||||
void GLES_GPU::Execute_VertexType(u32 op, u32 diff) {
|
||||
if (!g_Config.bSoftwareSkinning) {
|
||||
if (diff & (GE_VTYPE_TC_MASK | GE_VTYPE_THROUGH_MASK))
|
||||
shaderManager_->DirtyUniform(DIRTY_UVSCALEOFFSET);
|
||||
} else {
|
||||
// Don't flush when weight count changes, unless morph is enabled.
|
||||
if ((diff & ~GE_VTYPE_WEIGHTCOUNT_MASK) || (op & GE_VTYPE_MORPHCOUNT_MASK) != 0) {
|
||||
// Restore and flush
|
||||
gstate.vertType ^= diff;
|
||||
Flush();
|
||||
gstate.vertType ^= diff;
|
||||
if (diff & (GE_VTYPE_TC_MASK | GE_VTYPE_THROUGH_MASK))
|
||||
shaderManager_->DirtyUniform(DIRTY_UVSCALEOFFSET);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void GLES_GPU::Execute_Region(u32 op, u32 diff) {
|
||||
gstate_c.framebufChanged = true;
|
||||
gstate_c.textureChanged |= TEXCHANGE_PARAMSONLY;
|
||||
}
|
||||
|
||||
void GLES_GPU::Execute_FramebufType(u32 op, u32 diff) {
|
||||
gstate_c.framebufChanged = true;
|
||||
gstate_c.textureChanged |= TEXCHANGE_PARAMSONLY;
|
||||
}
|
||||
|
||||
void GLES_GPU::Execute_TexAddr0(u32 op, u32 diff) {
|
||||
gstate_c.textureChanged = TEXCHANGE_UPDATED;
|
||||
shaderManager_->DirtyUniform(DIRTY_UVSCALEOFFSET);
|
||||
}
|
||||
|
||||
void GLES_GPU::Execute_LoadClut(u32 op, u32 diff) {
|
||||
gstate_c.textureChanged |= TEXCHANGE_PARAMSONLY;
|
||||
textureCache_.LoadClut();
|
||||
// This could be used to "dirty" textures with clut.
|
||||
}
|
||||
|
||||
void GLES_GPU::Execute_TexSize0(u32 op, u32 diff) {
|
||||
// Render to texture may have overridden the width/height.
|
||||
// Don't reset it unless the size is different / the texture has changed.
|
||||
if (diff || gstate_c.textureChanged != TEXCHANGE_UNCHANGED) {
|
||||
gstate_c.curTextureWidth = gstate.getTextureWidth(0);
|
||||
gstate_c.curTextureHeight = gstate.getTextureHeight(0);
|
||||
shaderManager_->DirtyUniform(DIRTY_UVSCALEOFFSET);
|
||||
// We will need to reset the texture now.
|
||||
gstate_c.textureChanged |= TEXCHANGE_PARAMSONLY;
|
||||
}
|
||||
}
|
||||
|
||||
void GLES_GPU::Execute_Ambient(u32 op, u32 diff) {
|
||||
shaderManager_->DirtyUniform(DIRTY_AMBIENT);
|
||||
}
|
||||
|
||||
void GLES_GPU::Execute_MaterialDiffuse(u32 op, u32 diff) {
|
||||
shaderManager_->DirtyUniform(DIRTY_MATDIFFUSE);
|
||||
}
|
||||
|
||||
void GLES_GPU::Execute_MaterialEmissive(u32 op, u32 diff) {
|
||||
shaderManager_->DirtyUniform(DIRTY_MATEMISSIVE);
|
||||
}
|
||||
|
||||
void GLES_GPU::Execute_MaterialAmbient(u32 op, u32 diff) {
|
||||
shaderManager_->DirtyUniform(DIRTY_MATAMBIENTALPHA);
|
||||
}
|
||||
|
||||
void GLES_GPU::Execute_MaterialSpecular(u32 op, u32 diff) {
|
||||
shaderManager_->DirtyUniform(DIRTY_MATSPECULAR);
|
||||
}
|
||||
|
||||
void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) {
|
||||
u32 cmd = op >> 24;
|
||||
u32 data = op & 0xFFFFFF;
|
||||
|
@ -689,87 +849,15 @@ void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) {
|
|||
break;
|
||||
|
||||
case GE_CMD_VADDR:
|
||||
gstate_c.vertexAddr = gstate_c.getRelativeAddress(data);
|
||||
Execute_Vaddr(op, diff);
|
||||
break;
|
||||
|
||||
case GE_CMD_IADDR:
|
||||
gstate_c.indexAddr = gstate_c.getRelativeAddress(data);
|
||||
Execute_Iaddr(op, diff);
|
||||
break;
|
||||
|
||||
case GE_CMD_PRIM:
|
||||
{
|
||||
// This drives all drawing. All other state we just buffer up, then we apply it only
|
||||
// when it's time to draw. As most PSP games set state redundantly ALL THE TIME, this is a huge optimization.
|
||||
|
||||
u32 count = data & 0xFFFF;
|
||||
GEPrimitiveType prim = static_cast<GEPrimitiveType>(data >> 16);
|
||||
|
||||
if (count == 0)
|
||||
break;
|
||||
|
||||
// Discard AA lines as we can't do anything that makes sense with these anyway. The SW plugin might, though.
|
||||
|
||||
if (gstate.isAntiAliasEnabled()) {
|
||||
// Discard AA lines in DOA
|
||||
if (prim == GE_PRIM_LINE_STRIP)
|
||||
break;
|
||||
// Discard AA lines in Summon Night 5
|
||||
if ((prim == GE_PRIM_LINES) && gstate.isSkinningEnabled())
|
||||
break;
|
||||
}
|
||||
|
||||
// This also make skipping drawing very effective.
|
||||
framebufferManager_.SetRenderFrameBuffer();
|
||||
if (gstate_c.skipDrawReason & (SKIPDRAW_SKIPFRAME | SKIPDRAW_NON_DISPLAYED_FB)) {
|
||||
transformDraw_.SetupVertexDecoder(gstate.vertType);
|
||||
// Rough estimate, not sure what's correct.
|
||||
int vertexCost = transformDraw_.EstimatePerVertexCost();
|
||||
cyclesExecuted += vertexCost * count;
|
||||
return;
|
||||
}
|
||||
|
||||
if (!Memory::IsValidAddress(gstate_c.vertexAddr)) {
|
||||
ERROR_LOG_REPORT(G3D, "Bad vertex address %08x!", gstate_c.vertexAddr);
|
||||
break;
|
||||
}
|
||||
|
||||
// TODO: Split this so that we can collect sequences of primitives, can greatly speed things up
|
||||
// on platforms where draw calls are expensive like mobile and D3D
|
||||
void *verts = Memory::GetPointerUnchecked(gstate_c.vertexAddr);
|
||||
void *inds = 0;
|
||||
if ((gstate.vertType & GE_VTYPE_IDX_MASK) != GE_VTYPE_IDX_NONE) {
|
||||
if (!Memory::IsValidAddress(gstate_c.indexAddr)) {
|
||||
ERROR_LOG_REPORT(G3D, "Bad index address %08x!", gstate_c.indexAddr);
|
||||
break;
|
||||
}
|
||||
inds = Memory::GetPointerUnchecked(gstate_c.indexAddr);
|
||||
}
|
||||
|
||||
#ifndef MOBILE_DEVICE
|
||||
if (prim > GE_PRIM_RECTANGLES) {
|
||||
ERROR_LOG_REPORT_ONCE(reportPrim, G3D, "Unexpected prim type: %d", prim);
|
||||
}
|
||||
#endif
|
||||
|
||||
int bytesRead;
|
||||
transformDraw_.SubmitPrim(verts, inds, prim, count, gstate.vertType, &bytesRead);
|
||||
|
||||
int vertexCost = transformDraw_.EstimatePerVertexCost();
|
||||
gpuStats.vertexGPUCycles += vertexCost * count;
|
||||
cyclesExecuted += vertexCost * count;
|
||||
|
||||
// After drawing, we advance the vertexAddr (when non indexed) or indexAddr (when indexed).
|
||||
// Some games rely on this, they don't bother reloading VADDR and IADDR.
|
||||
// Q: Are these changed reflected in the real registers? Needs testing.
|
||||
if (inds) {
|
||||
int indexSize = 1;
|
||||
if ((gstate.vertType & GE_VTYPE_IDX_MASK) == GE_VTYPE_IDX_16BIT)
|
||||
indexSize = 2;
|
||||
gstate_c.indexAddr += count * indexSize;
|
||||
} else {
|
||||
gstate_c.vertexAddr += bytesRead;
|
||||
}
|
||||
}
|
||||
Execute_Prim(op, diff);
|
||||
break;
|
||||
|
||||
// The arrow and other rotary items in Puzbob are bezier patches, strangely enough.
|
||||
|
@ -887,26 +975,12 @@ void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) {
|
|||
break;
|
||||
|
||||
case GE_CMD_VERTEXTYPE:
|
||||
if (!g_Config.bSoftwareSkinning) {
|
||||
if (diff & (GE_VTYPE_TC_MASK | GE_VTYPE_THROUGH_MASK))
|
||||
shaderManager_->DirtyUniform(DIRTY_UVSCALEOFFSET);
|
||||
} else {
|
||||
// Don't flush when weight count changes, unless morph is enabled.
|
||||
if ((diff & ~GE_VTYPE_WEIGHTCOUNT_MASK) || (data & GE_VTYPE_MORPHCOUNT_MASK) != 0) {
|
||||
// Restore and flush
|
||||
gstate.vertType ^= diff;
|
||||
Flush();
|
||||
gstate.vertType ^= diff;
|
||||
if (diff & (GE_VTYPE_TC_MASK | GE_VTYPE_THROUGH_MASK))
|
||||
shaderManager_->DirtyUniform(DIRTY_UVSCALEOFFSET);
|
||||
}
|
||||
}
|
||||
Execute_VertexType(op, diff);
|
||||
break;
|
||||
|
||||
case GE_CMD_REGION1:
|
||||
case GE_CMD_REGION2:
|
||||
gstate_c.framebufChanged = true;
|
||||
gstate_c.textureChanged |= TEXCHANGE_PARAMSONLY;
|
||||
Execute_Region(op, diff);
|
||||
break;
|
||||
|
||||
case GE_CMD_CLIPENABLE:
|
||||
|
@ -979,13 +1053,11 @@ void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) {
|
|||
case GE_CMD_FRAMEBUFPTR:
|
||||
case GE_CMD_FRAMEBUFWIDTH:
|
||||
case GE_CMD_FRAMEBUFPIXFORMAT:
|
||||
gstate_c.framebufChanged = true;
|
||||
gstate_c.textureChanged |= TEXCHANGE_PARAMSONLY;
|
||||
Execute_FramebufType(op, diff);
|
||||
break;
|
||||
|
||||
case GE_CMD_TEXADDR0:
|
||||
gstate_c.textureChanged = TEXCHANGE_UPDATED;
|
||||
shaderManager_->DirtyUniform(DIRTY_UVSCALEOFFSET);
|
||||
Execute_TexAddr0(op, diff);
|
||||
break;
|
||||
|
||||
case GE_CMD_TEXADDR1:
|
||||
|
@ -1023,9 +1095,7 @@ void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) {
|
|||
break;
|
||||
|
||||
case GE_CMD_LOADCLUT:
|
||||
gstate_c.textureChanged |= TEXCHANGE_PARAMSONLY;
|
||||
textureCache_.LoadClut();
|
||||
// This could be used to "dirty" textures with clut.
|
||||
Execute_LoadClut(op, diff);
|
||||
break;
|
||||
|
||||
case GE_CMD_TEXMAPMODE:
|
||||
|
@ -1059,15 +1129,7 @@ void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) {
|
|||
}
|
||||
|
||||
case GE_CMD_TEXSIZE0:
|
||||
// Render to texture may have overridden the width/height.
|
||||
// Don't reset it unless the size is different / the texture has changed.
|
||||
if (diff || gstate_c.textureChanged != TEXCHANGE_UNCHANGED) {
|
||||
gstate_c.curTextureWidth = gstate.getTextureWidth(0);
|
||||
gstate_c.curTextureHeight = gstate.getTextureHeight(0);
|
||||
shaderManager_->DirtyUniform(DIRTY_UVSCALEOFFSET);
|
||||
// We will need to reset the texture now.
|
||||
gstate_c.textureChanged |= TEXCHANGE_PARAMSONLY;
|
||||
}
|
||||
Execute_TexSize0(op, diff);
|
||||
break;
|
||||
|
||||
case GE_CMD_TEXSIZE1:
|
||||
|
@ -1086,25 +1148,25 @@ void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) {
|
|||
|
||||
case GE_CMD_AMBIENTCOLOR:
|
||||
case GE_CMD_AMBIENTALPHA:
|
||||
shaderManager_->DirtyUniform(DIRTY_AMBIENT);
|
||||
Execute_Ambient(op, diff);
|
||||
break;
|
||||
|
||||
case GE_CMD_MATERIALDIFFUSE:
|
||||
shaderManager_->DirtyUniform(DIRTY_MATDIFFUSE);
|
||||
Execute_MaterialDiffuse(op, diff);
|
||||
break;
|
||||
|
||||
case GE_CMD_MATERIALEMISSIVE:
|
||||
shaderManager_->DirtyUniform(DIRTY_MATEMISSIVE);
|
||||
Execute_MaterialEmissive(op, diff);
|
||||
break;
|
||||
|
||||
case GE_CMD_MATERIALAMBIENT:
|
||||
case GE_CMD_MATERIALALPHA:
|
||||
shaderManager_->DirtyUniform(DIRTY_MATAMBIENTALPHA);
|
||||
Execute_MaterialAmbient(op, diff);
|
||||
break;
|
||||
|
||||
case GE_CMD_MATERIALSPECULAR:
|
||||
case GE_CMD_MATERIALSPECULARCOEF:
|
||||
shaderManager_->DirtyUniform(DIRTY_MATSPECULAR);
|
||||
Execute_MaterialSpecular(op, diff);
|
||||
break;
|
||||
|
||||
case GE_CMD_LIGHTTYPE0:
|
||||
|
|
|
@ -74,6 +74,27 @@ public:
|
|||
|
||||
virtual bool DescribeCodePtr(const u8 *ptr, std::string &name);
|
||||
|
||||
typedef void (GLES_GPU::*CmdFunc)(u32 op, u32 diff);
|
||||
struct CommandInfo {
|
||||
u8 flags;
|
||||
GLES_GPU::CmdFunc func;
|
||||
};
|
||||
|
||||
void Execute_Vaddr(u32 op, u32 diff);
|
||||
void Execute_Iaddr(u32 op, u32 diff);
|
||||
void Execute_Prim(u32 op, u32 diff);
|
||||
void Execute_VertexType(u32 op, u32 diff);
|
||||
void Execute_Region(u32 op, u32 diff);
|
||||
void Execute_FramebufType(u32 op, u32 diff);
|
||||
void Execute_TexAddr0(u32 op, u32 diff);
|
||||
void Execute_LoadClut(u32 op, u32 diff);
|
||||
void Execute_TexSize0(u32 op, u32 diff);
|
||||
void Execute_Ambient(u32 op, u32 diff);
|
||||
void Execute_MaterialDiffuse(u32 op, u32 diff);
|
||||
void Execute_MaterialEmissive(u32 op, u32 diff);
|
||||
void Execute_MaterialAmbient(u32 op, u32 diff);
|
||||
void Execute_MaterialSpecular(u32 op, u32 diff);
|
||||
|
||||
protected:
|
||||
virtual void FastRunLoop(DisplayList &list);
|
||||
virtual void ProcessEvent(GPUEvent ev);
|
||||
|
@ -92,13 +113,13 @@ private:
|
|||
void CopyDisplayToOutputInternal();
|
||||
void InvalidateCacheInternal(u32 addr, int size, GPUInvalidationType type);
|
||||
|
||||
static CommandInfo cmdInfo_[256];
|
||||
|
||||
FramebufferManager framebufferManager_;
|
||||
TextureCache textureCache_;
|
||||
TransformDrawEngine transformDraw_;
|
||||
ShaderManager *shaderManager_;
|
||||
|
||||
u8 *commandFlags_;
|
||||
|
||||
bool resized_;
|
||||
int lastVsync_;
|
||||
|
||||
|
|
|
@ -586,7 +586,7 @@ void GPUCommon::ReapplyGfxStateInternal() {
|
|||
// To be safe we pass 0xFFFFFFFF as the diff.
|
||||
|
||||
for (int i = GE_CMD_VERTEXTYPE; i < GE_CMD_BONEMATRIXNUMBER; i++) {
|
||||
if (i != GE_CMD_ORIGIN) {
|
||||
if (i != GE_CMD_ORIGIN && i != GE_CMD_OFFSETADDR) {
|
||||
ExecuteOp(gstate.cmdmem[i], 0xFFFFFFFF);
|
||||
}
|
||||
}
|
||||
|
@ -683,9 +683,249 @@ void GPUCommon::PreExecuteOp(u32 op, u32 diff) {
|
|||
// Nothing to do
|
||||
}
|
||||
|
||||
void GPUCommon::Execute_OffsetAddr(u32 op, u32 diff) {
|
||||
gstate_c.offsetAddr = op << 8;
|
||||
}
|
||||
|
||||
void GPUCommon::Execute_Origin(u32 op, u32 diff) {
|
||||
easy_guard guard(listLock);
|
||||
gstate_c.offsetAddr = currentList->pc;
|
||||
}
|
||||
|
||||
void GPUCommon::Execute_Jump(u32 op, u32 diff) {
|
||||
easy_guard guard(listLock);
|
||||
const u32 data = op & 0x00FFFFFF;
|
||||
const u32 target = gstate_c.getRelativeAddress(data);
|
||||
if (Memory::IsValidAddress(target)) {
|
||||
UpdatePC(currentList->pc, target - 4);
|
||||
currentList->pc = target - 4; // pc will be increased after we return, counteract that
|
||||
} else {
|
||||
ERROR_LOG_REPORT(G3D, "JUMP to illegal address %08x - ignoring! data=%06x", target, data);
|
||||
}
|
||||
}
|
||||
|
||||
void GPUCommon::Execute_BJump(u32 op, u32 diff) {
|
||||
if (!currentList->bboxResult) {
|
||||
// bounding box jump.
|
||||
easy_guard guard(listLock);
|
||||
const u32 data = op & 0x00FFFFFF;
|
||||
const u32 target = gstate_c.getRelativeAddress(data);
|
||||
if (Memory::IsValidAddress(target)) {
|
||||
UpdatePC(currentList->pc, target - 4);
|
||||
currentList->pc = target - 4; // pc will be increased after we return, counteract that
|
||||
} else {
|
||||
ERROR_LOG_REPORT(G3D, "BJUMP to illegal address %08x - ignoring! data=%06x", target, data);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void GPUCommon::Execute_Call(u32 op, u32 diff) {
|
||||
easy_guard guard(listLock);
|
||||
|
||||
// Saint Seiya needs correct support for relative calls.
|
||||
const u32 retval = currentList->pc + 4;
|
||||
const u32 data = op & 0x00FFFFFF;
|
||||
const u32 target = gstate_c.getRelativeAddress(data);
|
||||
|
||||
// Bone matrix optimization - many games will CALL a bone matrix (!).
|
||||
if ((Memory::ReadUnchecked_U32(target) >> 24) == GE_CMD_BONEMATRIXDATA) {
|
||||
// Check for the end
|
||||
if ((Memory::ReadUnchecked_U32(target + 11 * 4) >> 24) == GE_CMD_BONEMATRIXDATA &&
|
||||
(Memory::ReadUnchecked_U32(target + 12 * 4) >> 24) == GE_CMD_RET) {
|
||||
// Yep, pretty sure this is a bone matrix call.
|
||||
FastLoadBoneMatrix(target);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (currentList->stackptr == ARRAY_SIZE(currentList->stack)) {
|
||||
ERROR_LOG_REPORT(G3D, "CALL: Stack full!");
|
||||
} else if (!Memory::IsValidAddress(target)) {
|
||||
ERROR_LOG_REPORT(G3D, "CALL to illegal address %08x - ignoring! data=%06x", target, data);
|
||||
} else {
|
||||
auto &stackEntry = currentList->stack[currentList->stackptr++];
|
||||
stackEntry.pc = retval;
|
||||
stackEntry.offsetAddr = gstate_c.offsetAddr;
|
||||
// The base address is NOT saved/restored for a regular call.
|
||||
UpdatePC(currentList->pc, target - 4);
|
||||
currentList->pc = target - 4; // pc will be increased after we return, counteract that
|
||||
}
|
||||
}
|
||||
|
||||
void GPUCommon::Execute_Ret(u32 op, u32 diff) {
|
||||
easy_guard guard(listLock);
|
||||
if (currentList->stackptr == 0) {
|
||||
DEBUG_LOG_REPORT(G3D, "RET: Stack empty!");
|
||||
} else {
|
||||
auto &stackEntry = currentList->stack[--currentList->stackptr];
|
||||
gstate_c.offsetAddr = stackEntry.offsetAddr;
|
||||
u32 target = (currentList->pc & 0xF0000000) | (stackEntry.pc & 0x0FFFFFFF);
|
||||
UpdatePC(currentList->pc, target - 4);
|
||||
currentList->pc = target - 4;
|
||||
if (!Memory::IsValidAddress(currentList->pc)) {
|
||||
ERROR_LOG_REPORT(G3D, "Invalid DL PC %08x on return", currentList->pc);
|
||||
UpdateState(GPUSTATE_ERROR);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void GPUCommon::Execute_End(u32 op, u32 diff) {
|
||||
easy_guard guard(listLock);
|
||||
const u32 data = op & 0x00FFFFFF;
|
||||
const u32 prev = Memory::ReadUnchecked_U32(currentList->pc - 4);
|
||||
UpdatePC(currentList->pc);
|
||||
switch (prev >> 24) {
|
||||
case GE_CMD_SIGNAL:
|
||||
{
|
||||
// TODO: see http://code.google.com/p/jpcsp/source/detail?r=2935#
|
||||
SignalBehavior behaviour = static_cast<SignalBehavior>((prev >> 16) & 0xFF);
|
||||
int signal = prev & 0xFFFF;
|
||||
int enddata = data & 0xFFFF;
|
||||
bool trigger = true;
|
||||
currentList->subIntrToken = signal;
|
||||
|
||||
switch (behaviour) {
|
||||
case PSP_GE_SIGNAL_HANDLER_SUSPEND:
|
||||
// Suspend the list, and call the signal handler. When it's done, resume.
|
||||
// Before sdkver 0x02000010, listsync should return paused.
|
||||
if (sceKernelGetCompiledSdkVersion() <= 0x02000010)
|
||||
currentList->state = PSP_GE_DL_STATE_PAUSED;
|
||||
currentList->signal = behaviour;
|
||||
DEBUG_LOG(G3D, "Signal with wait. signal/end: %04x %04x", signal, enddata);
|
||||
break;
|
||||
case PSP_GE_SIGNAL_HANDLER_CONTINUE:
|
||||
// Resume the list right away, then call the handler.
|
||||
currentList->signal = behaviour;
|
||||
DEBUG_LOG(G3D, "Signal without wait. signal/end: %04x %04x", signal, enddata);
|
||||
break;
|
||||
case PSP_GE_SIGNAL_HANDLER_PAUSE:
|
||||
// Pause the list instead of ending at the next FINISH.
|
||||
// Call the handler with the PAUSE signal value at that FINISH.
|
||||
// Technically, this ought to trigger an interrupt, but it won't do anything.
|
||||
// But right now, signal is always reset by interrupts, so that causes pause to not work.
|
||||
trigger = false;
|
||||
currentList->signal = behaviour;
|
||||
DEBUG_LOG(G3D, "Signal with Pause. signal/end: %04x %04x", signal, enddata);
|
||||
break;
|
||||
case PSP_GE_SIGNAL_SYNC:
|
||||
// Acts as a memory barrier, never calls any user code.
|
||||
// Technically, this ought to trigger an interrupt, but it won't do anything.
|
||||
// Triggering here can cause incorrect rescheduling, which breaks 3rd Birthday.
|
||||
// However, this is likely a bug in how GE signal interrupts are handled.
|
||||
trigger = false;
|
||||
currentList->signal = behaviour;
|
||||
DEBUG_LOG(G3D, "Signal with Sync. signal/end: %04x %04x", signal, enddata);
|
||||
break;
|
||||
case PSP_GE_SIGNAL_JUMP:
|
||||
{
|
||||
trigger = false;
|
||||
currentList->signal = behaviour;
|
||||
// pc will be increased after we return, counteract that.
|
||||
u32 target = ((signal << 16) | enddata) - 4;
|
||||
if (!Memory::IsValidAddress(target)) {
|
||||
ERROR_LOG_REPORT(G3D, "Signal with Jump: bad address. signal/end: %04x %04x", signal, enddata);
|
||||
} else {
|
||||
UpdatePC(currentList->pc, target);
|
||||
currentList->pc = target;
|
||||
DEBUG_LOG(G3D, "Signal with Jump. signal/end: %04x %04x", signal, enddata);
|
||||
}
|
||||
}
|
||||
break;
|
||||
case PSP_GE_SIGNAL_CALL:
|
||||
{
|
||||
trigger = false;
|
||||
currentList->signal = behaviour;
|
||||
// pc will be increased after we return, counteract that.
|
||||
u32 target = ((signal << 16) | enddata) - 4;
|
||||
if (currentList->stackptr == ARRAY_SIZE(currentList->stack)) {
|
||||
ERROR_LOG_REPORT(G3D, "Signal with Call: stack full. signal/end: %04x %04x", signal, enddata);
|
||||
} else if (!Memory::IsValidAddress(target)) {
|
||||
ERROR_LOG_REPORT(G3D, "Signal with Call: bad address. signal/end: %04x %04x", signal, enddata);
|
||||
} else {
|
||||
// TODO: This might save/restore other state...
|
||||
auto &stackEntry = currentList->stack[currentList->stackptr++];
|
||||
stackEntry.pc = currentList->pc;
|
||||
stackEntry.offsetAddr = gstate_c.offsetAddr;
|
||||
stackEntry.baseAddr = gstate.base;
|
||||
UpdatePC(currentList->pc, target);
|
||||
currentList->pc = target;
|
||||
DEBUG_LOG(G3D, "Signal with Call. signal/end: %04x %04x", signal, enddata);
|
||||
}
|
||||
}
|
||||
break;
|
||||
case PSP_GE_SIGNAL_RET:
|
||||
{
|
||||
trigger = false;
|
||||
currentList->signal = behaviour;
|
||||
if (currentList->stackptr == 0) {
|
||||
ERROR_LOG_REPORT(G3D, "Signal with Return: stack empty. signal/end: %04x %04x", signal, enddata);
|
||||
} else {
|
||||
// TODO: This might save/restore other state...
|
||||
auto &stackEntry = currentList->stack[--currentList->stackptr];
|
||||
gstate_c.offsetAddr = stackEntry.offsetAddr;
|
||||
gstate.base = stackEntry.baseAddr;
|
||||
UpdatePC(currentList->pc, stackEntry.pc);
|
||||
currentList->pc = stackEntry.pc;
|
||||
DEBUG_LOG(G3D, "Signal with Return. signal/end: %04x %04x", signal, enddata);
|
||||
}
|
||||
}
|
||||
break;
|
||||
default:
|
||||
ERROR_LOG_REPORT(G3D, "UNKNOWN Signal UNIMPLEMENTED %i ! signal/end: %04x %04x", behaviour, signal, enddata);
|
||||
break;
|
||||
}
|
||||
// TODO: Technically, jump/call/ret should generate an interrupt, but before the pc change maybe?
|
||||
if (currentList->interruptsEnabled && trigger) {
|
||||
if (__GeTriggerInterrupt(currentList->id, currentList->pc, startingTicks + cyclesExecuted)) {
|
||||
currentList->pendingInterrupt = true;
|
||||
UpdateState(GPUSTATE_INTERRUPT);
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
case GE_CMD_FINISH:
|
||||
switch (currentList->signal) {
|
||||
case PSP_GE_SIGNAL_HANDLER_PAUSE:
|
||||
currentList->state = PSP_GE_DL_STATE_PAUSED;
|
||||
if (currentList->interruptsEnabled) {
|
||||
if (__GeTriggerInterrupt(currentList->id, currentList->pc, startingTicks + cyclesExecuted)) {
|
||||
currentList->pendingInterrupt = true;
|
||||
UpdateState(GPUSTATE_INTERRUPT);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case PSP_GE_SIGNAL_SYNC:
|
||||
currentList->signal = PSP_GE_SIGNAL_NONE;
|
||||
// TODO: Technically this should still cause an interrupt. Probably for memory sync.
|
||||
break;
|
||||
|
||||
default:
|
||||
currentList->subIntrToken = prev & 0xFFFF;
|
||||
UpdateState(GPUSTATE_DONE);
|
||||
if (currentList->interruptsEnabled && __GeTriggerInterrupt(currentList->id, currentList->pc, startingTicks + cyclesExecuted)) {
|
||||
currentList->pendingInterrupt = true;
|
||||
} else {
|
||||
currentList->state = PSP_GE_DL_STATE_COMPLETED;
|
||||
currentList->waitTicks = startingTicks + cyclesExecuted;
|
||||
busyTicks = std::max(busyTicks, currentList->waitTicks);
|
||||
__GeTriggerSync(GPU_SYNC_LIST, currentList->id, currentList->waitTicks);
|
||||
if (currentList->started && currentList->context.IsValid()) {
|
||||
gstate.Restore(currentList->context);
|
||||
ReapplyGfxStateInternal();
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
DEBUG_LOG(G3D,"Ah, not finished: %06x", prev & 0xFFFFFF);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void GPUCommon::ExecuteOp(u32 op, u32 diff) {
|
||||
u32 cmd = op >> 24;
|
||||
u32 data = op & 0xFFFFFF;
|
||||
const u32 cmd = op >> 24;
|
||||
|
||||
// Handle control and drawing commands here directly. The others we delegate.
|
||||
switch (cmd) {
|
||||
|
@ -693,94 +933,27 @@ void GPUCommon::ExecuteOp(u32 op, u32 diff) {
|
|||
break;
|
||||
|
||||
case GE_CMD_OFFSETADDR:
|
||||
gstate_c.offsetAddr = data << 8;
|
||||
Execute_OffsetAddr(op, diff);
|
||||
break;
|
||||
|
||||
case GE_CMD_ORIGIN:
|
||||
{
|
||||
easy_guard guard(listLock);
|
||||
gstate_c.offsetAddr = currentList->pc;
|
||||
}
|
||||
Execute_Origin(op, diff);
|
||||
break;
|
||||
|
||||
case GE_CMD_JUMP:
|
||||
{
|
||||
easy_guard guard(listLock);
|
||||
u32 target = gstate_c.getRelativeAddress(data);
|
||||
if (Memory::IsValidAddress(target)) {
|
||||
UpdatePC(currentList->pc, target - 4);
|
||||
currentList->pc = target - 4; // pc will be increased after we return, counteract that
|
||||
} else {
|
||||
ERROR_LOG_REPORT(G3D, "JUMP to illegal address %08x - ignoring! data=%06x", target, data);
|
||||
}
|
||||
}
|
||||
Execute_Jump(op, diff);
|
||||
break;
|
||||
|
||||
case GE_CMD_BJUMP:
|
||||
if (!currentList->bboxResult) {
|
||||
// bounding box jump.
|
||||
easy_guard guard(listLock);
|
||||
u32 target = gstate_c.getRelativeAddress(data);
|
||||
if (Memory::IsValidAddress(target)) {
|
||||
UpdatePC(currentList->pc, target - 4);
|
||||
currentList->pc = target - 4; // pc will be increased after we return, counteract that
|
||||
} else {
|
||||
ERROR_LOG_REPORT(G3D, "BJUMP to illegal address %08x - ignoring! data=%06x", target, data);
|
||||
}
|
||||
}
|
||||
Execute_BJump(op, diff);
|
||||
break;
|
||||
|
||||
case GE_CMD_CALL:
|
||||
{
|
||||
easy_guard guard(listLock);
|
||||
|
||||
// Saint Seiya needs correct support for relative calls.
|
||||
u32 retval = currentList->pc + 4;
|
||||
u32 target = gstate_c.getRelativeAddress(data);
|
||||
|
||||
// Bone matrix optimization - many games will CALL a bone matrix (!).
|
||||
if ((Memory::ReadUnchecked_U32(target) >> 24) == GE_CMD_BONEMATRIXDATA) {
|
||||
// Check for the end
|
||||
if ((Memory::ReadUnchecked_U32(target + 11 * 4) >> 24) == GE_CMD_BONEMATRIXDATA &&
|
||||
(Memory::ReadUnchecked_U32(target + 12 * 4) >> 24) == GE_CMD_RET) {
|
||||
// Yep, pretty sure this is a bone matrix call.
|
||||
FastLoadBoneMatrix(target);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (currentList->stackptr == ARRAY_SIZE(currentList->stack)) {
|
||||
ERROR_LOG_REPORT(G3D, "CALL: Stack full!");
|
||||
} else if (!Memory::IsValidAddress(target)) {
|
||||
ERROR_LOG_REPORT(G3D, "CALL to illegal address %08x - ignoring! data=%06x", target, data);
|
||||
} else {
|
||||
auto &stackEntry = currentList->stack[currentList->stackptr++];
|
||||
stackEntry.pc = retval;
|
||||
stackEntry.offsetAddr = gstate_c.offsetAddr;
|
||||
// The base address is NOT saved/restored for a regular call.
|
||||
UpdatePC(currentList->pc, target - 4);
|
||||
currentList->pc = target - 4; // pc will be increased after we return, counteract that
|
||||
}
|
||||
}
|
||||
Execute_Call(op, diff);
|
||||
break;
|
||||
|
||||
case GE_CMD_RET:
|
||||
{
|
||||
easy_guard guard(listLock);
|
||||
if (currentList->stackptr == 0) {
|
||||
DEBUG_LOG_REPORT(G3D, "RET: Stack empty!");
|
||||
} else {
|
||||
auto &stackEntry = currentList->stack[--currentList->stackptr];
|
||||
gstate_c.offsetAddr = stackEntry.offsetAddr;
|
||||
u32 target = (currentList->pc & 0xF0000000) | (stackEntry.pc & 0x0FFFFFFF);
|
||||
UpdatePC(currentList->pc, target - 4);
|
||||
currentList->pc = target - 4;
|
||||
if (!Memory::IsValidAddress(currentList->pc)) {
|
||||
ERROR_LOG_REPORT(G3D, "Invalid DL PC %08x on return", currentList->pc);
|
||||
UpdateState(GPUSTATE_ERROR);
|
||||
}
|
||||
}
|
||||
}
|
||||
Execute_Ret(op, diff);
|
||||
break;
|
||||
|
||||
case GE_CMD_SIGNAL:
|
||||
|
@ -788,160 +961,9 @@ void GPUCommon::ExecuteOp(u32 op, u32 diff) {
|
|||
// Processed in GE_END.
|
||||
break;
|
||||
|
||||
case GE_CMD_END: {
|
||||
easy_guard guard(listLock);
|
||||
u32 prev = Memory::ReadUnchecked_U32(currentList->pc - 4);
|
||||
UpdatePC(currentList->pc);
|
||||
switch (prev >> 24) {
|
||||
case GE_CMD_SIGNAL:
|
||||
{
|
||||
// TODO: see http://code.google.com/p/jpcsp/source/detail?r=2935#
|
||||
SignalBehavior behaviour = static_cast<SignalBehavior>((prev >> 16) & 0xFF);
|
||||
int signal = prev & 0xFFFF;
|
||||
int enddata = data & 0xFFFF;
|
||||
bool trigger = true;
|
||||
currentList->subIntrToken = signal;
|
||||
|
||||
switch (behaviour) {
|
||||
case PSP_GE_SIGNAL_HANDLER_SUSPEND:
|
||||
// Suspend the list, and call the signal handler. When it's done, resume.
|
||||
// Before sdkver 0x02000010, listsync should return paused.
|
||||
if (sceKernelGetCompiledSdkVersion() <= 0x02000010)
|
||||
currentList->state = PSP_GE_DL_STATE_PAUSED;
|
||||
currentList->signal = behaviour;
|
||||
DEBUG_LOG(G3D, "Signal with wait. signal/end: %04x %04x", signal, enddata);
|
||||
break;
|
||||
case PSP_GE_SIGNAL_HANDLER_CONTINUE:
|
||||
// Resume the list right away, then call the handler.
|
||||
currentList->signal = behaviour;
|
||||
DEBUG_LOG(G3D, "Signal without wait. signal/end: %04x %04x", signal, enddata);
|
||||
break;
|
||||
case PSP_GE_SIGNAL_HANDLER_PAUSE:
|
||||
// Pause the list instead of ending at the next FINISH.
|
||||
// Call the handler with the PAUSE signal value at that FINISH.
|
||||
// Technically, this ought to trigger an interrupt, but it won't do anything.
|
||||
// But right now, signal is always reset by interrupts, so that causes pause to not work.
|
||||
trigger = false;
|
||||
currentList->signal = behaviour;
|
||||
DEBUG_LOG(G3D, "Signal with Pause. signal/end: %04x %04x", signal, enddata);
|
||||
break;
|
||||
case PSP_GE_SIGNAL_SYNC:
|
||||
// Acts as a memory barrier, never calls any user code.
|
||||
// Technically, this ought to trigger an interrupt, but it won't do anything.
|
||||
// Triggering here can cause incorrect rescheduling, which breaks 3rd Birthday.
|
||||
// However, this is likely a bug in how GE signal interrupts are handled.
|
||||
trigger = false;
|
||||
currentList->signal = behaviour;
|
||||
DEBUG_LOG(G3D, "Signal with Sync. signal/end: %04x %04x", signal, enddata);
|
||||
break;
|
||||
case PSP_GE_SIGNAL_JUMP:
|
||||
{
|
||||
trigger = false;
|
||||
currentList->signal = behaviour;
|
||||
// pc will be increased after we return, counteract that.
|
||||
u32 target = ((signal << 16) | enddata) - 4;
|
||||
if (!Memory::IsValidAddress(target)) {
|
||||
ERROR_LOG_REPORT(G3D, "Signal with Jump: bad address. signal/end: %04x %04x", signal, enddata);
|
||||
} else {
|
||||
UpdatePC(currentList->pc, target);
|
||||
currentList->pc = target;
|
||||
DEBUG_LOG(G3D, "Signal with Jump. signal/end: %04x %04x", signal, enddata);
|
||||
}
|
||||
}
|
||||
break;
|
||||
case PSP_GE_SIGNAL_CALL:
|
||||
{
|
||||
trigger = false;
|
||||
currentList->signal = behaviour;
|
||||
// pc will be increased after we return, counteract that.
|
||||
u32 target = ((signal << 16) | enddata) - 4;
|
||||
if (currentList->stackptr == ARRAY_SIZE(currentList->stack)) {
|
||||
ERROR_LOG_REPORT(G3D, "Signal with Call: stack full. signal/end: %04x %04x", signal, enddata);
|
||||
} else if (!Memory::IsValidAddress(target)) {
|
||||
ERROR_LOG_REPORT(G3D, "Signal with Call: bad address. signal/end: %04x %04x", signal, enddata);
|
||||
} else {
|
||||
// TODO: This might save/restore other state...
|
||||
auto &stackEntry = currentList->stack[currentList->stackptr++];
|
||||
stackEntry.pc = currentList->pc;
|
||||
stackEntry.offsetAddr = gstate_c.offsetAddr;
|
||||
stackEntry.baseAddr = gstate.base;
|
||||
UpdatePC(currentList->pc, target);
|
||||
currentList->pc = target;
|
||||
DEBUG_LOG(G3D, "Signal with Call. signal/end: %04x %04x", signal, enddata);
|
||||
}
|
||||
}
|
||||
break;
|
||||
case PSP_GE_SIGNAL_RET:
|
||||
{
|
||||
trigger = false;
|
||||
currentList->signal = behaviour;
|
||||
if (currentList->stackptr == 0) {
|
||||
ERROR_LOG_REPORT(G3D, "Signal with Return: stack empty. signal/end: %04x %04x", signal, enddata);
|
||||
} else {
|
||||
// TODO: This might save/restore other state...
|
||||
auto &stackEntry = currentList->stack[--currentList->stackptr];
|
||||
gstate_c.offsetAddr = stackEntry.offsetAddr;
|
||||
gstate.base = stackEntry.baseAddr;
|
||||
UpdatePC(currentList->pc, stackEntry.pc);
|
||||
currentList->pc = stackEntry.pc;
|
||||
DEBUG_LOG(G3D, "Signal with Return. signal/end: %04x %04x", signal, enddata);
|
||||
}
|
||||
}
|
||||
break;
|
||||
default:
|
||||
ERROR_LOG_REPORT(G3D, "UNKNOWN Signal UNIMPLEMENTED %i ! signal/end: %04x %04x", behaviour, signal, enddata);
|
||||
break;
|
||||
}
|
||||
// TODO: Technically, jump/call/ret should generate an interrupt, but before the pc change maybe?
|
||||
if (currentList->interruptsEnabled && trigger) {
|
||||
if (__GeTriggerInterrupt(currentList->id, currentList->pc, startingTicks + cyclesExecuted)) {
|
||||
currentList->pendingInterrupt = true;
|
||||
UpdateState(GPUSTATE_INTERRUPT);
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
case GE_CMD_FINISH:
|
||||
switch (currentList->signal) {
|
||||
case PSP_GE_SIGNAL_HANDLER_PAUSE:
|
||||
currentList->state = PSP_GE_DL_STATE_PAUSED;
|
||||
if (currentList->interruptsEnabled) {
|
||||
if (__GeTriggerInterrupt(currentList->id, currentList->pc, startingTicks + cyclesExecuted)) {
|
||||
currentList->pendingInterrupt = true;
|
||||
UpdateState(GPUSTATE_INTERRUPT);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case PSP_GE_SIGNAL_SYNC:
|
||||
currentList->signal = PSP_GE_SIGNAL_NONE;
|
||||
// TODO: Technically this should still cause an interrupt. Probably for memory sync.
|
||||
break;
|
||||
|
||||
default:
|
||||
currentList->subIntrToken = prev & 0xFFFF;
|
||||
UpdateState(GPUSTATE_DONE);
|
||||
if (currentList->interruptsEnabled && __GeTriggerInterrupt(currentList->id, currentList->pc, startingTicks + cyclesExecuted)) {
|
||||
currentList->pendingInterrupt = true;
|
||||
} else {
|
||||
currentList->state = PSP_GE_DL_STATE_COMPLETED;
|
||||
currentList->waitTicks = startingTicks + cyclesExecuted;
|
||||
busyTicks = std::max(busyTicks, currentList->waitTicks);
|
||||
__GeTriggerSync(GPU_SYNC_LIST, currentList->id, currentList->waitTicks);
|
||||
if (currentList->started && currentList->context.IsValid()) {
|
||||
gstate.Restore(currentList->context);
|
||||
ReapplyGfxStateInternal();
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
DEBUG_LOG(G3D,"Ah, not finished: %06x", prev & 0xFFFFFF);
|
||||
break;
|
||||
}
|
||||
case GE_CMD_END:
|
||||
Execute_End(op, diff);
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
DEBUG_LOG(G3D,"DL Unknown: %08x @ %08x", op, currentList == NULL ? 0 : currentList->pc);
|
||||
|
|
|
@ -52,6 +52,14 @@ public:
|
|||
virtual u32 Break(int mode);
|
||||
virtual void ReapplyGfxState();
|
||||
|
||||
void Execute_OffsetAddr(u32 op, u32 diff);
|
||||
void Execute_Origin(u32 op, u32 diff);
|
||||
void Execute_Jump(u32 op, u32 diff);
|
||||
void Execute_BJump(u32 op, u32 diff);
|
||||
void Execute_Call(u32 op, u32 diff);
|
||||
void Execute_Ret(u32 op, u32 diff);
|
||||
void Execute_End(u32 op, u32 diff);
|
||||
|
||||
virtual u64 GetTickEstimate() {
|
||||
#if defined(_M_X64) || defined(ANDROID)
|
||||
return curTickEst_;
|
||||
|
|
Loading…
Add table
Reference in a new issue