Merge pull request #5898 from unknownbrackets/gpu-cmd-funcs

Use individual funcs, skipping the ExecuteOpInternal switch
This commit is contained in:
Henrik Rydgård 2014-04-17 10:39:11 +02:00
commit e223ab7df6
4 changed files with 500 additions and 387 deletions

View file

@ -55,13 +55,14 @@ enum {
struct CommandTableEntry {
u8 cmd;
u8 flags;
GLES_GPU::CmdFunc func;
};
static const CommandTableEntry commandTable[] = {
// Changes that dirty the framebuffer
{GE_CMD_FRAMEBUFPTR, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE},
{GE_CMD_FRAMEBUFWIDTH, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE},
{GE_CMD_FRAMEBUFPIXFORMAT, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE},
{GE_CMD_FRAMEBUFPTR, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GLES_GPU::Execute_FramebufType},
{GE_CMD_FRAMEBUFWIDTH, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GLES_GPU::Execute_FramebufType},
{GE_CMD_FRAMEBUFPIXFORMAT, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GLES_GPU::Execute_FramebufType},
{GE_CMD_ZBUFPTR, FLAG_FLUSHBEFOREONCHANGE},
{GE_CMD_ZBUFWIDTH, FLAG_FLUSHBEFOREONCHANGE},
@ -83,7 +84,7 @@ static const CommandTableEntry commandTable[] = {
// Changes that dirty the current texture. Really should be possible to avoid executing these if we compile
// by adding some more flags.
{GE_CMD_TEXSIZE0, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTE}, // NOTE: only one that uses diff?
{GE_CMD_TEXSIZE0, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTE, &GLES_GPU::Execute_TexSize0},
{GE_CMD_TEXSIZE1, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE},
{GE_CMD_TEXSIZE2, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE},
{GE_CMD_TEXSIZE3, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE},
@ -92,7 +93,7 @@ static const CommandTableEntry commandTable[] = {
{GE_CMD_TEXSIZE6, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE},
{GE_CMD_TEXSIZE7, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE},
{GE_CMD_TEXFORMAT, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE},
{GE_CMD_TEXADDR0, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE},
{GE_CMD_TEXADDR0, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GLES_GPU::Execute_TexAddr0},
{GE_CMD_TEXADDR1, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE},
{GE_CMD_TEXADDR2, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE},
{GE_CMD_TEXADDR3, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE},
@ -203,22 +204,22 @@ static const CommandTableEntry commandTable[] = {
{GE_CMD_VIEWPORTZ2, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE},
// Region
{GE_CMD_REGION1, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE},
{GE_CMD_REGION2, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE},
{GE_CMD_REGION1, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GLES_GPU::Execute_Region},
{GE_CMD_REGION2, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GLES_GPU::Execute_Region},
// Scissor
{GE_CMD_SCISSOR1, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE},
{GE_CMD_SCISSOR2, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE},
// These dirty various vertex shader uniforms. Could embed information about that in this table and call dirtyuniform directly, hm...
{GE_CMD_AMBIENTCOLOR, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE},
{GE_CMD_AMBIENTALPHA, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE},
{GE_CMD_MATERIALDIFFUSE, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE},
{GE_CMD_MATERIALEMISSIVE, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE},
{GE_CMD_MATERIALAMBIENT, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE},
{GE_CMD_MATERIALALPHA, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE},
{GE_CMD_MATERIALSPECULAR, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE},
{GE_CMD_MATERIALSPECULARCOEF, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE},
{GE_CMD_AMBIENTCOLOR, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GLES_GPU::Execute_Ambient},
{GE_CMD_AMBIENTALPHA, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GLES_GPU::Execute_Ambient},
{GE_CMD_MATERIALDIFFUSE, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GLES_GPU::Execute_MaterialDiffuse},
{GE_CMD_MATERIALEMISSIVE, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GLES_GPU::Execute_MaterialEmissive},
{GE_CMD_MATERIALAMBIENT, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GLES_GPU::Execute_MaterialAmbient},
{GE_CMD_MATERIALALPHA, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GLES_GPU::Execute_MaterialAmbient},
{GE_CMD_MATERIALSPECULAR, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GLES_GPU::Execute_MaterialSpecular},
{GE_CMD_MATERIALSPECULARCOEF, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GLES_GPU::Execute_MaterialSpecular},
// These precompute a value. not sure if worth it. Also dirty uniforms, which could be table-ized to avoid execute.
{GE_CMD_LX0, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE},
@ -302,20 +303,20 @@ static const CommandTableEntry commandTable[] = {
{GE_CMD_TRANSFERSIZE, 0},
// From Common. No flushing but definitely need execute.
{GE_CMD_OFFSETADDR, FLAG_EXECUTE},
{GE_CMD_ORIGIN, FLAG_EXECUTE | FLAG_READS_PC}, // Really?
{GE_CMD_PRIM, FLAG_EXECUTE},
{GE_CMD_JUMP, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC},
{GE_CMD_CALL, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC},
{GE_CMD_RET, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC},
{GE_CMD_END, FLAG_FLUSHBEFORE | FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC}, // Flush?
{GE_CMD_VADDR, FLAG_EXECUTE},
{GE_CMD_IADDR, FLAG_EXECUTE},
{GE_CMD_BJUMP, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC}, // EXECUTE
{GE_CMD_OFFSETADDR, FLAG_EXECUTE, &GPUCommon::Execute_OffsetAddr},
{GE_CMD_ORIGIN, FLAG_EXECUTE | FLAG_READS_PC, &GPUCommon::Execute_Origin}, // Really?
{GE_CMD_PRIM, FLAG_EXECUTE, &GLES_GPU::Execute_Prim},
{GE_CMD_JUMP, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, &GPUCommon::Execute_Jump},
{GE_CMD_CALL, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, &GPUCommon::Execute_Call},
{GE_CMD_RET, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, &GPUCommon::Execute_Ret},
{GE_CMD_END, FLAG_FLUSHBEFORE | FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, &GPUCommon::Execute_End}, // Flush?
{GE_CMD_VADDR, FLAG_EXECUTE, &GLES_GPU::Execute_Vaddr},
{GE_CMD_IADDR, FLAG_EXECUTE, &GLES_GPU::Execute_Iaddr},
{GE_CMD_BJUMP, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, &GPUCommon::Execute_BJump}, // EXECUTE
{GE_CMD_BOUNDINGBOX, FLAG_EXECUTE}, // + FLUSHBEFORE when we implement... or not, do we need to?
// Changing the vertex type requires us to flush.
{GE_CMD_VERTEXTYPE, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE},
{GE_CMD_VERTEXTYPE, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GLES_GPU::Execute_VertexType},
{GE_CMD_BEZIER, FLAG_FLUSHBEFORE | FLAG_EXECUTE},
{GE_CMD_SPLINE, FLAG_FLUSHBEFORE | FLAG_EXECUTE},
@ -325,7 +326,7 @@ static const CommandTableEntry commandTable[] = {
{GE_CMD_FINISH, FLAG_FLUSHBEFORE},
// Changes that trigger data copies. Only flushing on change for LOADCLUT must be a bit of a hack...
{GE_CMD_LOADCLUT, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTE},
{GE_CMD_LOADCLUT, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTE, &GLES_GPU::Execute_LoadClut},
{GE_CMD_TRANSFERSTART, FLAG_FLUSHBEFORE | FLAG_EXECUTE | FLAG_READS_PC},
// We don't use the dither table.
@ -382,6 +383,8 @@ static const CommandTableEntry commandTable[] = {
{GE_CMD_UNKNOWN_FF, FLAG_EXECUTE},
};
GLES_GPU::CommandInfo GLES_GPU::cmdInfo_[256];
GLES_GPU::GLES_GPU()
: resized_(false) {
@ -410,18 +413,21 @@ GLES_GPU::GLES_GPU()
ERROR_LOG(G3D, "gstate has drifted out of sync!");
}
// Sanity check commandFlags table - no dupes please
// Sanity check cmdInfo_ table - no dupes please
std::set<u8> dupeCheck;
commandFlags_ = new u8[256];
memset(commandFlags_, 0, 256 * sizeof(bool));
memset(cmdInfo_, 0, sizeof(cmdInfo_));
for (size_t i = 0; i < ARRAY_SIZE(commandTable); i++) {
u8 cmd = commandTable[i].cmd;
const u8 cmd = commandTable[i].cmd;
if (dupeCheck.find(cmd) != dupeCheck.end()) {
ERROR_LOG(G3D, "Command table Dupe: %02x (%i)", (int)cmd, (int)cmd);
} else {
dupeCheck.insert(cmd);
}
commandFlags_[cmd] |= commandTable[i].flags;
cmdInfo_[cmd].flags |= commandTable[i].flags;
cmdInfo_[cmd].func = commandTable[i].func;
if (!cmdInfo_[cmd].func) {
cmdInfo_[cmd].func = &GLES_GPU::ExecuteOpInternal;
}
}
// Find commands missing from the table.
for (int i = 0; i < 0xEF; i++) {
@ -434,14 +440,14 @@ GLES_GPU::GLES_GPU()
// the tex scale/offset into the vertices anyway.
if (g_Config.bPrescaleUV) {
commandFlags_[GE_CMD_TEXSCALEU] &= ~FLAG_FLUSHBEFOREONCHANGE;
commandFlags_[GE_CMD_TEXSCALEV] &= ~FLAG_FLUSHBEFOREONCHANGE;
commandFlags_[GE_CMD_TEXOFFSETU] &= ~FLAG_FLUSHBEFOREONCHANGE;
commandFlags_[GE_CMD_TEXOFFSETV] &= ~FLAG_FLUSHBEFOREONCHANGE;
cmdInfo_[GE_CMD_TEXSCALEU].flags &= ~FLAG_FLUSHBEFOREONCHANGE;
cmdInfo_[GE_CMD_TEXSCALEV].flags &= ~FLAG_FLUSHBEFOREONCHANGE;
cmdInfo_[GE_CMD_TEXOFFSETU].flags &= ~FLAG_FLUSHBEFOREONCHANGE;
cmdInfo_[GE_CMD_TEXOFFSETV].flags &= ~FLAG_FLUSHBEFOREONCHANGE;
}
if (g_Config.bSoftwareSkinning) {
commandFlags_[GE_CMD_VERTEXTYPE] &= ~FLAG_FLUSHBEFOREONCHANGE;
cmdInfo_[GE_CMD_VERTEXTYPE].flags &= ~FLAG_FLUSHBEFOREONCHANGE;
}
BuildReportingInfo();
@ -451,7 +457,6 @@ GLES_GPU::~GLES_GPU() {
framebufferManager_.DestroyAllFBOs();
shaderManager_->ClearCache(true);
delete shaderManager_;
delete [] commandFlags_;
}
// Let's avoid passing nulls into snprintf().
@ -615,12 +620,13 @@ void GLES_GPU::CopyDisplayToOutputInternal() {
// Maybe should write this in ASM...
void GLES_GPU::FastRunLoop(DisplayList &list) {
const u8 *commandFlags = commandFlags_;
const CommandInfo *cmdInfo = cmdInfo_;
for (; downcount > 0; --downcount) {
// We know that display list PCs have the upper nibble == 0 - no need to mask the pointer
const u32 op = *(const u32 *)(Memory::base + list.pc);
const u32 cmd = op >> 24;
const u8 cmdFlags = commandFlags[cmd]; // If we stashed the cmdFlags in the top bits of the cmdmem, we could get away with one table lookup instead of two
const CommandInfo info = cmdInfo[cmd];
const u8 cmdFlags = info.flags; // If we stashed the cmdFlags in the top bits of the cmdmem, we could get away with one table lookup instead of two
const u32 diff = op ^ gstate.cmdmem[cmd];
// Inlined CheckFlushOp here to get rid of the dumpThisFrame_ check.
if ((cmdFlags & FLAG_FLUSHBEFORE) || (diff && (cmdFlags & FLAG_FLUSHBEFOREONCHANGE))) {
@ -628,7 +634,7 @@ void GLES_GPU::FastRunLoop(DisplayList &list) {
}
gstate.cmdmem[cmd] = op; // TODO: no need to write if diff==0...
if ((cmdFlags & FLAG_EXECUTE) || (diff && (cmdFlags & FLAG_EXECUTEONCHANGE))) {
ExecuteOpInternal(op, diff);
(this->*info.func)(op, diff);
}
list.pc += 4;
}
@ -658,7 +664,7 @@ void GLES_GPU::ProcessEvent(GPUEvent ev) {
}
inline void GLES_GPU::CheckFlushOp(int cmd, u32 diff) {
const u8 cmdFlags = commandFlags_[cmd];
const u8 cmdFlags = cmdInfo_[cmd].flags;
if ((cmdFlags & FLAG_FLUSHBEFORE) || (diff && (cmdFlags & FLAG_FLUSHBEFOREONCHANGE))) {
if (dumpThisFrame_) {
NOTICE_LOG(G3D, "================ FLUSH ================");
@ -673,12 +679,166 @@ void GLES_GPU::PreExecuteOp(u32 op, u32 diff) {
void GLES_GPU::ExecuteOp(u32 op, u32 diff) {
const u8 cmd = op >> 24;
const u8 cmdFlags = commandFlags_[cmd];
const CommandInfo info = cmdInfo_[cmd];
const u8 cmdFlags = info.flags;
if ((cmdFlags & FLAG_EXECUTE) || (diff && (cmdFlags & FLAG_EXECUTEONCHANGE))) {
ExecuteOpInternal(op, diff);
(this->*info.func)(op, diff);
}
}
void GLES_GPU::Execute_Vaddr(u32 op, u32 diff) {
gstate_c.vertexAddr = gstate_c.getRelativeAddress(op & 0x00FFFFFF);
}
void GLES_GPU::Execute_Iaddr(u32 op, u32 diff) {
gstate_c.indexAddr = gstate_c.getRelativeAddress(op & 0x00FFFFFF);
}
void GLES_GPU::Execute_Prim(u32 op, u32 diff) {
// This drives all drawing. All other state we just buffer up, then we apply it only
// when it's time to draw. As most PSP games set state redundantly ALL THE TIME, this is a huge optimization.
u32 data = op & 0xFFFFFF;
u32 count = data & 0xFFFF;
GEPrimitiveType prim = static_cast<GEPrimitiveType>(data >> 16);
if (count == 0)
return;
// Discard AA lines as we can't do anything that makes sense with these anyway. The SW plugin might, though.
if (gstate.isAntiAliasEnabled()) {
// Discard AA lines in DOA
if (prim == GE_PRIM_LINE_STRIP)
return;
// Discard AA lines in Summon Night 5
if ((prim == GE_PRIM_LINES) && gstate.isSkinningEnabled())
return;
}
// This also make skipping drawing very effective.
framebufferManager_.SetRenderFrameBuffer();
if (gstate_c.skipDrawReason & (SKIPDRAW_SKIPFRAME | SKIPDRAW_NON_DISPLAYED_FB)) {
transformDraw_.SetupVertexDecoder(gstate.vertType);
// Rough estimate, not sure what's correct.
int vertexCost = transformDraw_.EstimatePerVertexCost();
cyclesExecuted += vertexCost * count;
return;
}
if (!Memory::IsValidAddress(gstate_c.vertexAddr)) {
ERROR_LOG_REPORT(G3D, "Bad vertex address %08x!", gstate_c.vertexAddr);
return;
}
// TODO: Split this so that we can collect sequences of primitives, can greatly speed things up
// on platforms where draw calls are expensive like mobile and D3D
void *verts = Memory::GetPointerUnchecked(gstate_c.vertexAddr);
void *inds = 0;
if ((gstate.vertType & GE_VTYPE_IDX_MASK) != GE_VTYPE_IDX_NONE) {
if (!Memory::IsValidAddress(gstate_c.indexAddr)) {
ERROR_LOG_REPORT(G3D, "Bad index address %08x!", gstate_c.indexAddr);
return;
}
inds = Memory::GetPointerUnchecked(gstate_c.indexAddr);
}
#ifndef MOBILE_DEVICE
if (prim > GE_PRIM_RECTANGLES) {
ERROR_LOG_REPORT_ONCE(reportPrim, G3D, "Unexpected prim type: %d", prim);
}
#endif
int bytesRead;
transformDraw_.SubmitPrim(verts, inds, prim, count, gstate.vertType, &bytesRead);
int vertexCost = transformDraw_.EstimatePerVertexCost();
gpuStats.vertexGPUCycles += vertexCost * count;
cyclesExecuted += vertexCost * count;
// After drawing, we advance the vertexAddr (when non indexed) or indexAddr (when indexed).
// Some games rely on this, they don't bother reloading VADDR and IADDR.
// Q: Are these changed reflected in the real registers? Needs testing.
if (inds) {
int indexSize = 1;
if ((gstate.vertType & GE_VTYPE_IDX_MASK) == GE_VTYPE_IDX_16BIT)
indexSize = 2;
gstate_c.indexAddr += count * indexSize;
} else {
gstate_c.vertexAddr += bytesRead;
}
}
void GLES_GPU::Execute_VertexType(u32 op, u32 diff) {
if (!g_Config.bSoftwareSkinning) {
if (diff & (GE_VTYPE_TC_MASK | GE_VTYPE_THROUGH_MASK))
shaderManager_->DirtyUniform(DIRTY_UVSCALEOFFSET);
} else {
// Don't flush when weight count changes, unless morph is enabled.
if ((diff & ~GE_VTYPE_WEIGHTCOUNT_MASK) || (op & GE_VTYPE_MORPHCOUNT_MASK) != 0) {
// Restore and flush
gstate.vertType ^= diff;
Flush();
gstate.vertType ^= diff;
if (diff & (GE_VTYPE_TC_MASK | GE_VTYPE_THROUGH_MASK))
shaderManager_->DirtyUniform(DIRTY_UVSCALEOFFSET);
}
}
}
void GLES_GPU::Execute_Region(u32 op, u32 diff) {
gstate_c.framebufChanged = true;
gstate_c.textureChanged |= TEXCHANGE_PARAMSONLY;
}
void GLES_GPU::Execute_FramebufType(u32 op, u32 diff) {
gstate_c.framebufChanged = true;
gstate_c.textureChanged |= TEXCHANGE_PARAMSONLY;
}
void GLES_GPU::Execute_TexAddr0(u32 op, u32 diff) {
gstate_c.textureChanged = TEXCHANGE_UPDATED;
shaderManager_->DirtyUniform(DIRTY_UVSCALEOFFSET);
}
void GLES_GPU::Execute_LoadClut(u32 op, u32 diff) {
gstate_c.textureChanged |= TEXCHANGE_PARAMSONLY;
textureCache_.LoadClut();
// This could be used to "dirty" textures with clut.
}
void GLES_GPU::Execute_TexSize0(u32 op, u32 diff) {
// Render to texture may have overridden the width/height.
// Don't reset it unless the size is different / the texture has changed.
if (diff || gstate_c.textureChanged != TEXCHANGE_UNCHANGED) {
gstate_c.curTextureWidth = gstate.getTextureWidth(0);
gstate_c.curTextureHeight = gstate.getTextureHeight(0);
shaderManager_->DirtyUniform(DIRTY_UVSCALEOFFSET);
// We will need to reset the texture now.
gstate_c.textureChanged |= TEXCHANGE_PARAMSONLY;
}
}
void GLES_GPU::Execute_Ambient(u32 op, u32 diff) {
shaderManager_->DirtyUniform(DIRTY_AMBIENT);
}
void GLES_GPU::Execute_MaterialDiffuse(u32 op, u32 diff) {
shaderManager_->DirtyUniform(DIRTY_MATDIFFUSE);
}
void GLES_GPU::Execute_MaterialEmissive(u32 op, u32 diff) {
shaderManager_->DirtyUniform(DIRTY_MATEMISSIVE);
}
void GLES_GPU::Execute_MaterialAmbient(u32 op, u32 diff) {
shaderManager_->DirtyUniform(DIRTY_MATAMBIENTALPHA);
}
void GLES_GPU::Execute_MaterialSpecular(u32 op, u32 diff) {
shaderManager_->DirtyUniform(DIRTY_MATSPECULAR);
}
void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) {
u32 cmd = op >> 24;
u32 data = op & 0xFFFFFF;
@ -689,87 +849,15 @@ void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) {
break;
case GE_CMD_VADDR:
gstate_c.vertexAddr = gstate_c.getRelativeAddress(data);
Execute_Vaddr(op, diff);
break;
case GE_CMD_IADDR:
gstate_c.indexAddr = gstate_c.getRelativeAddress(data);
Execute_Iaddr(op, diff);
break;
case GE_CMD_PRIM:
{
// This drives all drawing. All other state we just buffer up, then we apply it only
// when it's time to draw. As most PSP games set state redundantly ALL THE TIME, this is a huge optimization.
u32 count = data & 0xFFFF;
GEPrimitiveType prim = static_cast<GEPrimitiveType>(data >> 16);
if (count == 0)
break;
// Discard AA lines as we can't do anything that makes sense with these anyway. The SW plugin might, though.
if (gstate.isAntiAliasEnabled()) {
// Discard AA lines in DOA
if (prim == GE_PRIM_LINE_STRIP)
break;
// Discard AA lines in Summon Night 5
if ((prim == GE_PRIM_LINES) && gstate.isSkinningEnabled())
break;
}
// This also make skipping drawing very effective.
framebufferManager_.SetRenderFrameBuffer();
if (gstate_c.skipDrawReason & (SKIPDRAW_SKIPFRAME | SKIPDRAW_NON_DISPLAYED_FB)) {
transformDraw_.SetupVertexDecoder(gstate.vertType);
// Rough estimate, not sure what's correct.
int vertexCost = transformDraw_.EstimatePerVertexCost();
cyclesExecuted += vertexCost * count;
return;
}
if (!Memory::IsValidAddress(gstate_c.vertexAddr)) {
ERROR_LOG_REPORT(G3D, "Bad vertex address %08x!", gstate_c.vertexAddr);
break;
}
// TODO: Split this so that we can collect sequences of primitives, can greatly speed things up
// on platforms where draw calls are expensive like mobile and D3D
void *verts = Memory::GetPointerUnchecked(gstate_c.vertexAddr);
void *inds = 0;
if ((gstate.vertType & GE_VTYPE_IDX_MASK) != GE_VTYPE_IDX_NONE) {
if (!Memory::IsValidAddress(gstate_c.indexAddr)) {
ERROR_LOG_REPORT(G3D, "Bad index address %08x!", gstate_c.indexAddr);
break;
}
inds = Memory::GetPointerUnchecked(gstate_c.indexAddr);
}
#ifndef MOBILE_DEVICE
if (prim > GE_PRIM_RECTANGLES) {
ERROR_LOG_REPORT_ONCE(reportPrim, G3D, "Unexpected prim type: %d", prim);
}
#endif
int bytesRead;
transformDraw_.SubmitPrim(verts, inds, prim, count, gstate.vertType, &bytesRead);
int vertexCost = transformDraw_.EstimatePerVertexCost();
gpuStats.vertexGPUCycles += vertexCost * count;
cyclesExecuted += vertexCost * count;
// After drawing, we advance the vertexAddr (when non indexed) or indexAddr (when indexed).
// Some games rely on this, they don't bother reloading VADDR and IADDR.
// Q: Are these changed reflected in the real registers? Needs testing.
if (inds) {
int indexSize = 1;
if ((gstate.vertType & GE_VTYPE_IDX_MASK) == GE_VTYPE_IDX_16BIT)
indexSize = 2;
gstate_c.indexAddr += count * indexSize;
} else {
gstate_c.vertexAddr += bytesRead;
}
}
Execute_Prim(op, diff);
break;
// The arrow and other rotary items in Puzbob are bezier patches, strangely enough.
@ -887,26 +975,12 @@ void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) {
break;
case GE_CMD_VERTEXTYPE:
if (!g_Config.bSoftwareSkinning) {
if (diff & (GE_VTYPE_TC_MASK | GE_VTYPE_THROUGH_MASK))
shaderManager_->DirtyUniform(DIRTY_UVSCALEOFFSET);
} else {
// Don't flush when weight count changes, unless morph is enabled.
if ((diff & ~GE_VTYPE_WEIGHTCOUNT_MASK) || (data & GE_VTYPE_MORPHCOUNT_MASK) != 0) {
// Restore and flush
gstate.vertType ^= diff;
Flush();
gstate.vertType ^= diff;
if (diff & (GE_VTYPE_TC_MASK | GE_VTYPE_THROUGH_MASK))
shaderManager_->DirtyUniform(DIRTY_UVSCALEOFFSET);
}
}
Execute_VertexType(op, diff);
break;
case GE_CMD_REGION1:
case GE_CMD_REGION2:
gstate_c.framebufChanged = true;
gstate_c.textureChanged |= TEXCHANGE_PARAMSONLY;
Execute_Region(op, diff);
break;
case GE_CMD_CLIPENABLE:
@ -979,13 +1053,11 @@ void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) {
case GE_CMD_FRAMEBUFPTR:
case GE_CMD_FRAMEBUFWIDTH:
case GE_CMD_FRAMEBUFPIXFORMAT:
gstate_c.framebufChanged = true;
gstate_c.textureChanged |= TEXCHANGE_PARAMSONLY;
Execute_FramebufType(op, diff);
break;
case GE_CMD_TEXADDR0:
gstate_c.textureChanged = TEXCHANGE_UPDATED;
shaderManager_->DirtyUniform(DIRTY_UVSCALEOFFSET);
Execute_TexAddr0(op, diff);
break;
case GE_CMD_TEXADDR1:
@ -1023,9 +1095,7 @@ void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) {
break;
case GE_CMD_LOADCLUT:
gstate_c.textureChanged |= TEXCHANGE_PARAMSONLY;
textureCache_.LoadClut();
// This could be used to "dirty" textures with clut.
Execute_LoadClut(op, diff);
break;
case GE_CMD_TEXMAPMODE:
@ -1059,15 +1129,7 @@ void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) {
}
case GE_CMD_TEXSIZE0:
// Render to texture may have overridden the width/height.
// Don't reset it unless the size is different / the texture has changed.
if (diff || gstate_c.textureChanged != TEXCHANGE_UNCHANGED) {
gstate_c.curTextureWidth = gstate.getTextureWidth(0);
gstate_c.curTextureHeight = gstate.getTextureHeight(0);
shaderManager_->DirtyUniform(DIRTY_UVSCALEOFFSET);
// We will need to reset the texture now.
gstate_c.textureChanged |= TEXCHANGE_PARAMSONLY;
}
Execute_TexSize0(op, diff);
break;
case GE_CMD_TEXSIZE1:
@ -1086,25 +1148,25 @@ void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) {
case GE_CMD_AMBIENTCOLOR:
case GE_CMD_AMBIENTALPHA:
shaderManager_->DirtyUniform(DIRTY_AMBIENT);
Execute_Ambient(op, diff);
break;
case GE_CMD_MATERIALDIFFUSE:
shaderManager_->DirtyUniform(DIRTY_MATDIFFUSE);
Execute_MaterialDiffuse(op, diff);
break;
case GE_CMD_MATERIALEMISSIVE:
shaderManager_->DirtyUniform(DIRTY_MATEMISSIVE);
Execute_MaterialEmissive(op, diff);
break;
case GE_CMD_MATERIALAMBIENT:
case GE_CMD_MATERIALALPHA:
shaderManager_->DirtyUniform(DIRTY_MATAMBIENTALPHA);
Execute_MaterialAmbient(op, diff);
break;
case GE_CMD_MATERIALSPECULAR:
case GE_CMD_MATERIALSPECULARCOEF:
shaderManager_->DirtyUniform(DIRTY_MATSPECULAR);
Execute_MaterialSpecular(op, diff);
break;
case GE_CMD_LIGHTTYPE0:

View file

@ -74,6 +74,27 @@ public:
virtual bool DescribeCodePtr(const u8 *ptr, std::string &name);
typedef void (GLES_GPU::*CmdFunc)(u32 op, u32 diff);
struct CommandInfo {
u8 flags;
GLES_GPU::CmdFunc func;
};
void Execute_Vaddr(u32 op, u32 diff);
void Execute_Iaddr(u32 op, u32 diff);
void Execute_Prim(u32 op, u32 diff);
void Execute_VertexType(u32 op, u32 diff);
void Execute_Region(u32 op, u32 diff);
void Execute_FramebufType(u32 op, u32 diff);
void Execute_TexAddr0(u32 op, u32 diff);
void Execute_LoadClut(u32 op, u32 diff);
void Execute_TexSize0(u32 op, u32 diff);
void Execute_Ambient(u32 op, u32 diff);
void Execute_MaterialDiffuse(u32 op, u32 diff);
void Execute_MaterialEmissive(u32 op, u32 diff);
void Execute_MaterialAmbient(u32 op, u32 diff);
void Execute_MaterialSpecular(u32 op, u32 diff);
protected:
virtual void FastRunLoop(DisplayList &list);
virtual void ProcessEvent(GPUEvent ev);
@ -92,13 +113,13 @@ private:
void CopyDisplayToOutputInternal();
void InvalidateCacheInternal(u32 addr, int size, GPUInvalidationType type);
static CommandInfo cmdInfo_[256];
FramebufferManager framebufferManager_;
TextureCache textureCache_;
TransformDrawEngine transformDraw_;
ShaderManager *shaderManager_;
u8 *commandFlags_;
bool resized_;
int lastVsync_;

View file

@ -586,7 +586,7 @@ void GPUCommon::ReapplyGfxStateInternal() {
// To be safe we pass 0xFFFFFFFF as the diff.
for (int i = GE_CMD_VERTEXTYPE; i < GE_CMD_BONEMATRIXNUMBER; i++) {
if (i != GE_CMD_ORIGIN) {
if (i != GE_CMD_ORIGIN && i != GE_CMD_OFFSETADDR) {
ExecuteOp(gstate.cmdmem[i], 0xFFFFFFFF);
}
}
@ -683,9 +683,249 @@ void GPUCommon::PreExecuteOp(u32 op, u32 diff) {
// Nothing to do
}
void GPUCommon::Execute_OffsetAddr(u32 op, u32 diff) {
gstate_c.offsetAddr = op << 8;
}
void GPUCommon::Execute_Origin(u32 op, u32 diff) {
easy_guard guard(listLock);
gstate_c.offsetAddr = currentList->pc;
}
void GPUCommon::Execute_Jump(u32 op, u32 diff) {
easy_guard guard(listLock);
const u32 data = op & 0x00FFFFFF;
const u32 target = gstate_c.getRelativeAddress(data);
if (Memory::IsValidAddress(target)) {
UpdatePC(currentList->pc, target - 4);
currentList->pc = target - 4; // pc will be increased after we return, counteract that
} else {
ERROR_LOG_REPORT(G3D, "JUMP to illegal address %08x - ignoring! data=%06x", target, data);
}
}
void GPUCommon::Execute_BJump(u32 op, u32 diff) {
if (!currentList->bboxResult) {
// bounding box jump.
easy_guard guard(listLock);
const u32 data = op & 0x00FFFFFF;
const u32 target = gstate_c.getRelativeAddress(data);
if (Memory::IsValidAddress(target)) {
UpdatePC(currentList->pc, target - 4);
currentList->pc = target - 4; // pc will be increased after we return, counteract that
} else {
ERROR_LOG_REPORT(G3D, "BJUMP to illegal address %08x - ignoring! data=%06x", target, data);
}
}
}
void GPUCommon::Execute_Call(u32 op, u32 diff) {
easy_guard guard(listLock);
// Saint Seiya needs correct support for relative calls.
const u32 retval = currentList->pc + 4;
const u32 data = op & 0x00FFFFFF;
const u32 target = gstate_c.getRelativeAddress(data);
// Bone matrix optimization - many games will CALL a bone matrix (!).
if ((Memory::ReadUnchecked_U32(target) >> 24) == GE_CMD_BONEMATRIXDATA) {
// Check for the end
if ((Memory::ReadUnchecked_U32(target + 11 * 4) >> 24) == GE_CMD_BONEMATRIXDATA &&
(Memory::ReadUnchecked_U32(target + 12 * 4) >> 24) == GE_CMD_RET) {
// Yep, pretty sure this is a bone matrix call.
FastLoadBoneMatrix(target);
return;
}
}
if (currentList->stackptr == ARRAY_SIZE(currentList->stack)) {
ERROR_LOG_REPORT(G3D, "CALL: Stack full!");
} else if (!Memory::IsValidAddress(target)) {
ERROR_LOG_REPORT(G3D, "CALL to illegal address %08x - ignoring! data=%06x", target, data);
} else {
auto &stackEntry = currentList->stack[currentList->stackptr++];
stackEntry.pc = retval;
stackEntry.offsetAddr = gstate_c.offsetAddr;
// The base address is NOT saved/restored for a regular call.
UpdatePC(currentList->pc, target - 4);
currentList->pc = target - 4; // pc will be increased after we return, counteract that
}
}
void GPUCommon::Execute_Ret(u32 op, u32 diff) {
easy_guard guard(listLock);
if (currentList->stackptr == 0) {
DEBUG_LOG_REPORT(G3D, "RET: Stack empty!");
} else {
auto &stackEntry = currentList->stack[--currentList->stackptr];
gstate_c.offsetAddr = stackEntry.offsetAddr;
u32 target = (currentList->pc & 0xF0000000) | (stackEntry.pc & 0x0FFFFFFF);
UpdatePC(currentList->pc, target - 4);
currentList->pc = target - 4;
if (!Memory::IsValidAddress(currentList->pc)) {
ERROR_LOG_REPORT(G3D, "Invalid DL PC %08x on return", currentList->pc);
UpdateState(GPUSTATE_ERROR);
}
}
}
void GPUCommon::Execute_End(u32 op, u32 diff) {
easy_guard guard(listLock);
const u32 data = op & 0x00FFFFFF;
const u32 prev = Memory::ReadUnchecked_U32(currentList->pc - 4);
UpdatePC(currentList->pc);
switch (prev >> 24) {
case GE_CMD_SIGNAL:
{
// TODO: see http://code.google.com/p/jpcsp/source/detail?r=2935#
SignalBehavior behaviour = static_cast<SignalBehavior>((prev >> 16) & 0xFF);
int signal = prev & 0xFFFF;
int enddata = data & 0xFFFF;
bool trigger = true;
currentList->subIntrToken = signal;
switch (behaviour) {
case PSP_GE_SIGNAL_HANDLER_SUSPEND:
// Suspend the list, and call the signal handler. When it's done, resume.
// Before sdkver 0x02000010, listsync should return paused.
if (sceKernelGetCompiledSdkVersion() <= 0x02000010)
currentList->state = PSP_GE_DL_STATE_PAUSED;
currentList->signal = behaviour;
DEBUG_LOG(G3D, "Signal with wait. signal/end: %04x %04x", signal, enddata);
break;
case PSP_GE_SIGNAL_HANDLER_CONTINUE:
// Resume the list right away, then call the handler.
currentList->signal = behaviour;
DEBUG_LOG(G3D, "Signal without wait. signal/end: %04x %04x", signal, enddata);
break;
case PSP_GE_SIGNAL_HANDLER_PAUSE:
// Pause the list instead of ending at the next FINISH.
// Call the handler with the PAUSE signal value at that FINISH.
// Technically, this ought to trigger an interrupt, but it won't do anything.
// But right now, signal is always reset by interrupts, so that causes pause to not work.
trigger = false;
currentList->signal = behaviour;
DEBUG_LOG(G3D, "Signal with Pause. signal/end: %04x %04x", signal, enddata);
break;
case PSP_GE_SIGNAL_SYNC:
// Acts as a memory barrier, never calls any user code.
// Technically, this ought to trigger an interrupt, but it won't do anything.
// Triggering here can cause incorrect rescheduling, which breaks 3rd Birthday.
// However, this is likely a bug in how GE signal interrupts are handled.
trigger = false;
currentList->signal = behaviour;
DEBUG_LOG(G3D, "Signal with Sync. signal/end: %04x %04x", signal, enddata);
break;
case PSP_GE_SIGNAL_JUMP:
{
trigger = false;
currentList->signal = behaviour;
// pc will be increased after we return, counteract that.
u32 target = ((signal << 16) | enddata) - 4;
if (!Memory::IsValidAddress(target)) {
ERROR_LOG_REPORT(G3D, "Signal with Jump: bad address. signal/end: %04x %04x", signal, enddata);
} else {
UpdatePC(currentList->pc, target);
currentList->pc = target;
DEBUG_LOG(G3D, "Signal with Jump. signal/end: %04x %04x", signal, enddata);
}
}
break;
case PSP_GE_SIGNAL_CALL:
{
trigger = false;
currentList->signal = behaviour;
// pc will be increased after we return, counteract that.
u32 target = ((signal << 16) | enddata) - 4;
if (currentList->stackptr == ARRAY_SIZE(currentList->stack)) {
ERROR_LOG_REPORT(G3D, "Signal with Call: stack full. signal/end: %04x %04x", signal, enddata);
} else if (!Memory::IsValidAddress(target)) {
ERROR_LOG_REPORT(G3D, "Signal with Call: bad address. signal/end: %04x %04x", signal, enddata);
} else {
// TODO: This might save/restore other state...
auto &stackEntry = currentList->stack[currentList->stackptr++];
stackEntry.pc = currentList->pc;
stackEntry.offsetAddr = gstate_c.offsetAddr;
stackEntry.baseAddr = gstate.base;
UpdatePC(currentList->pc, target);
currentList->pc = target;
DEBUG_LOG(G3D, "Signal with Call. signal/end: %04x %04x", signal, enddata);
}
}
break;
case PSP_GE_SIGNAL_RET:
{
trigger = false;
currentList->signal = behaviour;
if (currentList->stackptr == 0) {
ERROR_LOG_REPORT(G3D, "Signal with Return: stack empty. signal/end: %04x %04x", signal, enddata);
} else {
// TODO: This might save/restore other state...
auto &stackEntry = currentList->stack[--currentList->stackptr];
gstate_c.offsetAddr = stackEntry.offsetAddr;
gstate.base = stackEntry.baseAddr;
UpdatePC(currentList->pc, stackEntry.pc);
currentList->pc = stackEntry.pc;
DEBUG_LOG(G3D, "Signal with Return. signal/end: %04x %04x", signal, enddata);
}
}
break;
default:
ERROR_LOG_REPORT(G3D, "UNKNOWN Signal UNIMPLEMENTED %i ! signal/end: %04x %04x", behaviour, signal, enddata);
break;
}
// TODO: Technically, jump/call/ret should generate an interrupt, but before the pc change maybe?
if (currentList->interruptsEnabled && trigger) {
if (__GeTriggerInterrupt(currentList->id, currentList->pc, startingTicks + cyclesExecuted)) {
currentList->pendingInterrupt = true;
UpdateState(GPUSTATE_INTERRUPT);
}
}
}
break;
case GE_CMD_FINISH:
switch (currentList->signal) {
case PSP_GE_SIGNAL_HANDLER_PAUSE:
currentList->state = PSP_GE_DL_STATE_PAUSED;
if (currentList->interruptsEnabled) {
if (__GeTriggerInterrupt(currentList->id, currentList->pc, startingTicks + cyclesExecuted)) {
currentList->pendingInterrupt = true;
UpdateState(GPUSTATE_INTERRUPT);
}
}
break;
case PSP_GE_SIGNAL_SYNC:
currentList->signal = PSP_GE_SIGNAL_NONE;
// TODO: Technically this should still cause an interrupt. Probably for memory sync.
break;
default:
currentList->subIntrToken = prev & 0xFFFF;
UpdateState(GPUSTATE_DONE);
if (currentList->interruptsEnabled && __GeTriggerInterrupt(currentList->id, currentList->pc, startingTicks + cyclesExecuted)) {
currentList->pendingInterrupt = true;
} else {
currentList->state = PSP_GE_DL_STATE_COMPLETED;
currentList->waitTicks = startingTicks + cyclesExecuted;
busyTicks = std::max(busyTicks, currentList->waitTicks);
__GeTriggerSync(GPU_SYNC_LIST, currentList->id, currentList->waitTicks);
if (currentList->started && currentList->context.IsValid()) {
gstate.Restore(currentList->context);
ReapplyGfxStateInternal();
}
}
break;
}
break;
default:
DEBUG_LOG(G3D,"Ah, not finished: %06x", prev & 0xFFFFFF);
break;
}
}
void GPUCommon::ExecuteOp(u32 op, u32 diff) {
u32 cmd = op >> 24;
u32 data = op & 0xFFFFFF;
const u32 cmd = op >> 24;
// Handle control and drawing commands here directly. The others we delegate.
switch (cmd) {
@ -693,94 +933,27 @@ void GPUCommon::ExecuteOp(u32 op, u32 diff) {
break;
case GE_CMD_OFFSETADDR:
gstate_c.offsetAddr = data << 8;
Execute_OffsetAddr(op, diff);
break;
case GE_CMD_ORIGIN:
{
easy_guard guard(listLock);
gstate_c.offsetAddr = currentList->pc;
}
Execute_Origin(op, diff);
break;
case GE_CMD_JUMP:
{
easy_guard guard(listLock);
u32 target = gstate_c.getRelativeAddress(data);
if (Memory::IsValidAddress(target)) {
UpdatePC(currentList->pc, target - 4);
currentList->pc = target - 4; // pc will be increased after we return, counteract that
} else {
ERROR_LOG_REPORT(G3D, "JUMP to illegal address %08x - ignoring! data=%06x", target, data);
}
}
Execute_Jump(op, diff);
break;
case GE_CMD_BJUMP:
if (!currentList->bboxResult) {
// bounding box jump.
easy_guard guard(listLock);
u32 target = gstate_c.getRelativeAddress(data);
if (Memory::IsValidAddress(target)) {
UpdatePC(currentList->pc, target - 4);
currentList->pc = target - 4; // pc will be increased after we return, counteract that
} else {
ERROR_LOG_REPORT(G3D, "BJUMP to illegal address %08x - ignoring! data=%06x", target, data);
}
}
Execute_BJump(op, diff);
break;
case GE_CMD_CALL:
{
easy_guard guard(listLock);
// Saint Seiya needs correct support for relative calls.
u32 retval = currentList->pc + 4;
u32 target = gstate_c.getRelativeAddress(data);
// Bone matrix optimization - many games will CALL a bone matrix (!).
if ((Memory::ReadUnchecked_U32(target) >> 24) == GE_CMD_BONEMATRIXDATA) {
// Check for the end
if ((Memory::ReadUnchecked_U32(target + 11 * 4) >> 24) == GE_CMD_BONEMATRIXDATA &&
(Memory::ReadUnchecked_U32(target + 12 * 4) >> 24) == GE_CMD_RET) {
// Yep, pretty sure this is a bone matrix call.
FastLoadBoneMatrix(target);
break;
}
}
if (currentList->stackptr == ARRAY_SIZE(currentList->stack)) {
ERROR_LOG_REPORT(G3D, "CALL: Stack full!");
} else if (!Memory::IsValidAddress(target)) {
ERROR_LOG_REPORT(G3D, "CALL to illegal address %08x - ignoring! data=%06x", target, data);
} else {
auto &stackEntry = currentList->stack[currentList->stackptr++];
stackEntry.pc = retval;
stackEntry.offsetAddr = gstate_c.offsetAddr;
// The base address is NOT saved/restored for a regular call.
UpdatePC(currentList->pc, target - 4);
currentList->pc = target - 4; // pc will be increased after we return, counteract that
}
}
Execute_Call(op, diff);
break;
case GE_CMD_RET:
{
easy_guard guard(listLock);
if (currentList->stackptr == 0) {
DEBUG_LOG_REPORT(G3D, "RET: Stack empty!");
} else {
auto &stackEntry = currentList->stack[--currentList->stackptr];
gstate_c.offsetAddr = stackEntry.offsetAddr;
u32 target = (currentList->pc & 0xF0000000) | (stackEntry.pc & 0x0FFFFFFF);
UpdatePC(currentList->pc, target - 4);
currentList->pc = target - 4;
if (!Memory::IsValidAddress(currentList->pc)) {
ERROR_LOG_REPORT(G3D, "Invalid DL PC %08x on return", currentList->pc);
UpdateState(GPUSTATE_ERROR);
}
}
}
Execute_Ret(op, diff);
break;
case GE_CMD_SIGNAL:
@ -788,160 +961,9 @@ void GPUCommon::ExecuteOp(u32 op, u32 diff) {
// Processed in GE_END.
break;
case GE_CMD_END: {
easy_guard guard(listLock);
u32 prev = Memory::ReadUnchecked_U32(currentList->pc - 4);
UpdatePC(currentList->pc);
switch (prev >> 24) {
case GE_CMD_SIGNAL:
{
// TODO: see http://code.google.com/p/jpcsp/source/detail?r=2935#
SignalBehavior behaviour = static_cast<SignalBehavior>((prev >> 16) & 0xFF);
int signal = prev & 0xFFFF;
int enddata = data & 0xFFFF;
bool trigger = true;
currentList->subIntrToken = signal;
switch (behaviour) {
case PSP_GE_SIGNAL_HANDLER_SUSPEND:
// Suspend the list, and call the signal handler. When it's done, resume.
// Before sdkver 0x02000010, listsync should return paused.
if (sceKernelGetCompiledSdkVersion() <= 0x02000010)
currentList->state = PSP_GE_DL_STATE_PAUSED;
currentList->signal = behaviour;
DEBUG_LOG(G3D, "Signal with wait. signal/end: %04x %04x", signal, enddata);
break;
case PSP_GE_SIGNAL_HANDLER_CONTINUE:
// Resume the list right away, then call the handler.
currentList->signal = behaviour;
DEBUG_LOG(G3D, "Signal without wait. signal/end: %04x %04x", signal, enddata);
break;
case PSP_GE_SIGNAL_HANDLER_PAUSE:
// Pause the list instead of ending at the next FINISH.
// Call the handler with the PAUSE signal value at that FINISH.
// Technically, this ought to trigger an interrupt, but it won't do anything.
// But right now, signal is always reset by interrupts, so that causes pause to not work.
trigger = false;
currentList->signal = behaviour;
DEBUG_LOG(G3D, "Signal with Pause. signal/end: %04x %04x", signal, enddata);
break;
case PSP_GE_SIGNAL_SYNC:
// Acts as a memory barrier, never calls any user code.
// Technically, this ought to trigger an interrupt, but it won't do anything.
// Triggering here can cause incorrect rescheduling, which breaks 3rd Birthday.
// However, this is likely a bug in how GE signal interrupts are handled.
trigger = false;
currentList->signal = behaviour;
DEBUG_LOG(G3D, "Signal with Sync. signal/end: %04x %04x", signal, enddata);
break;
case PSP_GE_SIGNAL_JUMP:
{
trigger = false;
currentList->signal = behaviour;
// pc will be increased after we return, counteract that.
u32 target = ((signal << 16) | enddata) - 4;
if (!Memory::IsValidAddress(target)) {
ERROR_LOG_REPORT(G3D, "Signal with Jump: bad address. signal/end: %04x %04x", signal, enddata);
} else {
UpdatePC(currentList->pc, target);
currentList->pc = target;
DEBUG_LOG(G3D, "Signal with Jump. signal/end: %04x %04x", signal, enddata);
}
}
break;
case PSP_GE_SIGNAL_CALL:
{
trigger = false;
currentList->signal = behaviour;
// pc will be increased after we return, counteract that.
u32 target = ((signal << 16) | enddata) - 4;
if (currentList->stackptr == ARRAY_SIZE(currentList->stack)) {
ERROR_LOG_REPORT(G3D, "Signal with Call: stack full. signal/end: %04x %04x", signal, enddata);
} else if (!Memory::IsValidAddress(target)) {
ERROR_LOG_REPORT(G3D, "Signal with Call: bad address. signal/end: %04x %04x", signal, enddata);
} else {
// TODO: This might save/restore other state...
auto &stackEntry = currentList->stack[currentList->stackptr++];
stackEntry.pc = currentList->pc;
stackEntry.offsetAddr = gstate_c.offsetAddr;
stackEntry.baseAddr = gstate.base;
UpdatePC(currentList->pc, target);
currentList->pc = target;
DEBUG_LOG(G3D, "Signal with Call. signal/end: %04x %04x", signal, enddata);
}
}
break;
case PSP_GE_SIGNAL_RET:
{
trigger = false;
currentList->signal = behaviour;
if (currentList->stackptr == 0) {
ERROR_LOG_REPORT(G3D, "Signal with Return: stack empty. signal/end: %04x %04x", signal, enddata);
} else {
// TODO: This might save/restore other state...
auto &stackEntry = currentList->stack[--currentList->stackptr];
gstate_c.offsetAddr = stackEntry.offsetAddr;
gstate.base = stackEntry.baseAddr;
UpdatePC(currentList->pc, stackEntry.pc);
currentList->pc = stackEntry.pc;
DEBUG_LOG(G3D, "Signal with Return. signal/end: %04x %04x", signal, enddata);
}
}
break;
default:
ERROR_LOG_REPORT(G3D, "UNKNOWN Signal UNIMPLEMENTED %i ! signal/end: %04x %04x", behaviour, signal, enddata);
break;
}
// TODO: Technically, jump/call/ret should generate an interrupt, but before the pc change maybe?
if (currentList->interruptsEnabled && trigger) {
if (__GeTriggerInterrupt(currentList->id, currentList->pc, startingTicks + cyclesExecuted)) {
currentList->pendingInterrupt = true;
UpdateState(GPUSTATE_INTERRUPT);
}
}
}
break;
case GE_CMD_FINISH:
switch (currentList->signal) {
case PSP_GE_SIGNAL_HANDLER_PAUSE:
currentList->state = PSP_GE_DL_STATE_PAUSED;
if (currentList->interruptsEnabled) {
if (__GeTriggerInterrupt(currentList->id, currentList->pc, startingTicks + cyclesExecuted)) {
currentList->pendingInterrupt = true;
UpdateState(GPUSTATE_INTERRUPT);
}
}
break;
case PSP_GE_SIGNAL_SYNC:
currentList->signal = PSP_GE_SIGNAL_NONE;
// TODO: Technically this should still cause an interrupt. Probably for memory sync.
break;
default:
currentList->subIntrToken = prev & 0xFFFF;
UpdateState(GPUSTATE_DONE);
if (currentList->interruptsEnabled && __GeTriggerInterrupt(currentList->id, currentList->pc, startingTicks + cyclesExecuted)) {
currentList->pendingInterrupt = true;
} else {
currentList->state = PSP_GE_DL_STATE_COMPLETED;
currentList->waitTicks = startingTicks + cyclesExecuted;
busyTicks = std::max(busyTicks, currentList->waitTicks);
__GeTriggerSync(GPU_SYNC_LIST, currentList->id, currentList->waitTicks);
if (currentList->started && currentList->context.IsValid()) {
gstate.Restore(currentList->context);
ReapplyGfxStateInternal();
}
}
break;
}
break;
default:
DEBUG_LOG(G3D,"Ah, not finished: %06x", prev & 0xFFFFFF);
break;
}
case GE_CMD_END:
Execute_End(op, diff);
break;
}
default:
DEBUG_LOG(G3D,"DL Unknown: %08x @ %08x", op, currentList == NULL ? 0 : currentList->pc);

View file

@ -52,6 +52,14 @@ public:
virtual u32 Break(int mode);
virtual void ReapplyGfxState();
void Execute_OffsetAddr(u32 op, u32 diff);
void Execute_Origin(u32 op, u32 diff);
void Execute_Jump(u32 op, u32 diff);
void Execute_BJump(u32 op, u32 diff);
void Execute_Call(u32 op, u32 diff);
void Execute_Ret(u32 op, u32 diff);
void Execute_End(u32 op, u32 diff);
virtual u64 GetTickEstimate() {
#if defined(_M_X64) || defined(ANDROID)
return curTickEst_;