mirror of
https://github.com/hrydgard/ppsspp.git
synced 2025-04-02 11:01:50 -04:00
Fastpath in fastrunloop when diff=0. Remove need for Execute for UV scale/offset.
This commit is contained in:
parent
71baecabd6
commit
2c4e5e2303
7 changed files with 127 additions and 80 deletions
|
@ -314,7 +314,6 @@ public:
|
|||
const VkPhysicalDeviceFeatures &GetFeaturesEnabled() const { return featuresEnabled_; }
|
||||
const VulkanPhysicalDeviceInfo &GetDeviceInfo() const { return deviceInfo_; }
|
||||
|
||||
|
||||
private:
|
||||
VkSemaphore acquireSemaphore;
|
||||
VkSemaphore renderingCompleteSemaphore;
|
||||
|
@ -381,7 +380,6 @@ private:
|
|||
|
||||
VulkanDeleteList deleteList;
|
||||
};
|
||||
|
||||
FrameData frame_[2];
|
||||
int curFrame_;
|
||||
|
||||
|
|
|
@ -389,27 +389,38 @@ void GPU_D3D11::CopyDisplayToOutputInternal() {
|
|||
void GPU_D3D11::FastRunLoop(DisplayList &list) {
|
||||
PROFILE_THIS_SCOPE("gpuloop");
|
||||
const CommandInfo *cmdInfo = cmdInfo_;
|
||||
for (; downcount > 0; --downcount) {
|
||||
int dc = downcount;
|
||||
for (; dc > 0; --dc) {
|
||||
// We know that display list PCs have the upper nibble == 0 - no need to mask the pointer
|
||||
const u32 op = *(const u32 *)(Memory::base + list.pc);
|
||||
const u32 cmd = op >> 24;
|
||||
const CommandInfo info = cmdInfo[cmd];
|
||||
const u8 cmdFlags = info.flags; // If we stashed the cmdFlags in the top bits of the cmdmem, we could get away with one table lookup instead of two
|
||||
const CommandInfo &info = cmdInfo[cmd];
|
||||
const u32 diff = op ^ gstate.cmdmem[cmd];
|
||||
// Inlined CheckFlushOp here to get rid of the dumpThisFrame_ check.
|
||||
if (diff && (cmdFlags & FLAG_FLUSHBEFOREONCHANGE)) {
|
||||
drawEngine_.Flush();
|
||||
}
|
||||
gstate.cmdmem[cmd] = op; // TODO: no need to write if diff==0...
|
||||
if ((cmdFlags & FLAG_EXECUTE) || (diff && (cmdFlags & FLAG_EXECUTEONCHANGE))) {
|
||||
(this->*info.func)(op, diff);
|
||||
} else if (diff) {
|
||||
uint64_t dirty = info.flags >> 8;
|
||||
if (dirty)
|
||||
gstate_c.Dirty(dirty);
|
||||
if (diff == 0) {
|
||||
if (info.flags & FLAG_EXECUTE) {
|
||||
downcount = dc;
|
||||
(this->*info.func)(op, diff);
|
||||
dc = downcount;
|
||||
}
|
||||
} else {
|
||||
uint64_t flags = info.flags;
|
||||
if (flags & FLAG_FLUSHBEFOREONCHANGE) {
|
||||
drawEngine_.Flush();
|
||||
}
|
||||
gstate.cmdmem[cmd] = op; // TODO: no need to write if diff==0...
|
||||
if (flags & (FLAG_EXECUTE | FLAG_EXECUTEONCHANGE)) {
|
||||
downcount = dc;
|
||||
(this->*info.func)(op, diff);
|
||||
dc = downcount;
|
||||
} else {
|
||||
uint64_t dirty = flags >> 8;
|
||||
if (dirty)
|
||||
gstate_c.Dirty(dirty);
|
||||
}
|
||||
}
|
||||
list.pc += 4;
|
||||
}
|
||||
downcount = 0;
|
||||
}
|
||||
|
||||
void GPU_D3D11::FinishDeferred() {
|
||||
|
@ -533,6 +544,7 @@ void GPU_D3D11::Execute_Prim(u32 op, u32 diff) {
|
|||
#endif
|
||||
|
||||
int bytesRead = 0;
|
||||
UpdateUVScaleOffset();
|
||||
drawEngine_.SubmitPrim(verts, inds, prim, count, vertexType, &bytesRead);
|
||||
|
||||
int vertexCost = EstimatePerVertexCost() * count;
|
||||
|
@ -598,6 +610,7 @@ void GPU_D3D11::Execute_Bezier(u32 op, u32 diff) {
|
|||
}
|
||||
|
||||
int bytesRead = 0;
|
||||
UpdateUVScaleOffset();
|
||||
drawEngine_.SubmitBezier(control_points, indices, gstate.getPatchDivisionU(), gstate.getPatchDivisionV(), bz_ucount, bz_vcount, patchPrim, computeNormals, patchFacing, gstate.vertType, &bytesRead);
|
||||
|
||||
if (gstate_c.bezier)
|
||||
|
@ -668,6 +681,7 @@ void GPU_D3D11::Execute_Spline(u32 op, u32 diff) {
|
|||
}
|
||||
}
|
||||
int bytesRead = 0;
|
||||
UpdateUVScaleOffset();
|
||||
drawEngine_.SubmitSpline(control_points, indices, gstate.getPatchDivisionU(), gstate.getPatchDivisionV(), sp_ucount, sp_vcount, sp_utype, sp_vtype, patchPrim, computeNormals, patchFacing, vertType, &bytesRead);
|
||||
|
||||
if (gstate_c.spline)
|
||||
|
|
|
@ -356,27 +356,38 @@ void GPU_DX9::CopyDisplayToOutputInternal() {
|
|||
void GPU_DX9::FastRunLoop(DisplayList &list) {
|
||||
PROFILE_THIS_SCOPE("gpuloop");
|
||||
const CommandInfo *cmdInfo = cmdInfo_;
|
||||
for (; downcount > 0; --downcount) {
|
||||
int dc = downcount;
|
||||
for (; dc > 0; --dc) {
|
||||
// We know that display list PCs have the upper nibble == 0 - no need to mask the pointer
|
||||
const u32 op = *(const u32 *)(Memory::base + list.pc);
|
||||
const u32 cmd = op >> 24;
|
||||
const CommandInfo info = cmdInfo[cmd];
|
||||
const u8 cmdFlags = info.flags; // If we stashed the cmdFlags in the top bits of the cmdmem, we could get away with one table lookup instead of two
|
||||
const CommandInfo &info = cmdInfo[cmd];
|
||||
const u32 diff = op ^ gstate.cmdmem[cmd];
|
||||
// Inlined CheckFlushOp here to get rid of the dumpThisFrame_ check.
|
||||
if (diff && (cmdFlags & FLAG_FLUSHBEFOREONCHANGE)) {
|
||||
drawEngine_.Flush();
|
||||
}
|
||||
gstate.cmdmem[cmd] = op; // TODO: no need to write if diff==0...
|
||||
if ((cmdFlags & FLAG_EXECUTE) || (diff && (cmdFlags & FLAG_EXECUTEONCHANGE))) {
|
||||
(this->*info.func)(op, diff);
|
||||
} else if (diff) {
|
||||
uint64_t dirty = info.flags >> 8;
|
||||
if (dirty)
|
||||
gstate_c.Dirty(dirty);
|
||||
if (diff == 0) {
|
||||
if (info.flags & FLAG_EXECUTE) {
|
||||
downcount = dc;
|
||||
(this->*info.func)(op, diff);
|
||||
dc = downcount;
|
||||
}
|
||||
} else {
|
||||
uint64_t flags = info.flags;
|
||||
if (flags & FLAG_FLUSHBEFOREONCHANGE) {
|
||||
drawEngine_.Flush();
|
||||
}
|
||||
gstate.cmdmem[cmd] = op; // TODO: no need to write if diff==0...
|
||||
if (flags & (FLAG_EXECUTE | FLAG_EXECUTEONCHANGE)) {
|
||||
downcount = dc;
|
||||
(this->*info.func)(op, diff);
|
||||
dc = downcount;
|
||||
} else {
|
||||
uint64_t dirty = flags >> 8;
|
||||
if (dirty)
|
||||
gstate_c.Dirty(dirty);
|
||||
}
|
||||
}
|
||||
list.pc += 4;
|
||||
}
|
||||
downcount = 0;
|
||||
}
|
||||
|
||||
void GPU_DX9::FinishDeferred() {
|
||||
|
@ -499,6 +510,7 @@ void GPU_DX9::Execute_Prim(u32 op, u32 diff) {
|
|||
#endif
|
||||
|
||||
int bytesRead = 0;
|
||||
UpdateUVScaleOffset();
|
||||
drawEngine_.SubmitPrim(verts, inds, prim, count, vertexType, &bytesRead);
|
||||
|
||||
int vertexCost = EstimatePerVertexCost() * count;
|
||||
|
@ -553,6 +565,7 @@ void GPU_DX9::Execute_Bezier(u32 op, u32 diff) {
|
|||
bool computeNormals = gstate.isLightingEnabled();
|
||||
bool patchFacing = gstate.patchfacing & 1;
|
||||
int bytesRead = 0;
|
||||
UpdateUVScaleOffset();
|
||||
drawEngine_.SubmitBezier(control_points, indices, gstate.getPatchDivisionU(), gstate.getPatchDivisionV(), bz_ucount, bz_vcount, patchPrim, computeNormals, patchFacing, gstate.vertType, &bytesRead);
|
||||
|
||||
// After drawing, we advance pointers - see SubmitPrim which does the same.
|
||||
|
@ -605,6 +618,7 @@ void GPU_DX9::Execute_Spline(u32 op, u32 diff) {
|
|||
bool patchFacing = gstate.patchfacing & 1;
|
||||
u32 vertType = gstate.vertType;
|
||||
int bytesRead = 0;
|
||||
UpdateUVScaleOffset();
|
||||
drawEngine_.SubmitSpline(control_points, indices, gstate.getPatchDivisionU(), gstate.getPatchDivisionV(), sp_ucount, sp_vcount, sp_utype, sp_vtype, patchPrim, computeNormals, patchFacing, vertType, &bytesRead);
|
||||
|
||||
// After drawing, we advance pointers - see SubmitPrim which does the same.
|
||||
|
|
|
@ -567,22 +567,29 @@ void GPU_GLES::FastRunLoop(DisplayList &list) {
|
|||
// We know that display list PCs have the upper nibble == 0 - no need to mask the pointer
|
||||
const u32 op = *(const u32 *)(Memory::base + list.pc);
|
||||
const u32 cmd = op >> 24;
|
||||
const CommandInfo info = cmdInfo[cmd];
|
||||
const u8 cmdFlags = info.flags; // If we stashed the cmdFlags in the top bits of the cmdmem, we could get away with one table lookup instead of two
|
||||
const CommandInfo &info = cmdInfo[cmd];
|
||||
const u32 diff = op ^ gstate.cmdmem[cmd];
|
||||
// Inlined CheckFlushOp here to get rid of the dumpThisFrame_ check.
|
||||
if (diff && (cmdFlags & FLAG_FLUSHBEFOREONCHANGE)) {
|
||||
drawEngine_.Flush();
|
||||
}
|
||||
gstate.cmdmem[cmd] = op; // TODO: no need to write if diff==0...
|
||||
if ((cmdFlags & FLAG_EXECUTE) || (diff && (cmdFlags & FLAG_EXECUTEONCHANGE))) {
|
||||
downcount = dc;
|
||||
(this->*info.func)(op, diff);
|
||||
dc = downcount;
|
||||
} else if (diff) {
|
||||
uint64_t dirty = info.flags >> 8;
|
||||
if (dirty)
|
||||
gstate_c.Dirty(dirty);
|
||||
if (diff == 0) {
|
||||
if (info.flags & FLAG_EXECUTE) {
|
||||
downcount = dc;
|
||||
(this->*info.func)(op, diff);
|
||||
dc = downcount;
|
||||
}
|
||||
} else {
|
||||
uint64_t flags = info.flags;
|
||||
if (flags & FLAG_FLUSHBEFOREONCHANGE) {
|
||||
drawEngine_.Flush();
|
||||
}
|
||||
gstate.cmdmem[cmd] = op; // TODO: no need to write if diff==0...
|
||||
if (flags & (FLAG_EXECUTE | FLAG_EXECUTEONCHANGE)) {
|
||||
downcount = dc;
|
||||
(this->*info.func)(op, diff);
|
||||
dc = downcount;
|
||||
} else {
|
||||
uint64_t dirty = flags >> 8;
|
||||
if (dirty)
|
||||
gstate_c.Dirty(dirty);
|
||||
}
|
||||
}
|
||||
list.pc += 4;
|
||||
}
|
||||
|
@ -676,6 +683,7 @@ void GPU_GLES::Execute_Prim(u32 op, u32 diff) {
|
|||
#endif
|
||||
|
||||
int bytesRead = 0;
|
||||
UpdateUVScaleOffset();
|
||||
drawEngine_.SubmitPrim(verts, inds, prim, count, gstate.vertType, &bytesRead);
|
||||
|
||||
int vertexCost = EstimatePerVertexCost();
|
||||
|
@ -772,6 +780,7 @@ void GPU_GLES::Execute_Bezier(u32 op, u32 diff) {
|
|||
}
|
||||
|
||||
int bytesRead = 0;
|
||||
UpdateUVScaleOffset();
|
||||
drawEngine_.SubmitBezier(control_points, indices, gstate.getPatchDivisionU(), gstate.getPatchDivisionV(), bz_ucount, bz_vcount, patchPrim, computeNormals, patchFacing, gstate.vertType, &bytesRead);
|
||||
|
||||
if (gstate_c.bezier)
|
||||
|
@ -843,6 +852,7 @@ void GPU_GLES::Execute_Spline(u32 op, u32 diff) {
|
|||
}
|
||||
|
||||
int bytesRead = 0;
|
||||
UpdateUVScaleOffset();
|
||||
drawEngine_.SubmitSpline(control_points, indices, gstate.getPatchDivisionU(), gstate.getPatchDivisionV(), sp_ucount, sp_vcount, sp_utype, sp_vtype, patchPrim, computeNormals, patchFacing, vertType, &bytesRead);
|
||||
|
||||
if (gstate_c.spline)
|
||||
|
|
|
@ -111,10 +111,12 @@ const CommonCommandTableEntry commonCommandTable[] = {
|
|||
{ GE_CMD_LOGICOPENABLE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_BLEND_STATE | DIRTY_FRAGMENTSHADER_STATE },
|
||||
|
||||
{ GE_CMD_TEXMAPMODE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE },
|
||||
{ GE_CMD_TEXSCALEU, FLAG_EXECUTEONCHANGE, 0, &GPUCommon::Execute_TexScaleU },
|
||||
{ GE_CMD_TEXSCALEV, FLAG_EXECUTEONCHANGE, 0, &GPUCommon::Execute_TexScaleV },
|
||||
{ GE_CMD_TEXOFFSETU, FLAG_EXECUTEONCHANGE, 0, &GPUCommon::Execute_TexOffsetU },
|
||||
{ GE_CMD_TEXOFFSETV, FLAG_EXECUTEONCHANGE, 0, &GPUCommon::Execute_TexOffsetV },
|
||||
|
||||
// These are read on every SubmitPrim, no need for dirtying or flushing.
|
||||
{ GE_CMD_TEXSCALEU },
|
||||
{ GE_CMD_TEXSCALEV },
|
||||
{ GE_CMD_TEXOFFSETU },
|
||||
{ GE_CMD_TEXOFFSETV },
|
||||
|
||||
// TEXSIZE0 is handled by each backend.
|
||||
{ GE_CMD_TEXSIZE1, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_PARAMS },
|
||||
|
@ -1425,22 +1427,6 @@ void GPUCommon::Execute_End(u32 op, u32 diff) {
|
|||
}
|
||||
}
|
||||
|
||||
void GPUCommon::Execute_TexScaleU(u32 op, u32 diff) {
|
||||
gstate_c.uv.uScale = getFloat24(op);
|
||||
}
|
||||
|
||||
void GPUCommon::Execute_TexScaleV(u32 op, u32 diff) {
|
||||
gstate_c.uv.vScale = getFloat24(op);
|
||||
}
|
||||
|
||||
void GPUCommon::Execute_TexOffsetU(u32 op, u32 diff) {
|
||||
gstate_c.uv.uOff = getFloat24(op);
|
||||
}
|
||||
|
||||
void GPUCommon::Execute_TexOffsetV(u32 op, u32 diff) {
|
||||
gstate_c.uv.vOff = getFloat24(op);
|
||||
}
|
||||
|
||||
void GPUCommon::Execute_TexLevel(u32 op, u32 diff) {
|
||||
if (diff == 0xFFFFFFFF) return;
|
||||
|
||||
|
|
|
@ -196,6 +196,21 @@ public:
|
|||
GPUgstate GetGState() override;
|
||||
void SetCmdValue(u32 op) override;
|
||||
|
||||
void UpdateUVScaleOffset() {
|
||||
#ifdef _M_SSE
|
||||
__m128i values = _mm_slli_epi32(_mm_load_si128((const __m128i *)&gstate.texscaleu), 8);
|
||||
_mm_storeu_si128((__m128i *)&gstate_c.uv, values);
|
||||
#elif PPSSPP_PLATFORM(ARM_NEON)
|
||||
const uint32x4_t values = vshlq_n_u32(vld1q_u32(&gstate.texscaleu), 8);
|
||||
vst1q_u32(&gstate_c.uv, values);
|
||||
#else
|
||||
gstate_c.uv.uScale = getFloat24(gstate.texscaleu);
|
||||
gstate_c.uv.vScale = getFloat24(gstate.texscalev);
|
||||
gstate_c.uv.uOff = getFloat24(gstate.texoffsetu);
|
||||
gstate_c.uv.vOff = getFloat24(gstate.texoffsetv);
|
||||
#endif
|
||||
}
|
||||
|
||||
DisplayList* getList(int listid) override {
|
||||
return &dls[listid];
|
||||
}
|
||||
|
|
|
@ -398,22 +398,29 @@ void GPU_Vulkan::FastRunLoop(DisplayList &list) {
|
|||
// We know that display list PCs have the upper nibble == 0 - no need to mask the pointer
|
||||
const u32 op = *(const u32 *)(Memory::base + list.pc);
|
||||
const u32 cmd = op >> 24;
|
||||
const CommandInfo info = cmdInfo[cmd];
|
||||
const u8 cmdFlags = info.flags; // If we stashed the cmdFlags in the top bits of the cmdmem, we could get away with one table lookup instead of two
|
||||
const CommandInfo &info = cmdInfo[cmd];
|
||||
const u32 diff = op ^ gstate.cmdmem[cmd];
|
||||
// Inlined CheckFlushOp here to get rid of the dumpThisFrame_ check.
|
||||
if (diff && (cmdFlags & FLAG_FLUSHBEFOREONCHANGE)) {
|
||||
drawEngine_.Flush();
|
||||
}
|
||||
gstate.cmdmem[cmd] = op; // TODO: no need to write if diff==0...
|
||||
if ((cmdFlags & FLAG_EXECUTE) || (diff && (cmdFlags & FLAG_EXECUTEONCHANGE))) {
|
||||
downcount = dc;
|
||||
(this->*info.func)(op, diff);
|
||||
dc = downcount;
|
||||
} else if (diff) {
|
||||
uint64_t dirty = info.flags >> 8;
|
||||
if (dirty)
|
||||
gstate_c.Dirty(dirty);
|
||||
if (diff == 0) {
|
||||
if (info.flags & FLAG_EXECUTE) {
|
||||
downcount = dc;
|
||||
(this->*info.func)(op, diff);
|
||||
dc = downcount;
|
||||
}
|
||||
} else {
|
||||
uint64_t flags = info.flags;
|
||||
if (flags & FLAG_FLUSHBEFOREONCHANGE) {
|
||||
drawEngine_.Flush();
|
||||
}
|
||||
gstate.cmdmem[cmd] = op; // TODO: no need to write if diff==0...
|
||||
if (flags & (FLAG_EXECUTE | FLAG_EXECUTEONCHANGE)) {
|
||||
downcount = dc;
|
||||
(this->*info.func)(op, diff);
|
||||
dc = downcount;
|
||||
} else {
|
||||
uint64_t dirty = flags >> 8;
|
||||
if (dirty)
|
||||
gstate_c.Dirty(dirty);
|
||||
}
|
||||
}
|
||||
list.pc += 4;
|
||||
}
|
||||
|
@ -507,6 +514,7 @@ void GPU_Vulkan::Execute_Prim(u32 op, u32 diff) {
|
|||
#endif
|
||||
|
||||
int bytesRead = 0;
|
||||
UpdateUVScaleOffset();
|
||||
drawEngine_.SubmitPrim(verts, inds, prim, count, gstate.vertType, &bytesRead);
|
||||
|
||||
int vertexCost = EstimatePerVertexCost() * count;
|
||||
|
@ -603,6 +611,7 @@ void GPU_Vulkan::Execute_Bezier(u32 op, u32 diff) {
|
|||
}
|
||||
}
|
||||
|
||||
UpdateUVScaleOffset();
|
||||
drawEngine_.SubmitBezier(control_points, indices, gstate.getPatchDivisionU(), gstate.getPatchDivisionV(), bz_ucount, bz_vcount, patchPrim, computeNormals, patchFacing, gstate.vertType, &bytesRead);
|
||||
|
||||
if (gstate_c.bezier)
|
||||
|
@ -674,6 +683,7 @@ void GPU_Vulkan::Execute_Spline(u32 op, u32 diff) {
|
|||
}
|
||||
|
||||
int bytesRead = 0;
|
||||
UpdateUVScaleOffset();
|
||||
drawEngine_.SubmitSpline(control_points, indices, gstate.getPatchDivisionU(), gstate.getPatchDivisionV(), sp_ucount, sp_vcount, sp_utype, sp_vtype, patchPrim, computeNormals, patchFacing, vertType, &bytesRead);
|
||||
|
||||
if (gstate_c.spline)
|
||||
|
|
Loading…
Add table
Reference in a new issue