mirror of
https://github.com/hrydgard/ppsspp.git
synced 2025-04-02 11:01:50 -04:00
Merge pull request #9769 from hrydgard/vulkan-sw-skinning
Implement software skinning for the Vulkan backend.
This commit is contained in:
commit
6169fa289a
7 changed files with 105 additions and 43 deletions
|
@ -88,7 +88,7 @@ protected:
|
|||
// Cached vertex decoders
|
||||
u32 lastVType_ = -1;
|
||||
std::unordered_map<u32, VertexDecoder *> decoderMap_;
|
||||
VertexDecoder *dec_;
|
||||
VertexDecoder *dec_ = nullptr;
|
||||
VertexDecoderJitCache *decJitCache_;
|
||||
VertexDecoderOptions decOptions_;
|
||||
|
||||
|
|
|
@ -83,7 +83,7 @@ DrawEngineD3D11::DrawEngineD3D11(Draw::DrawContext *draw, ID3D11Device *device,
|
|||
textureCache_(0),
|
||||
framebufferManager_(0),
|
||||
numDrawCalls(0),
|
||||
vertexCountInDrawCalls(0),
|
||||
vertexCountInDrawCalls_(0),
|
||||
decodeCounter_(0),
|
||||
dcid_(0) {
|
||||
device1_ = (ID3D11Device1 *)draw->GetNativeObject(Draw::NativeObject::DEVICE_EX);
|
||||
|
@ -284,7 +284,7 @@ inline void DrawEngineD3D11::SetupVertexDecoderInternal(u32 vertType) {
|
|||
}
|
||||
|
||||
void DrawEngineD3D11::SubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertType, int *bytesRead) {
|
||||
if (!indexGen.PrimCompatible(prevPrim_, prim) || numDrawCalls >= MAX_DEFERRED_DRAW_CALLS || vertexCountInDrawCalls + vertexCount > VERTEX_BUFFER_MAX)
|
||||
if (!indexGen.PrimCompatible(prevPrim_, prim) || numDrawCalls >= MAX_DEFERRED_DRAW_CALLS || vertexCountInDrawCalls_ + vertexCount > VERTEX_BUFFER_MAX)
|
||||
Flush();
|
||||
|
||||
// TODO: Is this the right thing to do?
|
||||
|
@ -309,17 +309,19 @@ void DrawEngineD3D11::SubmitPrim(void *verts, void *inds, GEPrimitiveType prim,
|
|||
dc.prim = prim;
|
||||
dc.vertexCount = vertexCount;
|
||||
|
||||
u32 dhash = dcid_;
|
||||
dhash ^= (u32)(uintptr_t)verts;
|
||||
dhash = __rotl(dhash, 13);
|
||||
dhash ^= (u32)(uintptr_t)inds;
|
||||
dhash = __rotl(dhash, 13);
|
||||
dhash ^= (u32)vertType;
|
||||
dhash = __rotl(dhash, 13);
|
||||
dhash ^= (u32)vertexCount;
|
||||
dhash = __rotl(dhash, 13);
|
||||
dhash ^= (u32)prim;
|
||||
dcid_ = dhash;
|
||||
if (g_Config.bVertexCache) {
|
||||
u32 dhash = dcid_;
|
||||
dhash ^= (u32)(uintptr_t)verts;
|
||||
dhash = __rotl(dhash, 13);
|
||||
dhash ^= (u32)(uintptr_t)inds;
|
||||
dhash = __rotl(dhash, 13);
|
||||
dhash ^= (u32)vertType;
|
||||
dhash = __rotl(dhash, 13);
|
||||
dhash ^= (u32)vertexCount;
|
||||
dhash = __rotl(dhash, 13);
|
||||
dhash ^= (u32)prim;
|
||||
dcid_ = dhash;
|
||||
}
|
||||
|
||||
if (inds) {
|
||||
GetIndexBounds(inds, vertexCount, vertType, &dc.indexLowerBound, &dc.indexUpperBound);
|
||||
|
@ -331,7 +333,7 @@ void DrawEngineD3D11::SubmitPrim(void *verts, void *inds, GEPrimitiveType prim,
|
|||
uvScale[numDrawCalls] = gstate_c.uv;
|
||||
|
||||
numDrawCalls++;
|
||||
vertexCountInDrawCalls += vertexCount;
|
||||
vertexCountInDrawCalls_ += vertexCount;
|
||||
|
||||
if (g_Config.bSoftwareSkinning && (vertType & GE_VTYPE_WEIGHT_MASK)) {
|
||||
DecodeVertsStep();
|
||||
|
@ -941,12 +943,12 @@ rotateVBO:
|
|||
}
|
||||
|
||||
gpuStats.numDrawCalls += numDrawCalls;
|
||||
gpuStats.numVertsSubmitted += vertexCountInDrawCalls;
|
||||
gpuStats.numVertsSubmitted += vertexCountInDrawCalls_;
|
||||
|
||||
indexGen.Reset();
|
||||
decodedVerts_ = 0;
|
||||
numDrawCalls = 0;
|
||||
vertexCountInDrawCalls = 0;
|
||||
vertexCountInDrawCalls_ = 0;
|
||||
decodeCounter_ = 0;
|
||||
dcid_ = 0;
|
||||
prevPrim_ = GE_PRIM_INVALID;
|
||||
|
@ -959,7 +961,10 @@ rotateVBO:
|
|||
gstate_c.vertBounds.maxU = 0;
|
||||
gstate_c.vertBounds.maxV = 0;
|
||||
|
||||
#if PPSSPP_PLATFORM(WINDOWS) && !PPSSPP_PLATFORM(UWP)
|
||||
// We only support GPU debugging on Windows, and that's the only use case for this.
|
||||
host->GPUNotifyDraw();
|
||||
#endif
|
||||
}
|
||||
|
||||
bool DrawEngineD3D11::IsCodePtrVertexDecoder(const u8 *ptr) const {
|
||||
|
|
|
@ -229,7 +229,7 @@ private:
|
|||
|
||||
DeferredDrawCall drawCalls[MAX_DEFERRED_DRAW_CALLS];
|
||||
int numDrawCalls;
|
||||
int vertexCountInDrawCalls;
|
||||
int vertexCountInDrawCalls_;
|
||||
|
||||
int decimationCounter_;
|
||||
int decodeCounter_;
|
||||
|
|
|
@ -69,16 +69,12 @@ DrawEngineVulkan::DrawEngineVulkan(VulkanContext *vulkan, Draw::DrawContext *dra
|
|||
draw_(draw),
|
||||
prevPrim_(GE_PRIM_INVALID),
|
||||
numDrawCalls(0),
|
||||
vertexCountInDrawCalls(0),
|
||||
curFrame_(0),
|
||||
nullTexture_(nullptr),
|
||||
stats_{} {
|
||||
|
||||
decOptions_.expandAllWeightsToFloat = false;
|
||||
decOptions_.expand8BitNormalsToFloat = false;
|
||||
|
||||
// Allocate nicely aligned memory. Maybe graphics drivers will
|
||||
// appreciate it.
|
||||
// Allocate nicely aligned memory. Maybe graphics drivers will appreciate it.
|
||||
// All this is a LOT of memory, need to see if we can cut down somehow.
|
||||
decoded = (u8 *)AllocateMemoryPages(DECODED_VERTEX_BUFFER_SIZE, MEM_PROT_READ | MEM_PROT_WRITE);
|
||||
decIndex = (u16 *)AllocateMemoryPages(DECODED_INDEX_BUFFER_SIZE, MEM_PROT_READ | MEM_PROT_WRITE);
|
||||
|
@ -336,7 +332,7 @@ inline void DrawEngineVulkan::SetupVertexDecoderInternal(u32 vertType) {
|
|||
}
|
||||
|
||||
void DrawEngineVulkan::SubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertType, int *bytesRead) {
|
||||
if (!indexGen.PrimCompatible(prevPrim_, prim) || numDrawCalls >= MAX_DEFERRED_DRAW_CALLS || vertexCountInDrawCalls + vertexCount > VERTEX_BUFFER_MAX)
|
||||
if (!indexGen.PrimCompatible(prevPrim_, prim) || numDrawCalls >= MAX_DEFERRED_DRAW_CALLS || vertexCountInDrawCalls_ + vertexCount > VERTEX_BUFFER_MAX)
|
||||
Flush();
|
||||
|
||||
// TODO: Is this the right thing to do?
|
||||
|
@ -360,6 +356,20 @@ void DrawEngineVulkan::SubmitPrim(void *verts, void *inds, GEPrimitiveType prim,
|
|||
dc.prim = prim;
|
||||
dc.vertexCount = vertexCount;
|
||||
|
||||
if (g_Config.bVertexCache) {
|
||||
u32 dhash = dcid_;
|
||||
dhash ^= (u32)(uintptr_t)verts;
|
||||
dhash = __rotl(dhash, 13);
|
||||
dhash ^= (u32)(uintptr_t)inds;
|
||||
dhash = __rotl(dhash, 13);
|
||||
dhash ^= (u32)vertType;
|
||||
dhash = __rotl(dhash, 13);
|
||||
dhash ^= (u32)vertexCount;
|
||||
dhash = __rotl(dhash, 13);
|
||||
dhash ^= (u32)prim;
|
||||
dcid_ = dhash;
|
||||
}
|
||||
|
||||
if (inds) {
|
||||
GetIndexBounds(inds, vertexCount, vertType, &dc.indexLowerBound, &dc.indexUpperBound);
|
||||
} else {
|
||||
|
@ -370,7 +380,12 @@ void DrawEngineVulkan::SubmitPrim(void *verts, void *inds, GEPrimitiveType prim,
|
|||
uvScale[numDrawCalls] = gstate_c.uv;
|
||||
|
||||
numDrawCalls++;
|
||||
vertexCountInDrawCalls += vertexCount;
|
||||
vertexCountInDrawCalls_ += vertexCount;
|
||||
|
||||
if (g_Config.bSoftwareSkinning && (vertType & GE_VTYPE_WEIGHT_MASK)) {
|
||||
DecodeVertsStep(decoded, decodeCounter_, decodedVerts_);
|
||||
decodeCounter_++;
|
||||
}
|
||||
|
||||
if (prim == GE_PRIM_RECTANGLES && (gstate.getTextureAddress(0) & 0x3FFFFFFF) == (gstate.getFrameBufAddress() & 0x3FFFFFFF)) {
|
||||
// Rendertarget == texture?
|
||||
|
@ -452,8 +467,6 @@ void DrawEngineVulkan::DecodeVertsStep(u8 *dest, int &i, int &decodedVerts) {
|
|||
}
|
||||
|
||||
void DrawEngineVulkan::DecodeVerts(VulkanPushBuffer *push, uint32_t *bindOffset, VkBuffer *vkbuf) {
|
||||
int decodedVerts = 0;
|
||||
|
||||
u8 *dest = decoded;
|
||||
|
||||
// Figure out how much pushbuffer space we need to allocate.
|
||||
|
@ -488,9 +501,9 @@ void DrawEngineVulkan::DecodeVerts(VulkanPushBuffer *push, uint32_t *bindOffset,
|
|||
}
|
||||
|
||||
const UVScale origUV = gstate_c.uv;
|
||||
for (int i = 0; i < numDrawCalls; i++) {
|
||||
gstate_c.uv = uvScale[i];
|
||||
DecodeVertsStep(dest, i, decodedVerts); // Note that this can modify i
|
||||
for (; decodeCounter_ < numDrawCalls; decodeCounter_++) {
|
||||
gstate_c.uv = uvScale[decodeCounter_];
|
||||
DecodeVertsStep(dest, decodeCounter_, decodedVerts_); // NOTE! DecodeVertsStep can modify i!
|
||||
}
|
||||
gstate_c.uv = origUV;
|
||||
|
||||
|
@ -685,9 +698,26 @@ void DrawEngineVulkan::DoFlush() {
|
|||
int vertexCount = 0;
|
||||
bool useElements = true;
|
||||
|
||||
// Decode directly into the pushbuffer
|
||||
// Cannot cache vertex data with morph enabled.
|
||||
bool useCache = g_Config.bVertexCache && !(lastVType_ & GE_VTYPE_MORPHCOUNT_MASK);
|
||||
// Also avoid caching when software skinning.
|
||||
VkBuffer vbuf;
|
||||
DecodeVerts(frame->pushVertex, &vbOffset, &vbuf);
|
||||
if (g_Config.bSoftwareSkinning && (lastVType_ & GE_VTYPE_WEIGHT_MASK)) {
|
||||
// If software skinning, we've already predecoded into "decoded". So push that content.
|
||||
VkDeviceSize size = decodedVerts_ * dec_->GetDecVtxFmt().stride;
|
||||
u8 *dest = (u8 *)frame->pushVertex->Push(size, &vbOffset, &vbuf);
|
||||
memcpy(dest, decoded, size);
|
||||
} else {
|
||||
// Decode directly into the pushbuffer
|
||||
DecodeVerts(frame->pushVertex, &vbOffset, &vbuf);
|
||||
}
|
||||
|
||||
useCache = false;
|
||||
if (useCache) {
|
||||
u32 id = dcid_ ^ gstate.getUVGenMode(); // This can have an effect on which UV decoder we need to use! And hence what the decoded data will look like. See #9263
|
||||
// TODO: Actually support vertex caching
|
||||
}
|
||||
|
||||
gpuStats.numUncachedVertsDrawn += indexGen.VertexCount();
|
||||
useElements = !indexGen.SeenOnlyPurePrims();
|
||||
vertexCount = indexGen.VertexCount();
|
||||
|
@ -752,7 +782,8 @@ void DrawEngineVulkan::DoFlush() {
|
|||
if (useElements) {
|
||||
VkBuffer ibuf;
|
||||
ibOffset = (uint32_t)frame->pushIndex->Push(decIndex, 2 * indexGen.VertexCount(), &ibuf);
|
||||
// TODO: Avoid rebinding vertex/index buffers if the vertex size stays the same by using the offset arguments
|
||||
// TODO (maybe): Avoid rebinding vertex/index buffers if the vertex size stays the same by using the offset arguments.
|
||||
// Not sure if actually worth it, binding buffers should be fast.
|
||||
vkCmdBindVertexBuffers(cmd, 0, 1, &vbuf, offsets);
|
||||
vkCmdBindIndexBuffer(cmd, ibuf, ibOffset, VK_INDEX_TYPE_UINT16);
|
||||
int numInstances = (gstate_c.bezier || gstate_c.spline) ? numPatches : 1;
|
||||
|
@ -888,11 +919,14 @@ void DrawEngineVulkan::DoFlush() {
|
|||
}
|
||||
|
||||
gpuStats.numDrawCalls += numDrawCalls;
|
||||
gpuStats.numVertsSubmitted += vertexCountInDrawCalls;
|
||||
gpuStats.numVertsSubmitted += vertexCountInDrawCalls_;
|
||||
|
||||
indexGen.Reset();
|
||||
decodedVerts_ = 0;
|
||||
numDrawCalls = 0;
|
||||
vertexCountInDrawCalls = 0;
|
||||
vertexCountInDrawCalls_ = 0;
|
||||
decodeCounter_ = 0;
|
||||
dcid_ = 0;
|
||||
prevPrim_ = GE_PRIM_INVALID;
|
||||
gstate_c.vertexFullAlpha = true;
|
||||
framebufferManager_->SetColorUpdated(gstate_c.skipDrawReason);
|
||||
|
|
|
@ -195,6 +195,7 @@ private:
|
|||
|
||||
// Vertex collector state
|
||||
IndexGenerator indexGen;
|
||||
int decodedVerts_ = 0;
|
||||
GEPrimitiveType prevPrim_;
|
||||
|
||||
TransformedVertex *transformed = nullptr;
|
||||
|
@ -220,14 +221,18 @@ private:
|
|||
VkSampler sampler;
|
||||
|
||||
// Null texture
|
||||
VulkanTexture *nullTexture_;
|
||||
VkSampler nullSampler_;
|
||||
VulkanTexture *nullTexture_ = nullptr;
|
||||
VkSampler nullSampler_ = VK_NULL_HANDLE;
|
||||
|
||||
DeferredDrawCall drawCalls[MAX_DEFERRED_DRAW_CALLS];
|
||||
int numDrawCalls;
|
||||
int vertexCountInDrawCalls;
|
||||
int numDrawCalls = 0;
|
||||
int vertexCountInDrawCalls_ = 0;
|
||||
UVScale uvScale[MAX_DEFERRED_DRAW_CALLS];
|
||||
|
||||
int decimationCounter_ = 0;
|
||||
int decodeCounter_ = 0;
|
||||
u32 dcid_;
|
||||
|
||||
DrawEngineVulkanStats stats_;
|
||||
|
||||
VulkanPipelineRasterStateKey pipelineKey_{};
|
||||
|
|
|
@ -198,12 +198,12 @@ void GPU_Vulkan::CheckGPUFeatures() {
|
|||
|
||||
void GPU_Vulkan::BeginHostFrame() {
|
||||
drawEngine_.BeginFrame();
|
||||
UpdateCmdInfo();
|
||||
|
||||
if (resized_) {
|
||||
CheckGPUFeatures();
|
||||
// In case the GPU changed.
|
||||
BuildReportingInfo();
|
||||
UpdateCmdInfo();
|
||||
framebufferManager_->Resized();
|
||||
drawEngine_.Resized();
|
||||
textureCacheVulkan_->NotifyConfigChanged();
|
||||
|
@ -342,14 +342,13 @@ void GPU_Vulkan::UpdateVsyncInterval(bool force) {
|
|||
}
|
||||
|
||||
void GPU_Vulkan::UpdateCmdInfo() {
|
||||
/*
|
||||
if (g_Config.bSoftwareSkinning) {
|
||||
cmdInfo_[GE_CMD_VERTEXTYPE].flags &= ~FLAG_FLUSHBEFOREONCHANGE;
|
||||
cmdInfo_[GE_CMD_VERTEXTYPE].func = &GPU_Vulkan::Execute_VertexTypeSkinning;
|
||||
} else {*/
|
||||
} else {
|
||||
cmdInfo_[GE_CMD_VERTEXTYPE].flags |= FLAG_FLUSHBEFOREONCHANGE;
|
||||
cmdInfo_[GE_CMD_VERTEXTYPE].func = &GPU_Vulkan::Execute_VertexType;
|
||||
// }
|
||||
}
|
||||
}
|
||||
|
||||
void GPU_Vulkan::BeginFrameInternal() {
|
||||
|
@ -539,6 +538,24 @@ void GPU_Vulkan::Execute_VertexType(u32 op, u32 diff) {
|
|||
}
|
||||
}
|
||||
|
||||
void GPU_Vulkan::Execute_VertexTypeSkinning(u32 op, u32 diff) {
|
||||
// Don't flush when weight count changes, unless morph is enabled.
|
||||
if ((diff & ~GE_VTYPE_WEIGHTCOUNT_MASK) || (op & GE_VTYPE_MORPHCOUNT_MASK) != 0) {
|
||||
// Restore and flush
|
||||
gstate.vertType ^= diff;
|
||||
Flush();
|
||||
gstate.vertType ^= diff;
|
||||
if (diff & (GE_VTYPE_TC_MASK | GE_VTYPE_THROUGH_MASK))
|
||||
gstate_c.Dirty(DIRTY_UVSCALEOFFSET);
|
||||
// In this case, we may be doing weights and morphs.
|
||||
// Update any bone matrix uniforms so it uses them correctly.
|
||||
if ((op & GE_VTYPE_MORPHCOUNT_MASK) != 0) {
|
||||
gstate_c.Dirty(gstate_c.deferredVertTypeDirty);
|
||||
gstate_c.deferredVertTypeDirty = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void GPU_Vulkan::Execute_Bezier(u32 op, u32 diff) {
|
||||
// We don't dirty on normal changes anymore as we prescale, but it's needed for splines/bezier.
|
||||
gstate_c.Dirty(DIRTY_UVSCALEOFFSET);
|
||||
|
|
|
@ -77,6 +77,7 @@ public:
|
|||
void Execute_Bezier(u32 op, u32 diff);
|
||||
void Execute_Spline(u32 op, u32 diff);
|
||||
void Execute_VertexType(u32 op, u32 diff);
|
||||
void Execute_VertexTypeSkinning(u32 op, u32 diff);
|
||||
void Execute_TexSize0(u32 op, u32 diff);
|
||||
void Execute_LoadClut(u32 op, u32 diff);
|
||||
void Execute_BoneMtxNum(u32 op, u32 diff);
|
||||
|
|
Loading…
Add table
Reference in a new issue