Merge pull request #9769 from hrydgard/vulkan-sw-skinning

Implement software skinning for the Vulkan backend.
This commit is contained in:
Henrik Rydgård 2017-06-02 12:15:56 +02:00 committed by GitHub
commit 6169fa289a
7 changed files with 105 additions and 43 deletions

View file

@ -88,7 +88,7 @@ protected:
// Cached vertex decoders
u32 lastVType_ = -1;
std::unordered_map<u32, VertexDecoder *> decoderMap_;
VertexDecoder *dec_;
VertexDecoder *dec_ = nullptr;
VertexDecoderJitCache *decJitCache_;
VertexDecoderOptions decOptions_;

View file

@ -83,7 +83,7 @@ DrawEngineD3D11::DrawEngineD3D11(Draw::DrawContext *draw, ID3D11Device *device,
textureCache_(0),
framebufferManager_(0),
numDrawCalls(0),
vertexCountInDrawCalls(0),
vertexCountInDrawCalls_(0),
decodeCounter_(0),
dcid_(0) {
device1_ = (ID3D11Device1 *)draw->GetNativeObject(Draw::NativeObject::DEVICE_EX);
@ -284,7 +284,7 @@ inline void DrawEngineD3D11::SetupVertexDecoderInternal(u32 vertType) {
}
void DrawEngineD3D11::SubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertType, int *bytesRead) {
if (!indexGen.PrimCompatible(prevPrim_, prim) || numDrawCalls >= MAX_DEFERRED_DRAW_CALLS || vertexCountInDrawCalls + vertexCount > VERTEX_BUFFER_MAX)
if (!indexGen.PrimCompatible(prevPrim_, prim) || numDrawCalls >= MAX_DEFERRED_DRAW_CALLS || vertexCountInDrawCalls_ + vertexCount > VERTEX_BUFFER_MAX)
Flush();
// TODO: Is this the right thing to do?
@ -309,17 +309,19 @@ void DrawEngineD3D11::SubmitPrim(void *verts, void *inds, GEPrimitiveType prim,
dc.prim = prim;
dc.vertexCount = vertexCount;
u32 dhash = dcid_;
dhash ^= (u32)(uintptr_t)verts;
dhash = __rotl(dhash, 13);
dhash ^= (u32)(uintptr_t)inds;
dhash = __rotl(dhash, 13);
dhash ^= (u32)vertType;
dhash = __rotl(dhash, 13);
dhash ^= (u32)vertexCount;
dhash = __rotl(dhash, 13);
dhash ^= (u32)prim;
dcid_ = dhash;
if (g_Config.bVertexCache) {
u32 dhash = dcid_;
dhash ^= (u32)(uintptr_t)verts;
dhash = __rotl(dhash, 13);
dhash ^= (u32)(uintptr_t)inds;
dhash = __rotl(dhash, 13);
dhash ^= (u32)vertType;
dhash = __rotl(dhash, 13);
dhash ^= (u32)vertexCount;
dhash = __rotl(dhash, 13);
dhash ^= (u32)prim;
dcid_ = dhash;
}
if (inds) {
GetIndexBounds(inds, vertexCount, vertType, &dc.indexLowerBound, &dc.indexUpperBound);
@ -331,7 +333,7 @@ void DrawEngineD3D11::SubmitPrim(void *verts, void *inds, GEPrimitiveType prim,
uvScale[numDrawCalls] = gstate_c.uv;
numDrawCalls++;
vertexCountInDrawCalls += vertexCount;
vertexCountInDrawCalls_ += vertexCount;
if (g_Config.bSoftwareSkinning && (vertType & GE_VTYPE_WEIGHT_MASK)) {
DecodeVertsStep();
@ -941,12 +943,12 @@ rotateVBO:
}
gpuStats.numDrawCalls += numDrawCalls;
gpuStats.numVertsSubmitted += vertexCountInDrawCalls;
gpuStats.numVertsSubmitted += vertexCountInDrawCalls_;
indexGen.Reset();
decodedVerts_ = 0;
numDrawCalls = 0;
vertexCountInDrawCalls = 0;
vertexCountInDrawCalls_ = 0;
decodeCounter_ = 0;
dcid_ = 0;
prevPrim_ = GE_PRIM_INVALID;
@ -959,7 +961,10 @@ rotateVBO:
gstate_c.vertBounds.maxU = 0;
gstate_c.vertBounds.maxV = 0;
#if PPSSPP_PLATFORM(WINDOWS) && !PPSSPP_PLATFORM(UWP)
// We only support GPU debugging on Windows, and that's the only use case for this.
host->GPUNotifyDraw();
#endif
}
bool DrawEngineD3D11::IsCodePtrVertexDecoder(const u8 *ptr) const {

View file

@ -229,7 +229,7 @@ private:
DeferredDrawCall drawCalls[MAX_DEFERRED_DRAW_CALLS];
int numDrawCalls;
int vertexCountInDrawCalls;
int vertexCountInDrawCalls_;
int decimationCounter_;
int decodeCounter_;

View file

@ -69,16 +69,12 @@ DrawEngineVulkan::DrawEngineVulkan(VulkanContext *vulkan, Draw::DrawContext *dra
draw_(draw),
prevPrim_(GE_PRIM_INVALID),
numDrawCalls(0),
vertexCountInDrawCalls(0),
curFrame_(0),
nullTexture_(nullptr),
stats_{} {
decOptions_.expandAllWeightsToFloat = false;
decOptions_.expand8BitNormalsToFloat = false;
// Allocate nicely aligned memory. Maybe graphics drivers will
// appreciate it.
// Allocate nicely aligned memory. Maybe graphics drivers will appreciate it.
// All this is a LOT of memory, need to see if we can cut down somehow.
decoded = (u8 *)AllocateMemoryPages(DECODED_VERTEX_BUFFER_SIZE, MEM_PROT_READ | MEM_PROT_WRITE);
decIndex = (u16 *)AllocateMemoryPages(DECODED_INDEX_BUFFER_SIZE, MEM_PROT_READ | MEM_PROT_WRITE);
@ -336,7 +332,7 @@ inline void DrawEngineVulkan::SetupVertexDecoderInternal(u32 vertType) {
}
void DrawEngineVulkan::SubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertType, int *bytesRead) {
if (!indexGen.PrimCompatible(prevPrim_, prim) || numDrawCalls >= MAX_DEFERRED_DRAW_CALLS || vertexCountInDrawCalls + vertexCount > VERTEX_BUFFER_MAX)
if (!indexGen.PrimCompatible(prevPrim_, prim) || numDrawCalls >= MAX_DEFERRED_DRAW_CALLS || vertexCountInDrawCalls_ + vertexCount > VERTEX_BUFFER_MAX)
Flush();
// TODO: Is this the right thing to do?
@ -360,6 +356,20 @@ void DrawEngineVulkan::SubmitPrim(void *verts, void *inds, GEPrimitiveType prim,
dc.prim = prim;
dc.vertexCount = vertexCount;
if (g_Config.bVertexCache) {
u32 dhash = dcid_;
dhash ^= (u32)(uintptr_t)verts;
dhash = __rotl(dhash, 13);
dhash ^= (u32)(uintptr_t)inds;
dhash = __rotl(dhash, 13);
dhash ^= (u32)vertType;
dhash = __rotl(dhash, 13);
dhash ^= (u32)vertexCount;
dhash = __rotl(dhash, 13);
dhash ^= (u32)prim;
dcid_ = dhash;
}
if (inds) {
GetIndexBounds(inds, vertexCount, vertType, &dc.indexLowerBound, &dc.indexUpperBound);
} else {
@ -370,7 +380,12 @@ void DrawEngineVulkan::SubmitPrim(void *verts, void *inds, GEPrimitiveType prim,
uvScale[numDrawCalls] = gstate_c.uv;
numDrawCalls++;
vertexCountInDrawCalls += vertexCount;
vertexCountInDrawCalls_ += vertexCount;
if (g_Config.bSoftwareSkinning && (vertType & GE_VTYPE_WEIGHT_MASK)) {
DecodeVertsStep(decoded, decodeCounter_, decodedVerts_);
decodeCounter_++;
}
if (prim == GE_PRIM_RECTANGLES && (gstate.getTextureAddress(0) & 0x3FFFFFFF) == (gstate.getFrameBufAddress() & 0x3FFFFFFF)) {
// Rendertarget == texture?
@ -452,8 +467,6 @@ void DrawEngineVulkan::DecodeVertsStep(u8 *dest, int &i, int &decodedVerts) {
}
void DrawEngineVulkan::DecodeVerts(VulkanPushBuffer *push, uint32_t *bindOffset, VkBuffer *vkbuf) {
int decodedVerts = 0;
u8 *dest = decoded;
// Figure out how much pushbuffer space we need to allocate.
@ -488,9 +501,9 @@ void DrawEngineVulkan::DecodeVerts(VulkanPushBuffer *push, uint32_t *bindOffset,
}
const UVScale origUV = gstate_c.uv;
for (int i = 0; i < numDrawCalls; i++) {
gstate_c.uv = uvScale[i];
DecodeVertsStep(dest, i, decodedVerts); // Note that this can modify i
for (; decodeCounter_ < numDrawCalls; decodeCounter_++) {
gstate_c.uv = uvScale[decodeCounter_];
DecodeVertsStep(dest, decodeCounter_, decodedVerts_); // NOTE! DecodeVertsStep can modify i!
}
gstate_c.uv = origUV;
@ -685,9 +698,26 @@ void DrawEngineVulkan::DoFlush() {
int vertexCount = 0;
bool useElements = true;
// Decode directly into the pushbuffer
// Cannot cache vertex data with morph enabled.
bool useCache = g_Config.bVertexCache && !(lastVType_ & GE_VTYPE_MORPHCOUNT_MASK);
// Also avoid caching when software skinning.
VkBuffer vbuf;
DecodeVerts(frame->pushVertex, &vbOffset, &vbuf);
if (g_Config.bSoftwareSkinning && (lastVType_ & GE_VTYPE_WEIGHT_MASK)) {
// If software skinning, we've already predecoded into "decoded". So push that content.
VkDeviceSize size = decodedVerts_ * dec_->GetDecVtxFmt().stride;
u8 *dest = (u8 *)frame->pushVertex->Push(size, &vbOffset, &vbuf);
memcpy(dest, decoded, size);
} else {
// Decode directly into the pushbuffer
DecodeVerts(frame->pushVertex, &vbOffset, &vbuf);
}
useCache = false;
if (useCache) {
u32 id = dcid_ ^ gstate.getUVGenMode(); // This can have an effect on which UV decoder we need to use! And hence what the decoded data will look like. See #9263
// TODO: Actually support vertex caching
}
gpuStats.numUncachedVertsDrawn += indexGen.VertexCount();
useElements = !indexGen.SeenOnlyPurePrims();
vertexCount = indexGen.VertexCount();
@ -752,7 +782,8 @@ void DrawEngineVulkan::DoFlush() {
if (useElements) {
VkBuffer ibuf;
ibOffset = (uint32_t)frame->pushIndex->Push(decIndex, 2 * indexGen.VertexCount(), &ibuf);
// TODO: Avoid rebinding vertex/index buffers if the vertex size stays the same by using the offset arguments
// TODO (maybe): Avoid rebinding vertex/index buffers if the vertex size stays the same by using the offset arguments.
// Not sure if actually worth it, binding buffers should be fast.
vkCmdBindVertexBuffers(cmd, 0, 1, &vbuf, offsets);
vkCmdBindIndexBuffer(cmd, ibuf, ibOffset, VK_INDEX_TYPE_UINT16);
int numInstances = (gstate_c.bezier || gstate_c.spline) ? numPatches : 1;
@ -888,11 +919,14 @@ void DrawEngineVulkan::DoFlush() {
}
gpuStats.numDrawCalls += numDrawCalls;
gpuStats.numVertsSubmitted += vertexCountInDrawCalls;
gpuStats.numVertsSubmitted += vertexCountInDrawCalls_;
indexGen.Reset();
decodedVerts_ = 0;
numDrawCalls = 0;
vertexCountInDrawCalls = 0;
vertexCountInDrawCalls_ = 0;
decodeCounter_ = 0;
dcid_ = 0;
prevPrim_ = GE_PRIM_INVALID;
gstate_c.vertexFullAlpha = true;
framebufferManager_->SetColorUpdated(gstate_c.skipDrawReason);

View file

@ -195,6 +195,7 @@ private:
// Vertex collector state
IndexGenerator indexGen;
int decodedVerts_ = 0;
GEPrimitiveType prevPrim_;
TransformedVertex *transformed = nullptr;
@ -220,14 +221,18 @@ private:
VkSampler sampler;
// Null texture
VulkanTexture *nullTexture_;
VkSampler nullSampler_;
VulkanTexture *nullTexture_ = nullptr;
VkSampler nullSampler_ = VK_NULL_HANDLE;
DeferredDrawCall drawCalls[MAX_DEFERRED_DRAW_CALLS];
int numDrawCalls;
int vertexCountInDrawCalls;
int numDrawCalls = 0;
int vertexCountInDrawCalls_ = 0;
UVScale uvScale[MAX_DEFERRED_DRAW_CALLS];
int decimationCounter_ = 0;
int decodeCounter_ = 0;
u32 dcid_;
DrawEngineVulkanStats stats_;
VulkanPipelineRasterStateKey pipelineKey_{};

View file

@ -198,12 +198,12 @@ void GPU_Vulkan::CheckGPUFeatures() {
void GPU_Vulkan::BeginHostFrame() {
drawEngine_.BeginFrame();
UpdateCmdInfo();
if (resized_) {
CheckGPUFeatures();
// In case the GPU changed.
BuildReportingInfo();
UpdateCmdInfo();
framebufferManager_->Resized();
drawEngine_.Resized();
textureCacheVulkan_->NotifyConfigChanged();
@ -342,14 +342,13 @@ void GPU_Vulkan::UpdateVsyncInterval(bool force) {
}
void GPU_Vulkan::UpdateCmdInfo() {
/*
if (g_Config.bSoftwareSkinning) {
cmdInfo_[GE_CMD_VERTEXTYPE].flags &= ~FLAG_FLUSHBEFOREONCHANGE;
cmdInfo_[GE_CMD_VERTEXTYPE].func = &GPU_Vulkan::Execute_VertexTypeSkinning;
} else {*/
} else {
cmdInfo_[GE_CMD_VERTEXTYPE].flags |= FLAG_FLUSHBEFOREONCHANGE;
cmdInfo_[GE_CMD_VERTEXTYPE].func = &GPU_Vulkan::Execute_VertexType;
// }
}
}
void GPU_Vulkan::BeginFrameInternal() {
@ -539,6 +538,24 @@ void GPU_Vulkan::Execute_VertexType(u32 op, u32 diff) {
}
}
void GPU_Vulkan::Execute_VertexTypeSkinning(u32 op, u32 diff) {
// Don't flush when weight count changes, unless morph is enabled.
if ((diff & ~GE_VTYPE_WEIGHTCOUNT_MASK) || (op & GE_VTYPE_MORPHCOUNT_MASK) != 0) {
// Restore and flush
gstate.vertType ^= diff;
Flush();
gstate.vertType ^= diff;
if (diff & (GE_VTYPE_TC_MASK | GE_VTYPE_THROUGH_MASK))
gstate_c.Dirty(DIRTY_UVSCALEOFFSET);
// In this case, we may be doing weights and morphs.
// Update any bone matrix uniforms so it uses them correctly.
if ((op & GE_VTYPE_MORPHCOUNT_MASK) != 0) {
gstate_c.Dirty(gstate_c.deferredVertTypeDirty);
gstate_c.deferredVertTypeDirty = 0;
}
}
}
void GPU_Vulkan::Execute_Bezier(u32 op, u32 diff) {
// We don't dirty on normal changes anymore as we prescale, but it's needed for splines/bezier.
gstate_c.Dirty(DIRTY_UVSCALEOFFSET);

View file

@ -77,6 +77,7 @@ public:
void Execute_Bezier(u32 op, u32 diff);
void Execute_Spline(u32 op, u32 diff);
void Execute_VertexType(u32 op, u32 diff);
void Execute_VertexTypeSkinning(u32 op, u32 diff);
void Execute_TexSize0(u32 op, u32 diff);
void Execute_LoadClut(u32 op, u32 diff);
void Execute_BoneMtxNum(u32 op, u32 diff);