From 0462c01228bc9a1f547ce6d6c97aabf62767e5eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Wed, 4 Sep 2019 23:20:57 +0200 Subject: [PATCH 1/4] Workaround ARM Mali depth hardware bug. Fixes #11937 When triangles coincide with the Z=1 plane in specific ways, triggered by Burnout Legends' sky for example, the depth buffer gets corrupted. This is worked around here by slightly rescaling Z. This type of workaround is recommended by ARM driver engineers. Ugly but what can you do when the hardware is bugged. I've done quick tests on a number of games with no issues. --- Common/Vulkan/VulkanContext.h | 3 +++ GPU/GPUState.h | 1 + GPU/Vulkan/DrawEngineVulkan.cpp | 6 +++++- GPU/Vulkan/GPU_Vulkan.cpp | 7 ++++++- GPU/Vulkan/VertexShaderGeneratorVulkan.cpp | 7 +++++-- ext/native/thin3d/VulkanRenderManager.h | 1 + 6 files changed, 21 insertions(+), 4 deletions(-) diff --git a/Common/Vulkan/VulkanContext.h b/Common/Vulkan/VulkanContext.h index 63fa5108e7..07388ce0f7 100644 --- a/Common/Vulkan/VulkanContext.h +++ b/Common/Vulkan/VulkanContext.h @@ -363,3 +363,6 @@ std::string FormatDriverVersion(const VkPhysicalDeviceProperties &props); // Simple heuristic. bool IsHashMaliDriverVersion(const VkPhysicalDeviceProperties &props); + +// For the ARM Mali depth scale hack. +#define DEPTH_SCALE_HACK_VALUE 0.9999f diff --git a/GPU/GPUState.h b/GPU/GPUState.h index c8481301ba..8afa4f4ef2 100644 --- a/GPU/GPUState.h +++ b/GPU/GPUState.h @@ -495,6 +495,7 @@ enum { GPU_SUPPORTS_ARB_FRAMEBUFFER_BLIT = FLAG_BIT(26), GPU_SUPPORTS_NV_FRAMEBUFFER_BLIT = FLAG_BIT(27), GPU_SUPPORTS_OES_TEXTURE_NPOT = FLAG_BIT(28), + GPU_NEEDS_DEPTH_SCALE_HACK = FLAG_BIT(29), GPU_PREFER_CPU_DOWNLOAD = FLAG_BIT(30), GPU_PREFER_REVERSE_COLOR_ORDER = FLAG_BIT(31), }; diff --git a/GPU/Vulkan/DrawEngineVulkan.cpp b/GPU/Vulkan/DrawEngineVulkan.cpp index 82c2656c84..3ce24f2696 100644 --- a/GPU/Vulkan/DrawEngineVulkan.cpp +++ b/GPU/Vulkan/DrawEngineVulkan.cpp @@ -962,6 +962,10 @@ void DrawEngineVulkan::DoFlush() { // We let the framebuffer manager handle the clear. It can use renderpasses to optimize on tilers. // If non-buffered though, it'll just do a plain clear. + float depth = result.depth; + if (gstate_c.Supports(GPU_NEEDS_DEPTH_SCALE_HACK)) { + depth *= DEPTH_SCALE_HACK_VALUE; + } framebufferManager_->NotifyClear(gstate.isClearModeColorMask(), gstate.isClearModeAlphaMask(), gstate.isClearModeDepthMask(), result.color, result.depth); int scissorX1 = gstate.getScissorX1(); @@ -970,7 +974,7 @@ void DrawEngineVulkan::DoFlush() { int scissorY2 = gstate.getScissorY2() + 1; framebufferManager_->SetSafeSize(scissorX2, scissorY2); - if ((gstate_c.featureFlags & GPU_USE_CLEAR_RAM_HACK) && gstate.isClearModeColorMask() && (gstate.isClearModeAlphaMask() || gstate.FrameBufFormat() == GE_FORMAT_565)) { + if (gstate_c.Supports(GPU_USE_CLEAR_RAM_HACK) && gstate.isClearModeColorMask() && (gstate.isClearModeAlphaMask() || gstate.FrameBufFormat() == GE_FORMAT_565)) { framebufferManager_->ApplyClearToMemory(scissorX1, scissorY1, scissorX2, scissorY2, result.color); } } diff --git a/GPU/Vulkan/GPU_Vulkan.cpp b/GPU/Vulkan/GPU_Vulkan.cpp index 5acdc68a43..cdb88d7894 100644 --- a/GPU/Vulkan/GPU_Vulkan.cpp +++ b/GPU/Vulkan/GPU_Vulkan.cpp @@ -205,11 +205,16 @@ void GPU_Vulkan::CheckGPUFeatures() { if (!PSP_CoreParameter().compat.flags().DisableAccurateDepth || driverTooOld) { features |= GPU_SUPPORTS_ACCURATE_DEPTH; } + // These GPUs (up to some certain hardware version?) has a bug where draws that touch the upper range of Z + // depth test incorrectly. This is easily worked around by simply scaling Z down a tiny bit, with a small risk + // for artifacts - haven't seen any though. + features |= GPU_NEEDS_DEPTH_SCALE_HACK; break; } default: - if (!PSP_CoreParameter().compat.flags().DisableAccurateDepth) + if (!PSP_CoreParameter().compat.flags().DisableAccurateDepth) { features |= GPU_SUPPORTS_ACCURATE_DEPTH; + } break; } diff --git a/GPU/Vulkan/VertexShaderGeneratorVulkan.cpp b/GPU/Vulkan/VertexShaderGeneratorVulkan.cpp index 88382f3b81..ad80589839 100644 --- a/GPU/Vulkan/VertexShaderGeneratorVulkan.cpp +++ b/GPU/Vulkan/VertexShaderGeneratorVulkan.cpp @@ -175,8 +175,7 @@ bool GenerateVulkanGLSLVertexShader(const VShaderID &id, char *buffer) { if (!useHWTransform && doTextureTransform && !isModeThrough) { WRITE(p, "layout (location = %d) in vec3 texcoord;\n", (int)PspAttributeLocation::TEXCOORD); texcoordInVec3 = true; - } - else + } else WRITE(p, "layout (location = %d) in vec2 texcoord;\n", (int)PspAttributeLocation::TEXCOORD); } if (hasColor) { @@ -637,6 +636,10 @@ bool GenerateVulkanGLSLVertexShader(const VShaderID &id, char *buffer) { } WRITE(p, " gl_Position = outPos;\n"); + if (gstate_c.Supports(GPU_NEEDS_DEPTH_SCALE_HACK)) { + WRITE(p, " gl_Position.z *= %f;\n", DEPTH_SCALE_HACK_VALUE); + } + WRITE(p, "}\n"); return true; } diff --git a/ext/native/thin3d/VulkanRenderManager.h b/ext/native/thin3d/VulkanRenderManager.h index ee8a15ee56..56a3fa1cf3 100644 --- a/ext/native/thin3d/VulkanRenderManager.h +++ b/ext/native/thin3d/VulkanRenderManager.h @@ -134,6 +134,7 @@ public: // TODO: This should be fixed at the source. data.viewport.vp.maxDepth = clamp_value(vp.maxDepth, 0.0f, 1.0f); data.viewport.vp.minDepth = clamp_value(vp.minDepth, 0.0f, 1.0f); + curRenderStep_->commands.push_back(data); } From f76adfd760403ce1a9390135c4e61c295c6b6e2c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Mon, 9 Sep 2019 00:09:57 +0200 Subject: [PATCH 2/4] Vulkan ARM mali Z hack: Modify the matrix instead of the shader. --- Common/Vulkan/VulkanContext.h | 3 --- GPU/Common/ShaderUniforms.cpp | 5 +++++ GPU/GPUState.h | 3 +++ GPU/Vulkan/VertexShaderGeneratorVulkan.cpp | 4 ---- 4 files changed, 8 insertions(+), 7 deletions(-) diff --git a/Common/Vulkan/VulkanContext.h b/Common/Vulkan/VulkanContext.h index 07388ce0f7..63fa5108e7 100644 --- a/Common/Vulkan/VulkanContext.h +++ b/Common/Vulkan/VulkanContext.h @@ -363,6 +363,3 @@ std::string FormatDriverVersion(const VkPhysicalDeviceProperties &props); // Simple heuristic. bool IsHashMaliDriverVersion(const VkPhysicalDeviceProperties &props); - -// For the ARM Mali depth scale hack. -#define DEPTH_SCALE_HACK_VALUE 0.9999f diff --git a/GPU/Common/ShaderUniforms.cpp b/GPU/Common/ShaderUniforms.cpp index 46ad4470e2..ca9320b749 100644 --- a/GPU/Common/ShaderUniforms.cpp +++ b/GPU/Common/ShaderUniforms.cpp @@ -151,6 +151,10 @@ void BaseUpdateUniforms(UB_VS_FS_Base *ub, uint64_t dirtyUniforms, bool flipView flippedMatrix = flippedMatrix * g_display_rot_matrix; } + if (gstate_c.Supports(GPU_NEEDS_DEPTH_SCALE_HACK)) { + flippedMatrix[10] *= DEPTH_SCALE_HACK_VALUE; + } + CopyMatrix4x4(ub->proj, flippedMatrix.getReadPtr()); } @@ -164,6 +168,7 @@ void BaseUpdateUniforms(UB_VS_FS_Base *ub, uint64_t dirtyUniforms, bool flipView if (g_Config.iRenderingMode == FB_NON_BUFFERED_MODE && g_display_rotation != DisplayRotation::ROTATE_0) { proj_through = proj_through * g_display_rot_matrix; } + proj_through[10] *= 0.999f; CopyMatrix4x4(ub->proj_through, proj_through.getReadPtr()); } diff --git a/GPU/GPUState.h b/GPU/GPUState.h index 8afa4f4ef2..19901ca437 100644 --- a/GPU/GPUState.h +++ b/GPU/GPUState.h @@ -500,6 +500,9 @@ enum { GPU_PREFER_REVERSE_COLOR_ORDER = FLAG_BIT(31), }; +// For the ARM Mali depth scale hack. +#define DEPTH_SCALE_HACK_VALUE 0.9999f + struct KnownVertexBounds { u16 minU; u16 minV; diff --git a/GPU/Vulkan/VertexShaderGeneratorVulkan.cpp b/GPU/Vulkan/VertexShaderGeneratorVulkan.cpp index ad80589839..c079285381 100644 --- a/GPU/Vulkan/VertexShaderGeneratorVulkan.cpp +++ b/GPU/Vulkan/VertexShaderGeneratorVulkan.cpp @@ -636,10 +636,6 @@ bool GenerateVulkanGLSLVertexShader(const VShaderID &id, char *buffer) { } WRITE(p, " gl_Position = outPos;\n"); - if (gstate_c.Supports(GPU_NEEDS_DEPTH_SCALE_HACK)) { - WRITE(p, " gl_Position.z *= %f;\n", DEPTH_SCALE_HACK_VALUE); - } - WRITE(p, "}\n"); return true; } From 70ec327b40fd61f6edfb57a44c351d067cb511a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Mon, 9 Sep 2019 00:33:04 +0200 Subject: [PATCH 3/4] Vulkan: Slim down and rename the Mali hack. --- GPU/Common/ShaderUniforms.cpp | 6 ------ GPU/GPUState.h | 5 +---- GPU/Vulkan/DrawEngineVulkan.cpp | 5 +---- GPU/Vulkan/GPU_Vulkan.cpp | 8 ++++---- GPU/Vulkan/VertexShaderGeneratorVulkan.cpp | 4 ++++ 5 files changed, 10 insertions(+), 18 deletions(-) diff --git a/GPU/Common/ShaderUniforms.cpp b/GPU/Common/ShaderUniforms.cpp index ca9320b749..ee90163c7d 100644 --- a/GPU/Common/ShaderUniforms.cpp +++ b/GPU/Common/ShaderUniforms.cpp @@ -150,11 +150,6 @@ void BaseUpdateUniforms(UB_VS_FS_Base *ub, uint64_t dirtyUniforms, bool flipView if (g_Config.iRenderingMode == FB_NON_BUFFERED_MODE && g_display_rotation != DisplayRotation::ROTATE_0) { flippedMatrix = flippedMatrix * g_display_rot_matrix; } - - if (gstate_c.Supports(GPU_NEEDS_DEPTH_SCALE_HACK)) { - flippedMatrix[10] *= DEPTH_SCALE_HACK_VALUE; - } - CopyMatrix4x4(ub->proj, flippedMatrix.getReadPtr()); } @@ -168,7 +163,6 @@ void BaseUpdateUniforms(UB_VS_FS_Base *ub, uint64_t dirtyUniforms, bool flipView if (g_Config.iRenderingMode == FB_NON_BUFFERED_MODE && g_display_rotation != DisplayRotation::ROTATE_0) { proj_through = proj_through * g_display_rot_matrix; } - proj_through[10] *= 0.999f; CopyMatrix4x4(ub->proj_through, proj_through.getReadPtr()); } diff --git a/GPU/GPUState.h b/GPU/GPUState.h index 19901ca437..2f33c3fb91 100644 --- a/GPU/GPUState.h +++ b/GPU/GPUState.h @@ -495,14 +495,11 @@ enum { GPU_SUPPORTS_ARB_FRAMEBUFFER_BLIT = FLAG_BIT(26), GPU_SUPPORTS_NV_FRAMEBUFFER_BLIT = FLAG_BIT(27), GPU_SUPPORTS_OES_TEXTURE_NPOT = FLAG_BIT(28), - GPU_NEEDS_DEPTH_SCALE_HACK = FLAG_BIT(29), + GPU_NEEDS_Z_EQUAL_W_HACK = FLAG_BIT(29), GPU_PREFER_CPU_DOWNLOAD = FLAG_BIT(30), GPU_PREFER_REVERSE_COLOR_ORDER = FLAG_BIT(31), }; -// For the ARM Mali depth scale hack. -#define DEPTH_SCALE_HACK_VALUE 0.9999f - struct KnownVertexBounds { u16 minU; u16 minV; diff --git a/GPU/Vulkan/DrawEngineVulkan.cpp b/GPU/Vulkan/DrawEngineVulkan.cpp index 3ce24f2696..ced84746e6 100644 --- a/GPU/Vulkan/DrawEngineVulkan.cpp +++ b/GPU/Vulkan/DrawEngineVulkan.cpp @@ -962,10 +962,7 @@ void DrawEngineVulkan::DoFlush() { // We let the framebuffer manager handle the clear. It can use renderpasses to optimize on tilers. // If non-buffered though, it'll just do a plain clear. - float depth = result.depth; - if (gstate_c.Supports(GPU_NEEDS_DEPTH_SCALE_HACK)) { - depth *= DEPTH_SCALE_HACK_VALUE; - } + // TODO: Don't we need to rescale result.depth if we use GPU_SCALE_DEPTH_FROM_24_TO_16_BIT and similar? framebufferManager_->NotifyClear(gstate.isClearModeColorMask(), gstate.isClearModeAlphaMask(), gstate.isClearModeDepthMask(), result.color, result.depth); int scissorX1 = gstate.getScissorX1(); diff --git a/GPU/Vulkan/GPU_Vulkan.cpp b/GPU/Vulkan/GPU_Vulkan.cpp index cdb88d7894..a1e6152503 100644 --- a/GPU/Vulkan/GPU_Vulkan.cpp +++ b/GPU/Vulkan/GPU_Vulkan.cpp @@ -205,10 +205,10 @@ void GPU_Vulkan::CheckGPUFeatures() { if (!PSP_CoreParameter().compat.flags().DisableAccurateDepth || driverTooOld) { features |= GPU_SUPPORTS_ACCURATE_DEPTH; } - // These GPUs (up to some certain hardware version?) has a bug where draws that touch the upper range of Z - // depth test incorrectly. This is easily worked around by simply scaling Z down a tiny bit, with a small risk - // for artifacts - haven't seen any though. - features |= GPU_NEEDS_DEPTH_SCALE_HACK; + // These GPUs (up to some certain hardware version?) has a bug where draws where gl_Position.w == .z + // corrupt the depth buffer. This is easily worked around by simply scaling Z down a tiny bit when this case + // is detected. See: https://github.com/hrydgard/ppsspp/issues/11937 + features |= GPU_NEEDS_Z_EQUAL_W_HACK; break; } default: diff --git a/GPU/Vulkan/VertexShaderGeneratorVulkan.cpp b/GPU/Vulkan/VertexShaderGeneratorVulkan.cpp index c079285381..265bcbdd0a 100644 --- a/GPU/Vulkan/VertexShaderGeneratorVulkan.cpp +++ b/GPU/Vulkan/VertexShaderGeneratorVulkan.cpp @@ -636,6 +636,10 @@ bool GenerateVulkanGLSLVertexShader(const VShaderID &id, char *buffer) { } WRITE(p, " gl_Position = outPos;\n"); + if (gstate_c.Supports(GPU_NEEDS_Z_EQUAL_W_HACK)) { + // See comment in GPU_Vulkan.cpp. + WRITE(p, " if (gl_Position.z == gl_Position.w) gl_Position.z *= 0.999999;\n"); + } WRITE(p, "}\n"); return true; } From 02a96e29bbc6d9c6b2c8077c3ac353b138060410 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Mon, 9 Sep 2019 00:52:23 +0200 Subject: [PATCH 4/4] Fix a comment --- GPU/Vulkan/DrawEngineVulkan.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/GPU/Vulkan/DrawEngineVulkan.cpp b/GPU/Vulkan/DrawEngineVulkan.cpp index ced84746e6..af4ee397a5 100644 --- a/GPU/Vulkan/DrawEngineVulkan.cpp +++ b/GPU/Vulkan/DrawEngineVulkan.cpp @@ -962,7 +962,6 @@ void DrawEngineVulkan::DoFlush() { // We let the framebuffer manager handle the clear. It can use renderpasses to optimize on tilers. // If non-buffered though, it'll just do a plain clear. - // TODO: Don't we need to rescale result.depth if we use GPU_SCALE_DEPTH_FROM_24_TO_16_BIT and similar? framebufferManager_->NotifyClear(gstate.isClearModeColorMask(), gstate.isClearModeAlphaMask(), gstate.isClearModeDepthMask(), result.color, result.depth); int scissorX1 = gstate.getScissorX1();