mirror of
https://github.com/hrydgard/ppsspp.git
synced 2025-04-02 11:01:50 -04:00
Merge pull request #16434 from unknownbrackets/stencil-opt
Vulkan: Use stencil export when available
This commit is contained in:
commit
d97035fffc
12 changed files with 121 additions and 58 deletions
|
@ -269,6 +269,7 @@ D3D11DrawContext::D3D11DrawContext(ID3D11Device *device, ID3D11DeviceContext *de
|
|||
caps_.anisoSupported = true;
|
||||
caps_.textureNPOTFullySupported = true;
|
||||
caps_.fragmentShaderDepthWriteSupported = true;
|
||||
caps_.fragmentShaderStencilWriteSupported = false;
|
||||
caps_.blendMinMaxSupported = true;
|
||||
|
||||
D3D11_FEATURE_DATA_D3D11_OPTIONS options{};
|
||||
|
|
|
@ -759,6 +759,7 @@ D3D9Context::D3D9Context(IDirect3D9 *d3d, IDirect3D9Ex *d3dEx, int adapterId, ID
|
|||
caps_.framebufferSeparateDepthCopySupported = false;
|
||||
caps_.texture3DSupported = true;
|
||||
caps_.fragmentShaderDepthWriteSupported = true;
|
||||
caps_.fragmentShaderStencilWriteSupported = false;
|
||||
caps_.blendMinMaxSupported = true;
|
||||
|
||||
if ((caps.RasterCaps & D3DPRASTERCAPS_ANISOTROPY) != 0 && caps.MaxAnisotropy > 1) {
|
||||
|
|
|
@ -65,6 +65,7 @@ enum class DataFormat : uint8_t {
|
|||
|
||||
S8,
|
||||
D16,
|
||||
D16_S8,
|
||||
D24_S8,
|
||||
D32F,
|
||||
D32F_S8,
|
||||
|
|
|
@ -371,6 +371,7 @@ void CheckGLExtensions() {
|
|||
gl_extensions.ARB_uniform_buffer_object = g_set_gl_extensions.count("GL_ARB_uniform_buffer_object") != 0;
|
||||
gl_extensions.ARB_explicit_attrib_location = g_set_gl_extensions.count("GL_ARB_explicit_attrib_location") != 0;
|
||||
gl_extensions.ARB_texture_non_power_of_two = g_set_gl_extensions.count("GL_ARB_texture_non_power_of_two") != 0;
|
||||
gl_extensions.ARB_shader_stencil_export = g_set_gl_extensions.count("GL_ARB_shader_stencil_export") != 0;
|
||||
if (gl_extensions.IsGLES) {
|
||||
gl_extensions.EXT_blend_func_extended = g_set_gl_extensions.count("GL_EXT_blend_func_extended") != 0;
|
||||
gl_extensions.OES_texture_npot = g_set_gl_extensions.count("GL_OES_texture_npot") != 0;
|
||||
|
|
|
@ -72,6 +72,7 @@ struct GLExtensions {
|
|||
bool ARB_uniform_buffer_object;
|
||||
bool ARB_texture_non_power_of_two;
|
||||
bool ARB_stencil_texturing;
|
||||
bool ARB_shader_stencil_export;
|
||||
|
||||
// EXT
|
||||
bool EXT_swap_control_tear;
|
||||
|
|
|
@ -575,6 +575,7 @@ OpenGLContext::OpenGLContext() {
|
|||
} else {
|
||||
caps_.fragmentShaderDepthWriteSupported = true;
|
||||
}
|
||||
caps_.fragmentShaderStencilWriteSupported = gl_extensions.ARB_shader_stencil_export;
|
||||
|
||||
// GLES has no support for logic framebuffer operations. There doesn't even seem to exist any such extensions.
|
||||
caps_.logicOpSupported = !gl_extensions.IsGLES;
|
||||
|
|
|
@ -592,6 +592,8 @@ static int GetBpp(VkFormat format) {
|
|||
static VkFormat DataFormatToVulkan(DataFormat format) {
|
||||
switch (format) {
|
||||
case DataFormat::D16: return VK_FORMAT_D16_UNORM;
|
||||
case DataFormat::D16_S8: return VK_FORMAT_D16_UNORM_S8_UINT;
|
||||
case DataFormat::D24_S8: return VK_FORMAT_D24_UNORM_S8_UINT;
|
||||
case DataFormat::D32F: return VK_FORMAT_D32_SFLOAT;
|
||||
case DataFormat::D32F_S8: return VK_FORMAT_D32_SFLOAT_S8_UINT;
|
||||
case DataFormat::S8: return VK_FORMAT_S8_UINT;
|
||||
|
@ -784,6 +786,25 @@ bool VKTexture::Create(VkCommandBuffer cmd, VulkanPushBuffer *push, const Textur
|
|||
return true;
|
||||
}
|
||||
|
||||
static DataFormat DataFormatFromVulkanDepth(VkFormat fmt) {
|
||||
switch (fmt) {
|
||||
case VK_FORMAT_D24_UNORM_S8_UINT:
|
||||
return DataFormat::D24_S8;
|
||||
case VK_FORMAT_D16_UNORM:
|
||||
return DataFormat::D16;
|
||||
case VK_FORMAT_D32_SFLOAT:
|
||||
return DataFormat::D32F;
|
||||
case VK_FORMAT_D32_SFLOAT_S8_UINT:
|
||||
return DataFormat::D32F_S8;
|
||||
case VK_FORMAT_D16_UNORM_S8_UINT:
|
||||
return DataFormat::D16_S8;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return DataFormat::UNDEFINED;
|
||||
}
|
||||
|
||||
VKContext::VKContext(VulkanContext *vulkan)
|
||||
: vulkan_(vulkan), renderManager_(vulkan) {
|
||||
shaderLanguageDesc_.Init(GLSL_VULKAN);
|
||||
|
@ -803,12 +824,14 @@ VKContext::VKContext(VulkanContext *vulkan)
|
|||
caps_.framebufferStencilBlitSupported = caps_.framebufferDepthBlitSupported;
|
||||
caps_.framebufferDepthCopySupported = true; // Will pretty much always be the case.
|
||||
caps_.framebufferSeparateDepthCopySupported = true; // Will pretty much always be the case.
|
||||
caps_.preferredDepthBufferFormat = DataFormat::D24_S8; // TODO: Ask vulkan.
|
||||
// This doesn't affect what depth/stencil format is actually used, see VulkanQueueRunner.
|
||||
caps_.preferredDepthBufferFormat = DataFormatFromVulkanDepth(vulkan->GetDeviceInfo().preferredDepthStencilFormat);
|
||||
caps_.texture3DSupported = true;
|
||||
caps_.textureDepthSupported = true;
|
||||
caps_.fragmentShaderInt32Supported = true;
|
||||
caps_.textureNPOTFullySupported = true;
|
||||
caps_.fragmentShaderDepthWriteSupported = true;
|
||||
caps_.fragmentShaderStencilWriteSupported = vulkan->Extensions().EXT_shader_stencil_export;
|
||||
caps_.blendMinMaxSupported = true;
|
||||
caps_.logicOpSupported = vulkan->GetDeviceFeatures().enabled.standard.logicOp != 0;
|
||||
caps_.multiViewSupported = vulkan->GetDeviceFeatures().enabled.multiview.multiview != 0;
|
||||
|
|
|
@ -46,6 +46,7 @@ size_t DataFormatSizeInBytes(DataFormat fmt) {
|
|||
|
||||
case DataFormat::S8: return 1;
|
||||
case DataFormat::D16: return 2;
|
||||
case DataFormat::D16_S8: return 3;
|
||||
case DataFormat::D24_S8: return 4;
|
||||
case DataFormat::D32F: return 4;
|
||||
// Or maybe 8...
|
||||
|
@ -68,6 +69,7 @@ const char *DataFormatToString(DataFormat fmt) {
|
|||
|
||||
case DataFormat::S8: return "S8";
|
||||
case DataFormat::D16: return "D16";
|
||||
case DataFormat::D16_S8: return "D16_S8";
|
||||
case DataFormat::D24_S8: return "D24_S8";
|
||||
case DataFormat::D32F: return "D32F";
|
||||
case DataFormat::D32F_S8: return "D32F_S8";
|
||||
|
@ -80,6 +82,7 @@ const char *DataFormatToString(DataFormat fmt) {
|
|||
bool DataFormatIsDepthStencil(DataFormat fmt) {
|
||||
switch (fmt) {
|
||||
case DataFormat::D16:
|
||||
case DataFormat::D16_S8:
|
||||
case DataFormat::D24_S8:
|
||||
case DataFormat::S8:
|
||||
case DataFormat::D32F:
|
||||
|
|
|
@ -572,6 +572,7 @@ struct DeviceCaps {
|
|||
bool fragmentShaderInt32Supported;
|
||||
bool textureNPOTFullySupported;
|
||||
bool fragmentShaderDepthWriteSupported;
|
||||
bool fragmentShaderStencilWriteSupported;
|
||||
bool textureDepthSupported;
|
||||
bool blendMinMaxSupported;
|
||||
bool multiViewSupported;
|
||||
|
|
|
@ -58,6 +58,34 @@ static u8 StencilBits8888(const u8 *ptr8, u32 numPixels) {
|
|||
return bits >> 24;
|
||||
}
|
||||
|
||||
static bool CheckStencilBits(const u8 *src, const VirtualFramebuffer *dstBuffer, int &values, u8 &usedBits) {
|
||||
switch (dstBuffer->fb_format) {
|
||||
case GE_FORMAT_565:
|
||||
// Well, this doesn't make much sense.
|
||||
return false;
|
||||
case GE_FORMAT_5551:
|
||||
usedBits = StencilBits5551(src, dstBuffer->fb_stride * dstBuffer->bufferHeight);
|
||||
values = 2;
|
||||
break;
|
||||
case GE_FORMAT_4444:
|
||||
usedBits = StencilBits4444(src, dstBuffer->fb_stride * dstBuffer->bufferHeight);
|
||||
values = 16;
|
||||
break;
|
||||
case GE_FORMAT_8888:
|
||||
usedBits = StencilBits8888(src, dstBuffer->fb_stride * dstBuffer->bufferHeight);
|
||||
values = 256;
|
||||
break;
|
||||
case GE_FORMAT_INVALID:
|
||||
case GE_FORMAT_DEPTH16:
|
||||
case GE_FORMAT_CLUT8:
|
||||
// Inconceivable.
|
||||
_assert_(false);
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
struct StencilUB {
|
||||
float stencilValue;
|
||||
};
|
||||
|
@ -83,8 +111,12 @@ static const SamplerDef samplers[1] = {
|
|||
{ 0, "tex" },
|
||||
};
|
||||
|
||||
void GenerateStencilFs(char *buffer, const ShaderLanguageDesc &lang, const Draw::Bugs &bugs) {
|
||||
ShaderWriter writer(buffer, lang, ShaderStage::Fragment);
|
||||
void GenerateStencilFs(char *buffer, const ShaderLanguageDesc &lang, const Draw::Bugs &bugs, bool useExport) {
|
||||
std::vector<const char *> extensions;
|
||||
if (useExport)
|
||||
extensions.push_back("#extension GL_ARB_shader_stencil_export : require");
|
||||
|
||||
ShaderWriter writer(buffer, lang, ShaderStage::Fragment, extensions);
|
||||
writer.HighPrecisionFloat();
|
||||
writer.DeclareSamplers(samplers);
|
||||
|
||||
|
@ -98,9 +130,13 @@ void GenerateStencilFs(char *buffer, const ShaderLanguageDesc &lang, const Draw:
|
|||
|
||||
writer.C(" vec4 index = ").SampleTexture2D("tex", "v_texcoord.xy").C(";\n");
|
||||
writer.C(" vec4 outColor = index.aaaa;\n"); // Only care about a.
|
||||
writer.C(" float shifted = roundAndScaleTo255f(index.a) / roundAndScaleTo255f(stencilValue);\n");
|
||||
// Bitwise operations on floats, ugh.
|
||||
writer.C(" if (mod(floor(shifted), 2.0) < 0.99) DISCARD;\n");
|
||||
if (useExport) {
|
||||
writer.C(" gl_FragStencilRefARB = int(roundAndScaleTo255f(index.a));\n");
|
||||
} else {
|
||||
writer.C(" float shifted = roundAndScaleTo255f(index.a) / roundAndScaleTo255f(stencilValue);\n");
|
||||
// Bitwise operations on floats, ugh.
|
||||
writer.C(" if (mod(floor(shifted), 2.0) < 0.99) DISCARD;\n");
|
||||
}
|
||||
|
||||
if (bugs.Has(Draw::Bugs::NO_DEPTH_CANNOT_DISCARD_STENCIL)) {
|
||||
writer.C(" gl_FragDepth = gl_FragCoord.z;\n");
|
||||
|
@ -135,10 +171,11 @@ bool FramebufferManagerCommon::PerformWriteStencilFromMemory(u32 addr, int size,
|
|||
return false;
|
||||
}
|
||||
|
||||
VirtualFramebuffer *dstBuffer = 0;
|
||||
VirtualFramebuffer *dstBuffer = nullptr;
|
||||
for (size_t i = 0; i < vfbs_.size(); ++i) {
|
||||
VirtualFramebuffer *vfb = vfbs_[i];
|
||||
if (vfb->fb_address == addr) {
|
||||
// TODO: Maybe we should broadcast to all? Most of the time, there's only one.
|
||||
if (vfb->fb_address == addr && (!dstBuffer || dstBuffer->colorBindSeq < vfb->colorBindSeq)) {
|
||||
dstBuffer = vfb;
|
||||
}
|
||||
}
|
||||
|
@ -148,34 +185,15 @@ bool FramebufferManagerCommon::PerformWriteStencilFromMemory(u32 addr, int size,
|
|||
|
||||
int values = 0;
|
||||
u8 usedBits = 0;
|
||||
bool useExportShader = draw_->GetDeviceCaps().fragmentShaderStencilWriteSupported;
|
||||
|
||||
const u8 *src = Memory::GetPointer(addr);
|
||||
if (!src)
|
||||
return false;
|
||||
|
||||
switch (dstBuffer->fb_format) {
|
||||
case GE_FORMAT_565:
|
||||
// Well, this doesn't make much sense.
|
||||
// Could skip this when doing useExportShader, but then we couldn't optimize usedBits == 0.
|
||||
if (!CheckStencilBits(src, dstBuffer, values, usedBits))
|
||||
return false;
|
||||
case GE_FORMAT_5551:
|
||||
usedBits = StencilBits5551(src, dstBuffer->fb_stride * dstBuffer->bufferHeight);
|
||||
values = 2;
|
||||
break;
|
||||
case GE_FORMAT_4444:
|
||||
usedBits = StencilBits4444(src, dstBuffer->fb_stride * dstBuffer->bufferHeight);
|
||||
values = 16;
|
||||
break;
|
||||
case GE_FORMAT_8888:
|
||||
usedBits = StencilBits8888(src, dstBuffer->fb_stride * dstBuffer->bufferHeight);
|
||||
values = 256;
|
||||
break;
|
||||
case GE_FORMAT_INVALID:
|
||||
case GE_FORMAT_DEPTH16:
|
||||
case GE_FORMAT_CLUT8:
|
||||
// Inconceivable.
|
||||
_assert_(false);
|
||||
break;
|
||||
}
|
||||
|
||||
if (usedBits == 0) {
|
||||
if (flags & WriteStencil::STENCIL_IS_ZERO) {
|
||||
|
@ -201,7 +219,7 @@ bool FramebufferManagerCommon::PerformWriteStencilFromMemory(u32 addr, int size,
|
|||
|
||||
char *fsCode = new char[8192];
|
||||
char *vsCode = new char[8192];
|
||||
GenerateStencilFs(fsCode, shaderLanguageDesc, draw_->GetBugs());
|
||||
GenerateStencilFs(fsCode, shaderLanguageDesc, draw_->GetBugs(), useExportShader);
|
||||
GenerateStencilVs(vsCode, shaderLanguageDesc);
|
||||
|
||||
_assert_msg_(strlen(fsCode) < 8192, "StenFS length error: %d", (int)strlen(fsCode));
|
||||
|
@ -303,24 +321,32 @@ bool FramebufferManagerCommon::PerformWriteStencilFromMemory(u32 addr, int size,
|
|||
draw_->SetScissorRect(0, 0, w, h);
|
||||
draw_->BindPipeline(stencilWritePipeline_);
|
||||
|
||||
for (int i = 1; i < values; i += i) {
|
||||
if (!(usedBits & i)) {
|
||||
// It's already zero, let's skip it.
|
||||
continue;
|
||||
}
|
||||
if (useExportShader) {
|
||||
// We only need to do one pass if using an export shader.
|
||||
StencilUB ub{};
|
||||
if (dstBuffer->fb_format == GE_FORMAT_4444) {
|
||||
draw_->SetStencilParams(0xFF, (i << 4) | i, 0xFF);
|
||||
ub.stencilValue = i * (16.0f / 255.0f);
|
||||
} else if (dstBuffer->fb_format == GE_FORMAT_5551) {
|
||||
draw_->SetStencilParams(0xFF, 0xFF, 0xFF);
|
||||
ub.stencilValue = i * (128.0f / 255.0f);
|
||||
} else {
|
||||
draw_->SetStencilParams(0xFF, i, 0xFF);
|
||||
ub.stencilValue = i * (1.0f / 255.0f);
|
||||
}
|
||||
draw_->SetStencilParams(0xFF, 0xFF, 0xFF);
|
||||
draw_->UpdateDynamicUniformBuffer(&ub, sizeof(ub));
|
||||
draw_->DrawUP(positions, 3);
|
||||
} else {
|
||||
for (int i = 1; i < values; i += i) {
|
||||
if (!(usedBits & i)) {
|
||||
// It's already zero, let's skip it.
|
||||
continue;
|
||||
}
|
||||
StencilUB ub{};
|
||||
if (dstBuffer->fb_format == GE_FORMAT_4444) {
|
||||
draw_->SetStencilParams(0xFF, (i << 4) | i, 0xFF);
|
||||
ub.stencilValue = i * (16.0f / 255.0f);
|
||||
} else if (dstBuffer->fb_format == GE_FORMAT_5551) {
|
||||
draw_->SetStencilParams(0xFF, 0xFF, 0xFF);
|
||||
ub.stencilValue = i * (128.0f / 255.0f);
|
||||
} else {
|
||||
draw_->SetStencilParams(0xFF, i, 0xFF);
|
||||
ub.stencilValue = i * (1.0f / 255.0f);
|
||||
}
|
||||
draw_->UpdateDynamicUniformBuffer(&ub, sizeof(ub));
|
||||
draw_->DrawUP(positions, 3);
|
||||
}
|
||||
}
|
||||
|
||||
if (useBlit) {
|
||||
|
|
|
@ -5,5 +5,5 @@
|
|||
#include "Common/GPU/thin3d.h"
|
||||
|
||||
// Exposed for automated tests
|
||||
void GenerateStencilFs(char *buffer, const ShaderLanguageDesc &lang, const Draw::Bugs &bugs);
|
||||
void GenerateStencilFs(char *buffer, const ShaderLanguageDesc &lang, const Draw::Bugs &bugs, bool useExport);
|
||||
void GenerateStencilVs(char *buffer, const ShaderLanguageDesc &lang);
|
||||
|
|
|
@ -291,17 +291,21 @@ bool TestStencilShaders() {
|
|||
ShaderLanguageDesc desc(languages[k]);
|
||||
std::string errorMessage;
|
||||
|
||||
// Generate all despite failures - it's only 6.
|
||||
GenerateStencilFs(buffer, desc, bugs);
|
||||
if (strlen(buffer) >= 8192) {
|
||||
printf("Stencil fragment shader exceeded buffer:\n\n%s\n", LineNumberString(buffer).c_str());
|
||||
failed = true;
|
||||
}
|
||||
if (!TestCompileShader(buffer, languages[k], ShaderStage::Fragment, &errorMessage)) {
|
||||
printf("Error compiling stencil shader:\n\n%s\n\n%s\n", LineNumberString(buffer).c_str(), errorMessage.c_str());
|
||||
failed = true;
|
||||
} else {
|
||||
printf("===\n%s\n===\n", buffer);
|
||||
// Generate all despite failures - it's only a few.
|
||||
// Only use export on Vulkan, because GLSL_3xx is ES which doesn't support stencil export.
|
||||
bool allowUseExport = languages[k] == ShaderLanguage::GLSL_VULKAN;
|
||||
for (int useExport = 0; useExport <= (allowUseExport ? 1 : 0); ++useExport) {
|
||||
GenerateStencilFs(buffer, desc, bugs, useExport == 1);
|
||||
if (strlen(buffer) >= 8192) {
|
||||
printf("Stencil fragment shader (useExport=%d) exceeded buffer:\n\n%s\n", useExport, LineNumberString(buffer).c_str());
|
||||
failed = true;
|
||||
}
|
||||
if (!TestCompileShader(buffer, languages[k], ShaderStage::Fragment, &errorMessage)) {
|
||||
printf("Error compiling stencil shader (useExport=%d):\n\n%s\n\n%s\n", useExport, LineNumberString(buffer).c_str(), errorMessage.c_str());
|
||||
failed = true;
|
||||
} else {
|
||||
printf("===\n%s\n===\n", buffer);
|
||||
}
|
||||
}
|
||||
|
||||
GenerateStencilVs(buffer, desc);
|
||||
|
|
Loading…
Add table
Reference in a new issue