Clean up blit/copy feature detection. Don't need fast GPU flags for these.

This commit is contained in:
Henrik Rydgård 2022-08-01 23:21:14 +02:00
parent 710c6b6ad1
commit 19931c003f
11 changed files with 52 additions and 39 deletions

View file

@ -283,6 +283,7 @@ D3D11DrawContext::D3D11DrawContext(ID3D11Device *device, ID3D11DeviceContext *de
caps_.framebufferCopySupported = true;
caps_.framebufferDepthBlitSupported = false;
caps_.framebufferDepthCopySupported = true;
caps_.framebufferSeparateDepthCopySupported = false;
caps_.texture3DSupported = true;
D3D11_FEATURE_DATA_D3D11_OPTIONS options{};

View file

@ -667,6 +667,7 @@ D3D9Context::D3D9Context(IDirect3D9 *d3d, IDirect3D9Ex *d3dEx, int adapterId, ID
caps_.framebufferCopySupported = false;
caps_.framebufferDepthBlitSupported = true;
caps_.framebufferDepthCopySupported = false;
caps_.framebufferSeparateDepthCopySupported = false;
caps_.texture3DSupported = true;
if (d3d) {
@ -1241,14 +1242,26 @@ void D3D9Context::GetFramebufferDimensions(Framebuffer *fbo, int *w, int *h) {
bool D3D9Context::BlitFramebuffer(Framebuffer *srcfb, int srcX1, int srcY1, int srcX2, int srcY2, Framebuffer *dstfb, int dstX1, int dstY1, int dstX2, int dstY2, int channelBits, FBBlitFilter filter, const char *tag) {
D3D9Framebuffer *src = (D3D9Framebuffer *)srcfb;
D3D9Framebuffer *dst = (D3D9Framebuffer *)dstfb;
if (channelBits != FB_COLOR_BIT)
return false;
LPDIRECT3DSURFACE9 srcSurf;
LPDIRECT3DSURFACE9 dstSurf;
RECT srcRect{ (LONG)srcX1, (LONG)srcY1, (LONG)srcX2, (LONG)srcY2 };
RECT dstRect{ (LONG)dstX1, (LONG)dstY1, (LONG)dstX2, (LONG)dstY2 };
LPDIRECT3DSURFACE9 srcSurf = src ? src->surf : deviceRTsurf;
LPDIRECT3DSURFACE9 dstSurf = dst ? dst->surf : deviceRTsurf;
if (channelBits == FB_COLOR_BIT) {
srcSurf = src ? src->surf : deviceRTsurf;
dstSurf = dst ? dst->surf : deviceRTsurf;
} else if (channelBits & FB_DEPTH_BIT) {
if (!src || !dst) {
// Might have implications for non-buffered rendering.
return false;
}
srcSurf = src->depthstencil;
dstSurf = dst->depthstencil;
} else {
return false;
}
stepId_++;
return SUCCEEDED(device_->StretchRect(srcSurf, &srcRect, dstSurf, &dstRect, filter == FB_BLIT_LINEAR ? D3DTEXF_LINEAR : D3DTEXF_POINT));
return SUCCEEDED(device_->StretchRect(srcSurf, &srcRect, dstSurf, &dstRect, (filter == FB_BLIT_LINEAR && channelBits == FB_COLOR_BIT) ? D3DTEXF_LINEAR : D3DTEXF_POINT));
}
void D3D9Context::HandleEvent(Event ev, int width, int height, void *param1, void *param2) {

View file

@ -541,7 +541,9 @@ OpenGLContext::OpenGLContext() {
caps_.preferredDepthBufferFormat = DataFormat::D24_S8;
caps_.texture3DSupported = true;
}
caps_.framebufferBlitSupported = gl_extensions.NV_framebuffer_blit || gl_extensions.ARB_framebuffer_object;
caps_.framebufferCopySupported = gl_extensions.OES_copy_image || gl_extensions.NV_copy_image || gl_extensions.EXT_copy_image || gl_extensions.ARB_copy_image;
caps_.framebufferBlitSupported = gl_extensions.NV_framebuffer_blit || gl_extensions.ARB_framebuffer_object || gl_extensions.GLES3;
caps_.framebufferDepthBlitSupported = caps_.framebufferBlitSupported;
caps_.depthClampSupported = gl_extensions.ARB_depth_clamp;
if (gl_extensions.IsGLES) {

View file

@ -578,7 +578,7 @@ static int GetBpp(VkFormat format) {
}
}
VkFormat DataFormatToVulkan(DataFormat format) {
static VkFormat DataFormatToVulkan(DataFormat format) {
switch (format) {
case DataFormat::D16: return VK_FORMAT_D16_UNORM;
case DataFormat::D32F: return VK_FORMAT_D32_SFLOAT;
@ -774,6 +774,8 @@ VKContext::VKContext(VulkanContext *vulkan, bool splitSubmit)
: vulkan_(vulkan), renderManager_(vulkan) {
shaderLanguageDesc_.Init(GLSL_VULKAN);
VkFormat depthStencilFormat = vulkan->GetDeviceInfo().preferredDepthStencilFormat;
caps_.anisoSupported = vulkan->GetDeviceFeatures().enabled.samplerAnisotropy != 0;
caps_.geometryShaderSupported = vulkan->GetDeviceFeatures().enabled.geometryShader != 0;
caps_.tesselationShaderSupported = vulkan->GetDeviceFeatures().enabled.tessellationShader != 0;
@ -784,8 +786,9 @@ VKContext::VKContext(VulkanContext *vulkan, bool splitSubmit)
caps_.cullDistanceSupported = vulkan->GetDeviceFeatures().enabled.shaderCullDistance != 0;
caps_.framebufferBlitSupported = true;
caps_.framebufferCopySupported = true;
caps_.framebufferDepthBlitSupported = false; // Can be checked for.
caps_.framebufferDepthBlitSupported = vulkan->GetDeviceInfo().canBlitToPreferredDepthStencilFormat;
caps_.framebufferDepthCopySupported = true; // Will pretty much always be the case.
caps_.framebufferSeparateDepthCopySupported = true; // Will pretty much always be the case.
caps_.preferredDepthBufferFormat = DataFormat::D24_S8; // TODO: Ask vulkan.
caps_.texture3DSupported = true;
@ -1440,18 +1443,24 @@ uint32_t VKContext::GetDataFormatSupport(DataFormat fmt) const {
VkFormatProperties properties;
vkGetPhysicalDeviceFormatProperties(vulkan_->GetCurrentPhysicalDevice(), vulkan_format, &properties);
uint32_t flags = 0;
if (properties.optimalTilingFeatures & VkFormatFeatureFlagBits::VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT) {
if (properties.optimalTilingFeatures & VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT) {
flags |= FMT_RENDERTARGET;
}
if (properties.optimalTilingFeatures & VkFormatFeatureFlagBits::VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT) {
if (properties.optimalTilingFeatures & VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT) {
flags |= FMT_DEPTHSTENCIL;
}
if (properties.optimalTilingFeatures & VkFormatFeatureFlagBits::VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT) {
if (properties.optimalTilingFeatures & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT) {
flags |= FMT_TEXTURE;
}
if (properties.bufferFeatures & VkFormatFeatureFlagBits::VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT) {
if (properties.bufferFeatures & VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT) {
flags |= FMT_INPUTLAYOUT;
}
if ((properties.optimalTilingFeatures & VK_FORMAT_FEATURE_BLIT_SRC_BIT) && (properties.optimalTilingFeatures & VK_FORMAT_FEATURE_BLIT_DST_BIT)) {
flags |= FMT_BLIT;
}
if (properties.optimalTilingFeatures & VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT) {
flags |= FMT_STORAGE_IMAGE;
}
return flags;
}

View file

@ -202,6 +202,8 @@ enum FormatSupport {
FMT_INPUTLAYOUT = 4,
FMT_DEPTHSTENCIL = 8,
FMT_AUTOGEN_MIPS = 16,
FMT_BLIT = 32,
FMT_STORAGE_IMAGE = 64,
};
enum InfoField {
@ -530,6 +532,7 @@ struct DeviceCaps {
bool framebufferCopySupported;
bool framebufferBlitSupported;
bool framebufferDepthCopySupported;
bool framebufferSeparateDepthCopySupported;
bool framebufferDepthBlitSupported;
bool framebufferFetchSupported;
bool texture3DSupported;

View file

@ -581,15 +581,16 @@ void FramebufferManagerCommon::BlitFramebufferDepth(VirtualFramebuffer *src, Vir
int w = std::min(src->renderWidth, dst->renderWidth);
int h = std::min(src->renderHeight, dst->renderHeight);
// Note: We prefer Blit ahead of Copy here, since at least on GL, Copy will always also copy stencil which we don't want.
// See #9740.
// TODO: This ordering should probably apply to GL only, since in Vulkan you can totally copy just the depth aspect.
if (gstate_c.Supports(GPU_SUPPORTS_FRAMEBUFFER_BLIT_TO_DEPTH)) {
draw_->BlitFramebuffer(src->fbo, 0, 0, w, h, dst->fbo, 0, 0, w, h, Draw::FB_DEPTH_BIT, Draw::FB_BLIT_NEAREST, "BlitFramebufferDepth");
RebindFramebuffer("After BlitFramebufferDepth");
} else if (gstate_c.Supports(GPU_SUPPORTS_COPY_IMAGE)) {
// TODO: It might even be advantageous on some GPUs to do this copy using a fragment shader that writes to Z, that way upcoming commands can just continue that render pass.
// Some GPUs can copy depth but only if stencil gets to come along for the ride. We only want to use this if there is no blit functionality.
if (draw_->GetDeviceCaps().framebufferSeparateDepthCopySupported || !draw_->GetDeviceCaps().framebufferDepthBlitSupported) {
draw_->CopyFramebufferImage(src->fbo, 0, 0, 0, 0, dst->fbo, 0, 0, 0, 0, w, h, 1, Draw::FB_DEPTH_BIT, "BlitFramebufferDepth");
RebindFramebuffer("After BlitFramebufferDepth");
} else if (draw_->GetDeviceCaps().framebufferDepthBlitSupported) {
// We'll accept whether we get a separate depth blit or not...
draw_->BlitFramebuffer(src->fbo, 0, 0, w, h, dst->fbo, 0, 0, w, h, Draw::FB_DEPTH_BIT, Draw::FB_BLIT_NEAREST, "BlitFramebufferDepth");
RebindFramebuffer("After BlitFramebufferDepth");
}
dst->last_frame_depth_updated = gpuStats.numFlips;
}
@ -693,7 +694,7 @@ void FramebufferManagerCommon::ReinterpretFramebuffer(VirtualFramebuffer *vfb, G
bool doReinterpret = PSP_CoreParameter().compat.flags().ReinterpretFramebuffers &&
(lang == HLSL_D3D11 || lang == GLSL_VULKAN || lang == GLSL_3xx);
// Copy image required for now.
if (!gstate_c.Supports(GPU_SUPPORTS_COPY_IMAGE))
if (!draw_->GetDeviceCaps().framebufferCopySupported)
doReinterpret = false;
if (!doReinterpret) {
// Fake reinterpret - just clear the way we always did on Vulkan. Just clear color and stencil.

View file

@ -139,7 +139,6 @@ void GPU_D3D11::CheckGPUFeatures() {
}
}
features |= GPU_SUPPORTS_COPY_IMAGE;
features |= GPU_SUPPORTS_TEXTURE_FLOAT;
features |= GPU_SUPPORTS_INSTANCE_RENDERING;
features |= GPU_SUPPORTS_TEXTURE_LOD_CONTROL;

View file

@ -185,10 +185,6 @@ void GPU_GLES::CheckGPUFeatures() {
}
}
if (gl_extensions.ARB_framebuffer_object || gl_extensions.NV_framebuffer_blit || gl_extensions.GLES3) {
features |= GPU_SUPPORTS_FRAMEBUFFER_BLIT | GPU_SUPPORTS_FRAMEBUFFER_BLIT_TO_DEPTH;
}
if ((gl_extensions.gpuVendor == GPU_VENDOR_NVIDIA) || (gl_extensions.gpuVendor == GPU_VENDOR_AMD))
features |= GPU_PREFER_REVERSE_COLOR_ORDER;
@ -198,9 +194,6 @@ void GPU_GLES::CheckGPUFeatures() {
if (gl_extensions.EXT_blend_minmax)
features |= GPU_SUPPORTS_BLEND_MINMAX;
if (gl_extensions.OES_copy_image || gl_extensions.NV_copy_image || gl_extensions.EXT_copy_image || gl_extensions.ARB_copy_image)
features |= GPU_SUPPORTS_COPY_IMAGE;
if (!gl_extensions.IsGLES)
features |= GPU_SUPPORTS_LOGIC_OP;

View file

@ -158,7 +158,7 @@ bool FramebufferManagerGLES::NotifyStencilUpload(u32 addr, int size, StencilUplo
shaderManager_->DirtyLastShader();
bool useBlit = gstate_c.Supports(GPU_SUPPORTS_FRAMEBUFFER_BLIT);
bool useBlit = draw_->GetDeviceCaps().framebufferDepthBlitSupported;
// Our fragment shader (and discard) is slow. Since the source is 1x, we can stencil to 1x.
// Then after we're done, we'll just blit it across and stretch it there.

View file

@ -479,15 +479,13 @@ enum {
GPU_SUPPORTS_32BIT_INT_FSHADER = FLAG_BIT(15),
GPU_SUPPORTS_DEPTH_TEXTURE = FLAG_BIT(16),
GPU_SUPPORTS_ACCURATE_DEPTH = FLAG_BIT(17),
// Free bit: 18
GPU_SUPPORTS_COPY_IMAGE = FLAG_BIT(19),
// Free bits: 18-19
GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH = FLAG_BIT(20),
GPU_SCALE_DEPTH_FROM_24BIT_TO_16BIT = FLAG_BIT(21),
GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT = FLAG_BIT(22),
GPU_ROUND_DEPTH_TO_16BIT = FLAG_BIT(23), // Can be disabled either per game or if we use a real 16-bit depth buffer
GPU_SUPPORTS_TEXTURE_LOD_CONTROL = FLAG_BIT(24),
GPU_SUPPORTS_FRAMEBUFFER_BLIT = FLAG_BIT(26),
GPU_SUPPORTS_FRAMEBUFFER_BLIT_TO_DEPTH = FLAG_BIT(27),
// Free bits: 25-27
GPU_SUPPORTS_TEXTURE_NPOT = FLAG_BIT(28),
GPU_SUPPORTS_CLIP_DISTANCE = FLAG_BIT(29),
GPU_SUPPORTS_CULL_DISTANCE = FLAG_BIT(30),

View file

@ -232,19 +232,13 @@ void GPU_Vulkan::CheckGPUFeatures() {
// Mandatory features on Vulkan, which may be checked in "centralized" code
features |= GPU_SUPPORTS_TEXTURE_LOD_CONTROL;
features |= GPU_SUPPORTS_FRAMEBUFFER_BLIT;
features |= GPU_SUPPORTS_BLEND_MINMAX;
features |= GPU_SUPPORTS_COPY_IMAGE;
features |= GPU_SUPPORTS_TEXTURE_NPOT;
features |= GPU_SUPPORTS_INSTANCE_RENDERING;
features |= GPU_SUPPORTS_VERTEX_TEXTURE_FETCH;
features |= GPU_SUPPORTS_TEXTURE_FLOAT;
features |= GPU_SUPPORTS_DEPTH_TEXTURE;
if (vulkan->GetDeviceInfo().canBlitToPreferredDepthStencilFormat) {
features |= GPU_SUPPORTS_FRAMEBUFFER_BLIT_TO_DEPTH;
}
auto &enabledFeatures = vulkan->GetDeviceFeatures().enabled;
if (enabledFeatures.depthClamp) {
features |= GPU_SUPPORTS_DEPTH_CLAMP;