diff --git a/Common/Vulkan/VulkanImage.cpp b/Common/Vulkan/VulkanImage.cpp index 8ad99b774d..85a6e34f1b 100644 --- a/Common/Vulkan/VulkanImage.cpp +++ b/Common/Vulkan/VulkanImage.cpp @@ -121,9 +121,10 @@ bool VulkanTexture::CreateDirect(VkCommandBuffer cmd, VulkanDeviceAllocator *all if (initialLayout != VK_IMAGE_LAYOUT_UNDEFINED && initialLayout != VK_IMAGE_LAYOUT_PREINITIALIZED) { switch (initialLayout) { case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL: + case VK_IMAGE_LAYOUT_GENERAL: TransitionImageLayout2(cmd, image_, 0, numMips, VK_IMAGE_ASPECT_COLOR_BIT, - VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, - VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_IMAGE_LAYOUT_UNDEFINED, initialLayout, + VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, VK_ACCESS_TRANSFER_WRITE_BIT); break; default: @@ -208,10 +209,10 @@ void VulkanTexture::GenerateMip(VkCommandBuffer cmd, int mip) { VK_ACCESS_TRANSFER_READ_BIT, VK_ACCESS_TRANSFER_WRITE_BIT); } -void VulkanTexture::EndCreate(VkCommandBuffer cmd, bool vertexTexture) { +void VulkanTexture::EndCreate(VkCommandBuffer cmd, bool vertexTexture, VkImageLayout layout) { TransitionImageLayout2(cmd, image_, 0, numMips_, VK_IMAGE_ASPECT_COLOR_BIT, - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + layout, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_PIPELINE_STAGE_TRANSFER_BIT, vertexTexture ? VK_PIPELINE_STAGE_VERTEX_SHADER_BIT : VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT); } @@ -222,6 +223,27 @@ void VulkanTexture::Touch() { } } +VkImageView VulkanTexture::CreateViewForMip(int mip) { + // Create the view while we're at it. + VkImageViewCreateInfo view_info = { VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO }; + view_info.image = image_; + view_info.viewType = VK_IMAGE_VIEW_TYPE_2D; + view_info.format = format_; + view_info.components.r = VK_COMPONENT_SWIZZLE_R; + view_info.components.g = VK_COMPONENT_SWIZZLE_G; + view_info.components.b = VK_COMPONENT_SWIZZLE_B; + view_info.components.a = VK_COMPONENT_SWIZZLE_A; + view_info.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + view_info.subresourceRange.baseMipLevel = mip; + view_info.subresourceRange.levelCount = 1; + view_info.subresourceRange.baseArrayLayer = 0; + view_info.subresourceRange.layerCount = 1; + VkImageView view; + VkResult res = vkCreateImageView(vulkan_->GetDevice(), &view_info, NULL, &view); + assert(res == VK_SUCCESS); + return view; +} + void VulkanTexture::Destroy() { if (view_ != VK_NULL_HANDLE) { vulkan_->Delete().QueueDeleteImageView(view_); diff --git a/Common/Vulkan/VulkanImage.h b/Common/Vulkan/VulkanImage.h index 64f84a8997..4fd06e21c0 100644 --- a/Common/Vulkan/VulkanImage.h +++ b/Common/Vulkan/VulkanImage.h @@ -21,7 +21,11 @@ public: bool CreateDirect(VkCommandBuffer cmd, VulkanDeviceAllocator *allocator, int w, int h, int numMips, VkFormat format, VkImageLayout initialLayout, VkImageUsageFlags usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, const VkComponentMapping *mapping = nullptr); void UploadMip(VkCommandBuffer cmd, int mip, int mipWidth, int mipHeight, VkBuffer buffer, uint32_t offset, size_t rowLength); // rowLength is in pixels void GenerateMip(VkCommandBuffer cmd, int mip); - void EndCreate(VkCommandBuffer cmd, bool vertexTexture = false); + void EndCreate(VkCommandBuffer cmd, bool vertexTexture = false, VkImageLayout layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + + // When loading mips from compute shaders, you need to pass VK_IMAGE_LAYOUT_GENERAL to the above function. + // In addition, ignore UploadMip and GenerateMip, and instead use GetViewForMip. Make sure to delete the returned views when used. + VkImageView CreateViewForMip(int mip); void Destroy(); diff --git a/GPU/Vulkan/TextureCacheVulkan.cpp b/GPU/Vulkan/TextureCacheVulkan.cpp index 55e400eb9b..ec60b70703 100644 --- a/GPU/Vulkan/TextureCacheVulkan.cpp +++ b/GPU/Vulkan/TextureCacheVulkan.cpp @@ -67,6 +67,50 @@ static const VkComponentMapping VULKAN_1555_SWIZZLE = { VK_COMPONENT_SWIZZLE_B, static const VkComponentMapping VULKAN_565_SWIZZLE = { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_A }; static const VkComponentMapping VULKAN_8888_SWIZZLE = { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_A }; +const char *uploadShader = R"( +#version 450 +#extension GL_ARB_separate_shader_objects : enable + +// No idea what's optimal here... +#define WORKGROUP_SIZE 16 +layout (local_size_x = WORKGROUP_SIZE, local_size_y = WORKGROUP_SIZE, local_size_z = 1) in; + +layout(std430, binding = 0) buffer Buf { + uint pixel[]; +} buf; + +uniform layout(binding = 1, rgba8) writeonly image2D img; + +layout(push_constant) uniform Params { + int width; + int height; +} params; + +void main() { + uint x = gl_GlobalInvocationID.x; + uint y = gl_GlobalInvocationID.y; + // Kill off any out-of-image threads to avoid stray writes. + // Should only happen on the tiniest mipmaps as PSP textures are power-of-2, + // and we use a 16x16 workgroup size. + if (x >= params.width || gl_GlobalInvocationID.y >= params.height) + return; + + // Note that if the pixels are packed, we can do multiple stores + // and only launch this compute shader for every N pixels, + // by slicing the width in half and multiplying x by 2, for example. + uint color = buf.pixel[y * params.width + x]; + // Unpack the color (we could look it up in a CLUT here if we wanted...) + // It's a bit silly that we need to unpack to float and then have imageStore repack, + // but the alternative is to store to a buffer, and then launch a vkCmdCopyBufferToImage instead. + vec4 outColor = vec4( + (color & 0xFF) * (1.0 / 255.0), + ((color >> 8) & 0xFF) * (1.0 / 255.0), + ((color >> 16) & 0xFF) * (1.0 / 255.0), + ((color >> 24) & 0xFF) * (1.0 / 255.0)); + imageStore(img, ivec2(x,y), outColor); +} +)"; + SamplerCache::~SamplerCache() { DeviceLost(); } @@ -181,6 +225,7 @@ void TextureCacheVulkan::DeviceLost() { if (samplerNearest_) vulkan_->Delete().QueueDeleteSampler(samplerNearest_); + vulkan_->Delete().QueueDeleteShaderModule(uploadCS_); upload_.DeviceLost(); nextTexture_ = nullptr; @@ -204,6 +249,10 @@ void TextureCacheVulkan::DeviceRestore(VulkanContext *vulkan, Draw::DrawContext samp.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST; vkCreateSampler(vulkan_->GetDevice(), &samp, nullptr, &samplerNearest_); + std::string error; + uploadCS_ = CompileShaderModule(vulkan_, VK_SHADER_STAGE_COMPUTE_BIT, uploadShader, &error); + _dbg_assert_msg_(G3D, uploadCS_ != VK_NULL_HANDLE, "failed to compile upload shader"); + upload_.DeviceRestore(vulkan); } @@ -277,10 +326,12 @@ void TextureCacheVulkan::StartFrame() { } allocator_->Begin(); + upload_.BeginFrame(); } void TextureCacheVulkan::EndFrame() { allocator_->End(); + upload_.EndFrame(); if (texelsScaledThisFrame_) { // INFO_LOG(G3D, "Scaled %i texels", texelsScaledThisFrame_); @@ -602,6 +653,8 @@ void TextureCacheVulkan::BuildTexture(TexCacheEntry *const entry) { actualFmt = ToVulkanFormat(replaced.Format(0)); } + bool computeUpload = false; + { delete entry->vkTex; entry->vkTex = new VulkanTexture(vulkan_); @@ -626,11 +679,25 @@ void TextureCacheVulkan::BuildTexture(TexCacheEntry *const entry) { break; } + VkImageLayout imageLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + VkImageUsageFlags usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT; + // If we want to use the GE debugger, we should add VK_IMAGE_USAGE_TRANSFER_SRC_BIT too... + + // Compute experiment + if (actualFmt == VULKAN_8888_FORMAT) { + computeUpload = true; + } + + if (computeUpload) { + usage |= VK_IMAGE_USAGE_STORAGE_BIT; + imageLayout = VK_IMAGE_LAYOUT_GENERAL; + } + char texName[128]{}; snprintf(texName, sizeof(texName), "Texture%08x", entry->addr); image->SetTag(texName); - bool allocSuccess = image->CreateDirect(cmdInit, allocator_, w * scaleFactor, h * scaleFactor, maxLevel + 1, actualFmt, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, mapping); + bool allocSuccess = image->CreateDirect(cmdInit, allocator_, w * scaleFactor, h * scaleFactor, maxLevel + 1, actualFmt, imageLayout, usage, mapping); if (!allocSuccess && !lowMemoryMode_) { WARN_LOG_REPORT(G3D, "Texture cache ran out of GPU memory; switching to low memory mode"); lowMemoryMode_ = true; @@ -692,6 +759,7 @@ void TextureCacheVulkan::BuildTexture(TexCacheEntry *const entry) { void *data = drawEngine_->GetPushBufferForTextureData()->PushAligned(size, &bufferOffset, &texBuf, pushAlignment); if (replaced.Valid()) { replaced.Load(i, data, stride); + entry->vkTex->UploadMip(cmdInit, i, mipWidth, mipHeight, texBuf, bufferOffset, stride / bpp); } else { if (fakeMipmap) { LoadTextureLevel(*entry, (uint8_t *)data, stride, level, scaleFactor, dstFmt); @@ -699,12 +767,24 @@ void TextureCacheVulkan::BuildTexture(TexCacheEntry *const entry) { break; } else { LoadTextureLevel(*entry, (uint8_t *)data, stride, i, scaleFactor, dstFmt); + if (computeUpload) { + // This format can be used with storage images. + VkImageView view = entry->vkTex->CreateViewForMip(i); + VkDescriptorSet descSet = upload_.GetDescriptorSet(texBuf, bufferOffset, size, view); + struct Params { int x; int y; } params{ mipWidth, mipHeight }; + vkCmdBindPipeline(cmdInit, VK_PIPELINE_BIND_POINT_COMPUTE, upload_.GetPipeline(uploadCS_)); + vkCmdBindDescriptorSets(cmdInit, VK_PIPELINE_BIND_POINT_COMPUTE, upload_.GetPipelineLayout(), 0, 1, &descSet, 0, nullptr); + vkCmdPushConstants(cmdInit, upload_.GetPipelineLayout(), VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(params), ¶ms); + vkCmdDispatch(cmdInit, (mipWidth + 15) / 16, (mipHeight + 15) / 16, 1); + vulkan_->Delete().QueueDeleteImageView(view); + } else { + entry->vkTex->UploadMip(cmdInit, i, mipWidth, mipHeight, texBuf, bufferOffset, stride / bpp); + } } if (replacer_.Enabled()) { replacer_.NotifyTextureDecoded(replacedInfo, data, stride, i, mipWidth, mipHeight); } } - entry->vkTex->UploadMip(cmdInit, i, mipWidth, mipHeight, texBuf, bufferOffset, stride / bpp); } if (maxLevel == 0) { @@ -715,7 +795,7 @@ void TextureCacheVulkan::BuildTexture(TexCacheEntry *const entry) { if (replaced.Valid()) { entry->SetAlphaStatus(TexCacheEntry::TexStatus(replaced.AlphaStatus())); } - entry->vkTex->EndCreate(cmdInit); + entry->vkTex->EndCreate(cmdInit, false, computeUpload ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); } gstate_c.SetTextureFullAlpha(entry->GetAlphaStatus() == TexCacheEntry::STATUS_ALPHA_FULL); diff --git a/GPU/Vulkan/TextureCacheVulkan.h b/GPU/Vulkan/TextureCacheVulkan.h index 49862fde3c..d8c3f69cd5 100644 --- a/GPU/Vulkan/TextureCacheVulkan.h +++ b/GPU/Vulkan/TextureCacheVulkan.h @@ -145,6 +145,8 @@ private: DrawEngineVulkan *drawEngine_; Vulkan2D *vulkan2D_; + VkShaderModule uploadCS_ = VK_NULL_HANDLE; + // Bound state to emulate an API similar to the others VkImageView imageView_ = VK_NULL_HANDLE; VkSampler curSampler_ = VK_NULL_HANDLE; diff --git a/GPU/Vulkan/VulkanUtil.cpp b/GPU/Vulkan/VulkanUtil.cpp index 1f14f0eefa..ab4f30a3a9 100644 --- a/GPU/Vulkan/VulkanUtil.cpp +++ b/GPU/Vulkan/VulkanUtil.cpp @@ -16,6 +16,7 @@ // https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. #include "base/basictypes.h" +#include "base/stringutil.h" #include "Common/Log.h" #include "Common/Vulkan/VulkanContext.h" #include "GPU/Vulkan/VulkanUtil.h" @@ -385,7 +386,7 @@ VkShaderModule CompileShaderModule(VulkanContext *vulkan, VkShaderStageFlagBits ERROR_LOG(G3D, "Error in shader compilation!"); } ERROR_LOG(G3D, "Messages: %s", error->c_str()); - ERROR_LOG(G3D, "Shader source:\n%s", code); + ERROR_LOG(G3D, "Shader source:\n%s", LineNumberString(code).c_str()); OutputDebugStringUTF8("Messages:\n"); OutputDebugStringUTF8(error->c_str()); return VK_NULL_HANDLE; @@ -404,7 +405,9 @@ VulkanComputeUploader::VulkanComputeUploader(VulkanContext *vulkan) : vulkan_(vu VulkanComputeUploader::~VulkanComputeUploader() {} void VulkanComputeUploader::InitDeviceObjects() { - pipelineCache_ = vulkan_->CreatePipelineCache(); + VkPipelineCacheCreateInfo pc{ VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO }; + VkResult res = vkCreatePipelineCache(vulkan_->GetDevice(), &pc, nullptr, &pipelineCache_); + assert(VK_SUCCESS == res); VkDescriptorSetLayoutBinding bindings[2] = {}; bindings[0].descriptorCount = 1; @@ -421,18 +424,18 @@ void VulkanComputeUploader::InitDeviceObjects() { VkDescriptorSetLayoutCreateInfo dsl = { VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO }; dsl.bindingCount = 2; dsl.pBindings = bindings; - VkResult res = vkCreateDescriptorSetLayout(device, &dsl, nullptr, &descriptorSetLayout_); + res = vkCreateDescriptorSetLayout(device, &dsl, nullptr, &descriptorSetLayout_); assert(VK_SUCCESS == res); VkDescriptorPoolSize dpTypes[2]; - dpTypes[0].descriptorCount = 300; + dpTypes[0].descriptorCount = 1024; dpTypes[0].type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; - dpTypes[1].descriptorCount = 300; + dpTypes[1].descriptorCount = 1024; dpTypes[1].type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; VkDescriptorPoolCreateInfo dp = { VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO }; dp.flags = 0; // Don't want to mess around with individually freeing these, let's go fixed each frame and zap the whole array. Might try the dynamic approach later. - dp.maxSets = 300; + dp.maxSets = 1024; dp.pPoolSizes = dpTypes; dp.poolSizeCount = ARRAY_SIZE(dpTypes); for (int i = 0; i < ARRAY_SIZE(frameData_); i++) { @@ -442,7 +445,7 @@ void VulkanComputeUploader::InitDeviceObjects() { VkPushConstantRange push = {}; push.offset = 0; - push.size = 48; + push.size = 16; push.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; VkPipelineLayoutCreateInfo pl = { VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO }; @@ -501,7 +504,7 @@ VkDescriptorSet VulkanComputeUploader::GetDescriptorSet(VkBuffer buffer, VkDevic writes[n].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; writes[n].dstSet = desc; n++; - imageInfo.imageLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + imageInfo.imageLayout = VK_IMAGE_LAYOUT_GENERAL; imageInfo.imageView = image; imageInfo.sampler = VK_NULL_HANDLE; writes[n].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; @@ -526,6 +529,7 @@ VkPipeline VulkanComputeUploader::GetPipeline(VkShaderModule cs) { pci.stage.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; pci.stage.module = cs; pci.stage.pName = "main"; + pci.stage.stage = VK_SHADER_STAGE_COMPUTE_BIT; pci.layout = pipelineLayout_; pci.flags = 0; @@ -533,4 +537,13 @@ VkPipeline VulkanComputeUploader::GetPipeline(VkShaderModule cs) { pipelines_.Insert(key, pipeline); return pipeline; -} \ No newline at end of file +} + +void VulkanComputeUploader::BeginFrame() { + int curFrame = vulkan_->GetCurFrame(); + FrameData &frame = frameData_[curFrame]; + vkResetDescriptorPool(vulkan_->GetDevice(), frame.descPool, 0); +} + +void VulkanComputeUploader::EndFrame() { +} diff --git a/GPU/Vulkan/VulkanUtil.h b/GPU/Vulkan/VulkanUtil.h index abce1ff8a3..7e19696697 100644 --- a/GPU/Vulkan/VulkanUtil.h +++ b/GPU/Vulkan/VulkanUtil.h @@ -146,6 +146,9 @@ public: VkPipeline GetPipeline(VkShaderModule cs); VkPipelineLayout GetPipelineLayout() const { return pipelineLayout_; } + void BeginFrame(); + void EndFrame(); + private: void InitDeviceObjects(); void DestroyDeviceObjects();