Use a plain compute shader for RGBA8888 texture uploads, as a proof of concept.

This commit is contained in:
Henrik Rydgård 2017-12-11 12:22:24 +01:00 committed by Unknown W. Brackets
parent e6bec3e555
commit 9ed66001c6
6 changed files with 141 additions and 17 deletions

View file

@ -121,9 +121,10 @@ bool VulkanTexture::CreateDirect(VkCommandBuffer cmd, VulkanDeviceAllocator *all
if (initialLayout != VK_IMAGE_LAYOUT_UNDEFINED && initialLayout != VK_IMAGE_LAYOUT_PREINITIALIZED) {
switch (initialLayout) {
case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL:
case VK_IMAGE_LAYOUT_GENERAL:
TransitionImageLayout2(cmd, image_, 0, numMips, VK_IMAGE_ASPECT_COLOR_BIT,
VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_IMAGE_LAYOUT_UNDEFINED, initialLayout,
VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
0, VK_ACCESS_TRANSFER_WRITE_BIT);
break;
default:
@ -208,10 +209,10 @@ void VulkanTexture::GenerateMip(VkCommandBuffer cmd, int mip) {
VK_ACCESS_TRANSFER_READ_BIT, VK_ACCESS_TRANSFER_WRITE_BIT);
}
void VulkanTexture::EndCreate(VkCommandBuffer cmd, bool vertexTexture) {
void VulkanTexture::EndCreate(VkCommandBuffer cmd, bool vertexTexture, VkImageLayout layout) {
TransitionImageLayout2(cmd, image_, 0, numMips_,
VK_IMAGE_ASPECT_COLOR_BIT,
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
layout, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
VK_PIPELINE_STAGE_TRANSFER_BIT, vertexTexture ? VK_PIPELINE_STAGE_VERTEX_SHADER_BIT : VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT);
}
@ -222,6 +223,27 @@ void VulkanTexture::Touch() {
}
}
VkImageView VulkanTexture::CreateViewForMip(int mip) {
// Create the view while we're at it.
VkImageViewCreateInfo view_info = { VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO };
view_info.image = image_;
view_info.viewType = VK_IMAGE_VIEW_TYPE_2D;
view_info.format = format_;
view_info.components.r = VK_COMPONENT_SWIZZLE_R;
view_info.components.g = VK_COMPONENT_SWIZZLE_G;
view_info.components.b = VK_COMPONENT_SWIZZLE_B;
view_info.components.a = VK_COMPONENT_SWIZZLE_A;
view_info.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
view_info.subresourceRange.baseMipLevel = mip;
view_info.subresourceRange.levelCount = 1;
view_info.subresourceRange.baseArrayLayer = 0;
view_info.subresourceRange.layerCount = 1;
VkImageView view;
VkResult res = vkCreateImageView(vulkan_->GetDevice(), &view_info, NULL, &view);
assert(res == VK_SUCCESS);
return view;
}
void VulkanTexture::Destroy() {
if (view_ != VK_NULL_HANDLE) {
vulkan_->Delete().QueueDeleteImageView(view_);

View file

@ -21,7 +21,11 @@ public:
bool CreateDirect(VkCommandBuffer cmd, VulkanDeviceAllocator *allocator, int w, int h, int numMips, VkFormat format, VkImageLayout initialLayout, VkImageUsageFlags usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, const VkComponentMapping *mapping = nullptr);
void UploadMip(VkCommandBuffer cmd, int mip, int mipWidth, int mipHeight, VkBuffer buffer, uint32_t offset, size_t rowLength); // rowLength is in pixels
void GenerateMip(VkCommandBuffer cmd, int mip);
void EndCreate(VkCommandBuffer cmd, bool vertexTexture = false);
void EndCreate(VkCommandBuffer cmd, bool vertexTexture = false, VkImageLayout layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
// When loading mips from compute shaders, you need to pass VK_IMAGE_LAYOUT_GENERAL to the above function.
// In addition, ignore UploadMip and GenerateMip, and instead use GetViewForMip. Make sure to delete the returned views when used.
VkImageView CreateViewForMip(int mip);
void Destroy();

View file

@ -67,6 +67,50 @@ static const VkComponentMapping VULKAN_1555_SWIZZLE = { VK_COMPONENT_SWIZZLE_B,
static const VkComponentMapping VULKAN_565_SWIZZLE = { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_A };
static const VkComponentMapping VULKAN_8888_SWIZZLE = { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_A };
const char *uploadShader = R"(
#version 450
#extension GL_ARB_separate_shader_objects : enable
// No idea what's optimal here...
#define WORKGROUP_SIZE 16
layout (local_size_x = WORKGROUP_SIZE, local_size_y = WORKGROUP_SIZE, local_size_z = 1) in;
layout(std430, binding = 0) buffer Buf {
uint pixel[];
} buf;
uniform layout(binding = 1, rgba8) writeonly image2D img;
layout(push_constant) uniform Params {
int width;
int height;
} params;
void main() {
uint x = gl_GlobalInvocationID.x;
uint y = gl_GlobalInvocationID.y;
// Kill off any out-of-image threads to avoid stray writes.
// Should only happen on the tiniest mipmaps as PSP textures are power-of-2,
// and we use a 16x16 workgroup size.
if (x >= params.width || gl_GlobalInvocationID.y >= params.height)
return;
// Note that if the pixels are packed, we can do multiple stores
// and only launch this compute shader for every N pixels,
// by slicing the width in half and multiplying x by 2, for example.
uint color = buf.pixel[y * params.width + x];
// Unpack the color (we could look it up in a CLUT here if we wanted...)
// It's a bit silly that we need to unpack to float and then have imageStore repack,
// but the alternative is to store to a buffer, and then launch a vkCmdCopyBufferToImage instead.
vec4 outColor = vec4(
(color & 0xFF) * (1.0 / 255.0),
((color >> 8) & 0xFF) * (1.0 / 255.0),
((color >> 16) & 0xFF) * (1.0 / 255.0),
((color >> 24) & 0xFF) * (1.0 / 255.0));
imageStore(img, ivec2(x,y), outColor);
}
)";
SamplerCache::~SamplerCache() {
DeviceLost();
}
@ -181,6 +225,7 @@ void TextureCacheVulkan::DeviceLost() {
if (samplerNearest_)
vulkan_->Delete().QueueDeleteSampler(samplerNearest_);
vulkan_->Delete().QueueDeleteShaderModule(uploadCS_);
upload_.DeviceLost();
nextTexture_ = nullptr;
@ -204,6 +249,10 @@ void TextureCacheVulkan::DeviceRestore(VulkanContext *vulkan, Draw::DrawContext
samp.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST;
vkCreateSampler(vulkan_->GetDevice(), &samp, nullptr, &samplerNearest_);
std::string error;
uploadCS_ = CompileShaderModule(vulkan_, VK_SHADER_STAGE_COMPUTE_BIT, uploadShader, &error);
_dbg_assert_msg_(G3D, uploadCS_ != VK_NULL_HANDLE, "failed to compile upload shader");
upload_.DeviceRestore(vulkan);
}
@ -277,10 +326,12 @@ void TextureCacheVulkan::StartFrame() {
}
allocator_->Begin();
upload_.BeginFrame();
}
void TextureCacheVulkan::EndFrame() {
allocator_->End();
upload_.EndFrame();
if (texelsScaledThisFrame_) {
// INFO_LOG(G3D, "Scaled %i texels", texelsScaledThisFrame_);
@ -602,6 +653,8 @@ void TextureCacheVulkan::BuildTexture(TexCacheEntry *const entry) {
actualFmt = ToVulkanFormat(replaced.Format(0));
}
bool computeUpload = false;
{
delete entry->vkTex;
entry->vkTex = new VulkanTexture(vulkan_);
@ -626,11 +679,25 @@ void TextureCacheVulkan::BuildTexture(TexCacheEntry *const entry) {
break;
}
VkImageLayout imageLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
VkImageUsageFlags usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT;
// If we want to use the GE debugger, we should add VK_IMAGE_USAGE_TRANSFER_SRC_BIT too...
// Compute experiment
if (actualFmt == VULKAN_8888_FORMAT) {
computeUpload = true;
}
if (computeUpload) {
usage |= VK_IMAGE_USAGE_STORAGE_BIT;
imageLayout = VK_IMAGE_LAYOUT_GENERAL;
}
char texName[128]{};
snprintf(texName, sizeof(texName), "Texture%08x", entry->addr);
image->SetTag(texName);
bool allocSuccess = image->CreateDirect(cmdInit, allocator_, w * scaleFactor, h * scaleFactor, maxLevel + 1, actualFmt, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, mapping);
bool allocSuccess = image->CreateDirect(cmdInit, allocator_, w * scaleFactor, h * scaleFactor, maxLevel + 1, actualFmt, imageLayout, usage, mapping);
if (!allocSuccess && !lowMemoryMode_) {
WARN_LOG_REPORT(G3D, "Texture cache ran out of GPU memory; switching to low memory mode");
lowMemoryMode_ = true;
@ -692,6 +759,7 @@ void TextureCacheVulkan::BuildTexture(TexCacheEntry *const entry) {
void *data = drawEngine_->GetPushBufferForTextureData()->PushAligned(size, &bufferOffset, &texBuf, pushAlignment);
if (replaced.Valid()) {
replaced.Load(i, data, stride);
entry->vkTex->UploadMip(cmdInit, i, mipWidth, mipHeight, texBuf, bufferOffset, stride / bpp);
} else {
if (fakeMipmap) {
LoadTextureLevel(*entry, (uint8_t *)data, stride, level, scaleFactor, dstFmt);
@ -699,12 +767,24 @@ void TextureCacheVulkan::BuildTexture(TexCacheEntry *const entry) {
break;
} else {
LoadTextureLevel(*entry, (uint8_t *)data, stride, i, scaleFactor, dstFmt);
if (computeUpload) {
// This format can be used with storage images.
VkImageView view = entry->vkTex->CreateViewForMip(i);
VkDescriptorSet descSet = upload_.GetDescriptorSet(texBuf, bufferOffset, size, view);
struct Params { int x; int y; } params{ mipWidth, mipHeight };
vkCmdBindPipeline(cmdInit, VK_PIPELINE_BIND_POINT_COMPUTE, upload_.GetPipeline(uploadCS_));
vkCmdBindDescriptorSets(cmdInit, VK_PIPELINE_BIND_POINT_COMPUTE, upload_.GetPipelineLayout(), 0, 1, &descSet, 0, nullptr);
vkCmdPushConstants(cmdInit, upload_.GetPipelineLayout(), VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(params), &params);
vkCmdDispatch(cmdInit, (mipWidth + 15) / 16, (mipHeight + 15) / 16, 1);
vulkan_->Delete().QueueDeleteImageView(view);
} else {
entry->vkTex->UploadMip(cmdInit, i, mipWidth, mipHeight, texBuf, bufferOffset, stride / bpp);
}
}
if (replacer_.Enabled()) {
replacer_.NotifyTextureDecoded(replacedInfo, data, stride, i, mipWidth, mipHeight);
}
}
entry->vkTex->UploadMip(cmdInit, i, mipWidth, mipHeight, texBuf, bufferOffset, stride / bpp);
}
if (maxLevel == 0) {
@ -715,7 +795,7 @@ void TextureCacheVulkan::BuildTexture(TexCacheEntry *const entry) {
if (replaced.Valid()) {
entry->SetAlphaStatus(TexCacheEntry::TexStatus(replaced.AlphaStatus()));
}
entry->vkTex->EndCreate(cmdInit);
entry->vkTex->EndCreate(cmdInit, false, computeUpload ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
}
gstate_c.SetTextureFullAlpha(entry->GetAlphaStatus() == TexCacheEntry::STATUS_ALPHA_FULL);

View file

@ -145,6 +145,8 @@ private:
DrawEngineVulkan *drawEngine_;
Vulkan2D *vulkan2D_;
VkShaderModule uploadCS_ = VK_NULL_HANDLE;
// Bound state to emulate an API similar to the others
VkImageView imageView_ = VK_NULL_HANDLE;
VkSampler curSampler_ = VK_NULL_HANDLE;

View file

@ -16,6 +16,7 @@
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
#include "base/basictypes.h"
#include "base/stringutil.h"
#include "Common/Log.h"
#include "Common/Vulkan/VulkanContext.h"
#include "GPU/Vulkan/VulkanUtil.h"
@ -385,7 +386,7 @@ VkShaderModule CompileShaderModule(VulkanContext *vulkan, VkShaderStageFlagBits
ERROR_LOG(G3D, "Error in shader compilation!");
}
ERROR_LOG(G3D, "Messages: %s", error->c_str());
ERROR_LOG(G3D, "Shader source:\n%s", code);
ERROR_LOG(G3D, "Shader source:\n%s", LineNumberString(code).c_str());
OutputDebugStringUTF8("Messages:\n");
OutputDebugStringUTF8(error->c_str());
return VK_NULL_HANDLE;
@ -404,7 +405,9 @@ VulkanComputeUploader::VulkanComputeUploader(VulkanContext *vulkan) : vulkan_(vu
VulkanComputeUploader::~VulkanComputeUploader() {}
void VulkanComputeUploader::InitDeviceObjects() {
pipelineCache_ = vulkan_->CreatePipelineCache();
VkPipelineCacheCreateInfo pc{ VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO };
VkResult res = vkCreatePipelineCache(vulkan_->GetDevice(), &pc, nullptr, &pipelineCache_);
assert(VK_SUCCESS == res);
VkDescriptorSetLayoutBinding bindings[2] = {};
bindings[0].descriptorCount = 1;
@ -421,18 +424,18 @@ void VulkanComputeUploader::InitDeviceObjects() {
VkDescriptorSetLayoutCreateInfo dsl = { VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO };
dsl.bindingCount = 2;
dsl.pBindings = bindings;
VkResult res = vkCreateDescriptorSetLayout(device, &dsl, nullptr, &descriptorSetLayout_);
res = vkCreateDescriptorSetLayout(device, &dsl, nullptr, &descriptorSetLayout_);
assert(VK_SUCCESS == res);
VkDescriptorPoolSize dpTypes[2];
dpTypes[0].descriptorCount = 300;
dpTypes[0].descriptorCount = 1024;
dpTypes[0].type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
dpTypes[1].descriptorCount = 300;
dpTypes[1].descriptorCount = 1024;
dpTypes[1].type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
VkDescriptorPoolCreateInfo dp = { VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO };
dp.flags = 0; // Don't want to mess around with individually freeing these, let's go fixed each frame and zap the whole array. Might try the dynamic approach later.
dp.maxSets = 300;
dp.maxSets = 1024;
dp.pPoolSizes = dpTypes;
dp.poolSizeCount = ARRAY_SIZE(dpTypes);
for (int i = 0; i < ARRAY_SIZE(frameData_); i++) {
@ -442,7 +445,7 @@ void VulkanComputeUploader::InitDeviceObjects() {
VkPushConstantRange push = {};
push.offset = 0;
push.size = 48;
push.size = 16;
push.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
VkPipelineLayoutCreateInfo pl = { VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO };
@ -501,7 +504,7 @@ VkDescriptorSet VulkanComputeUploader::GetDescriptorSet(VkBuffer buffer, VkDevic
writes[n].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
writes[n].dstSet = desc;
n++;
imageInfo.imageLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
imageInfo.imageLayout = VK_IMAGE_LAYOUT_GENERAL;
imageInfo.imageView = image;
imageInfo.sampler = VK_NULL_HANDLE;
writes[n].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
@ -526,6 +529,7 @@ VkPipeline VulkanComputeUploader::GetPipeline(VkShaderModule cs) {
pci.stage.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
pci.stage.module = cs;
pci.stage.pName = "main";
pci.stage.stage = VK_SHADER_STAGE_COMPUTE_BIT;
pci.layout = pipelineLayout_;
pci.flags = 0;
@ -533,4 +537,13 @@ VkPipeline VulkanComputeUploader::GetPipeline(VkShaderModule cs) {
pipelines_.Insert(key, pipeline);
return pipeline;
}
}
void VulkanComputeUploader::BeginFrame() {
int curFrame = vulkan_->GetCurFrame();
FrameData &frame = frameData_[curFrame];
vkResetDescriptorPool(vulkan_->GetDevice(), frame.descPool, 0);
}
void VulkanComputeUploader::EndFrame() {
}

View file

@ -146,6 +146,9 @@ public:
VkPipeline GetPipeline(VkShaderModule cs);
VkPipelineLayout GetPipelineLayout() const { return pipelineLayout_; }
void BeginFrame();
void EndFrame();
private:
void InitDeviceObjects();
void DestroyDeviceObjects();