Merge pull request #9356 from xebra/hw_tess_vulkan

Hardware Tessellation on Vulkan
This commit is contained in:
Henrik Rydgård 2017-02-26 09:12:11 +01:00 committed by GitHub
commit d84f579928
12 changed files with 453 additions and 42 deletions

View file

@ -141,10 +141,19 @@ void BaseUpdateUniforms(UB_VS_FS_Base *ub, uint64_t dirtyUniforms, bool flipView
const int h = gstate.getTextureHeight(0);
const float widthFactor = (float)w * invW;
const float heightFactor = (float)h * invH;
ub->uvScaleOffset[0] = widthFactor;
ub->uvScaleOffset[1] = heightFactor;
ub->uvScaleOffset[2] = 0.0f;
ub->uvScaleOffset[3] = 0.0f;
if (gstate_c.bezier || gstate_c.spline) {
// When we are generating UV coordinates through the bezier/spline, we need to apply the scaling.
// However, this is missing a check that we're not getting our UV:s supplied for us in the vertices.
ub->uvScaleOffset[0] = gstate_c.uv.uScale * widthFactor;
ub->uvScaleOffset[1] = gstate_c.uv.vScale * heightFactor;
ub->uvScaleOffset[2] = gstate_c.uv.uOff * widthFactor;
ub->uvScaleOffset[3] = gstate_c.uv.vOff * heightFactor;
} else {
ub->uvScaleOffset[0] = widthFactor;
ub->uvScaleOffset[1] = heightFactor;
ub->uvScaleOffset[2] = 0.0f;
ub->uvScaleOffset[3] = 0.0f;
}
}
if (dirtyUniforms & DIRTY_DEPTHRANGE) {
@ -177,6 +186,20 @@ void BaseUpdateUniforms(UB_VS_FS_Base *ub, uint64_t dirtyUniforms, bool flipView
ub->depthRange[2] = viewZCenter;
ub->depthRange[3] = viewZInvScale;
}
if (gstate_c.bezier) {
if (dirtyUniforms & DIRTY_BEZIERCOUNTU)
ub->spline_count_u = gstate_c.bezier_count_u;
} else if (gstate_c.spline) {
if (dirtyUniforms & DIRTY_SPLINECOUNTU)
ub->spline_count_u = gstate_c.spline_count_u;
if (dirtyUniforms & DIRTY_SPLINECOUNTV)
ub->spline_count_v = gstate_c.spline_count_v;
if (dirtyUniforms & DIRTY_SPLINETYPEU)
ub->spline_type_u = gstate_c.spline_type_u;
if (dirtyUniforms & DIRTY_SPLINETYPEV)
ub->spline_type_v = gstate_c.spline_type_v;
}
}
void LightUpdateUniforms(UB_VS_Lights *ub, uint64_t dirtyUniforms) {

View file

@ -11,7 +11,8 @@ enum : uint64_t {
DIRTY_BASE_UNIFORMS =
DIRTY_WORLDMATRIX | DIRTY_PROJTHROUGHMATRIX | DIRTY_VIEWMATRIX | DIRTY_TEXMATRIX | DIRTY_ALPHACOLORREF |
DIRTY_PROJMATRIX | DIRTY_FOGCOLOR | DIRTY_FOGCOEF | DIRTY_TEXENV | DIRTY_STENCILREPLACEVALUE |
DIRTY_ALPHACOLORMASK | DIRTY_SHADERBLEND | DIRTY_UVSCALEOFFSET | DIRTY_TEXCLAMP | DIRTY_DEPTHRANGE | DIRTY_MATAMBIENTALPHA,
DIRTY_ALPHACOLORMASK | DIRTY_SHADERBLEND | DIRTY_UVSCALEOFFSET | DIRTY_TEXCLAMP | DIRTY_DEPTHRANGE | DIRTY_MATAMBIENTALPHA |
DIRTY_BEZIERCOUNTU | DIRTY_SPLINECOUNTU | DIRTY_SPLINECOUNTV | DIRTY_SPLINETYPEU | DIRTY_SPLINETYPEV,
DIRTY_LIGHT_UNIFORMS =
DIRTY_LIGHT0 | DIRTY_LIGHT1 | DIRTY_LIGHT2 | DIRTY_LIGHT3 |
DIRTY_MATDIFFUSE | DIRTY_MATSPECULAR | DIRTY_MATEMISSIVE | DIRTY_AMBIENT,
@ -29,6 +30,10 @@ struct UB_VS_FS_Base {
float depthRange[4];
float fogCoef_stencil[4];
float matAmbient[4];
int spline_count_u;
int spline_count_v;
int spline_type_u;
int spline_type_v;
// Fragment data
float fogColor[4];
float texEnvColor[4];
@ -50,6 +55,10 @@ R"( mat4 proj_mtx;
vec4 depthRange;
vec3 fogcoef_stencilreplace;
vec4 matambientalpha;
int spline_count_u;
int spline_count_v;
int spline_type_u;
int spline_type_v;
vec3 fogcolor;
vec3 texenv;
ivec4 alphacolorref;
@ -71,6 +80,10 @@ R"( float4x4 u_proj;
float4 u_depthRange;
float3 u_fogcoef_stencilreplace;
float4 u_matambientalpha;
int spline_count_u;
int spline_count_v;
int spline_type_u;
int spline_type_v;
float3 u_fogcolor;
float3 u_texenv;
uint4 u_alphacolorref;

View file

@ -41,6 +41,7 @@
#include "base/NativeApp.h"
#include "base/logging.h"
#include "profiler/profiler.h"
#include "i18n/i18n.h"
#include "Core/Debugger/Breakpoints.h"
#include "Core/MemMapHelpers.h"
#include "Core/MIPS/MIPS.h"
@ -474,6 +475,14 @@ GPU_D3D11::GPU_D3D11(GraphicsContext *gfxCtx, Draw::DrawContext *draw)
// Some of our defaults are different from hw defaults, let's assert them.
// We restore each frame anyway, but here is convenient for tests.
textureCache_->NotifyConfigChanged();
if (g_Config.bHardwareTessellation) {
// Disable hardware tessellation bacause DX11 is still unsupported.
g_Config.bHardwareTessellation = false;
ERROR_LOG(G3D, "Hardware Tessellation is unsupported, falling back to software tessellation");
I18NCategory *gr = GetI18NCategory("Graphics");
host->NotifyUserMessage(gr->T("Turn off Hardware Tessellation - unsupported"), 2.5f, 0xFF3030FF);
}
}
GPU_D3D11::~GPU_D3D11() {

View file

@ -22,6 +22,7 @@
#include "base/NativeApp.h"
#include "base/logging.h"
#include "profiler/profiler.h"
#include "i18n/i18n.h"
#include "Core/Debugger/Breakpoints.h"
#include "Core/MemMapHelpers.h"
#include "Core/MIPS/MIPS.h"
@ -456,6 +457,14 @@ GPU_DX9::GPU_DX9(GraphicsContext *gfxCtx, Draw::DrawContext *draw)
// We restore each frame anyway, but here is convenient for tests.
dxstate.Restore();
textureCache_->NotifyConfigChanged();
if (g_Config.bHardwareTessellation) {
// Disable hardware tessellation bacause DX9 is still unsupported.
g_Config.bHardwareTessellation = false;
ERROR_LOG(G3D, "Hardware Tessellation is unsupported, falling back to software tessellation");
I18NCategory *gr = GetI18NCategory("Graphics");
host->NotifyUserMessage(gr->T("Turn off Hardware Tessellation - unsupported"), 2.5f, 0xFF3030FF);
}
}
void GPU_DX9::UpdateCmdInfo() {

View file

@ -1120,7 +1120,7 @@ void DrawEngineGLES::TessellationDataTransferGLES::SendDataToShader(const float
#ifndef USING_GLES2
if (isAllowTexture1D) {
// Position
glActiveTexture(GL_TEXTURE3);
glActiveTexture(GL_TEXTURE4);
glBindTexture(GL_TEXTURE_1D, data_tex[0]);
glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
@ -1135,7 +1135,7 @@ void DrawEngineGLES::TessellationDataTransferGLES::SendDataToShader(const float
// Texcoords
if (hasTexCoords) {
glActiveTexture(GL_TEXTURE4);
glActiveTexture(GL_TEXTURE5);
glBindTexture(GL_TEXTURE_1D, data_tex[1]);
glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
@ -1150,7 +1150,7 @@ void DrawEngineGLES::TessellationDataTransferGLES::SendDataToShader(const float
}
// Color
glActiveTexture(GL_TEXTURE5);
glActiveTexture(GL_TEXTURE6);
glBindTexture(GL_TEXTURE_1D, data_tex[2]);
glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
@ -1167,7 +1167,7 @@ void DrawEngineGLES::TessellationDataTransferGLES::SendDataToShader(const float
#endif
{
// Position
glActiveTexture(GL_TEXTURE3);
glActiveTexture(GL_TEXTURE4);
glBindTexture(GL_TEXTURE_2D, data_tex[0]);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
@ -1182,7 +1182,7 @@ void DrawEngineGLES::TessellationDataTransferGLES::SendDataToShader(const float
// Texcoords
if (hasTexCoords) {
glActiveTexture(GL_TEXTURE4);
glActiveTexture(GL_TEXTURE5);
glBindTexture(GL_TEXTURE_2D, data_tex[1]);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
@ -1197,7 +1197,7 @@ void DrawEngineGLES::TessellationDataTransferGLES::SendDataToShader(const float
}
// Color
glActiveTexture(GL_TEXTURE5);
glActiveTexture(GL_TEXTURE6);
glBindTexture(GL_TEXTURE_2D, data_tex[2]);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);

View file

@ -305,11 +305,11 @@ LinkedShader::LinkedShader(ShaderID VSID, Shader *vs, ShaderID FSID, Shader *fs,
if (u_tess_pos_tex != -1)
glUniform1i(u_tess_pos_tex, 3); // Texture unit 3
glUniform1i(u_tess_pos_tex, 4); // Texture unit 4
if (u_tess_tex_tex != -1)
glUniform1i(u_tess_tex_tex, 4); // Texture unit 4
glUniform1i(u_tess_tex_tex, 5); // Texture unit 5
if (u_tess_col_tex != -1)
glUniform1i(u_tess_col_tex, 5); // Texture unit 5
glUniform1i(u_tess_col_tex, 6); // Texture unit 6
// The rest, use the "dirty" mechanism.
dirtyUniforms = DIRTY_ALL_UNIFORMS;

View file

@ -488,7 +488,7 @@ void GenerateVertexShader(const ShaderID &id, char *buffer) {
WRITE(p, " weights[1] = 3.0 * tess_pos * (1.0 - tess_pos) * (1.0 - tess_pos);\n");
WRITE(p, " weights[2] = 3.0 * tess_pos * tess_pos * (1.0 - tess_pos);\n");
WRITE(p, " weights[3] = tess_pos * tess_pos * tess_pos;\n");
} else if (doSpline) {
} else { // Spline
WRITE(p, " ivec2 spline_num_patches = ivec2(u_spline_count_u - 3, u_spline_count_v - 3);\n");
WRITE(p, " ivec2 spline_type = ivec2(u_spline_type_u, u_spline_type_v);\n");
WRITE(p, " vec2 knots[6];\n");
@ -527,7 +527,7 @@ void GenerateVertexShader(const ShaderID &id, char *buffer) {
WRITE(p, " vec3 du = tess_sample(_pos, bernderiv_u);\n");
WRITE(p, " vec3 dv = tess_sample(_pos, bernderiv_v);\n");
} else if (doSpline) {
} else { // Spline
WRITE(p, " vec2 tess_next_u = vec2(normal.x, 0.0);\n");
WRITE(p, " vec2 tess_next_v = vec2(0.0, normal.y);\n");
// Right

View file

@ -55,6 +55,9 @@ enum {
DRAW_BINDING_DYNUBO_BASE = 2,
DRAW_BINDING_DYNUBO_LIGHT = 3,
DRAW_BINDING_DYNUBO_BONE = 4,
DRAW_BINDING_TESS_POS_TEXTURE = 5,
DRAW_BINDING_TESS_TEX_TEXTURE = 6,
DRAW_BINDING_TESS_COL_TEXTURE = 7,
};
enum {
@ -87,12 +90,12 @@ DrawEngineVulkan::DrawEngineVulkan(VulkanContext *vulkan)
InitDeviceObjects();
tessDataTransfer = new TessellationDataTransferVulkan();
tessDataTransfer = new TessellationDataTransferVulkan(vulkan);
}
void DrawEngineVulkan::InitDeviceObjects() {
// All resources we need for PSP drawing. Usually only bindings 0 and 2-4 are populated.
VkDescriptorSetLayoutBinding bindings[5];
VkDescriptorSetLayoutBinding bindings[8];
bindings[0].descriptorCount = 1;
bindings[0].pImmutableSamplers = nullptr;
bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
@ -118,11 +121,27 @@ void DrawEngineVulkan::InitDeviceObjects() {
bindings[4].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;
bindings[4].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
bindings[4].binding = DRAW_BINDING_DYNUBO_BONE;
// Hardware tessellation
bindings[5].descriptorCount = 1;
bindings[5].pImmutableSamplers = nullptr;
bindings[5].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
bindings[5].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
bindings[5].binding = DRAW_BINDING_TESS_POS_TEXTURE;
bindings[6].descriptorCount = 1;
bindings[6].pImmutableSamplers = nullptr;
bindings[6].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
bindings[6].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
bindings[6].binding = DRAW_BINDING_TESS_TEX_TEXTURE;
bindings[7].descriptorCount = 1;
bindings[7].pImmutableSamplers = nullptr;
bindings[7].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
bindings[7].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
bindings[7].binding = DRAW_BINDING_TESS_COL_TEXTURE;
VkDevice device = vulkan_->GetDevice();
VkDescriptorSetLayoutCreateInfo dsl = { VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO };
dsl.bindingCount = 5;
dsl.bindingCount = 8;
dsl.pBindings = bindings;
VkResult res = vkCreateDescriptorSetLayout(device, &dsl, nullptr, &descriptorSetLayout_);
assert(VK_SUCCESS == res);
@ -512,9 +531,11 @@ VkDescriptorSet DrawEngineVulkan::GetDescriptorSet(VkImageView imageView, VkSamp
assert(bone != VK_NULL_HANDLE);
FrameData *frame = &frame_[curFrame_ & 1];
auto iter = frame->descSets.find(key);
if (iter != frame->descSets.end()) {
return iter->second;
if (!(gstate_c.bezier || gstate_c.spline)) { // Has no cache when HW tessellation.
auto iter = frame->descSets.find(key);
if (iter != frame->descSets.end()) {
return iter->second;
}
}
// Didn't find one in the frame descriptor set cache, let's make a new one.
@ -530,7 +551,7 @@ VkDescriptorSet DrawEngineVulkan::GetDescriptorSet(VkImageView imageView, VkSamp
assert(result == VK_SUCCESS);
// We just don't write to the slots we don't care about.
VkWriteDescriptorSet writes[4];
VkWriteDescriptorSet writes[7];
memset(writes, 0, sizeof(writes));
// Main texture
int n = 0;
@ -551,6 +572,30 @@ VkDescriptorSet DrawEngineVulkan::GetDescriptorSet(VkImageView imageView, VkSamp
}
// Skipping 2nd texture for now.
// Tessellation data textures
if (gstate_c.bezier || gstate_c.spline) {
VkDescriptorImageInfo tess_tex[3];
VkSampler sampler = ((TessellationDataTransferVulkan *)tessDataTransfer)->GetSampler();
for (int i = 0; i < 3; i++) {
VulkanTexture *texture = ((TessellationDataTransferVulkan *)tessDataTransfer)->GetTexture(i);
VkImageView imageView = texture->GetImageView();
if (i == 0 || imageView) {
tess_tex[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
tess_tex[i].imageView = imageView;
tess_tex[i].sampler = sampler;
writes[n].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
writes[n].pNext = nullptr;
writes[n].dstBinding = DRAW_BINDING_TESS_POS_TEXTURE + i;
writes[n].pImageInfo = &tess_tex[i];
writes[n].descriptorCount = 1;
writes[n].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
writes[n].dstSet = desc;
n++;
}
}
}
// Uniform buffer objects
VkDescriptorBufferInfo buf[3];
int count = 0;
@ -580,7 +625,8 @@ VkDescriptorSet DrawEngineVulkan::GetDescriptorSet(VkImageView imageView, VkSamp
vkUpdateDescriptorSets(vulkan_->GetDevice(), n, writes, 0, nullptr);
frame->descSets[key] = desc;
if (!(gstate_c.bezier || gstate_c.spline)) // Avoid caching when HW tessellation.
frame->descSets[key] = desc;
return desc;
}
@ -721,7 +767,8 @@ void DrawEngineVulkan::DoFlush(VkCommandBuffer cmd) {
// TODO: Avoid rebinding vertex/index buffers if the vertex size stays the same by using the offset arguments
vkCmdBindVertexBuffers(cmd_, 0, 1, &vbuf, offsets);
vkCmdBindIndexBuffer(cmd_, ibuf, ibOffset, VK_INDEX_TYPE_UINT16);
vkCmdDrawIndexed(cmd_, vertexCount, 1, 0, 0, 0);
int numInstances = (gstate_c.bezier || gstate_c.spline) ? numPatches : 1;
vkCmdDrawIndexed(cmd_, vertexCount, numInstances, 0, 0, 0);
} else {
vkCmdBindVertexBuffers(cmd_, 0, 1, &vbuf, offsets);
vkCmdDraw(cmd_, vertexCount, 1, 0, 0);
@ -900,6 +947,40 @@ bool DrawEngineVulkan::IsCodePtrVertexDecoder(const u8 *ptr) const {
return decJitCache_->IsInSpace(ptr);
}
void DrawEngineVulkan::TessellationDataTransferVulkan::SendDataToShader(const float * pos, const float * tex, const float * col, int size, bool hasColor, bool hasTexCoords)
{
void DrawEngineVulkan::TessellationDataTransferVulkan::SendDataToShader(const float * pos, const float * tex, const float * col, int size, bool hasColor, bool hasTexCoords) {
int rowPitch;
u8 *data;
// Position
if (prevSize < size) {
prevSize = size;
data_tex[0]->CreateDirect(size, 1, 1, VK_FORMAT_R32G32B32_SFLOAT, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
}
data = data_tex[0]->Lock(0, &rowPitch);
memcpy(data, pos, size * 3 * sizeof(float));
data_tex[0]->Unlock();
// Texcoords
if (hasTexCoords) {
if (prevSizeTex < size) {
prevSizeTex = size;
data_tex[1]->CreateDirect(size, 1, 1, VK_FORMAT_R32G32B32_SFLOAT, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
}
data = data_tex[1]->Lock(0, &rowPitch);
memcpy(data, tex, size * 3 * sizeof(float));
data_tex[1]->Unlock();
}
// Color
int sizeColor = hasColor ? size : 1;
if (prevSizeCol < sizeColor) {
prevSizeCol = sizeColor;
data_tex[2]->CreateDirect(sizeColor, 1, 1, VK_FORMAT_R32G32B32A32_SFLOAT, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
}
data = data_tex[2]->Lock(0, &rowPitch);
memcpy(data, col, sizeColor * 4 * sizeof(float));
data_tex[2]->Unlock();
}

View file

@ -245,12 +245,53 @@ private:
// Hardware tessellation
class TessellationDataTransferVulkan : public TessellationDataTransfer {
private:
int data_tex[3];
VulkanContext *vulkan;
VulkanTexture *data_tex[3];
VkSampler sampler;
public:
TessellationDataTransferVulkan() : TessellationDataTransfer(), data_tex() {
TessellationDataTransferVulkan(VulkanContext *vulkan)
: TessellationDataTransfer(), vulkan(vulkan), data_tex(), sampler() {
for (int i = 0; i < 3; i++)
data_tex[i] = new VulkanTexture(vulkan);
CreateSampler();
}
~TessellationDataTransferVulkan() {
for (int i = 0; i < 3; i++)
delete data_tex[i];
vulkan->Delete().QueueDeleteSampler(sampler);
}
void SendDataToShader(const float *pos, const float *tex, const float *col, int size, bool hasColor, bool hasTexCoords) override;
VulkanTexture *GetTexture(int i) const { return data_tex[i]; }
VkSampler GetSampler() const { return sampler; }
void CreateSampler() {
VkSamplerCreateInfo samp = { VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO };
samp.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE;
samp.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE;
samp.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE;
samp.borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE;
samp.compareOp = VK_COMPARE_OP_NEVER;
samp.flags = 0;
samp.magFilter =VK_FILTER_NEAREST;
samp.minFilter = VK_FILTER_NEAREST;
samp.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST;
if (gstate_c.Supports(GPU_SUPPORTS_ANISOTROPY) && g_Config.iAnisotropyLevel > 0) {
// Docs say the min of this value and the supported max are used.
samp.maxAnisotropy = 1 << g_Config.iAnisotropyLevel;
samp.anisotropyEnable = true;
} else {
samp.maxAnisotropy = 1.0f;
samp.anisotropyEnable = false;
}
samp.maxLod = 1.0f;
samp.minLod = 0.0f;
samp.mipLodBias = 0.0f;
VkResult res = vkCreateSampler(vulkan->GetDevice(), &samp, nullptr, &sampler);
assert(res == VK_SUCCESS);
}
};
};

View file

@ -876,8 +876,19 @@ void GPU_Vulkan::Execute_Bezier(u32 op, u32 diff) {
bool computeNormals = gstate.isLightingEnabled();
bool patchFacing = gstate.patchfacing & 1;
int bytesRead = 0;
if (g_Config.bHardwareTessellation && g_Config.bHardwareTransform && !g_Config.bSoftwareRendering) {
gstate_c.bezier = true;
if (gstate_c.bezier_count_u != bz_ucount) {
gstate_c.Dirty(DIRTY_BEZIERCOUNTU);
gstate_c.bezier_count_u = bz_ucount;
}
}
drawEngine_.SubmitBezier(control_points, indices, gstate.getPatchDivisionU(), gstate.getPatchDivisionV(), bz_ucount, bz_vcount, patchPrim, computeNormals, patchFacing, gstate.vertType, &bytesRead);
gstate_c.bezier = false;
// After drawing, we advance pointers - see SubmitPrim which does the same.
int count = bz_ucount * bz_vcount;
AdvanceVerts(gstate.vertType, count, bytesRead);
@ -923,9 +934,32 @@ void GPU_Vulkan::Execute_Spline(u32 op, u32 diff) {
bool computeNormals = gstate.isLightingEnabled();
bool patchFacing = gstate.patchfacing & 1;
u32 vertType = gstate.vertType;
if (g_Config.bHardwareTessellation && g_Config.bHardwareTransform && !g_Config.bSoftwareRendering) {
gstate_c.spline = true;
if (gstate_c.spline_count_u != sp_ucount) {
gstate_c.Dirty(DIRTY_SPLINECOUNTU);
gstate_c.spline_count_u = sp_ucount;
}
if (gstate_c.spline_count_v != sp_vcount) {
gstate_c.Dirty(DIRTY_SPLINECOUNTV);
gstate_c.spline_count_v = sp_vcount;
}
if (gstate_c.spline_type_u != sp_utype) {
gstate_c.Dirty(DIRTY_SPLINETYPEU);
gstate_c.spline_type_u = sp_utype;
}
if (gstate_c.spline_type_v != sp_vtype) {
gstate_c.Dirty(DIRTY_SPLINETYPEV);
gstate_c.spline_type_v = sp_vtype;
}
}
int bytesRead = 0;
drawEngine_.SubmitSpline(control_points, indices, gstate.getPatchDivisionU(), gstate.getPatchDivisionV(), sp_ucount, sp_vcount, sp_utype, sp_vtype, patchPrim, computeNormals, patchFacing, vertType, &bytesRead);
gstate_c.spline = false;
// After drawing, we advance pointers - see SubmitPrim which does the same.
int count = sp_ucount * sp_vcount;
AdvanceVerts(gstate.vertType, count, bytesRead);

View file

@ -130,6 +130,12 @@ bool GenerateVulkanGLSLVertexShader(const ShaderID &id, char *buffer, bool *uses
bool enableLighting = id.Bit(VS_BIT_LIGHTING_ENABLE);
int matUpdate = id.Bits(VS_BIT_MATERIAL_UPDATE, 3);
bool doBezier = id.Bit(VS_BIT_BEZIER);
bool doSpline = id.Bit(VS_BIT_SPLINE);
bool hasColorTess = id.Bit(VS_BIT_HAS_COLOR_TESS);
bool hasTexcoordTess = id.Bit(VS_BIT_HAS_TEXCOORD_TESS);
bool flipNormalTess = id.Bit(VS_BIT_NORM_REVERSE_TESS);
// The uniforms are passed in as three "clumps" that may or may not be present.
// We will memcpy the parts into place in a big buffer so we can be quite dynamic about what parts
// are present and what parts aren't, but we will not be ultra detailed about it.
@ -212,6 +218,81 @@ bool GenerateVulkanGLSLVertexShader(const ShaderID &id, char *buffer, bool *uses
WRITE(p, "}\n\n");
}
WRITE(p, "out gl_PerVertex { vec4 gl_Position; };\n");
if (doBezier || doSpline) {
WRITE(p, "layout (binding = 5) uniform sampler2D u_tess_pos_tex;\n");
WRITE(p, "layout (binding = 6) uniform sampler2D u_tess_tex_tex;\n");
WRITE(p, "layout (binding = 7) uniform sampler2D u_tess_col_tex;\n");
for (int i = 2; i <= 4; i++) {
// Define 3 types vec2, vec3, vec4
WRITE(p, "vec%d tess_sample(in vec%d points[16], in vec2 weights[4]) {\n", i, i);
WRITE(p, " vec%d pos = vec%d(0);\n", i, i);
WRITE(p, " for (int i = 0; i < 4; ++i) {\n");
WRITE(p, " for (int j = 0; j < 4; ++j) {\n");
WRITE(p, " float f = weights[j].x * weights[i].y;\n");
WRITE(p, " if (f != 0)\n");
WRITE(p, " pos = pos + f * points[i * 4 + j];\n");
WRITE(p, " }\n");
WRITE(p, " }\n");
WRITE(p, " return pos;\n");
WRITE(p, "}\n");
}
if (doSpline) {
WRITE(p, "void spline_knot(ivec2 num_patches, ivec2 type, out vec2 knot[6], ivec2 patch_pos) {\n");
WRITE(p, " for (int i = 0; i < 6; ++i) {\n");
WRITE(p, " knot[i] = vec2(i + patch_pos.x - 2, i + patch_pos.y - 2);\n");
WRITE(p, " }\n");
WRITE(p, " if ((type.x & 1) != 0) {\n");
WRITE(p, " if (patch_pos.x <= 2)\n");
WRITE(p, " knot[0].x = 0;\n");
WRITE(p, " if (patch_pos.x <= 1)\n");
WRITE(p, " knot[1].x = 0;\n");
WRITE(p, " }\n");
WRITE(p, " if ((type.x & 2) != 0) {\n");
WRITE(p, " if (patch_pos.x >= (num_patches.x - 2))\n");
WRITE(p, " knot[5].x = num_patches.x;\n");
WRITE(p, " if (patch_pos.x == (num_patches.x - 1))\n");
WRITE(p, " knot[4].x = num_patches.x;\n");
WRITE(p, " }\n");
WRITE(p, " if ((type.y & 1) != 0) {\n");
WRITE(p, " if (patch_pos.y <= 2)\n");
WRITE(p, " knot[0].y = 0;\n");
WRITE(p, " if (patch_pos.y <= 1)\n");
WRITE(p, " knot[1].y = 0;\n");
WRITE(p, " }\n");
WRITE(p, " if ((type.y & 2) != 0) {\n");
WRITE(p, " if (patch_pos.y >= (num_patches.y - 2))\n");
WRITE(p, " knot[5].y = num_patches.y;\n");
WRITE(p, " if (patch_pos.y == (num_patches.y - 1))\n");
WRITE(p, " knot[4].y = num_patches.y;\n");
WRITE(p, " }\n");
WRITE(p, "}\n");
WRITE(p, "void spline_weight(vec2 t, in vec2 knot[6], out vec2 weights[4]) {\n");
// TODO: Maybe compilers could be coaxed into vectorizing this code without the above explicitly...
WRITE(p, " vec2 t0 = (t - knot[0]);\n");
WRITE(p, " vec2 t1 = (t - knot[1]);\n");
WRITE(p, " vec2 t2 = (t - knot[2]);\n");
// TODO: All our knots are integers so we should be able to get rid of these divisions (How?)
WRITE(p, " vec2 f30 = t0 / (knot[3] - knot[0]);\n");
WRITE(p, " vec2 f41 = t1 / (knot[4] - knot[1]);\n");
WRITE(p, " vec2 f52 = t2 / (knot[5] - knot[2]);\n");
WRITE(p, " vec2 f31 = t1 / (knot[3] - knot[1]);\n");
WRITE(p, " vec2 f42 = t2 / (knot[4] - knot[2]);\n");
WRITE(p, " vec2 f32 = t2 / (knot[3] - knot[2]);\n");
WRITE(p, " vec2 a = (1 - f30)*(1 - f31);\n");
WRITE(p, " vec2 b = (f31*f41);\n");
WRITE(p, " vec2 c = (1 - f41)*(1 - f42);\n");
WRITE(p, " vec2 d = (f42*f52);\n");
WRITE(p, " weights[0] = a - (a*f32);\n");
WRITE(p, " weights[1] = 1 - a - b + ((a + b + c - 1)*f32);\n");
WRITE(p, " weights[2] = b + ((1 - b - c - d)*f32);\n");
WRITE(p, " weights[3] = d*f32;\n");
WRITE(p, "}\n");
}
}
WRITE(p, "void main() {\n");
if (!useHWTransform) {
@ -248,12 +329,112 @@ bool GenerateVulkanGLSLVertexShader(const ShaderID &id, char *buffer, bool *uses
} else {
// Step 1: World Transform / Skinning
if (!enableBones) {
// No skinning, just standard T&L.
WRITE(p, " vec3 worldpos = vec4(position.xyz, 1.0) * base.world_mtx;\n");
if (hasNormal)
WRITE(p, " mediump vec3 worldnormal = normalize(vec4(%snormal, 0.0) * base.world_mtx);\n", flipNormal ? "-" : "");
else
WRITE(p, " mediump vec3 worldnormal = vec3(0.0, 0.0, 1.0);\n");
if (doBezier || doSpline) {
WRITE(p, " vec3 _pos[16];\n");
WRITE(p, " vec2 _tex[16];\n");
WRITE(p, " vec4 _col[16];\n");
WRITE(p, " int num_patches_u = %s;\n", doBezier ? "(base.spline_count_u - 1) / 3" : "base.spline_count_u - 3");
WRITE(p, " int u = int(mod(gl_InstanceIndex, num_patches_u));\n");
WRITE(p, " int v = gl_InstanceIndex / num_patches_u;\n");
WRITE(p, " ivec2 patch_pos = ivec2(u, v);\n");
WRITE(p, " for (int i = 0; i < 4; i++) {\n");
WRITE(p, " for (int j = 0; j < 4; j++) {\n");
WRITE(p, " int idx = (i + v%s) * base.spline_count_u + (j + u%s);\n", doBezier ? " * 3" : "", doBezier ? " * 3" : "");
WRITE(p, " ivec2 index = ivec2(idx, 0);\n");
WRITE(p, " _pos[i * 4 + j] = texelFetch(u_tess_pos_tex, index, 0).xyz;\n");
if (doTexture && hasTexcoord && hasTexcoordTess)
WRITE(p, " _tex[i * 4 + j] = texelFetch(u_tess_tex_tex, index, 0).xy;\n");
if (hasColor && hasColorTess)
WRITE(p, " _col[i * 4 + j] = texelFetch(u_tess_col_tex, index, 0).rgba;\n");
WRITE(p, " }\n");
WRITE(p, " }\n");
WRITE(p, " vec2 tess_pos = position.xy;\n");
WRITE(p, " vec2 weights[4];\n");
if (doBezier) {
// Bernstein 3D
WRITE(p, " weights[0] = (1 - tess_pos) * (1 - tess_pos) * (1 - tess_pos);\n");
WRITE(p, " weights[1] = 3 * tess_pos * (1 - tess_pos) * (1 - tess_pos);\n");
WRITE(p, " weights[2] = 3 * tess_pos * tess_pos * (1 - tess_pos);\n");
WRITE(p, " weights[3] = tess_pos * tess_pos * tess_pos;\n");
} else { // Spline
WRITE(p, " ivec2 spline_num_patches = ivec2(base.spline_count_u - 3, base.spline_count_v - 3);\n");
WRITE(p, " ivec2 spline_type = ivec2(base.spline_type_u, base.spline_type_v);\n");
WRITE(p, " vec2 knots[6];\n");
WRITE(p, " spline_knot(spline_num_patches, spline_type, knots, patch_pos);\n");
WRITE(p, " spline_weight(tess_pos + patch_pos, knots, weights);\n");
}
WRITE(p, " vec3 pos = tess_sample(_pos, weights);\n");
if (doTexture && hasTexcoord) {
if (hasTexcoordTess)
WRITE(p, " vec2 tex = tess_sample(_tex, weights);\n");
else
WRITE(p, " vec2 tex = tess_pos + patch_pos;\n");
}
if (hasColor) {
if (hasColorTess)
WRITE(p, " vec4 col = tess_sample(_col, weights);\n");
else
WRITE(p, " vec4 col = texelFetch(u_tess_col_tex, ivec2(0, 0), 0).rgba;\n");
}
if (hasNormal) {
// Curved surface is probably always need to compute normal(not sampling from control points)
if (doBezier) {
// Bernstein derivative
WRITE(p, " vec2 bernderiv[4];\n");
WRITE(p, " bernderiv[0] = -3 * (tess_pos - 1) * (tess_pos - 1); \n");
WRITE(p, " bernderiv[1] = 9 * tess_pos * tess_pos - 12 * tess_pos + 3; \n");
WRITE(p, " bernderiv[2] = 3 * (2 - 3 * tess_pos) * tess_pos; \n");
WRITE(p, " bernderiv[3] = 3 * tess_pos * tess_pos; \n");
WRITE(p, " vec2 bernderiv_u[4];\n");
WRITE(p, " vec2 bernderiv_v[4];\n");
WRITE(p, " for (int i = 0; i < 4; i++) {\n");
WRITE(p, " bernderiv_u[i] = vec2(bernderiv[i].x, weights[i].y);\n");
WRITE(p, " bernderiv_v[i] = vec2(weights[i].x, bernderiv[i].y);\n");
WRITE(p, " }\n");
WRITE(p, " vec3 du = tess_sample(_pos, bernderiv_u);\n");
WRITE(p, " vec3 dv = tess_sample(_pos, bernderiv_v);\n");
} else { // Spline
WRITE(p, " vec2 tess_next_u = vec2(normal.x, 0);\n");
WRITE(p, " vec2 tess_next_v = vec2(0, normal.y);\n");
// Right
WRITE(p, " vec2 tess_pos_r = tess_pos + tess_next_u;\n");
WRITE(p, " spline_weight(tess_pos_r + patch_pos, knots, weights);\n");
WRITE(p, " vec3 pos_r = tess_sample(_pos, weights);\n");
// Left
WRITE(p, " vec2 tess_pos_l = tess_pos - tess_next_u;\n");
WRITE(p, " spline_weight(tess_pos_l + patch_pos, knots, weights);\n");
WRITE(p, " vec3 pos_l = tess_sample(_pos, weights);\n");
// Down
WRITE(p, " vec2 tess_pos_d = tess_pos + tess_next_v;\n");
WRITE(p, " spline_weight(tess_pos_d + patch_pos, knots, weights);\n");
WRITE(p, " vec3 pos_d = tess_sample(_pos, weights);\n");
// Up
WRITE(p, " vec2 tess_pos_u = tess_pos - tess_next_v;\n");
WRITE(p, " spline_weight(tess_pos_u + patch_pos, knots, weights);\n");
WRITE(p, " vec3 pos_u = tess_sample(_pos, weights);\n");
WRITE(p, " vec3 du = pos_r - pos_l;\n");
WRITE(p, " vec3 dv = pos_d - pos_u;\n");
}
WRITE(p, " vec3 nrm = cross(du, dv);\n");
WRITE(p, " nrm = normalize(nrm);\n");
}
WRITE(p, " vec3 worldpos = vec4(pos.xyz, 1.0) * base.world_mtx;\n");
if (hasNormal) {
WRITE(p, " mediump vec3 worldnormal = normalize(vec4(%snrm, 0.0) * base.world_mtx);\n", flipNormalTess ? "-" : "");
} else {
WRITE(p, " mediump vec3 worldnormal = vec3(0.0, 0.0, 1.0);\n");
}
} else {
// No skinning, just standard T&L.
WRITE(p, " vec3 worldpos = vec4(position.xyz, 1.0) * base.world_mtx;\n");
if (hasNormal)
WRITE(p, " mediump vec3 worldnormal = normalize(vec4(%snormal, 0.0) * base.world_mtx);\n", flipNormal ? "-" : "");
else
WRITE(p, " mediump vec3 worldnormal = vec3(0.0, 0.0, 1.0);\n");
}
} else {
static const char *rescale[4] = { "", " * 1.9921875", " * 1.999969482421875", "" }; // 2*127.5f/128.f, 2*32767.5f/32768.f, 1.0f};
const char *factor = rescale[boneWeightScale];
@ -298,6 +479,11 @@ bool GenerateVulkanGLSLVertexShader(const ShaderID &id, char *buffer, bool *uses
const char *ambientStr = ((matUpdate & 1) && hasColor) ? "color0" : "base.matambientalpha";
const char *diffuseStr = ((matUpdate & 2) && hasColor) ? "color0.rgb" : "light.matdiffuse";
const char *specularStr = ((matUpdate & 4) && hasColor) ? "color0.rgb" : "light.matspecular.rgb";
if (doBezier || doSpline) {
ambientStr = (matUpdate & 1) && hasColor ? "col" : "base.matambientalpha";
diffuseStr = (matUpdate & 2) && hasColor ? "col.rgb" : "light.matdiffuse";
specularStr = (matUpdate & 4) && hasColor ? "col.rgb" : "light.matspecular.rgb";
}
bool diffuseIsZero = true;
bool specularIsZero = true;
@ -416,7 +602,10 @@ bool GenerateVulkanGLSLVertexShader(const ShaderID &id, char *buffer, bool *uses
} else {
// Lighting doesn't affect color.
if (hasColor) {
WRITE(p, " v_color0 = color0;\n");
if (doBezier || doSpline)
WRITE(p, " v_color0 = col;\n");
else
WRITE(p, " v_color0 = color0;\n");
} else {
WRITE(p, " v_color0 = base.matambientalpha;\n");
}
@ -434,13 +623,19 @@ bool GenerateVulkanGLSLVertexShader(const ShaderID &id, char *buffer, bool *uses
case GE_TEXMAP_UNKNOWN: // Not sure what this is, but Riviera uses it. Treating as coords works.
if (scaleUV) {
if (hasTexcoord) {
WRITE(p, " v_texcoord = vec3(texcoord.xy, 0.0);\n");
if (doBezier || doSpline)
WRITE(p, " v_texcoord = vec3(tex.xy * base.uvscaleoffset.xy + base.uvscaleoffset.zw, 0.0);\n");
else
WRITE(p, " v_texcoord = vec3(texcoord.xy, 0.0);\n");
} else {
WRITE(p, " v_texcoord = vec3(0.0);\n");
}
} else {
if (hasTexcoord) {
WRITE(p, " v_texcoord = vec3(texcoord.xy * base.uvscaleoffset.xy + base.uvscaleoffset.zw, 0.0);\n");
if (doBezier || doSpline)
WRITE(p, " v_texcoord = vec3(tex.xy * base.uvscaleoffset.xy + base.uvscaleoffset.zw, 0.0);\n");
else
WRITE(p, " v_texcoord = vec3(texcoord.xy * base.uvscaleoffset.xy + base.uvscaleoffset.zw, 0.0);\n");
} else {
WRITE(p, " v_texcoord = vec3(base.uvscaleoffset.zw, 0.0);\n");
}

View file

@ -83,7 +83,7 @@ bool GameSettingsScreen::UseVerticalLayout() const {
// This needs before run CheckGPUFeatures()
// TODO: Remove this if fix the issue
bool CheckSupportInstancedTessellation() {
bool CheckSupportInstancedTessellationGLES() {
#if PPSSPP_PLATFORM(UWP)
return true;
#else
@ -312,7 +312,9 @@ void GameSettingsScreen::CreateViews() {
bezierChoiceDisable_ = g_Config.bSoftwareRendering || g_Config.bHardwareTessellation;
return UI::EVENT_CONTINUE;
});
tessHWEnable_ = CheckSupportInstancedTessellation() && !g_Config.bSoftwareRendering && g_Config.bHardwareTransform;
bool isBackendSupportHWTess = g_Config.iGPUBackend == GPU_BACKEND_OPENGL || g_Config.iGPUBackend == GPU_BACKEND_VULKAN;
bool isDeviceSupportInstTess = g_Config.iGPUBackend == GPU_BACKEND_OPENGL ? CheckSupportInstancedTessellationGLES() : isBackendSupportHWTess;
tessHWEnable_ = isDeviceSupportInstTess && !g_Config.bSoftwareRendering && g_Config.bHardwareTransform;
tessellationHW->SetEnabledPtr(&tessHWEnable_);
// In case we're going to add few other antialiasing option like MSAA in the future.
@ -752,13 +754,17 @@ UI::EventReturn GameSettingsScreen::OnSoftwareRendering(UI::EventParams &e) {
postProcEnable_ = !g_Config.bSoftwareRendering && (g_Config.iRenderingMode != FB_NON_BUFFERED_MODE);
resolutionEnable_ = !g_Config.bSoftwareRendering && (g_Config.iRenderingMode != FB_NON_BUFFERED_MODE);
bezierChoiceDisable_ = g_Config.bSoftwareRendering || g_Config.bHardwareTessellation;
tessHWEnable_ = CheckSupportInstancedTessellation() && !g_Config.bSoftwareRendering && g_Config.bHardwareTransform;
bool isBackendSupportHWTess = g_Config.iGPUBackend == GPU_BACKEND_OPENGL || g_Config.iGPUBackend == GPU_BACKEND_VULKAN;
bool isDeviceSupportInstTess = g_Config.iGPUBackend == GPU_BACKEND_OPENGL ? CheckSupportInstancedTessellationGLES() : isBackendSupportHWTess;
tessHWEnable_ = isDeviceSupportInstTess && !g_Config.bSoftwareRendering && g_Config.bHardwareTransform;
return UI::EVENT_DONE;
}
UI::EventReturn GameSettingsScreen::OnHardwareTransform(UI::EventParams &e) {
vtxCacheEnable_ = !g_Config.bSoftwareRendering && g_Config.bHardwareTransform;
tessHWEnable_ = CheckSupportInstancedTessellation() && !g_Config.bSoftwareRendering && g_Config.bHardwareTransform;
bool isBackendSupportHWTess = g_Config.iGPUBackend == GPU_BACKEND_OPENGL || g_Config.iGPUBackend == GPU_BACKEND_VULKAN;
bool isDeviceSupportInstTess = g_Config.iGPUBackend == GPU_BACKEND_OPENGL ? CheckSupportInstancedTessellationGLES() : isBackendSupportHWTess;
tessHWEnable_ = isDeviceSupportInstTess && !g_Config.bSoftwareRendering && g_Config.bHardwareTransform;
return UI::EVENT_DONE;
}