diff --git a/Common/GPU/Shader.cpp b/Common/GPU/Shader.cpp index 5033e55ac1..ac90a9f404 100644 --- a/Common/GPU/Shader.cpp +++ b/Common/GPU/Shader.cpp @@ -33,6 +33,7 @@ ShaderLanguageDesc::ShaderLanguageDesc(ShaderLanguage lang) { void ShaderLanguageDesc::Init(ShaderLanguage lang) { shaderLanguage = lang; + strcpy(driverInfo, ""); switch (lang) { case GLSL_1xx: // Just used in the shader test, and as a basis for the others in DetectShaderLanguage. diff --git a/Common/GPU/ShaderWriter.cpp b/Common/GPU/ShaderWriter.cpp index 062d9ed2d3..da3a48e5c3 100644 --- a/Common/GPU/ShaderWriter.cpp +++ b/Common/GPU/ShaderWriter.cpp @@ -14,7 +14,6 @@ const char * const vulkan_glsl_preamble_fs = "#extension GL_ARB_shader_image_load_store : enable\n" "#define splat3(x) vec3(x)\n" "#define DISCARD discard\n" -"precision lowp float;\n" "precision highp int;\n" "\n"; @@ -125,12 +124,13 @@ void ShaderWriter::Preamble(const char **gl_extensions, size_t num_gl_extensions F("%s\n", gl_extensions[i]); } // Print some system info - useful to gather information directly from screenshots. - F("// %s\n", lang_.driverInfo); + if (strlen(lang_.driverInfo) != 0) { + F("// Driver: %s\n", lang_.driverInfo); + } switch (stage_) { case ShaderStage::Fragment: C("#define DISCARD discard\n"); if (lang_.gles) { - C("precision lowp float;\n"); if (lang_.glslES30) { C("precision highp int;\n"); } @@ -318,9 +318,27 @@ void ShaderWriter::HighPrecisionFloat() { } } +void ShaderWriter::LowPrecisionFloat() { + if ((ShaderLanguageIsOpenGL(lang_.shaderLanguage) && lang_.gles) || lang_.shaderLanguage == GLSL_VULKAN) { + C("precision lowp float;\n"); + } +} + +void ShaderWriter::ConstFloat(const char *name, float value) { + switch (lang_.shaderLanguage) { + case HLSL_D3D11: + case HLSL_D3D9: + F("static const float %s = %f;\n", name, value); + break; + default: + F("#define %s %f\n", name, value); + break; + } +} + void ShaderWriter::DeclareSamplers(Slice samplers) { for (int i = 0; i < (int)samplers.size(); i++) { - DeclareTexture2D(samplers[i].name,i); + DeclareTexture2D(samplers[i].name, i); DeclareSampler2D(samplers[i].name, i); } } @@ -347,24 +365,24 @@ void ShaderWriter::DeclareSampler2D(const char *name, int binding) { // We only use separate samplers in HLSL D3D11, where we have no choice. switch (lang_.shaderLanguage) { case HLSL_D3D11: - F("SamplerState %s : register(s%d);\n", name, binding); + F("SamplerState %sSamp : register(s%d);\n", name, binding); break; default: break; } } -ShaderWriter &ShaderWriter::SampleTexture2D(const char *texName, const char *samplerName, const char *uv) { +ShaderWriter &ShaderWriter::SampleTexture2D(const char *sampName, const char *uv) { switch (lang_.shaderLanguage) { case HLSL_D3D11: - F("%s.Sample(%s, %s)", texName, samplerName, uv); + F("%s.Sample(%sSamp, %s)", sampName, sampName, uv); break; case HLSL_D3D9: - F("tex2D(%s, %s)", texName, uv); + F("tex2D(%s, %s)", sampName, uv); break; default: // Note: we ignore the sampler. make sure you bound samplers to the textures correctly. - F("%s(%s, %s)", lang_.texture, texName, uv); + F("%s(%s, %s)", lang_.texture, sampName, uv); break; } return *this; diff --git a/Common/GPU/ShaderWriter.h b/Common/GPU/ShaderWriter.h index ff00740108..0624a8aca9 100644 --- a/Common/GPU/ShaderWriter.h +++ b/Common/GPU/ShaderWriter.h @@ -41,6 +41,7 @@ public: ShaderWriter(char *buffer, const ShaderLanguageDesc &lang, ShaderStage stage, const char **gl_extensions, size_t num_gl_extensions) : p_(buffer), lang_(lang), stage_(stage) { Preamble(gl_extensions, num_gl_extensions); } + ShaderWriter(const ShaderWriter &) = delete; // I tried to call all three write functions "W", but only MSVC // managed to disentangle the ambiguities, so had to give up on that. @@ -64,17 +65,20 @@ public: // F: Formats into the buffer. ShaderWriter &F(const char *format, ...); + ShaderWriter &endl() { + return C("\n"); + } + // Useful for fragment shaders in GLES. // We always default integers to high precision. void HighPrecisionFloat(); - - // Several of the shader languages ignore samplers, beware of that. - void DeclareSampler2D(const char *name, int binding); - void DeclareTexture2D(const char *name, int binding); + void LowPrecisionFloat(); void DeclareSamplers(Slice samplers); - ShaderWriter &SampleTexture2D(const char *texName, const char *samplerName, const char *uv); + void ConstFloat(const char *name, float value); + + ShaderWriter &SampleTexture2D(const char *sampName, const char *uv); // Simple shaders with no special tricks. void BeginVSMain(Slice inputs, Slice uniforms, Slice varyings); @@ -95,6 +99,10 @@ public: } private: + // Several of the shader languages ignore samplers, beware of that. + void DeclareSampler2D(const char *name, int binding); + void DeclareTexture2D(const char *name, int binding); + void Preamble(const char **gl_extensions, size_t num_gl_extensions); char *p_; diff --git a/Common/GPU/Vulkan/thin3d_vulkan.cpp b/Common/GPU/Vulkan/thin3d_vulkan.cpp index c1d05acb38..aa55b4e707 100644 --- a/Common/GPU/Vulkan/thin3d_vulkan.cpp +++ b/Common/GPU/Vulkan/thin3d_vulkan.cpp @@ -1293,7 +1293,7 @@ ShaderModule *VKContext::CreateShaderModule(ShaderStage stage, ShaderLanguage la if (shader->Compile(vulkan_, language, data, size)) { return shader; } else { - ERROR_LOG(G3D, "Failed to compile shader:\n%s", (const char *)data); + ERROR_LOG(G3D, "Failed to compile shader:\n%s", (const char *)LineNumberString((const char *)data).c_str()); shader->Release(); return nullptr; } diff --git a/GPU/Common/DepalettizeCommon.cpp b/GPU/Common/DepalettizeCommon.cpp index 9da13d38ea..85c74d4c55 100644 --- a/GPU/Common/DepalettizeCommon.cpp +++ b/GPU/Common/DepalettizeCommon.cpp @@ -28,13 +28,8 @@ #include "GPU/Common/DepalettizeShaderCommon.h" #include "GPU/Common/DepalettizeCommon.h" -static const InputDef vsInputs[2] = { - { "vec2", "a_position", Draw::SEM_POSITION, }, - { "vec2", "a_texcoord0", Draw::SEM_TEXCOORD0, }, -}; - static const VaryingDef varyings[1] = { - { "vec2", "v_texcoord0", Draw::SEM_TEXCOORD0, 0, "highp" }, + { "vec2", "v_texcoord", Draw::SEM_TEXCOORD0, 0, "highp" }, }; static const SamplerDef samplers[2] = { @@ -56,18 +51,6 @@ void DepalShaderCache::DeviceLost() { Clear(); } -bool DepalShaderCache::GenerateVertexShader(char *buffer, const ShaderLanguageDesc &lang) { - ShaderWriter writer(buffer, lang, ShaderStage::Vertex, nullptr, 0); - writer.BeginVSMain(vsInputs, Slice::empty(), varyings); - writer.C(" v_texcoord0 = a_texcoord0;\n"); - writer.C(" gl_Position = vec4(a_position, 0.0, 1.0);\n"); - if (strlen(lang.viewportYSign)) { - writer.F(" gl_Position.y *= %s1.0;\n", lang.viewportYSign); - } - writer.EndVSMain(varyings); - return true; -} - Draw::Texture *DepalShaderCache::GetClutTexture(GEPaletteFormat clutFormat, const u32 clutHash, u32 *rawClut) { u32 clutId = GetClutID(clutFormat, clutHash); @@ -178,16 +161,19 @@ DepalShader *DepalShaderCache::GetDepalettizeShader(uint32_t clutMode, GEBufferF char *buffer = new char[4096]; if (!vertexShader_) { - if (!GenerateVertexShader(buffer, draw_->GetShaderLanguageDesc())) { - // The vertex shader failed, no need to bother trying the fragment. - delete[] buffer; - return nullptr; - } + GenerateDepalVs(buffer, draw_->GetShaderLanguageDesc()); vertexShader_ = draw_->CreateShaderModule(ShaderStage::Vertex, draw_->GetShaderLanguageDesc().shaderLanguage, (const uint8_t *)buffer, strlen(buffer), "depal_vs"); } - // TODO: Replace with ShaderWriter-based implementation. - GenerateDepalShader(buffer, pixelFormat, draw_->GetShaderLanguageDesc().shaderLanguage); + // TODO: Parse these out of clutMode some nice way, to become a bit more stateless. + DepalConfig config; + config.clutFormat = gstate.getClutPaletteFormat(); + config.startPos = gstate.getClutIndexStartPos(); + config.shift = gstate.getClutIndexShift(); + config.mask = gstate.getClutIndexMask(); + config.pixelFormat = pixelFormat; + + GenerateDepalFs(buffer, config, draw_->GetShaderLanguageDesc()); std::string src(buffer); ShaderModule *fragShader = draw_->CreateShaderModule(ShaderStage::Fragment, draw_->GetShaderLanguageDesc().shaderLanguage, (const uint8_t *)buffer, strlen(buffer), "depal_fs"); diff --git a/GPU/Common/DepalettizeCommon.h b/GPU/Common/DepalettizeCommon.h index af90da8015..c394f47eb7 100644 --- a/GPU/Common/DepalettizeCommon.h +++ b/GPU/Common/DepalettizeCommon.h @@ -62,9 +62,6 @@ public: void DeviceLost(); void DeviceRestore(Draw::DrawContext *draw); - // Exposed for testing. - static bool GenerateVertexShader(char *buffer, const ShaderLanguageDesc &lang); - private: static uint32_t GenerateShaderID(uint32_t clutMode, GEBufferFormat pixelFormat) { return (clutMode & 0xFFFFFF) | (pixelFormat << 24); diff --git a/GPU/Common/DepalettizeShaderCommon.cpp b/GPU/Common/DepalettizeShaderCommon.cpp index 064e3fce1c..ad53268760 100644 --- a/GPU/Common/DepalettizeShaderCommon.cpp +++ b/GPU/Common/DepalettizeShaderCommon.cpp @@ -17,83 +17,44 @@ #include -#include "Common/GPU/OpenGL/GLFeatures.h" - #include "Common/GPU/Shader.h" #include "Common/GPU/ShaderWriter.h" -#include "GPU/Common/ShaderId.h" #include "GPU/Common/ShaderCommon.h" #include "Common/StringUtils.h" #include "Common/Log.h" #include "Core/Reporting.h" -#include "GPU/GPUState.h" #include "GPU/Common/GPUStateUtils.h" #include "GPU/Common/DepalettizeShaderCommon.h" #define WRITE p+=sprintf +static const InputDef vsInputs[2] = { + { "vec2", "a_position", Draw::SEM_POSITION, }, + { "vec2", "a_texcoord", Draw::SEM_TEXCOORD0, }, +}; + +// TODO: Deduplicate with DepalettizeCommon.cpp +static const SamplerDef samplers[2] = { + { "tex" }, + { "pal" }, +}; + +static const VaryingDef varyings[1] = { + { "vec2", "v_texcoord", Draw::SEM_TEXCOORD0, 0, "highp" }, +}; + // Uses integer instructions available since OpenGL 3.0. Suitable for ES 3.0 as well. -void GenerateDepalShader300(char *buffer, GEBufferFormat pixelFormat, ShaderLanguage language) { - char *p = buffer; - if (language == HLSL_D3D11) { - WRITE(p, "SamplerState texSamp : register(s0);\n"); - WRITE(p, "Texture2D tex : register(t0);\n"); - WRITE(p, "Texture2D pal : register(t1);\n"); - // Support for depth. - if (pixelFormat == GE_FORMAT_DEPTH16) { - DepthScaleFactors factors = GetDepthScaleFactors(); - WRITE(p, "static const float z_scale = %f;\n", factors.scale); - WRITE(p, "static const float z_offset = %f;\n", factors.offset); - } - } else if (language == GLSL_VULKAN) { - WRITE(p, "#version 450\n"); - WRITE(p, "#extension GL_ARB_separate_shader_objects : enable\n"); - WRITE(p, "#extension GL_ARB_shading_language_420pack : enable\n"); - WRITE(p, "layout(set = 0, binding = 1) uniform sampler2D tex;\n"); - WRITE(p, "layout(set = 0, binding = 2) uniform sampler2D pal;\n"); - WRITE(p, "layout(location = 0) in vec2 v_texcoord0;\n"); - WRITE(p, "layout(location = 0) out vec4 fragColor0;\n"); +void GenerateDepalShader300(ShaderWriter &writer, const DepalConfig &config, const ShaderLanguageDesc &lang) { + const int shift = config.shift; + const int mask = config.mask; - // Support for depth. - if (pixelFormat == GE_FORMAT_DEPTH16) { - DepthScaleFactors factors = GetDepthScaleFactors(); - WRITE(p, "const float z_scale = %f;\n", factors.scale); - WRITE(p, "const float z_offset = %f;\n", factors.offset); - } - } else { - if (gl_extensions.IsGLES) { - WRITE(p, "#version 300 es\n"); - WRITE(p, "precision mediump float;\n"); - WRITE(p, "precision highp int;\n"); - } else { - WRITE(p, "#version %d\n", gl_extensions.GLSLVersion()); - } - WRITE(p, "in vec2 v_texcoord0;\n"); - WRITE(p, "out vec4 fragColor0;\n"); - WRITE(p, "uniform sampler2D tex;\n"); - WRITE(p, "uniform sampler2D pal;\n"); - - if (pixelFormat == GE_FORMAT_DEPTH16) { - DepthScaleFactors factors = GetDepthScaleFactors(); - WRITE(p, "const float z_scale = %f;\n", factors.scale); - WRITE(p, "const float z_offset = %f;\n", factors.offset); - } + if (config.pixelFormat == GE_FORMAT_DEPTH16) { + DepthScaleFactors factors = GetDepthScaleFactors(); + writer.ConstFloat("z_scale", factors.scale); + writer.ConstFloat("z_offset", factors.offset); } - if (language == HLSL_D3D11) { - WRITE(p, "float4 main(in float2 v_texcoord0 : TEXCOORD0) : SV_Target {\n"); - WRITE(p, " float4 color = tex.Sample(texSamp, v_texcoord0);\n"); - } else { - WRITE(p, "void main() {\n"); - WRITE(p, " vec4 color = texture(tex, v_texcoord0);\n"); - } - - int mask = gstate.getClutIndexMask(); - int shift = gstate.getClutIndexShift(); - int offset = gstate.getClutIndexStartPos(); - GEPaletteFormat clutFormat = gstate.getClutPaletteFormat(); - // Sampling turns our texture into floating point. To avoid this, might be able // to declare them as isampler2D objects, but these require integer textures, which needs more work. // Anyhow, we simply work around this by converting back to integer, which is fine. @@ -107,87 +68,77 @@ void GenerateDepalShader300(char *buffer, GEBufferFormat pixelFormat, ShaderLang // An alternative would be to have a special mode where we keep some extra precision here and sample the CLUT linearly - works for ramps such // as those that Test Drive uses for its color remapping. But would need game specific flagging. + writer.C(" vec4 color = ").SampleTexture2D("tex", "v_texcoord").C(";\n"); + int shiftedMask = mask << shift; - switch (pixelFormat) { + switch (config.pixelFormat) { case GE_FORMAT_8888: - if (shiftedMask & 0xFF) WRITE(p, " int r = int(color.r * 255.99);\n"); else WRITE(p, " int r = 0;\n"); - if (shiftedMask & 0xFF00) WRITE(p, " int g = int(color.g * 255.99);\n"); else WRITE(p, " int g = 0;\n"); - if (shiftedMask & 0xFF0000) WRITE(p, " int b = int(color.b * 255.99);\n"); else WRITE(p, " int b = 0;\n"); - if (shiftedMask & 0xFF000000) WRITE(p, " int a = int(color.a * 255.99);\n"); else WRITE(p, " int a = 0;\n"); - WRITE(p, " int index = (a << 24) | (b << 16) | (g << 8) | (r);\n"); + if (shiftedMask & 0xFF) writer.C(" int r = int(color.r * 255.99);\n"); else writer.C(" int r = 0;\n"); + if (shiftedMask & 0xFF00) writer.C(" int g = int(color.g * 255.99);\n"); else writer.C(" int g = 0;\n"); + if (shiftedMask & 0xFF0000) writer.C(" int b = int(color.b * 255.99);\n"); else writer.C(" int b = 0;\n"); + if (shiftedMask & 0xFF000000) writer.C(" int a = int(color.a * 255.99);\n"); else writer.C(" int a = 0;\n"); + writer.C(" int index = (a << 24) | (b << 16) | (g << 8) | (r);\n"); break; case GE_FORMAT_4444: - if (shiftedMask & 0xF) WRITE(p, " int r = int(color.r * 15.99);\n"); else WRITE(p, " int r = 0;\n"); - if (shiftedMask & 0xF0) WRITE(p, " int g = int(color.g * 15.99);\n"); else WRITE(p, " int g = 0;\n"); - if (shiftedMask & 0xF00) WRITE(p, " int b = int(color.b * 15.99);\n"); else WRITE(p, " int b = 0;\n"); - if (shiftedMask & 0xF000) WRITE(p, " int a = int(color.a * 15.99);\n"); else WRITE(p, " int a = 0;\n"); - WRITE(p, " int index = (a << 12) | (b << 8) | (g << 4) | (r);\n"); + if (shiftedMask & 0xF) writer.C(" int r = int(color.r * 15.99);\n"); else writer.C(" int r = 0;\n"); + if (shiftedMask & 0xF0) writer.C(" int g = int(color.g * 15.99);\n"); else writer.C(" int g = 0;\n"); + if (shiftedMask & 0xF00) writer.C(" int b = int(color.b * 15.99);\n"); else writer.C(" int b = 0;\n"); + if (shiftedMask & 0xF000) writer.C(" int a = int(color.a * 15.99);\n"); else writer.C(" int a = 0;\n"); + writer.C(" int index = (a << 12) | (b << 8) | (g << 4) | (r);\n"); break; case GE_FORMAT_565: - if (shiftedMask & 0x1F) WRITE(p, " int r = int(color.r * 31.99);\n"); else WRITE(p, " int r = 0;\n"); - if (shiftedMask & 0x7E0) WRITE(p, " int g = int(color.g * 63.99);\n"); else WRITE(p, " int g = 0;\n"); - if (shiftedMask & 0xF800) WRITE(p, " int b = int(color.b * 31.99);\n"); else WRITE(p, " int b = 0;\n"); - WRITE(p, " int index = (b << 11) | (g << 5) | (r);\n"); + if (shiftedMask & 0x1F) writer.C(" int r = int(color.r * 31.99);\n"); else writer.C(" int r = 0;\n"); + if (shiftedMask & 0x7E0) writer.C(" int g = int(color.g * 63.99);\n"); else writer.C(" int g = 0;\n"); + if (shiftedMask & 0xF800) writer.C(" int b = int(color.b * 31.99);\n"); else writer.C(" int b = 0;\n"); + writer.C(" int index = (b << 11) | (g << 5) | (r);\n"); break; case GE_FORMAT_5551: - if (shiftedMask & 0x1F) WRITE(p, " int r = int(color.r * 31.99);\n"); else WRITE(p, " int r = 0;\n"); - if (shiftedMask & 0x3E0) WRITE(p, " int g = int(color.g * 31.99);\n"); else WRITE(p, " int g = 0;\n"); - if (shiftedMask & 0x7C00) WRITE(p, " int b = int(color.b * 31.99);\n"); else WRITE(p, " int b = 0;\n"); - if (shiftedMask & 0x8000) WRITE(p, " int a = int(color.a);\n"); else WRITE(p, " int a = 0;\n"); - WRITE(p, " int index = (a << 15) | (b << 10) | (g << 5) | (r);\n"); + if (shiftedMask & 0x1F) writer.C(" int r = int(color.r * 31.99);\n"); else writer.C(" int r = 0;\n"); + if (shiftedMask & 0x3E0) writer.C(" int g = int(color.g * 31.99);\n"); else writer.C(" int g = 0;\n"); + if (shiftedMask & 0x7C00) writer.C(" int b = int(color.b * 31.99);\n"); else writer.C(" int b = 0;\n"); + if (shiftedMask & 0x8000) writer.C(" int a = int(color.a);\n"); else writer.C(" int a = 0;\n"); + writer.C(" int index = (a << 15) | (b << 10) | (g << 5) | (r);\n"); break; case GE_FORMAT_DEPTH16: // Remap depth buffer. - WRITE(p, " float depth = (color.x - z_offset) * z_scale;\n"); - WRITE(p, " int index = int(clamp(depth, 0.0, 65535.0));\n"); + writer.C(" float depth = (color.x - z_offset) * z_scale;\n"); + writer.C(" int index = int(clamp(depth, 0.0, 65535.0));\n"); break; default: break; } - float texturePixels = 256; - if (clutFormat != GE_CMODE_32BIT_ABGR8888) { - texturePixels = 512; + float texturePixels = 256.0f; + if (config.clutFormat != GE_CMODE_32BIT_ABGR8888) { + texturePixels = 512.0f; } if (shift) { - WRITE(p, " index = (int(uint(index) >> uint(%i)) & 0x%02x)", shift, mask); + writer.F(" index = (int(uint(index) >> uint(%d)) & 0x%02x)", shift, mask); } else { - WRITE(p, " index = (index & 0x%02x)", mask); + writer.F(" index = (index & 0x%02x)", mask); } - if (offset) { - WRITE(p, " | %i;\n", offset); // '|' matches what we have in gstate.h + if (config.startPos) { + writer.F(" | %d;\n", config.startPos); // '|' matches what we have in gstate.h } else { - WRITE(p, ";\n"); + writer.F(";\n"); } - if (language == HLSL_D3D11) { - WRITE(p, " return pal.Load(int3(index, 0, 0));\n"); - } else { - WRITE(p, " fragColor0 = texture(pal, vec2((float(index) + 0.5) * (1.0 / %f), 0.0));\n", texturePixels); - } - WRITE(p, "}\n"); + writer.F(" vec2 uv = vec2((float(index) + 0.5) * %f, 0.0);\n", 1.0f / texturePixels); + writer.C(" vec4 outColor = ").SampleTexture2D("pal", "uv").C(";\n"); } // FP only, to suit GL(ES) 2.0 -void GenerateDepalShaderFloat(char *buffer, GEBufferFormat pixelFormat, ShaderLanguage lang) { - char *p = buffer; - - const char *modFunc = lang == HLSL_D3D9 ? "fmod" : "mod"; - +void GenerateDepalShaderFloat(ShaderWriter &writer, const DepalConfig &config, const ShaderLanguageDesc &lang) { char lookupMethod[128] = "index.r"; - char offset[128] = ""; - const GEPaletteFormat clutFormat = gstate.getClutPaletteFormat(); - const u32 clutBase = gstate.getClutIndexStartPos(); - - const int shift = gstate.getClutIndexShift(); - const int mask = gstate.getClutIndexMask(); + const int shift = config.shift; + const int mask = config.mask; float index_multiplier = 1.0f; // pixelformat is the format of the texture we are sampling. bool formatOK = true; - switch (pixelFormat) { + switch (config.pixelFormat) { case GE_FORMAT_8888: if ((mask & (mask + 1)) == 0) { // If the value has all bits contiguous (bitmask check above), we can mod by it + 1. @@ -196,7 +147,7 @@ void GenerateDepalShaderFloat(char *buffer, GEBufferFormat pixelFormat, ShaderLa if (rgba_shift == 0 && mask == 0xFF) { sprintf(lookupMethod, "index.%c", rgba[shift]); } else { - sprintf(lookupMethod, "%s(index.%c * %f, %d.0)", modFunc, rgba[shift], 255.99f / (1 << rgba_shift), mask + 1); + sprintf(lookupMethod, "fmod(index.%c * %f, %d.0)", rgba[shift], 255.99f / (1 << rgba_shift), mask + 1); index_multiplier = 1.0f / 256.0f; // Format was OK if there weren't bits from another component. formatOK = mask <= 255 - (1 << rgba_shift); @@ -214,7 +165,7 @@ void GenerateDepalShaderFloat(char *buffer, GEBufferFormat pixelFormat, ShaderLa index_multiplier = 15.0f / 256.0f; } else { // Let's divide and mod to get the right bits. A common case is shift=0, mask=01. - sprintf(lookupMethod, "%s(index.%c * %f, %d.0)", modFunc, rgba[shift], 15.99f / (1 << rgba_shift), mask + 1); + sprintf(lookupMethod, "fmod(index.%c * %f, %d.0)", rgba[shift], 15.99f / (1 << rgba_shift), mask + 1); index_multiplier = 1.0f / 256.0f; formatOK = mask <= 15 - (1 << rgba_shift); } @@ -234,7 +185,7 @@ void GenerateDepalShaderFloat(char *buffer, GEBufferFormat pixelFormat, ShaderLa } else { // We just need to divide the right component by the right value, and then mod against the mask. // A common case is shift=1, mask=0f. - sprintf(lookupMethod, "%s(index.%c * %f, %d.0)", modFunc, rgba[shift], ((float)multipliers[shift] + 0.99f) / (1 << rgba_shift), mask + 1); + sprintf(lookupMethod, "fmod(index.%c * %f, %d.0)", rgba[shift], ((float)multipliers[shift] + 0.99f) / (1 << rgba_shift), mask + 1); index_multiplier = 1.0f / 256.0f; formatOK = mask <= multipliers[shift] - (1 << rgba_shift); } @@ -254,7 +205,7 @@ void GenerateDepalShaderFloat(char *buffer, GEBufferFormat pixelFormat, ShaderLa index_multiplier = 1.0f / 256.0f; } else { // A isn't possible here. - sprintf(lookupMethod, "%s(index.%c * %f, %d.0)", modFunc, rgba[shift], 31.99f / (1 << rgba_shift), mask + 1); + sprintf(lookupMethod, "fmod(index.%c * %f, %d.0)", rgba[shift], 31.99f / (1 << rgba_shift), mask + 1); index_multiplier = 1.0f / 256.0f; formatOK = mask <= 31 - (1 << rgba_shift); } @@ -278,7 +229,7 @@ void GenerateDepalShaderFloat(char *buffer, GEBufferFormat pixelFormat, ShaderLa } float texturePixels = 256.f; - if (clutFormat != GE_CMODE_32BIT_ABGR8888) { + if (config.clutFormat != GE_CMODE_32BIT_ABGR8888) { texturePixels = 512.f; index_multiplier *= 0.5f; } @@ -287,60 +238,51 @@ void GenerateDepalShaderFloat(char *buffer, GEBufferFormat pixelFormat, ShaderLa // index_multiplier -= 0.01f / texturePixels; if (!formatOK) { - ERROR_LOG_REPORT_ONCE(depal, G3D, "%i depal unsupported: shift=%i mask=%02x offset=%d", pixelFormat, shift, mask, clutBase); + ERROR_LOG_REPORT_ONCE(depal, G3D, "%i depal unsupported: shift=%i mask=%02x offset=%d", config.pixelFormat, shift, mask, config.startPos); } // Offset by half a texel (plus clutBase) to turn NEAREST filtering into FLOOR. // Technically, the clutBase should be |'d, not added, but that's hard with floats. - float texel_offset = ((float)clutBase + 0.5f) / texturePixels; + float texel_offset = ((float)config.startPos + 0.5f) / texturePixels; + char offset[128] = ""; sprintf(offset, " + %f", texel_offset); - if (lang == GLSL_1xx) { - if (gl_extensions.IsGLES) { - WRITE(p, "#version 100\n"); - WRITE(p, "precision mediump float;\n"); - } else { - WRITE(p, "#version %d\n", gl_extensions.GLSLVersion()); - if (gl_extensions.VersionGEThan(3, 0, 0)) { - WRITE(p, "#define gl_FragColor fragColor0\n"); - WRITE(p, "out vec4 fragColor0;\n"); - } - } - WRITE(p, "varying vec2 v_texcoord0;\n"); - WRITE(p, "uniform sampler2D tex;\n"); - WRITE(p, "uniform sampler2D pal;\n"); - WRITE(p, "void main() {\n"); - WRITE(p, " vec4 index = texture2D(tex, v_texcoord0);\n"); - WRITE(p, " float coord = (%s * %f)%s;\n", lookupMethod, index_multiplier, offset); - WRITE(p, " gl_FragColor = texture2D(pal, vec2(coord, 0.0));\n"); - WRITE(p, "}\n"); - } else if (lang == HLSL_D3D9) { - WRITE(p, "sampler tex: register(s0);\n"); - WRITE(p, "sampler pal: register(s1);\n"); - WRITE(p, "float4 main(float2 v_texcoord0 : TEXCOORD0) : COLOR0 {\n"); - WRITE(p, " float4 index = tex2D(tex, v_texcoord0);\n"); - WRITE(p, " float coord = (%s * %f)%s;\n", lookupMethod, index_multiplier, offset); - WRITE(p, " return tex2D(pal, float2(coord, 0.0));\n"); - WRITE(p, "}\n"); - } + writer.C(" vec4 index = ").SampleTexture2D("tex", "v_texcoord").C(";\n"); + writer.F(" float coord = (%s * %f)%s;\n", lookupMethod, index_multiplier, offset); + writer.C(" vec4 outColor = ").SampleTexture2D("pal", "vec2(coord, 0.0)").C(";\n"); } -void GenerateDepalShader(char *buffer, GEBufferFormat pixelFormat, ShaderLanguage language) { - switch (language) { +void GenerateDepalFs(char *buffer, const DepalConfig &config, const ShaderLanguageDesc &lang) { + ShaderWriter writer(buffer, lang, ShaderStage::Fragment, nullptr, 0); + writer.DeclareSamplers(samplers); + writer.HighPrecisionFloat(); + writer.BeginFSMain(Slice::empty(), varyings); + switch (lang.shaderLanguage) { + case HLSL_D3D9: case GLSL_1xx: - GenerateDepalShaderFloat(buffer, pixelFormat, language); + GenerateDepalShaderFloat(writer, config, lang); break; case GLSL_3xx: case GLSL_VULKAN: case HLSL_D3D11: - GenerateDepalShader300(buffer, pixelFormat, language); + GenerateDepalShader300(writer, config, lang); break; - case HLSL_D3D9: - GenerateDepalShaderFloat(buffer, pixelFormat, language); break; default: - _assert_msg_(false, "Depal shader language not supported: %d", (int)language); + _assert_msg_(false, "Depal shader language not supported: %d", (int)lang.shaderLanguage); } + writer.EndFSMain("outColor"); +} + +void GenerateDepalVs(char *buffer, const ShaderLanguageDesc &lang) { + ShaderWriter writer(buffer, lang, ShaderStage::Vertex, nullptr, 0); + writer.BeginVSMain(vsInputs, Slice::empty(), varyings); + writer.C(" v_texcoord = a_texcoord;\n"); + writer.C(" gl_Position = vec4(a_position, 0.0, 1.0);\n"); + if (strlen(lang.viewportYSign)) { + writer.F(" gl_Position.y *= %s1.0;\n", lang.viewportYSign); + } + writer.EndVSMain(varyings); } #undef WRITE diff --git a/GPU/Common/DepalettizeShaderCommon.h b/GPU/Common/DepalettizeShaderCommon.h index 8058e40bf7..5ce5ef88a8 100644 --- a/GPU/Common/DepalettizeShaderCommon.h +++ b/GPU/Common/DepalettizeShaderCommon.h @@ -24,4 +24,13 @@ static const int DEPAL_TEXTURE_OLD_AGE = 120; -void GenerateDepalShader(char *buffer, GEBufferFormat pixelFormat, ShaderLanguage language); +struct DepalConfig { + int mask; + int shift; + u32 startPos; + GEPaletteFormat clutFormat; + GEBufferFormat pixelFormat; +}; + +void GenerateDepalFs(char *buffer, const DepalConfig &config, const ShaderLanguageDesc &lang); +void GenerateDepalVs(char *buffer, const ShaderLanguageDesc &lang); diff --git a/GPU/Common/Draw2D.cpp b/GPU/Common/Draw2D.cpp index d522353b6d..9dedfd2782 100644 --- a/GPU/Common/Draw2D.cpp +++ b/GPU/Common/Draw2D.cpp @@ -35,12 +35,15 @@ static const VaryingDef varyings[1] = { { "vec2", "v_texcoord", Draw::SEM_TEXCOORD0, 0, "highp" }, }; +static const SamplerDef samplers[1] = { + { "tex" }, +}; + void GenerateDraw2DFs(char *buffer, const ShaderLanguageDesc &lang, const Draw::Bugs &bugs) { ShaderWriter writer(buffer, lang, ShaderStage::Fragment, nullptr, 0); - writer.DeclareSampler2D("samp", 0); - writer.DeclareTexture2D("tex", 0); + writer.DeclareSamplers(samplers); writer.BeginFSMain(Slice::empty(), varyings); - writer.C(" vec4 outColor = ").SampleTexture2D("tex", "samp", "v_texcoord.xy").C(";\n"); + writer.C(" vec4 outColor = ").SampleTexture2D("tex", "v_texcoord.xy").C(";\n"); writer.EndFSMain("outColor"); } diff --git a/GPU/Common/ReinterpretFramebuffer.cpp b/GPU/Common/ReinterpretFramebuffer.cpp index aa1cd89a1b..f0066e0313 100644 --- a/GPU/Common/ReinterpretFramebuffer.cpp +++ b/GPU/Common/ReinterpretFramebuffer.cpp @@ -10,6 +10,10 @@ static const VaryingDef varyings[1] = { { "vec2", "v_texcoord", Draw::SEM_TEXCOORD0, 0, "highp" }, }; +static const SamplerDef samplers[1] = { + { "tex" } +}; + // TODO: We could possibly have an option to preserve any extra color precision? But gonna start without it. // Requires full size integer math. It would be possible to make a floating point-only version with lots of // modulo and stuff, might do it one day. @@ -22,12 +26,11 @@ bool GenerateReinterpretFragmentShader(char *buffer, GEBufferFormat from, GEBuff writer.HighPrecisionFloat(); - writer.DeclareSampler2D("samp", 0); - writer.DeclareTexture2D("tex", 0); + writer.DeclareSamplers(samplers); writer.BeginFSMain(Slice::empty(), varyings); - writer.C(" vec4 val = ").SampleTexture2D("tex", "samp", "v_texcoord.xy").C(";\n"); + writer.C(" vec4 val = ").SampleTexture2D("tex", "v_texcoord.xy").C(";\n"); switch (from) { case GE_FORMAT_4444: diff --git a/GPU/Common/StencilCommon.cpp b/GPU/Common/StencilCommon.cpp index 489c0f6560..0881771ce3 100644 --- a/GPU/Common/StencilCommon.cpp +++ b/GPU/Common/StencilCommon.cpp @@ -79,12 +79,14 @@ static const VaryingDef varyings[1] = { { "vec2", "v_texcoord", Draw::SEM_TEXCOORD0, 0, "highp" }, }; +static const SamplerDef samplers[1] = { + { "tex" }, +}; + void GenerateStencilFs(char *buffer, const ShaderLanguageDesc &lang, const Draw::Bugs &bugs) { ShaderWriter writer(buffer, lang, ShaderStage::Fragment, nullptr, 0); writer.HighPrecisionFloat(); - - writer.DeclareSampler2D("samp", 0); - writer.DeclareTexture2D("tex", 0); + writer.DeclareSamplers(samplers); if (bugs.Has(Draw::Bugs::NO_DEPTH_CANNOT_DISCARD_STENCIL)) { writer.C("layout (depth_unchanged) out float gl_FragDepth;\n"); @@ -94,7 +96,7 @@ void GenerateStencilFs(char *buffer, const ShaderLanguageDesc &lang, const Draw: writer.BeginFSMain(uniforms, varyings); - writer.C(" vec4 index = ").SampleTexture2D("tex", "samp", "v_texcoord.xy").C(";\n"); + writer.C(" vec4 index = ").SampleTexture2D("tex", "v_texcoord.xy").C(";\n"); writer.C(" vec4 outColor = index.aaaa;\n"); // Only care about a. writer.C(" float shifted = roundAndScaleTo255f(index.a) / roundAndScaleTo255f(stencilValue);\n"); // Bitwise operations on floats, ugh. diff --git a/unittest/TestShaderGenerators.cpp b/unittest/TestShaderGenerators.cpp index 8583a18c60..1a5d6b4164 100644 --- a/unittest/TestShaderGenerators.cpp +++ b/unittest/TestShaderGenerators.cpp @@ -14,6 +14,7 @@ #include "GPU/Common/VertexShaderGenerator.h" #include "GPU/Common/ReinterpretFramebuffer.h" #include "GPU/Common/StencilCommon.h" +#include "GPU/Common/DepalettizeShaderCommon.h" #if PPSSPP_PLATFORM(WINDOWS) #include "GPU/D3D11/D3D11Util.h" @@ -272,6 +273,61 @@ bool TestStencilShaders() { return !failed; } +bool TestDepalShaders() { + Draw::Bugs bugs; + + ShaderLanguage languages[] = { +#if PPSSPP_PLATFORM(WINDOWS) + ShaderLanguage::HLSL_D3D9, + ShaderLanguage::HLSL_D3D11, +#endif + ShaderLanguage::GLSL_VULKAN, + ShaderLanguage::GLSL_3xx, + ShaderLanguage::GLSL_1xx, + }; + + char *buffer = new char[65536]; + + bool failed = false; + + for (int k = 0; k < ARRAY_SIZE(languages); k++) { + printf("=== %s ===\n\n", ShaderLanguageToString(languages[k])); + + ShaderLanguageDesc desc(languages[k]); + std::string errorMessage; + + // TODO: Try some different configurations of the fragment shader. + // But first just try one. + DepalConfig config{}; + config.clutFormat = GE_CMODE_16BIT_ABGR4444; + config.shift = 8; + config.startPos = 64; + config.mask = 0xFF; + config.pixelFormat = GE_FORMAT_8888; + + GenerateDepalFs(buffer, config, desc); + if (!TestCompileShader(buffer, languages[k], ShaderStage::Fragment, &errorMessage)) { + printf("Error compiling depal shader:\n\n%s\n\n%s\n", LineNumberString(buffer).c_str(), errorMessage.c_str()); + failed = true; + return false; + } else { + printf("===\n%s\n===\n", buffer); + } + + GenerateDepalVs(buffer, desc); + if (!TestCompileShader(buffer, languages[k], ShaderStage::Vertex, &errorMessage)) { + printf("Error compiling depal shader:\n\n%s\n\n%s\n", LineNumberString(buffer).c_str(), errorMessage.c_str()); + failed = true; + return false; + } else { + printf("===\n%s\n===\n", buffer); + } + } + + delete[] buffer; + return !failed; +} + const ShaderLanguage languages[] = { #if PPSSPP_PLATFORM(WINDOWS) ShaderLanguage::HLSL_D3D9, @@ -427,6 +483,10 @@ bool TestShaderGenerators() { return false; } + if (!TestDepalShaders()) { + return false; + } + if (!TestFragmentShaders()) { return false; }