Implement shader depal for GL as well, but disabled by default.

This commit is contained in:
Henrik Rydgård 2018-04-13 20:00:14 +02:00
parent 0479255f76
commit fb7a63bd11
6 changed files with 150 additions and 7 deletions

View file

@ -157,6 +157,7 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, uint64_t *uniform
bool doTextureProjection = id.Bit(FS_BIT_DO_TEXTURE_PROJ);
bool doTextureAlpha = id.Bit(FS_BIT_TEXALPHA);
bool doFlatShading = id.Bit(FS_BIT_FLATSHADE);
bool shaderDepal = id.Bit(FS_BIT_SHADER_DEPAL);
GEComparison alphaTestFunc = (GEComparison)id.Bits(FS_BIT_ALPHA_TEST_FUNC, 3);
GEComparison colorTestFunc = (GEComparison)id.Bits(FS_BIT_COLOR_TEST_FUNC, 2);
@ -217,6 +218,12 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, uint64_t *uniform
}
}
if (shaderDepal) {
WRITE(p, "uniform sampler2D pal;\n");
WRITE(p, "uniform int u_depal;\n");
*uniformMask |= DIRTY_DEPAL;
}
StencilValueType replaceAlphaWithStencilType = (StencilValueType)id.Bits(FS_BIT_REPLACE_ALPHA_WITH_STENCIL_TYPE, 4);
if (stencilToAlpha && replaceAlphaWithStencilType == STENCIL_VALUE_UNIFORM) {
*uniformMask |= DIRTY_STENCILREPLACEVALUE;
@ -336,10 +343,95 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, uint64_t *uniform
if (doTextureProjection) {
WRITE(p, " vec4 t = %sProj(tex, %s);\n", texture, texcoord);
if (shaderDepal) {
WRITE(p, " vec4 t1 = %sProjOffset(tex, %s, ivec2(1, 0));\n", texture, texcoord);
WRITE(p, " vec4 t2 = %sProjOffset(tex, %s, ivec2(0, 1));\n", texture, texcoord);
WRITE(p, " vec4 t3 = %sProjOffset(tex, %s, ivec2(1, 1));\n", texture, texcoord);
}
} else {
WRITE(p, " vec4 t = %s(tex, %s.xy);\n", texture, texcoord);
if (shaderDepal) {
WRITE(p, " vec4 t1 = %sOffset(tex, %s.xy, ivec2(1, 0));\n", texture, texcoord);
WRITE(p, " vec4 t2 = %sOffset(tex, %s.xy, ivec2(0, 1));\n", texture, texcoord);
WRITE(p, " vec4 t3 = %sOffset(tex, %s.xy, ivec2(1, 1));\n", texture, texcoord);
}
}
WRITE(p, " vec4 p = v_color0;\n");
if (shaderDepal) {
WRITE(p, " int depalMask = (u_depal & 0xFF);\n");
WRITE(p, " int depalShift = ((u_depal >> 8) & 0xFF);\n");
WRITE(p, " int depalOffset = (((u_depal >> 16) & 0xFF) << 4);\n");
WRITE(p, " int depalFmt = ((u_depal >> 24) & 0x3);\n");
WRITE(p, " bool bilinear = (u_depal >> 31) != 0;\n");
WRITE(p, " vec2 fraction = fract(%s.xy);\n", texcoord);
WRITE(p, " ivec4 col; int index0; int index1; int index2; int index3;\n");
WRITE(p, " switch (depalFmt) {\n"); // We might want to include fmt in the shader ID if this is a performance issue.
WRITE(p, " case 0:\n"); // 565
WRITE(p, " col = ivec4(t.rgb * vec3(31.99, 63.99, 31.99), 0);\n");
WRITE(p, " index0 = (col.b << 11) | (col.g << 5) | (col.r);\n");
WRITE(p, " if (bilinear) {\n");
WRITE(p, " col = ivec4(t1.rgb * vec3(31.99, 63.99, 31.99), 0);\n");
WRITE(p, " index1 = (col.b << 11) | (col.g << 5) | (col.r);\n");
WRITE(p, " col = ivec4(t2.rgb * vec3(31.99, 63.99, 31.99), 0);\n");
WRITE(p, " index2 = (col.b << 11) | (col.g << 5) | (col.r);\n");
WRITE(p, " col = ivec4(t3.rgb * vec3(31.99, 63.99, 31.99), 0);\n");
WRITE(p, " index3 = (col.b << 11) | (col.g << 5) | (col.r);\n");
WRITE(p, " }\n");
WRITE(p, " break;\n");
WRITE(p, " case 1:\n"); // 5551
WRITE(p, " col = ivec4(t.rgba * vec4(31.99, 31.99, 31.99, 1.0));\n");
WRITE(p, " index0 = (col.a << 15) | (col.b << 10) | (col.g << 5) | (col.r);\n");
WRITE(p, " if (bilinear) {\n");
WRITE(p, " col = ivec4(t1.rgba * vec4(31.99, 31.99, 31.99, 1.0));\n");
WRITE(p, " index1 = (col.a << 15) | (col.b << 10) | (col.g << 5) | (col.r);\n");
WRITE(p, " col = ivec4(t2.rgba * vec4(31.99, 31.99, 31.99, 1.0));\n");
WRITE(p, " index2 = (col.a << 15) | (col.b << 10) | (col.g << 5) | (col.r);\n");
WRITE(p, " col = ivec4(t3.rgba * vec4(31.99, 31.99, 31.99, 1.0));\n");
WRITE(p, " index3 = (col.a << 15) | (col.b << 10) | (col.g << 5) | (col.r);\n");
WRITE(p, " }\n");
WRITE(p, " break;\n");
WRITE(p, " case 2:\n"); // 4444
WRITE(p, " col = ivec4(t.rgba * vec4(15.99, 15.99, 15.99, 15.99));\n");
WRITE(p, " index0 = (col.a << 12) | (col.b << 8) | (col.g << 4) | (col.r);\n");
WRITE(p, " if (bilinear) {\n");
WRITE(p, " col = ivec4(t1.rgba * vec4(15.99, 15.99, 15.99, 15.99));\n");
WRITE(p, " index1 = (col.a << 12) | (col.b << 8) | (col.g << 4) | (col.r);\n");
WRITE(p, " col = ivec4(t2.rgba * vec4(15.99, 15.99, 15.99, 15.99));\n");
WRITE(p, " index2 = (col.a << 12) | (col.b << 8) | (col.g << 4) | (col.r);\n");
WRITE(p, " col = ivec4(t3.rgba * vec4(15.99, 15.99, 15.99, 15.99));\n");
WRITE(p, " index3 = (col.a << 12) | (col.b << 8) | (col.g << 4) | (col.r);\n");
WRITE(p, " }\n");
WRITE(p, " break;\n");
WRITE(p, " case 3:\n"); // 8888
WRITE(p, " col = ivec4(t.rgba * vec4(255.99, 255.99, 255.99, 255.99));\n");
WRITE(p, " index0 = (col.a << 24) | (col.b << 16) | (col.g << 8) | (col.r);\n");
WRITE(p, " if (bilinear) {\n");
WRITE(p, " col = ivec4(t1.rgba * vec4(255.99, 255.99, 255.99, 255.99));\n");
WRITE(p, " index1 = (col.a << 24) | (col.b << 16) | (col.g << 8) | (col.r);\n");
WRITE(p, " col = ivec4(t2.rgba * vec4(255.99, 255.99, 255.99, 255.99));\n");
WRITE(p, " index2 = (col.a << 24) | (col.b << 16) | (col.g << 8) | (col.r);\n");
WRITE(p, " col = ivec4(t3.rgba * vec4(255.99, 255.99, 255.99, 255.99));\n");
WRITE(p, " index3 = (col.a << 24) | (col.b << 16) | (col.g << 8) | (col.r);\n");
WRITE(p, " }\n");
WRITE(p, " break;\n");
WRITE(p, " };\n");
WRITE(p, " index0 = ((index0 >> depalShift) & depalMask) | depalOffset;\n");
WRITE(p, " t = texelFetch(pal, ivec2(index0, 0), 0);\n");
WRITE(p, " if (bilinear) {\n");
WRITE(p, " index1 = ((index1 >> depalShift) & depalMask) | depalOffset;\n");
WRITE(p, " index2 = ((index2 >> depalShift) & depalMask) | depalOffset;\n");
WRITE(p, " index3 = ((index3 >> depalShift) & depalMask) | depalOffset;\n");
WRITE(p, " t1 = texelFetch(pal, ivec2(index1, 0), 0);\n");
WRITE(p, " t2 = texelFetch(pal, ivec2(index2, 0), 0);\n");
WRITE(p, " t3 = texelFetch(pal, ivec2(index3, 0), 0);\n");
WRITE(p, " t = mix(t, t1, fraction.x);\n");
WRITE(p, " t2 = mix(t2, t3, fraction.x);\n");
WRITE(p, " t = mix(t, t2, fraction.y);\n");
WRITE(p, " }\n");
}
if (texFunc != GE_TEXFUNC_REPLACE || !doTextureAlpha)
WRITE(p, " vec4 p = v_color0;\n");
if (doTextureAlpha) { // texfmt == RGBA
switch (texFunc) {

View file

@ -103,6 +103,7 @@ LinkedShader::LinkedShader(GLRenderManager *render, VShaderID VSID, Shader *vs,
queries.push_back({ &u_blendFixA, "u_blendFixA" });
queries.push_back({ &u_blendFixB, "u_blendFixB" });
queries.push_back({ &u_fbotexSize, "u_fbotexSize" });
queries.push_back({ &u_pal, "pal" });
// Transform
queries.push_back({ &u_view, "u_view" });
@ -161,6 +162,7 @@ LinkedShader::LinkedShader(GLRenderManager *render, VShaderID VSID, Shader *vs,
queries.push_back({ &u_spline_count_v, "u_spline_count_v" });
queries.push_back({ &u_spline_type_u, "u_spline_type_u" });
queries.push_back({ &u_spline_type_v, "u_spline_type_v" });
queries.push_back({ &u_depal, "u_depal" });
attrMask = vs->GetAttrMask();
availableUniforms = vs->GetUniformMask() | fs->GetUniformMask();
@ -169,6 +171,7 @@ LinkedShader::LinkedShader(GLRenderManager *render, VShaderID VSID, Shader *vs,
initialize.push_back({ &u_tex, 0, 0 });
initialize.push_back({ &u_fbotex, 0, 1 });
initialize.push_back({ &u_testtex, 0, 2 });
initialize.push_back({ &u_pal, 0, 3 }); // CLUT
initialize.push_back({ &u_tess_pos_tex, 0, 4 }); // Texture unit 4
initialize.push_back({ &u_tess_tex_tex, 0, 5 }); // Texture unit 5
initialize.push_back({ &u_tess_col_tex, 0, 6 }); // Texture unit 6
@ -283,6 +286,17 @@ void LinkedShader::UpdateUniforms(u32 vertType, const ShaderID &vsid) {
if (!dirty)
return;
if (dirty & DIRTY_DEPAL) {
int indexMask = gstate.getClutIndexMask();
int indexShift = gstate.getClutIndexShift();
int indexOffset = gstate.getClutIndexStartPos() >> 4;
int format = gstate_c.depalFramebufferFormat;
uint32_t val = BytesToUint32(indexMask, indexShift, indexOffset, format);
// Poke in a bilinear filter flag in the top bit.
val |= gstate.isMagnifyFilteringEnabled() << 31;
render_->SetUniformI1(&u_depal, val);
}
// Update any dirty uniforms before we draw
if (dirty & DIRTY_PROJMATRIX) {
Matrix4x4 flippedMatrix;
@ -810,7 +824,7 @@ std::string ShaderManagerGLES::DebugGetShaderString(std::string id, DebugShaderT
// as sometimes these features might have an effect on the ID bits.
#define CACHE_HEADER_MAGIC 0x83277592
#define CACHE_VERSION 11
#define CACHE_VERSION 12
struct CacheHeader {
uint32_t magic;
uint32_t version;

View file

@ -85,6 +85,10 @@ public:
int u_blendFixB;
int u_fbotexSize;
// Shader depal
int u_pal; // the texture
int u_depal; // the params
// Fragment processing inputs
int u_alphacolorref;
int u_alphacolormask;

View file

@ -161,7 +161,7 @@ void TextureCacheGLES::UpdateSamplingParams(TexCacheEntry &entry, bool force) {
render_->SetTextureSampler(0, sClamp ? GL_CLAMP_TO_EDGE : GL_REPEAT, tClamp ? GL_CLAMP_TO_EDGE : GL_REPEAT, MagFiltGL[magFilt], MinFiltGL[minFilt], aniso);
}
void TextureCacheGLES::SetFramebufferSamplingParams(u16 bufferWidth, u16 bufferHeight) {
void TextureCacheGLES::SetFramebufferSamplingParams(u16 bufferWidth, u16 bufferHeight, bool forcePoint) {
int minFilt;
int magFilt;
bool sClamp;
@ -171,6 +171,10 @@ void TextureCacheGLES::SetFramebufferSamplingParams(u16 bufferWidth, u16 bufferH
GetSamplingParams(minFilt, magFilt, sClamp, tClamp, lodBias, 0, 0, mode);
minFilt &= 1; // framebuffers can't mipmap.
if (forcePoint) {
minFilt &= ~1;
magFilt &= ~1;
}
// Often the framebuffer will not match the texture size. We'll wrap/clamp in the shader in that case.
// This happens whether we have OES_texture_npot or not.
@ -324,6 +328,7 @@ void TextureCacheGLES::BindTexture(TexCacheEntry *entry) {
lastBoundTexture = entry->textureName;
}
UpdateSamplingParams(*entry, false);
gstate_c.useShaderDepal = false;
}
void TextureCacheGLES::Unbind() {
@ -434,7 +439,33 @@ protected:
void TextureCacheGLES::ApplyTextureFramebuffer(TexCacheEntry *entry, VirtualFramebuffer *framebuffer) {
DepalShader *depal = nullptr;
uint32_t clutMode = gstate.clutformat & 0xFFFFFF;
#if 0
bool useShaderDepal = gstate_c.Supports(GPU_SUPPORTS_GLSL_ES_300);
#else
bool useShaderDepal = false;
#endif
if ((entry->status & TexCacheEntry::STATUS_DEPALETTIZE) && !g_Config.bDisableSlowFramebufEffects) {
if (useShaderDepal) {
const GEPaletteFormat clutFormat = gstate.getClutPaletteFormat();
GLRTexture *clutTexture = depalShaderCache_->GetClutTexture(clutFormat, clutHash_, clutBuf_);
render_->BindTexture(TEX_SLOT_CLUT, clutTexture);
framebufferManagerGL_->BindFramebufferAsColorTexture(0, framebuffer, BINDFBCOLOR_MAY_COPY_WITH_UV | BINDFBCOLOR_APPLY_TEX_OFFSET);
SetFramebufferSamplingParams(framebuffer->bufferWidth, framebuffer->bufferHeight, true);
InvalidateLastTexture();
// Since we started/ended render passes, might need these.
gstate_c.Dirty(DIRTY_DEPAL);
gstate_c.useShaderDepal = true;
gstate_c.depalFramebufferFormat = framebuffer->drawnFormat;
const u32 bytesPerColor = clutFormat == GE_CMODE_32BIT_ABGR8888 ? sizeof(u32) : sizeof(u16);
const u32 clutTotalColors = clutMaxBytes_ / bytesPerColor;
TexCacheEntry::TexStatus alphaStatus = CheckAlpha((const uint8_t *)clutBuf_, getClutDestFormat(clutFormat), clutTotalColors, clutTotalColors, 1);
gstate_c.SetTextureFullAlpha(alphaStatus == TexCacheEntry::STATUS_ALPHA_FULL);
return;
}
depal = depalShaderCache_->GetDepalettizeShader(clutMode, framebuffer->drawnFormat);
}
if (depal) {
@ -472,7 +503,7 @@ void TextureCacheGLES::ApplyTextureFramebuffer(TexCacheEntry *entry, VirtualFram
}
framebufferManagerGL_->RebindFramebuffer();
SetFramebufferSamplingParams(framebuffer->bufferWidth, framebuffer->bufferHeight);
SetFramebufferSamplingParams(framebuffer->bufferWidth, framebuffer->bufferHeight, false);
InvalidateLastTexture();

View file

@ -63,7 +63,7 @@ public:
}
}
void SetFramebufferSamplingParams(u16 bufferWidth, u16 bufferHeight);
void SetFramebufferSamplingParams(u16 bufferWidth, u16 bufferHeight, bool forcePoint);
bool GetCurrentTextureDebug(GPUDebugBuffer &buffer, int level) override;
void DeviceLost();

View file

@ -4,6 +4,7 @@
#include "GLRenderManager.h"
#include "DataFormatGL.h"
#include "base/logging.h"
#include "base/stringutil.h"
#include "gfx/gl_common.h"
#include "gfx/gl_debug_log.h"
#include "gfx_es2/gpu_features.h"
@ -156,9 +157,10 @@ void GLQueueRunner::RunInitSteps(const std::vector<GLRInitStep> &steps) {
#ifdef _WIN32
OutputDebugStringUTF8(buf);
OutputDebugStringUTF8(vsCode);
if (vsCode)
OutputDebugStringUTF8(LineNumberString(vsCode).c_str());
if (fsCode)
OutputDebugStringUTF8(fsCode);
OutputDebugStringUTF8(LineNumberString(fsCode).c_str());
#endif
delete[] buf;
} else {