daedalus/Source/SysGL/HLEGraphics/n64.psh
salvy 0801ac4ed2 [!] Moved W32 audio thread back to main thread (was causing various issues, it would be nice to get it working though)
[!] Fixed issue in pixel shader that was affecting my video card (Thx StrmnNrmn)
[!] Copy n64.psh to build dir (W32)

git-svn-id: https://subversion.assembla.com/svn/Daedalusx64/trunk@1543 42e9bfbe-799a-4a2d-bad1-236e862a387a
2013-04-08 01:43:40 +00:00

225 lines
6.8 KiB
Text

#version 150
uniform sampler2D uTexture0;
uniform sampler2D uTexture1;
uniform vec2 uTexScale0; // Not used below, but might be needed for 'cheap' bilinear filtering.
uniform vec2 uTexScale1;
uniform vec4 uPrimColour;
uniform vec4 uEnvColour;
uniform float uPrimLODFrac;
in vec2 v_st;
in vec4 v_col;
out vec4 fragcol;
uniform bvec2 uTileClampEnable0;
uniform ivec2 uTileTL0; // 10.2 fixed point
uniform ivec2 uTileBR0; // 10.2 fixed point
uniform vec2 uTileShift0; // floating point
uniform ivec2 uTileMask0; // 10.5 fixed point
uniform ivec2 uTileMirror0; // 10.5 fixed point
uniform bvec2 uTileClampEnable1;
uniform ivec2 uTileTL1;
uniform ivec2 uTileBR1;
uniform vec2 uTileShift1;
uniform ivec2 uTileMask1;
uniform ivec2 uTileMirror1;
uniform int uFoo;
ivec2 imix(ivec2 a, ivec2 b, bvec2 c)
{
return ivec2(mix(vec2(a),vec2(b),c));
}
// coord: 10.5
// return: 10.5
ivec2 shift(ivec2 coord, vec2 shift_scale)
{
return ivec2(vec2(coord) * shift_scale);
}
// Clamp a UV coord when point sampling.
// coord: 10.5
// return: 10.0, frac (0.5)
ivec2 clampPoint(ivec2 coord,
ivec2 tile_tl, ivec2 tile_br, bvec2 clamp_enable)
{
ivec2 coord_clamped = clamp(coord, tile_tl<<3, tile_br<<3);
ivec2 coord_out = imix(coord, coord_clamped, clamp_enable);
// NB: discard fractional bits.
ivec2 coord_relative = ((coord_out>>3) - tile_tl) >> 2;
return coord_relative;
}
// coord: 10.5
// return: 10.0, frac (0.5)
ivec2 clampBilinear(ivec2 coord,
ivec2 tile_tl, ivec2 tile_br, bvec2 clamp_enable,
out ivec2 frac)
{
ivec2 tl = tile_tl<<3;
ivec2 br = tile_br<<3;
ivec2 coord_clamped = clamp(coord, tl, br);
ivec2 coord_out = imix(coord, coord_clamped, clamp_enable);
// NB: retain fractional bits.
ivec2 coord_relative = coord_out - (tile_tl << 3);
frac = coord_relative & 0x1f;
return coord_relative >> 5;
}
// coord: 10.0
// return: 10.0
ivec2 mask(ivec2 coord, ivec2 mirror_bits, ivec2 mask_bits)
{
bvec2 mirror = notEqual(coord & mirror_bits, ivec2(0,0));
coord = imix(coord, ~coord, mirror); // Invert the bits if mirroring.
coord &= mask_bits;
return coord;
}
// This is higher quality bilinear filter than the n64 hardware used, and probably cheaper.
vec4 bilinear(vec4 col_00, vec4 col_01, vec4 col_10, vec4 col_11, ivec2 frac)
{
vec2 fracf = vec2(frac) / 32.f;
vec4 a = mix(col_00, col_10, fracf.x);
vec4 b = mix(col_01, col_11, fracf.x);
return mix(a, b, fracf.y);
}
vec4 bilinear_n64(vec4 col_00, vec4 col_01, vec4 col_10, vec4 col_11, ivec2 frac)
{
bool upper = frac.x + frac.y >= 0x20;
bvec4 uppersel = bvec4(upper, upper, upper, upper);
// Pick either the bottom-left or top-right texel, and lerp along the edges.
vec4 col0 = mix(col_00, col_11, uppersel);
vec4 col1 = mix(col_10, col_01, uppersel);
vec4 col2 = mix(col_01, col_10, uppersel);
vec2 fracf = imix(frac, 0x20 - frac, uppersel.xy) / 32.f;
vec4 col = col0 + (fracf.x * (col1 - col0)) + (fracf.y * (col2 - col0));
return clamp(col, 0.f, 1.f);
}
vec4 fetchBilinear(vec2 st_in, vec2 shift_scale, ivec2 mirror_bits, ivec2 mask_bits,
ivec2 tile_tl, ivec2 tile_br, bvec2 clamp_enable,
sampler2D tex, vec2 tex_scale)
{
ivec2 frac;
ivec2 uv0 = ivec2(st_in);
uv0 = shift(uv0, shift_scale);
uv0 = clampBilinear(uv0, tile_tl, tile_br, clamp_enable, /*out */frac);
ivec2 uv1 = uv0 + ivec2(1,1);
uv0 = mask(uv0, mirror_bits, mask_bits);
uv1 = mask(uv1, mirror_bits, mask_bits);
vec4 col_00 = texelFetch(tex, ivec2(uv0.x, uv0.y), 0);
vec4 col_01 = texelFetch(tex, ivec2(uv0.x, uv1.y), 0);
vec4 col_10 = texelFetch(tex, ivec2(uv1.x, uv0.y), 0);
vec4 col_11 = texelFetch(tex, ivec2(uv1.x, uv1.y), 0);
return bilinear(col_00, col_01, col_10, col_11, frac);
}
vec4 fetchBilinearClampedCommon(
vec2 st_in, vec2 shift_scale, ivec2 mirror_bits, ivec2 mask_bits,
ivec2 tile_tl, ivec2 tile_br, bvec2 clamp_enable,
sampler2D tex, vec2 tex_scale, ivec2 bilerp_wrap_enable)
{
ivec2 frac;
ivec2 uv0 = ivec2(st_in);
uv0 = shift(uv0, shift_scale);
uv0 = clampBilinear(uv0, tile_tl, tile_br, clamp_enable, /*out */frac);
ivec2 uv1 = uv0 + ivec2(1,1);
uv0 = mask(uv0, mirror_bits, mask_bits);
uv1 = mask(uv1, mirror_bits, mask_bits);
// If uv1 has wrapped (less than uv0) then set to zero
// (bilerp_wrap_enable is a bitmask - if 0, the fractional bits are zeroed)
frac = imix(frac, frac & bilerp_wrap_enable, lessThan(uv1, uv0));
vec4 col_00 = texelFetch(tex, ivec2(uv0.x, uv0.y), 0);
vec4 col_01 = texelFetch(tex, ivec2(uv0.x, uv1.y), 0);
vec4 col_10 = texelFetch(tex, ivec2(uv1.x, uv0.y), 0);
vec4 col_11 = texelFetch(tex, ivec2(uv1.x, uv1.y), 0);
return bilinear(col_00, col_01, col_10, col_11, frac);
}
vec4 fetchBilinearClampedS(
vec2 st_in, vec2 shift_scale, ivec2 mirror_bits, ivec2 mask_bits,
ivec2 tile_tl, ivec2 tile_br, bvec2 clamp_enable,
sampler2D tex, vec2 tex_scale)
{
return fetchBilinearClampedCommon(
st_in, shift_scale, mirror_bits,mask_bits,
tile_tl, tile_br, clamp_enable, tex, tex_scale, ivec2(0, -1));
}
vec4 fetchBilinearClampedT(vec2 st_in, vec2 shift_scale, ivec2 mirror_bits, ivec2 mask_bits,
ivec2 tile_tl, ivec2 tile_br, bvec2 clamp_enable,
sampler2D tex, vec2 tex_scale)
{
return fetchBilinearClampedCommon(
st_in, shift_scale, mirror_bits,mask_bits,
tile_tl, tile_br, clamp_enable, tex, tex_scale, ivec2(-1, 0));
}
vec4 fetchBilinearClampedST(vec2 st_in, vec2 shift_scale, ivec2 mirror_bits, ivec2 mask_bits,
ivec2 tile_tl, ivec2 tile_br, bvec2 clamp_enable,
sampler2D tex, vec2 tex_scale)
{
return fetchBilinearClampedCommon(
st_in, shift_scale, mirror_bits,mask_bits,
tile_tl, tile_br, clamp_enable, tex, tex_scale, ivec2(0, 0));
}
// Point sample
vec4 fetchPoint(vec2 st_in, vec2 shift_scale, ivec2 mirror_bits, ivec2 mask_bits,
ivec2 tile_tl, ivec2 tile_br, bvec2 clamp_enable,
sampler2D tex, vec2 tex_scale)
{
ivec2 uv = ivec2(st_in);
uv = shift(uv, shift_scale);
uv = clampPoint(uv, tile_tl, tile_br, clamp_enable);
uv = mask(uv, mirror_bits, mask_bits);
return texelFetch(tex, uv, 0);
}
// For cycle type Copy - there is no clamping.
vec4 fetchCopy(vec2 st_in, vec2 shift_scale, ivec2 mirror_bits, ivec2 mask_bits,
ivec2 tile_tl, ivec2 tile_br, bvec2 clamp_enable,
sampler2D tex, vec2 tex_scale)
{
ivec2 uv = ivec2(st_in);
uv = shift(uv, shift_scale);
uv = (((uv>>3) - tile_tl) >> 2) & 0x1fff;
uv = mask(uv, mirror_bits, mask_bits);
return texelFetch(tex, uv, 0);
}
// This just uses regular OpenGL texture filtering.
// It doesn't handle shift/scale/mirror etc.
vec4 fetchSimple(vec2 st_in, vec2 shift_scale, ivec2 mirror_bits, ivec2 mask_bits,
ivec2 tile_tl, ivec2 tile_br, bvec2 clamp_enable,
sampler2D tex, vec2 tex_scale)
{
ivec2 uv = ivec2(st_in);
uv = shift(uv, shift_scale);
vec2 uvf = (uv - (tile_tl<<3)) * (tex_scale / 32.f);
return texture(tex, uvf);
}