Optimize and simplify Pixel AA, Average Fill, and Blur Fill presets (#467)

* Simplify pixel AA; Dependents to be refactored * Finish refactoring pixel_aa itself; Dependent presets TBD * Update average and border fill * Tune default blur strengths in blur fill * Clean up includes * Minor corrections
2024-06-22 22:31:45 -04:00 · 2023-07-29 23:32:34 +02:00 · 2023-07-29 23:32:34 +02:00 · 6f921ee481
parent 89530c1dec
commit 6f921ee481
9 changed files with 141 additions and 150 deletions
--- a/border/blur_fill.slangp
+++ b/border/blur_fill.slangp
@ -27,8 +27,8 @@ wrap_mode2 = mirrored_repeat
 shader3 = shaders/blur_fill/render_sampling_areas.slang
 filter_linear3 = true
 scale_type3 = source
-scale_x3 = 2.0
-scale_y3 = 2.0
+scale_x3 = 1.1
+scale_y3 = 1.1
 float_framebuffer3 = true
 alias3 = "Tiled"

--- a/border/blur_fill_stronger_blur.slangp
+++ b/border/blur_fill_stronger_blur.slangp
@ -1,7 +1,7 @@
 shaders = 12

 parameters = "SIGMA;BLUR_RADIUS"
-SIGMA = 2.0
+SIGMA = 1.5
 BLUR_RADIUS = 3.0

 shader0 = ../blurs/shaders/kawase/linearize.slang
@ -27,8 +27,8 @@ wrap_mode2 = mirrored_repeat
 shader3 = shaders/blur_fill/render_sampling_areas.slang
 filter_linear3 = true
 scale_type3 = source
-scale_x3 = 1.0
-scale_y3 = 1.0
+scale_x3 = 0.7
+scale_y3 = 0.7
 float_framebuffer3 = true
 alias3 = "Tiled"

--- a/border/blur_fill_weaker_blur.slangp
+++ b/border/blur_fill_weaker_blur.slangp
@ -27,8 +27,8 @@ wrap_mode2 = mirrored_repeat
 shader3 = shaders/blur_fill/render_sampling_areas.slang
 filter_linear3 = true
 scale_type3 = source
-scale_x3 = 3.0
-scale_y3 = 3.0
+scale_x3 = 2.0
+scale_y3 = 2.0
 float_framebuffer3 = true
 alias3 = "Tiled"

--- a/border/shaders/average_fill/compose.slang
+++ b/border/shaders/average_fill/compose.slang
@ -1,7 +1,7 @@
 #version 450

 /*
-    Average fill v1.4 by fishku
+    Average fill v1.5 by fishku
    Copyright (C) 2023
    Public domain license (CC0)

@ -27,6 +27,7 @@
    3 = Smooth angle-based blending

    Changelog:
+    v1.5: Optimize. Update to new Pixel AA version.
    v1.4: Add anti-aliased interpolation for non-integer scaling.
    v1.3: Fix scaling bugs.
    v1.2: Fix scaling bugs.
@ -34,7 +35,10 @@
    v1.0: Initial release.
 */

+// clang-format off
 #include "parameters.slang"
+#include "../../../interpolation/shaders/pixel_aa/shared.slang"
+// clang-format on

 layout(push_constant) uniform Push {
    vec4 InputSize;
@ -69,14 +73,24 @@ global;
 layout(location = 0) in vec4 Position;
 layout(location = 1) in vec2 TexCoord;
 layout(location = 0) out vec2 vTexCoord;
+layout(location = 1) out vec2 tx_coord;
+layout(location = 2) out vec2 tx_per_px;
+layout(location = 3) out vec2 tx_to_uv;

 void main() {
    gl_Position = global.MVP * Position;
    vTexCoord = TexCoord;
+    const vec2 scale_o2i = scale_o2i();
+    tx_coord = (vTexCoord - 0.49999) * scale_o2i + get_input_center();
+    tx_per_px = scale_o2i * param.FinalViewportSize.zw;
+    tx_to_uv = param.InputSize.zw;
 }

 #pragma stage fragment
 layout(location = 0) in vec2 vTexCoord;
+layout(location = 1) in vec2 tx_coord;
+layout(location = 2) in vec2 tx_per_px;
+layout(location = 3) in vec2 tx_to_uv;
 layout(location = 0) out vec4 FragColor;
 layout(set = 0, binding = 2) uniform sampler2D Input;
 layout(set = 0, binding = 3) uniform sampler2D Top;
@ -122,20 +136,14 @@ vec3 blend_corner(vec3 a,      // The first color to blend
    }
 }

-#include "../../../interpolation/shaders/pixel_aa/shared.slang"
-
 void main() {
-    const vec2 scale_o2i = scale_o2i();
-    const vec2 pixel_coord =
-        (vTexCoord - 0.49999) * scale_o2i + get_input_center();
-
-    if (pixel_coord.x < param.OS_CROP_LEFT) {
+    if (tx_coord.x < param.OS_CROP_LEFT) {
        if (param.EXTEND_H < 0.5) {
            FragColor = vec4(0.0, 0.0, 0.0, 1.0);
            return;
        }
        const vec3 left = textureLod(Left, vec2(0.5), BIG_NUMBER).rgb;
-        if (pixel_coord.y < param.OS_CROP_TOP) {
+        if (tx_coord.y < param.OS_CROP_TOP) {
            if (param.EXTEND_V < 0.5) {
                FragColor = vec4(0.0, 0.0, 0.0, 1.0);
                return;
@ -155,7 +163,7 @@ void main() {
                                  viewport_corner - content_corner),
                     1.0);
            FragColor.rgb = pow(FragColor.rgb, vec3(param.FILL_GAMMA));
-        } else if (pixel_coord.y < param.InputSize.y - param.OS_CROP_BOTTOM) {
+        } else if (tx_coord.y < param.InputSize.y - param.OS_CROP_BOTTOM) {
            // Left bar
            FragColor = vec4(pow(left, vec3(param.FILL_GAMMA)), 1.0);
        } else {
@ -179,8 +187,8 @@ void main() {
                     1.0);
            FragColor.rgb = pow(FragColor.rgb, vec3(param.FILL_GAMMA));
        }
-    } else if (pixel_coord.x < param.InputSize.x - param.OS_CROP_RIGHT) {
-        if (pixel_coord.y < param.OS_CROP_TOP) {
+    } else if (tx_coord.x < param.InputSize.x - param.OS_CROP_RIGHT) {
+        if (tx_coord.y < param.OS_CROP_TOP) {
            if (param.EXTEND_V < 0.5) {
                FragColor = vec4(0.0, 0.0, 0.0, 1.0);
                return;
@ -188,41 +196,22 @@ void main() {
            // Top bar
            FragColor = vec4(textureLod(Top, vec2(0.5), BIG_NUMBER).rgb, 1.0);
            FragColor.rgb = pow(FragColor.rgb, vec3(param.FILL_GAMMA));
-        } else if (pixel_coord.y < param.InputSize.y - param.OS_CROP_BOTTOM) {
+        } else if (tx_coord.y < param.InputSize.y - param.OS_CROP_BOTTOM) {
            // Uncropped
            if (param.FORCE_INTEGER_SCALING > 0.5) {
                // Do a perfectly sharp (nearest neighbor) sampling.
-                FragColor = vec4(texture(Input, (floor(pixel_coord) + 0.5) *
-                                                    param.InputSize.zw)
-                                     .rgb,
-                                 1.0);
+                FragColor = vec4(
+                    texture(Input, (floor(tx_coord) + 0.5) * param.InputSize.zw)
+                        .rgb,
+                    1.0);
            } else {
                // Do a sharp anti-aliased interpolation.
                // Do not correct for gamma additionally because the input is
                // already in linear color space.
-                if (param.PIX_AA_SUBPX < 0.5) {
-                    const vec2 tx_size = get_texel_size(pixel_coord) *
-                                         scale_o2i * param.InputSize.zw;
-                    FragColor =
-                        vec4(sample_aa(Input, pixel_coord, param.InputSize.zw,
-                                       false, param.PIX_AA_SHARP, tx_size),
-                             1.0);
-                } else {
-                    for (int i = -1; i < 2; ++i) {
-                        const vec2 subpix_coord =
-                            pixel_coord +
-                            vec2((param.PIX_AA_SUBPX_BGR < 0.5 ? i : -i) / 3.0,
-                                 0.0) *
-                                param.FinalViewportSize.zw * param.InputSize.xy;
-                        const vec2 tx_size = get_texel_size(subpix_coord) *
-                                             scale_o2i * param.InputSize.zw /
-                                             vec2(3.0, 1.0);
-                        FragColor[i + 1] = sample_aa(
-                            Input, subpix_coord, param.InputSize.zw, false,
-                            param.PIX_AA_SHARP, tx_size)[i + 1];
-                    }
-                    FragColor[3] = 1.0;
-                }
+                FragColor = pixel_aa(
+                    Input, tx_per_px, tx_to_uv, tx_coord, param.PIX_AA_SHARP,
+                    /* gamma_correct = */ false, param.PIX_AA_SUBPX > 0.5,
+                    param.PIX_AA_SUBPX_BGR > 0.5);
            }
        } else {
            if (param.EXTEND_V < 0.5) {
@ -240,7 +229,7 @@ void main() {
            return;
        }
        const vec3 right = textureLod(Right, vec2(0.5), BIG_NUMBER).rgb;
-        if (pixel_coord.y < param.OS_CROP_TOP) {
+        if (tx_coord.y < param.OS_CROP_TOP) {
            if (param.EXTEND_V < 0.5) {
                FragColor = vec4(0.0, 0.0, 0.0, 1.0);
                return;
@ -260,7 +249,7 @@ void main() {
                                  viewport_corner - content_corner),
                     1.0);
            FragColor.rgb = pow(FragColor.rgb, vec3(param.FILL_GAMMA));
-        } else if (pixel_coord.y < param.InputSize.y - param.OS_CROP_BOTTOM) {
+        } else if (tx_coord.y < param.InputSize.y - param.OS_CROP_BOTTOM) {
            // Right bar
            FragColor = vec4(pow(right, vec3(param.FILL_GAMMA)), 1.0);
        } else {
--- a/border/shaders/average_fill/parameters.slang
+++ b/border/shaders/average_fill/parameters.slang
@ -1,7 +1,7 @@
 // See compose.slang for copyright and other information.

 // clang-format off
-#pragma parameter AVERAGE_FILL_SETTINGS "=== Average fill v1.4 settings ===" 0.0 0.0 1.0 1.0
+#pragma parameter AVERAGE_FILL_SETTINGS "=== Average fill v1.5 settings ===" 0.0 0.0 1.0 1.0
 #pragma parameter OS_CROP_TOP "Overscan crop top" 0.0 0.0 1024.0 1.0
 #pragma parameter OS_CROP_BOTTOM "Overscan crop bottom" 0.0 0.0 1024.0 1.0
 #pragma parameter OS_CROP_LEFT "Overscan crop left" 0.0 0.0 1024.0 1.0
--- a/border/shaders/blur_fill/compose.slang
+++ b/border/shaders/blur_fill/compose.slang
@ -1,7 +1,7 @@
 #version 450

 /*
-    Blur fill v1.5 by fishku
+    Blur fill v1.6 by fishku
    Copyright (C) 2023
    Public domain license (CC0)

@ -27,6 +27,7 @@
    strength of the blur.

    Changelog:
+    v1.6: Optimize. Update to new Pixel AA version. Tune default blur strength.
    v1.5: Add anti-aliased interpolation for non-integer scaling.
    v1.4: Fix scaling bugs.
    v1.3: Reduce shimmering artifacts.
@ -35,8 +36,11 @@
    v1.0: Initial release.
 */

-#include "../../../blurs/shaders/dual_filter/parameters.slang"
+// clang-format off
 #include "parameters.slang"
+#include "../../../blurs/shaders/dual_filter/parameters.slang"
+#include "../../../interpolation/shaders/pixel_aa/shared.slang"
+// clang-format on

 layout(push_constant) uniform Push {
    vec4 InputSize;
@ -71,31 +75,37 @@ global;
 layout(location = 0) in vec4 Position;
 layout(location = 1) in vec2 TexCoord;
 layout(location = 0) out vec2 vTexCoord;
+layout(location = 1) out vec2 tx_coord;
+layout(location = 2) out vec2 tx_per_px;
+layout(location = 3) out vec2 tx_to_uv;
+layout(location = 4) out vec4 input_extrema;

 void main() {
    gl_Position = global.MVP * Position;
    vTexCoord = TexCoord;
+    const vec2 scale_o2i = scale_o2i();
+    tx_coord = (vTexCoord - 0.49999) * scale_o2i + get_input_center();
+    tx_per_px = scale_o2i * param.FinalViewportSize.zw;
+    tx_to_uv = param.InputSize.zw;
+    input_extrema = vec4(param.OS_CROP_LEFT, param.OS_CROP_TOP,
+                         param.InputSize.x - param.OS_CROP_RIGHT,
+                         param.InputSize.y - param.OS_CROP_BOTTOM);
 }

 #pragma stage fragment
 layout(location = 0) in vec2 vTexCoord;
+layout(location = 1) in vec2 tx_coord;
+layout(location = 2) in vec2 tx_per_px;
+layout(location = 3) in vec2 tx_to_uv;
+layout(location = 4) in vec4 input_extrema;
 layout(location = 0) out vec4 FragColor;
 layout(set = 0, binding = 2) uniform sampler2D Input;
 layout(set = 0, binding = 3) uniform sampler2D Tiled;
 layout(set = 0, binding = 4) uniform sampler2D Blurred;

-#include "../../../interpolation/shaders/pixel_aa/shared.slang"
-
 void main() {
-    const vec2 scale_o2i = scale_o2i();
-    const vec2 pixel_coord =
-        (vTexCoord - 0.49999) * scale_o2i + get_input_center();
-
-    const vec4 input_extrema = vec4(param.OS_CROP_LEFT, param.OS_CROP_TOP,
-                                    param.InputSize.x - param.OS_CROP_RIGHT,
-                                    param.InputSize.y - param.OS_CROP_BOTTOM);
-    if (any(lessThan(pixel_coord, input_extrema.xy)) ||
-        any(greaterThanEqual(pixel_coord, input_extrema.zw))) {
+    if (any(lessThan(tx_coord, input_extrema.xy)) ||
+        any(greaterThanEqual(tx_coord, input_extrema.zw))) {
        if (param.BLUR_RADIUS > 0.0) {
            // Sample blur.
            FragColor = vec4(
@ -117,36 +127,17 @@ void main() {
        if (param.FORCE_INTEGER_SCALING > 0.5) {
            // Do a perfectly sharp (nearest neighbor) sampling.
            FragColor = vec4(
-                texture(Input, (floor(pixel_coord) + 0.5) * param.InputSize.zw)
+                texture(Input, (floor(tx_coord) + 0.5) * param.InputSize.zw)
                    .rgb,
                1.0);
        } else {
            // Do a sharp anti-aliased interpolation.
            // Do not correct for gamma additionally because the input is
            // already in linear color space.
-            if (param.PIX_AA_SUBPX < 0.5) {
-                const vec2 tx_size = get_texel_size(pixel_coord) * scale_o2i *
-                                     param.InputSize.zw;
-                FragColor =
-                    vec4(sample_aa(Input, pixel_coord, param.InputSize.zw,
-                                   false, param.PIX_AA_SHARP, tx_size),
-                         1.0);
-            } else {
-                for (int i = -1; i < 2; ++i) {
-                    const vec2 subpix_coord =
-                        pixel_coord +
-                        vec2((param.PIX_AA_SUBPX_BGR < 0.5 ? i : -i) / 3.0,
-                             0.0) *
-                            param.FinalViewportSize.zw * param.InputSize.xy;
-                    const vec2 tx_size = get_texel_size(subpix_coord) *
-                                         scale_o2i * param.InputSize.zw /
-                                         vec2(3.0, 1.0);
-                    FragColor[i + 1] =
-                        sample_aa(Input, subpix_coord, param.InputSize.zw,
-                                  false, param.PIX_AA_SHARP, tx_size)[i + 1];
-                }
-                FragColor[3] = 1.0;
-            }
+            FragColor = pixel_aa(
+                Input, tx_per_px, tx_to_uv, tx_coord, param.PIX_AA_SHARP,
+                /* gamma_correct = */ false, param.PIX_AA_SUBPX > 0.5,
+                param.PIX_AA_SUBPX_BGR > 0.5);
        }
    }
 }
--- a/border/shaders/blur_fill/parameters.slang
+++ b/border/shaders/blur_fill/parameters.slang
@ -1,7 +1,7 @@
 // See compose.slang for copyright and other information.

 // clang-format off
-#pragma parameter BLUR_FILL_SETTINGS "=== Blur fill v1.5 settings ===" 0.0 0.0 1.0 1.0
+#pragma parameter BLUR_FILL_SETTINGS "=== Blur fill v1.6 settings ===" 0.0 0.0 1.0 1.0
 #pragma parameter OS_CROP_TOP "Overscan crop top" 0.0 0.0 1024.0 1.0
 #pragma parameter OS_CROP_BOTTOM "Overscan crop bottom" 0.0 0.0 1024.0 1.0
 #pragma parameter OS_CROP_LEFT "Overscan crop left" 0.0 0.0 1024.0 1.0
--- a/interpolation/shaders/pixel_aa/pixel_aa.slang
+++ b/interpolation/shaders/pixel_aa/pixel_aa.slang
@ -1,7 +1,7 @@
 #version 450

 /*
-    Pixel AA v1.1 by fishku
+    Pixel AA v1.2 by fishku
    Copyright (C) 2023
    Public domain license (CC0)

@ -24,6 +24,8 @@
    subpixel anti-aliasing, results are identical to the "pixellate" shader.

    Changelog:
+    v1.2: Optimize and simplify algorithm. Enable sharpness < 1.0. Fix subpixel
+          sampling bug.
    v1.1: Better subpixel sampling.
    v1.0: Initial release.
 */
@ -44,44 +46,29 @@ global;
 #pragma stage vertex
 layout(location = 0) in vec4 Position;
 layout(location = 1) in vec2 TexCoord;
-layout(location = 0) out vec2 vTexCoord;
-layout(location = 1) out vec2 pix_coord;
+layout(location = 0) out vec2 tx_coord;
+layout(location = 1) out vec2 tx_per_px;
+layout(location = 2) out vec2 tx_to_uv;

 void main() {
    gl_Position = global.MVP * Position;
-    vTexCoord = TexCoord;
-    pix_coord = vTexCoord * param.SourceSize.xy;
+    tx_coord = TexCoord * param.SourceSize.xy;
+    tx_per_px = param.SourceSize.xy * param.OutputSize.zw;
+    tx_to_uv = param.SourceSize.zw;
 }

 #pragma stage fragment
-layout(location = 0) in vec2 vTexCoord;
-layout(location = 1) in vec2 pix_coord;
+layout(location = 0) in vec2 tx_coord;
+layout(location = 1) in vec2 tx_per_px;
+layout(location = 2) in vec2 tx_to_uv;
 layout(location = 0) out vec4 FragColor;
 layout(set = 0, binding = 2) uniform sampler2D Source;

 #include "shared.slang"

 void main() {
-    if (param.PIX_AA_SUBPX < 0.5) {
-        FragColor =
-            vec4(sample_aa(Source, pix_coord, param.SourceSize.zw,
-                           param.PIX_AA_GAMMA > 0.5, param.PIX_AA_SHARP),
-                 1.0);
-    } else {
-        // Subpixel sampling: Shift the sampling by 1/3rd of an output pixel,
-        // assuming that the output size is at monitor resolution.
-        for (int i = -1; i < 2; ++i) {
-            const vec2 subpix_coord =
-                pix_coord +
-                vec2((param.PIX_AA_SUBPX_BGR < 0.5 ? i : -i) / 3.0, 0.0) *
-                    param.OutputSize.zw * param.SourceSize.xy;
-            // With subpixel sampling, the sampling area is effectively reduced
-            // to a third.
-            const vec2 tx_size = get_texel_size(subpix_coord) / vec2(3.0, 1.0);
-            FragColor[i + 1] = sample_aa(
-                Source, subpix_coord, param.SourceSize.zw,
-                param.PIX_AA_GAMMA > 0.5, param.PIX_AA_SHARP, tx_size)[i + 1];
-        }
-        FragColor[3] = 1.0;
-    }
+    FragColor =
+        pixel_aa(Source, tx_per_px, tx_to_uv, tx_coord, param.PIX_AA_SHARP,
+                 param.PIX_AA_GAMMA > 0.5, param.PIX_AA_SUBPX > 0.5,
+                 param.PIX_AA_SUBPX_BGR > 0.5);
 }
--- a/interpolation/shaders/pixel_aa/shared.slang
+++ b/interpolation/shaders/pixel_aa/shared.slang
@ -1,9 +1,8 @@
-// This file has to be included at the frag shader stage so that fwidth() is
-// defined.
+// See pixel_aa.slang for copyright and other information.

 // clang-format off
-#pragma parameter PIX_AA_SETTINGS "=== Pixel AA v1.1 settings ===" 0.0 0.0 1.0 1.0
-#pragma parameter PIX_AA_SHARP "Pixel AA sharpening amount" 1.0 1.0 4.0 0.05
+#pragma parameter PIX_AA_SETTINGS "=== Pixel AA v1.2 settings ===" 0.0 0.0 1.0 1.0
+#pragma parameter PIX_AA_SHARP "Pixel AA sharpening amount" 1.5 0.0 2.0 0.05
 #pragma parameter PIX_AA_GAMMA "Enable gamma-correct blending" 1.0 0.0 1.0 1.0
 #pragma parameter PIX_AA_SUBPX "Enable subpixel AA" 0.0 0.0 1.0 1.0
 #pragma parameter PIX_AA_SUBPX_BGR "Use BGR subpx. instead of RGB" 0.0 0.0 1.0 1.0
@ -25,40 +24,65 @@ vec3 to_lin(vec3 x) { return pow(x, vec3(2.2)); }

 vec3 to_srgb(vec3 x) { return pow(x, vec3(1.0 / 2.2)); }

-vec2 get_texel_size(vec2 pix_coord) {
-    return clamp(fwidth(pix_coord), 1.0e-5, 1.0);
-}
-
+// Function to get a single sample using the "pixel AA" method.
 // Params:
-// pix_coord: Coordinate in source pixel coordinates
-// px_size_uv: 1 / source resolution
-vec3 sample_aa(sampler2D tex, vec2 pix_coord, vec2 px_size_uv,
-               bool gamma_correct, float sharpness, vec2 tx_size) {
-    const vec2 tx_coord = pix_coord - 0.5 * tx_size;
-    const vec2 tx_coord_i = floor(tx_coord);
-    const vec2 tx_offset =
-        slopestep(1.0 - tx_size, vec2(1.0), fract(tx_coord), sharpness);
-    // With gamma correct blending, we have to do 4 taps and blend manually.
-    // Without it, we can make use of a single tap using bilinear interpolation.
+// tx_coord: Coordinate in source pixel (texel) coordinates
+vec3 sample_aa(sampler2D tex, vec2 tx_per_px, vec2 tx_to_uv, vec2 tx_coord,
+               float sharpness, bool gamma_correct) {
+    // The offset for interpolation is a periodic function with
+    // a period length of 1 texel.
+    // The input coordinate is shifted so that the center of the texel
+    // aligns with the start of the period.
+    // First, get the period and phase.
+    vec2 period;
+    const vec2 phase = modf(tx_coord - 0.5, period);
+    // The function starts at 0, then starts transitioning at
+    // 0.5 - 0.5 / pixels_per_texel, then reaches 0.5 at 0.5,
+    // Then reaches 1 at 0.5 + 0.5 / pixels_per_texel.
+    // For sharpness values < 1.0, blend to bilinear filtering.
+    const vec2 offset =
+        slopestep(min(1.0, sharpness) * (0.5 - 0.5 * tx_per_px),
+                  1.0 - min(1.0, sharpness) * (1.0 - (0.5 + 0.5 * tx_per_px)),
+                  phase, max(1.0, sharpness));
+
+    // With gamma correct blending, we have to do 4 taps and interpolate
+    // manually. Without it, we can make use of a single tap using bilinear
+    // interpolation. The offsets are shifted back to the texel center before
+    // sampling.
    if (gamma_correct) {
        const vec3 samples[] = {
-            to_lin(texture(tex, (tx_coord_i + 0.5) * px_size_uv).rgb),
-            to_lin(
-                texture(tex, (tx_coord_i + vec2(1.5, 0.5)) * px_size_uv).rgb),
-            to_lin(
-                texture(tex, (tx_coord_i + vec2(0.5, 1.5)) * px_size_uv).rgb),
-            to_lin(texture(tex, (tx_coord_i + 1.5) * px_size_uv).rgb)};
-        return to_srgb(mix(mix(samples[0], samples[1], tx_offset.x),
-                           mix(samples[2], samples[3], tx_offset.x),
-                           tx_offset.y));
+            to_lin(texture(tex, (period + 0.5) * tx_to_uv).rgb),
+            to_lin(texture(tex, (period + vec2(1.5, 0.5)) * tx_to_uv).rgb),
+            to_lin(texture(tex, (period + vec2(0.5, 1.5)) * tx_to_uv).rgb),
+            to_lin(texture(tex, (period + 1.5) * tx_to_uv).rgb)};
+        return to_srgb(mix(mix(samples[0], samples[1], offset.x),
+                           mix(samples[2], samples[3], offset.x), offset.y));
    } else {
-        return texture(tex, (tx_coord_i + 0.5 + tx_offset) * px_size_uv).rgb;
+        return texture(tex, (period + 0.5 + offset) * tx_to_uv).rgb;
    }
 }

-vec3 sample_aa(sampler2D tex, vec2 pix_coord, vec2 px_size_uv,
-               bool gamma_correct, float sharpness) {
-    const vec2 tx_size = get_texel_size(pix_coord);
-    return sample_aa(tex, pix_coord, px_size_uv, gamma_correct, sharpness,
-                     tx_size);
+// Function to get a pixel value, taking into consideration possible subpixel
+// interpolation.
+vec4 pixel_aa(sampler2D tex, vec2 tx_per_px, vec2 tx_to_uv, vec2 tx_coord,
+              float sharpness, bool gamma_correct, bool sample_subpx,
+              bool subpx_bgr) {
+    if (sample_subpx) {
+        // Subpixel sampling: Shift the sampling by 1/3rd of an output pixel for
+        // each subpixel, assuming that the output size is at monitor
+        // resolution.
+        const vec2 sub_tx_offset =
+            vec2(tx_per_px.x / 3.0 * (subpx_bgr ? -1.0 : 1.0), 0.0);
+        vec3 res;
+        for (int i = -1; i < 2; ++i) {
+            res[i + 1] = sample_aa(tex, tx_per_px, tx_to_uv,
+                                   tx_coord + sub_tx_offset * float(i),
+                                   sharpness, gamma_correct)[i + 1];
+        }
+        return vec4(res, 1.0);
+    } else {
+        return vec4(sample_aa(tex, tx_per_px, tx_to_uv, tx_coord, sharpness,
+                              gamma_correct),
+                    1.0);
+    }
 }