Add ray traced curvature shader (#604)

* Ray-sphere intersection * Implement spherical mapping with AA; Found mip-mapping bug * Implement screen tilt * Implement camera offset with tilt * Clean up code a bit * Implement perfect zoom; tweak params * Add zero curvature * Implement rounded corner * Add aspect ratio forcing; Add bilinear filtering compile time switch * Make rounded corner AA more generous * Implement cylinder shape * Implement ortho view * Implement proper trilinear filtering * Add LOD bias option * Better sampling * Initial cleanup * Apply simplification that curvature_o = 0 * Compress code; Replace shape branching with multiplication * Move stuff to the vert shader * Inline update_frustum; Simplify cyl_ax = plane_v * Separate out vertex shader * Separate out ray tracing part of frag shader * Make hq preset default * Rename to rt curvature; Fix curv = 0 bug * Fix rotated games; Finalize simplifications and comments * Move to CRT folder; Add append preset * Longer comment * Add additional runtime switch for reducing aliasing when appending
2024-06-22 14:21:47 -04:00 · 2024-06-15 00:48:23 +02:00 · 2024-06-15 00:48:23 +02:00 · 693c632bdd
parent e5b20b1422
commit 693c632bdd
7 changed files with 379 additions and 39 deletions
--- a/crt/ray_traced_curvature.slangp
+++ b/crt/ray_traced_curvature.slangp
@ -0,0 +1,10 @@
+shaders = 2
+
+shader0 = ../stock.slang
+filter_linear0 = false
+scale_type0 = source
+
+shader1 = shaders/rt_curvature/rt_curvature.slang
+filter_linear1 = true
+scale_type1 = viewport
+mipmap_input1 = true
--- a/crt/ray_traced_curvature_append.slangp
+++ b/crt/ray_traced_curvature_append.slangp
@ -0,0 +1,9 @@
+shaders = 1
+
+shader0 = shaders/rt_curvature/rt_curvature.slang
+filter_linear0 = true
+scale_type0 = viewport
+mipmap_input0 = true
+
+parameters = "RT_CURV_APPEND"
+RT_CURV_APPEND = 1.0
--- a/crt/shaders/rt_curvature/parameters.inc
+++ b/crt/shaders/rt_curvature/parameters.inc
@ -0,0 +1,21 @@
+// See the main shader file for copyright and other information.
+
+// clang-format off
+#pragma parameter RT_CURV_SETTINGS "=== Ray Traced Curvature v1.0 settings ===" 0.0 0.0 1.0 1.0
+
+#pragma parameter RT_CURV_CURVATURE "Curvature strength" 0.5 0.0 1.5 0.05
+#pragma parameter RT_CURV_TILT_ANGLE_H "Horizontal tilt" 0.0 -0.5 0.5 0.01
+#pragma parameter RT_CURV_TILT_ANGLE_V "Vertical tilt" -0.08 -0.5 0.5 0.01
+#pragma parameter RT_CURV_ROUNDED_CORNER "Rounded corner radius" 0.02 0.0 0.2 0.01
+#pragma parameter RT_CURV_SHAPE "Screen shape (0 = sphere, 1 = cylinder)" 0.0 0.0 1.0 1.0
+
+#pragma parameter RT_CURV_ASPECT_H "Horizontal aspect ratio (0 = unchanged)" 0.0 0.0 256.0 1.0
+#pragma parameter RT_CURV_ASPECT_V "Vertical aspect ratio (0 = unchanged)" 0.0 0.0 256.0 1.0
+
+#pragma parameter RT_CURV_ZOOM "Zoom" 0.99 0.8 1.2 0.01
+#define RT_CURV_F_MAX 10.0
+#pragma parameter RT_CURV_F "Focal Length (max = ortho)" 2.0 0.5 10.0 0.25
+
+#pragma parameter RT_CURV_LOD_BIAS "Anti-aliasing boost" 0.0 0.0 1.0 0.05
+#pragma parameter RT_CURV_APPEND "Disable pixel art sampling (for append)" 0.0 0.0 1.0 1.0
+// clang-format on
--- a/crt/shaders/rt_curvature/rt_curvature.slang
+++ b/crt/shaders/rt_curvature/rt_curvature.slang
@ -0,0 +1,175 @@
+#version 450
+
+/*
+    Ray traced curvature v1.0 by fishku
+    Copyright (C) 2024
+    Public domain license (CC0)
+
+    This example demoes the following:
+    - How to generate rays for tracing against the screen surface in the vertex
+      shader.
+    - How to trace the rays in the fragment shader.
+    - How to achieve high-quality sampling with the found UV.
+
+    Changelog:
+    v1.0: Initial release.
+*/
+
+// If uncommented, pixel art is sampled sharply, better for stand-alone usage.
+// If commented out, no coordinate distortion is done, which is better for
+// appending. Enabling this option may give stronger aliasing artifacts when
+// appending to other presets.
+#define SHARP_PIXEL_SAMPLING
+
+// If uncommented, mip-map levels are blended, if available, giving better AA.
+#define TRILINEAR_SAMPLING
+
+#include "../../../misc/shaders/input_transform/rotation.inc"
+#include "parameters.inc"
+#include "shared.inc"
+
+layout(push_constant) uniform Push {
+    vec4 OriginalSize;
+    vec4 SourceSize;
+    vec4 OutputSize;
+    uint Rotation;
+    float RT_CURV_ASPECT_H;
+    float RT_CURV_ASPECT_V;
+    float RT_CURV_SHAPE;
+    float RT_CURV_CURVATURE;
+    float RT_CURV_ZOOM;
+    float RT_CURV_TILT_ANGLE_H;
+    float RT_CURV_TILT_ANGLE_V;
+    float RT_CURV_F;
+    float RT_CURV_ROUNDED_CORNER;
+    float RT_CURV_LOD_BIAS;
+    float RT_CURV_APPEND;
+}
+param;
+
+layout(std140, set = 0, binding = 0) uniform UBO { mat4 MVP; }
+global;
+
+#pragma stage vertex
+layout(location = 0) in vec4 Position;
+layout(location = 1) in vec2 TexCoord;
+layout(location = 0) out vec3 prim_ray_o;
+layout(location = 1) out vec3 prim_ray_d;
+layout(location = 2) out vec3 plane_n;
+layout(location = 3) out vec3 plane_u;
+layout(location = 4) out vec3 plane_v;
+layout(location = 5) out vec2 input_aspect;
+
+void main() {
+    gl_Position = global.MVP * Position;
+
+    // Define curved object: sphere or cylinder.
+    // Simplification: Assume origin of curved object (sphere / cylinder) is at
+    // the origin.
+    const float r =
+        param.RT_CURV_CURVATURE > 1.0e-3 ? 1.0 / param.RT_CURV_CURVATURE : 1.0;
+    // TODO: Why does this need an extra flip compared to the usage in
+    // input_transform.inc?
+    const vec2 tilt = get_rotated_vector(vec2(param.RT_CURV_TILT_ANGLE_H,
+                                              param.RT_CURV_TILT_ANGLE_V),
+                                         param.Rotation) *
+                      (1 - 2 * (int(param.Rotation) % 2));
+    const vec2 sin_tilt = sin(tilt);
+    const vec2 cos_tilt = cos(tilt);
+    // Simplification: Assume cylinder axis == plane_v.
+
+    // Define input texture plane.
+    // Basic vectors are:
+    // n = (0, 0, -r)
+    // u = (1, 0, 0)
+    // v = (0, 1, 0)
+    // n serves a double duty as the origin of the plane.
+    // These are then rotated around x by tilt.x and then around y by tilt.y.
+    plane_n = vec3(r * sin_tilt.x * cos_tilt.y, -r * sin_tilt.y,
+                   -r * cos_tilt.x * cos_tilt.y);
+    plane_u = vec3(cos_tilt.x, 0.0, sin_tilt.x);
+    plane_v =
+        vec3(sin_tilt.x * sin_tilt.y, cos_tilt.y, -cos_tilt.x * sin_tilt.y);
+
+    // Compute aspects ratios.
+    // Output aspect ratio does not need to be rotated since RA takes care of
+    // output rotation.
+    input_aspect =
+        param.RT_CURV_ASPECT_H > 0.0 && param.RT_CURV_ASPECT_V > 0.0
+            ? get_rotated_size(
+                  vec2(param.RT_CURV_ASPECT_H, param.RT_CURV_ASPECT_V) /
+                      max(param.RT_CURV_ASPECT_H, param.RT_CURV_ASPECT_V),
+                  param.Rotation)
+            : param.OriginalSize.xy /
+                  max(param.OriginalSize.x, param.OriginalSize.y);
+    const vec2 output_aspect =
+        param.OutputSize.xy / max(param.OutputSize.x, param.OutputSize.y);
+
+    // Simplification: Assume plane_o = plane_n
+    generate_ray(TexCoord, input_aspect, output_aspect, plane_n, plane_u,
+                 plane_v, param.RT_CURV_CURVATURE, param.RT_CURV_F,
+                 param.RT_CURV_SHAPE, param.RT_CURV_ZOOM, prim_ray_o,
+                 prim_ray_d);
+}
+
+#pragma stage fragment
+layout(location = 0) in vec3 prim_ray_o;
+layout(location = 1) in vec3 prim_ray_d;
+layout(location = 2) in vec3 plane_n;
+layout(location = 3) in vec3 plane_u;
+layout(location = 4) in vec3 plane_v;
+layout(location = 5) in vec2 input_aspect;
+layout(location = 0) out vec4 FragColor;
+layout(set = 0, binding = 2) uniform sampler2D Source;
+
+void main() {
+    vec2 uv = trace_ray(input_aspect, prim_ray_o, prim_ray_d, plane_n, plane_u,
+                        plane_v, param.RT_CURV_CURVATURE, param.RT_CURV_SHAPE);
+
+    // Compute rounded corner darkening.
+    const vec2 q =
+        input_aspect * (abs(uv - 0.5) - 0.5) + param.RT_CURV_ROUNDED_CORNER;
+    const float rounded_rect_dist = min(max(q.x, q.y), 0.0) +
+                                    length(max(q, 0.0)) -
+                                    param.RT_CURV_ROUNDED_CORNER;
+    const float corner_darkening = smoothstep(
+        -max(param.OutputSize.z, param.OutputSize.w), 0.0, -rounded_rect_dist);
+
+    // Sampling section follows.
+#if defined(SHARP_PIXEL_SAMPLING) || defined(TRILINEAR_SAMPLING)
+    const vec2 d_uv_dx = dFdx(uv) * param.SourceSize.xy;
+    const vec2 d_uv_dy = dFdy(uv) * param.SourceSize.xy;
+#endif
+
+#ifdef SHARP_PIXEL_SAMPLING
+    if (param.RT_CURV_APPEND < 0.5) {
+        // Do a sharp "pixel art" sampling, following:
+        // https://www.youtube.com/watch?v=d6tp43wZqps
+        // Only apply pixel art sampling when not appending to avoid aliasing.
+        const vec2 box_size = clamp(abs(d_uv_dx) + abs(d_uv_dy), 1.0e-6, 1.0);
+        const vec2 tx = uv * param.SourceSize.xy - 0.5 * box_size;
+        const vec2 tx_offset = smoothstep(1 - box_size, vec2(1.0), fract(tx));
+        uv = (floor(tx) + 0.5 + tx_offset) * param.SourceSize.zw;
+    }
+#endif
+
+#ifdef TRILINEAR_SAMPLING
+    // Anisotropic trilinear filtering.
+    // Implement in software because current implementation is broken. See:
+    // https://github.com/libretro/RetroArch/issues/16567
+    const float lambda_base =
+        max(0.0,
+            0.5 * log2(max(dot(d_uv_dx, d_uv_dx), dot(d_uv_dy, d_uv_dy)))) +
+        param.RT_CURV_LOD_BIAS;
+    float lambda_i;
+    const float lambda_f = modf(lambda_base, lambda_i);
+    FragColor = vec4(mix(textureLod(Source, uv, lambda_i).rgb,
+                         textureLod(Source, uv, lambda_i + 1.0).rgb, lambda_f),
+                     1.0);
+#else
+    FragColor = vec4(textureLod(Source, uv, param.RT_CURV_LOD_BIAS).rgb, 1.0);
+#endif
+
+    // Apply rounded corner darkening.
+    FragColor.rgb *= corner_darkening;
+}
--- a/crt/shaders/rt_curvature/shared.inc
+++ b/crt/shaders/rt_curvature/shared.inc
@ -0,0 +1,122 @@
+// See the main shader file for copyright and other information.
+
+// Intersects 2D lines, defined as normal vector (.x and .y) and offset (.z).
+vec2 line_intersection(vec3 l1, vec3 l2) {
+    // Simplification: Assume lines are not parallel.
+    const float inv_det = 1.0 / (l1.x * l2.y - l2.x * l1.y);
+    return vec2((l2.y * l1.z - l1.y * l2.z) * inv_det,
+                (l1.x * l2.z - l2.x * l1.z) * inv_det);
+}
+
+void generate_ray(vec2 tex_coord, vec2 input_aspect, vec2 output_aspect,
+                  vec3 plane_o, vec3 plane_u, vec3 plane_v, float curv, float f,
+                  float shape, float zoom, inout vec3 prim_ray_o,
+                  inout vec3 prim_ray_d) {
+    // Figure out optimal camera position from 9 points sampled across the
+    // frame. We want to find the camera position that is as close as possible
+    // to the points, maximizing the points in the frustum view.
+    vec3 half_spaces[4] = {vec3(f, 0.5 * output_aspect.x, 1.0e7),
+                           vec3(-f, 0.5 * output_aspect.x, 1.0e7),
+                           vec3(f, 0.5 * output_aspect.y, 1.0e7),
+                           vec3(-f, 0.5 * output_aspect.y, 1.0e7)};
+    vec3 p_min = vec3(1.0e7);
+    vec3 p_max = vec3(-1.0e7);
+    for (int i = -1; i < 2; ++i) {
+        for (int j = -1; j < 2; ++j) {
+            const vec2 uv = vec2(i * 0.5, j * 0.5) * input_aspect;
+            vec3 p = plane_o + uv.x * plane_u + uv.y * plane_v;
+            if (curv > 1.0e-3) {
+                // Simplification: Assume shape = 0 for sphere, = 1
+                // for cylinder. This allows multiplication instead of
+                // branching.
+                // Simplification: Assume cylinder axis == plane_v.
+                const vec3 p_on_ax = shape * dot(p, plane_v) * plane_v;
+                p = p_on_ax + normalize(p - p_on_ax) / curv;
+            }
+            half_spaces[0].z =
+                min(half_spaces[0].z, dot(half_spaces[0].xy, p.xz));
+            half_spaces[1].z =
+                min(half_spaces[1].z, dot(half_spaces[1].xy, p.xz));
+            half_spaces[2].z =
+                min(half_spaces[2].z, dot(half_spaces[2].xy, p.yz));
+            half_spaces[3].z =
+                min(half_spaces[3].z, dot(half_spaces[3].xy, p.yz));
+            p_min = min(p_min, p);
+            p_max = max(p_max, p);
+        }
+    }
+
+    // Generate camera ray.
+    if (f < RT_CURV_F_MAX) {
+        // Perspective camera.
+        const vec2 i_xz = line_intersection(half_spaces[0], half_spaces[1]);
+        const vec2 i_yz = line_intersection(half_spaces[2], half_spaces[3]);
+        const float ideal_cam_z = min(i_xz[1], i_yz[1]);
+        prim_ray_o =
+            vec3(i_xz[0], i_yz[0], p_min.z + (ideal_cam_z - p_min.z) / zoom);
+        prim_ray_d = vec3((tex_coord - 0.5) * output_aspect, f);
+    } else {
+        // Orthographic camera.
+        const vec3 p_extent = p_max - p_min;
+        const vec2 p_center = 0.5 * (p_min.xy + p_max.xy);
+        prim_ray_o = vec3(p_center + (tex_coord - 0.5) * output_aspect *
+                                         max(p_extent.x / output_aspect.x,
+                                             p_extent.y / output_aspect.y) /
+                                         zoom,
+                          p_min.z - 1.0);
+        prim_ray_d = vec3(0.0, 0.0, 1.0);
+    }
+}
+
+vec2 trace_ray(vec2 input_aspect, vec3 prim_ray_o, vec3 prim_ray_d,
+               vec3 plane_n, vec3 plane_u, vec3 plane_v, float curv,
+               float shape) {
+    vec3 sec_ray_o = prim_ray_o;
+    vec3 sec_ray_d = prim_ray_d;
+
+    if (curv > 1.0e-3) {
+        // Intersect sphere / cylinder.
+        // Simplification: Assume shape = 0 for sphere, = 1 for
+        // cylinder. This allows multiplication instead of branching.
+        // Simplification: Assume cylinder axis == plane_v.
+        const vec3 alpha =
+            prim_ray_d - shape * dot(prim_ray_d, plane_v) * plane_v;
+        const vec3 beta =
+            prim_ray_o - shape * dot(prim_ray_o, plane_v) * plane_v;
+        const float half_b = dot(alpha, beta);
+        const float c = dot(beta, beta) - 1.0 / (curv * curv);
+        // Simplification: a = dot(alpha, alpha).
+        const float discriminant = half_b * half_b - dot(alpha, alpha) * c;
+        if (discriminant < 0.0) {
+            // Ray misses screen surface entirely.
+            return vec2(-1.0);
+        }
+
+        // We only need the smaller root of the two solutions for the ray-object
+        // intersection. The smaller root can be found as c / q, according to:
+        // https://www.av8n.com/physics/quadratic-formula.htm
+        // Simplification: Assume the solution is positive.
+        // Simplification: Assume half_b < 0.
+        // Simplification: p_screen = sec_ray_o.
+        sec_ray_o = prim_ray_o + c / (sqrt(discriminant) - half_b) * prim_ray_d;
+        // Simplification: Assume shape = 0 for sphere, = 1 for
+        // cylinder. This allows multiplication instead of branching.
+        sec_ray_d = sec_ray_o - shape * dot(sec_ray_o, plane_v) * plane_v;
+    }
+
+    // Intersect plane.
+    // Simplification:
+    // t = dot(plane_o - sec_ray_o, plane_n) / dot(plane_n, sec_ray_d).
+    // Simplification: Assume t > 0.
+    // Simplification: Assume denominator is not close to zero.
+    // Simplification: p_plane = sec_ray_o + dot(plane_o - sec_ray_o, plane_n) /
+    //  dot(plane_n, sec_ray_d) * sec_ray_d;
+    const vec3 op = sec_ray_o +
+                    dot(plane_n - sec_ray_o, plane_n) /
+                        dot(plane_n, sec_ray_d) * sec_ray_d -
+                    plane_n;
+    // Convert plane intersection to input UV.
+    return vec2(dot(op, plane_u / input_aspect.x),
+                dot(op, plane_v / input_aspect.y)) +
+           0.5;
+}
--- a/misc/shaders/input_transform/input_transform.inc
+++ b/misc/shaders/input_transform/input_transform.inc
@ -30,45 +30,7 @@
    v1.0: Initial conversion from blur_fill release. Add rotation support.
 */

-vec2 get_rotated_size(vec2 x, uint rotation) {
-    switch (rotation) {
-        case 0:
-        case 2:
-        default:
-            return x;
-        case 1:
-        case 3:
-            return x.yx;
-    }
-}
-
-vec4 get_rotated_crop(vec4 crop, uint rotation) {
-    switch (rotation) {
-        case 0:
-        default:
-            return crop;
-        case 1:
-            return crop.yzwx;
-        case 2:
-            return crop.zwxy;
-        case 3:
-            return crop.wxyz;
-    }
-}
-
-vec2 get_rotated_vector(vec2 x, uint rotation) {
-    switch (rotation) {
-        case 0:
-        default:
-            return x;
-        case 1:
-            return vec2(-x.y, x.x);
-        case 2:
-            return -x;
-        case 3:
-            return vec2(x.y, -x.x);
-    }
-}
+#include "rotation.inc"

 // Get 2 corners of input in texel space, spanning the input image.
 // corners.x and .y define the top-left corner, corners.z and .w define the
--- a/misc/shaders/input_transform/rotation.inc
+++ b/misc/shaders/input_transform/rotation.inc
@ -0,0 +1,41 @@
+// See input_transform.inc for copyright and other information.
+
+vec2 get_rotated_size(vec2 x, uint rotation) {
+    switch (rotation) {
+        case 0:
+        case 2:
+        default:
+            return x;
+        case 1:
+        case 3:
+            return x.yx;
+    }
+}
+
+vec4 get_rotated_crop(vec4 crop, uint rotation) {
+    switch (rotation) {
+        case 0:
+        default:
+            return crop;
+        case 1:
+            return crop.yzwx;
+        case 2:
+            return crop.zwxy;
+        case 3:
+            return crop.wxyz;
+    }
+}
+
+vec2 get_rotated_vector(vec2 x, uint rotation) {
+    switch (rotation) {
+        case 0:
+        default:
+            return x;
+        case 1:
+            return vec2(-x.y, x.x);
+        case 2:
+            return -x;
+        case 3:
+            return vec2(x.y, -x.x);
+    }
+}