n64-emu/third_party/parallel-rdp-standalone/parallel-rdp/shaders/binning.h
2023-08-06 14:03:29 +09:00

146 lines
No EOL
5.1 KiB
C

/* Copyright (c) 2020 Themaister
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef BINNING_H_
#define BINNING_H_
// There are 4 critical Y coordinates to test when binning. Top, bottom, mid, and mid - 1.
const int SUBPIXELS_Y = 4;
ivec4 quantize_x(ivec4 x)
{
return x >> 15;
}
int minimum4(ivec4 v)
{
ivec2 minimum2 = min(v.xy, v.zw);
return min(minimum2.x, minimum2.y);
}
int maximum4(ivec4 v)
{
ivec2 maximum2 = max(v.xy, v.zw);
return max(maximum2.x, maximum2.y);
}
ivec4 madd_32_64(ivec4 a, int b, int c, out ivec4 hi_bits)
{
ivec4 lo, hi;
imulExtended(a, ivec4(b), hi, lo);
uvec4 carry;
lo = ivec4(uaddCarry(lo, uvec4(c), carry));
hi += ivec4(carry);
hi_bits = hi;
return lo;
}
ivec2 interpolate_xs(TriangleSetup setup, ivec4 ys, bool flip, int scaling)
{
int yh_interpolation_base = setup.yh & ~(SUBPIXELS_Y - 1);
int ym_interpolation_base = setup.ym;
yh_interpolation_base *= scaling;
ym_interpolation_base *= scaling;
// Interpolate in 64-bit so we can detect quirky overflow scenarios.
ivec4 xh_hi, xm_hi, xl_hi;
ivec4 xh = madd_32_64(ys - yh_interpolation_base, setup.dxhdy, scaling * setup.xh, xh_hi);
ivec4 xm = madd_32_64(ys - yh_interpolation_base, setup.dxmdy, scaling * setup.xm, xm_hi);
ivec4 xl = madd_32_64(ys - ym_interpolation_base, setup.dxldy, scaling * setup.xl, xl_hi);
xl = mix(xl, xm, lessThan(ys, ivec4(scaling * setup.ym)));
xl_hi = mix(xl_hi, xm_hi, lessThan(ys, ivec4(scaling * setup.ym)));
// Handle overflow scenarios. Saturate 64-bit signed to 32-bit signed without 64-bit math.
xh = mix(xh, ivec4(0x7fffffff), greaterThan(xh_hi, ivec4(0)));
xh = mix(xh, ivec4(-0x80000000), lessThan(xh_hi, ivec4(-1)));
xl = mix(xl, ivec4(0x7fffffff), greaterThan(xl_hi, ivec4(0)));
xl = mix(xl, ivec4(-0x80000000), lessThan(xl_hi, ivec4(-1)));
ivec4 xh_shifted = quantize_x(xh);
ivec4 xl_shifted = quantize_x(xl);
ivec4 xleft, xright;
if (flip)
{
xleft = xh_shifted;
xright = xl_shifted;
}
else
{
xleft = xl_shifted;
xright = xh_shifted;
}
// If one of the results are out of range, we have overflow, and we need to be conservative when binning.
int max_range = maximum4(max(abs(xleft), abs(xright)));
ivec2 range;
if (max_range <= 2047 * scaling)
range = ivec2(minimum4(xleft), maximum4(xright));
else
range = ivec2(0, 0x7fffffff);
return range;
}
bool bin_primitive(TriangleSetup setup, ivec2 lo, ivec2 hi, int scaling, ScissorState scissor)
{
// First clip Y range based on scissor.
lo.y = max(lo.y, scaling * (scissor.ylo >> 2));
hi.y = min(hi.y, scaling * ((scissor.yhi + 3) >> 2) - 1);
int start_y = lo.y * SUBPIXELS_Y;
int end_y = (hi.y * SUBPIXELS_Y) + (SUBPIXELS_Y - 1);
// First, we clip start/end against y_lo, y_hi.
start_y = max(start_y, scaling * int(setup.yh));
end_y = min(end_y, scaling * int(setup.yl) - 1);
// Y is clipped out, exit early.
if (end_y < start_y)
return false;
bool flip = (setup.flags & TRIANGLE_SETUP_FLIP_BIT) != 0;
// Sample the X ranges for min and max Y, and potentially the mid-point as well.
ivec4 ys = ivec4(start_y, end_y, clamp(setup.ym * scaling + ivec2(-1, 0), ivec2(start_y), ivec2(end_y)));
ivec2 x_range = interpolate_xs(setup, ys, flip, scaling);
// For FILL_COPY_RASTER_BIT we're inclusive, if not, exclusive.
int x_bias = (setup.flags & TRIANGLE_SETUP_FILL_COPY_RASTER_BIT) != 0 ? 4 : 3;
ivec2 scissor_x = ivec2(scaling * (scissor.xlo >> 2), scaling * ((scissor.xhi + x_bias) >> 2) - 1);
// Scissor is applied through a clamp with a mask being generated for overshoot which affects if the line is valid.
// Since this is a conservative test we don't compute valid line here, so we have to assume it is valid.
// We can end up creating fake coverage in FILL/COPY modes in some cases
// if we clamp scissor to outside the primitive's range as long as at least one sub-line passes the scissor test.
// The x_range ends up being degenerate, but these fill modes are conservative and generate one pixel of coverage
// anyways.
x_range = clamp(x_range, scissor_x.xx, scissor_x.yy);
x_range.x = max(x_range.x, lo.x);
x_range.y = min(x_range.y, hi.x);
return x_range.x <= x_range.y;
}
#endif