mirror of
https://github.com/kmc-jp/n64-emu.git
synced 2025-04-02 10:21:43 -04:00
1005 lines
28 KiB
C
1005 lines
28 KiB
C
/* Copyright (c) 2020 Themaister
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining
|
|
* a copy of this software and associated documentation files (the
|
|
* "Software"), to deal in the Software without restriction, including
|
|
* without limitation the rights to use, copy, modify, merge, publish,
|
|
* distribute, sublicense, and/or sell copies of the Software, and to
|
|
* permit persons to whom the Software is furnished to do so, subject to
|
|
* the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall be
|
|
* included in all copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
|
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
|
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
|
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
*/
|
|
|
|
#ifndef TEXTURE_H_
|
|
#define TEXTURE_H_
|
|
|
|
#include "data_structures.h"
|
|
|
|
const int TEXTURE_FORMAT_RGBA = 0;
|
|
const int TEXTURE_FORMAT_YUV = 1;
|
|
const int TEXTURE_FORMAT_CI = 2;
|
|
const int TEXTURE_FORMAT_IA = 3;
|
|
const int TEXTURE_FORMAT_I = 4;
|
|
|
|
int texel_mask_s(TileInfo tile, int s)
|
|
{
|
|
if (tile.mask_s != 0)
|
|
{
|
|
int mask = 1 << tile.mask_s;
|
|
if ((tile.flags & TILE_INFO_MIRROR_S_BIT) != 0)
|
|
s ^= max((s & mask) - 1, 0);
|
|
s &= mask - 1;
|
|
}
|
|
|
|
return s;
|
|
}
|
|
|
|
ivec2 texel_mask_s_copy(TileInfo tile, int s)
|
|
{
|
|
ivec2 multi_s = s + ivec2(0, 1);
|
|
|
|
if (tile.mask_s != 0)
|
|
{
|
|
int mask = 1 << tile.mask_s;
|
|
if ((tile.flags & TILE_INFO_MIRROR_S_BIT) != 0)
|
|
multi_s ^= max((multi_s & mask) - 1, 0);
|
|
multi_s &= mask - 1;
|
|
}
|
|
|
|
return multi_s;
|
|
}
|
|
|
|
int texel_mask_t(TileInfo tile, int t)
|
|
{
|
|
if (tile.mask_t != 0)
|
|
{
|
|
int mask = 1 << tile.mask_t;
|
|
if ((tile.flags & TILE_INFO_MIRROR_T_BIT) != 0)
|
|
t ^= max((t & mask) - 1, 0);
|
|
t &= mask - 1;
|
|
}
|
|
|
|
return t;
|
|
}
|
|
|
|
i16x4 convert_rgba16(uint word)
|
|
{
|
|
uvec3 rgb = (uvec3(word) >> uvec3(11, 6, 1)) & 31u;
|
|
rgb = (rgb << 3u) | (rgb >> 2u);
|
|
uint alpha = (word & 1u) * 0xffu;
|
|
return i16x4(rgb, alpha);
|
|
}
|
|
|
|
i16x4 convert_ia16(uint word)
|
|
{
|
|
uint intensity = word >> 8;
|
|
uint alpha = word & 0xff;
|
|
return i16x4(intensity, intensity, intensity, alpha);
|
|
}
|
|
|
|
i16x4 sample_texel_rgba4(TileInfo tile, uint tmem_instance, uvec2 st)
|
|
{
|
|
uint byte_offset = tile.offset + tile.stride * st.y;
|
|
byte_offset += st.x >> 1;
|
|
byte_offset &= 0xfff;
|
|
|
|
uint shift = (~st.x & 1) * 4;
|
|
|
|
uint index = byte_offset;
|
|
index ^= (st.y & 1) << 2;
|
|
index ^= 3;
|
|
|
|
uint word = uint(tmem8.instances[tmem_instance].elems[index]);
|
|
word = (word >> shift) & 0xf;
|
|
word |= word << 4;
|
|
return i16x4(word);
|
|
}
|
|
|
|
i16x4 sample_texel_ia4(TileInfo tile, uint tmem_instance, uvec2 st)
|
|
{
|
|
uint byte_offset = tile.offset + tile.stride * st.y;
|
|
byte_offset += st.x >> 1;
|
|
byte_offset &= 0xfff;
|
|
|
|
uint shift = (~st.x & 1) * 4;
|
|
|
|
uint index = byte_offset;
|
|
index ^= (st.y & 1) << 2;
|
|
index ^= 3;
|
|
|
|
uint word = uint(tmem8.instances[tmem_instance].elems[index]);
|
|
word = (word >> shift) & 0xf;
|
|
|
|
uint intensity = word & 0xe;
|
|
intensity = (intensity << 4) | (intensity << 1) | (intensity >> 2);
|
|
return i16x4(intensity, intensity, intensity, (word & 1) * 0xff);
|
|
}
|
|
|
|
i16x4 sample_texel_ci4(TileInfo tile, uint tmem_instance, uvec2 st, uint pal)
|
|
{
|
|
uint byte_offset = tile.offset + tile.stride * st.y;
|
|
byte_offset += st.x >> 1;
|
|
byte_offset &= 0xfff;
|
|
|
|
uint shift = (~st.x & 1) * 4;
|
|
|
|
uint index = byte_offset;
|
|
index ^= (st.y & 1) << 2;
|
|
index ^= 3;
|
|
|
|
uint word = uint(tmem8.instances[tmem_instance].elems[index]);
|
|
word = (word >> shift) & 0xf;
|
|
word |= pal << 4;
|
|
return i16x4(word);
|
|
}
|
|
|
|
i16x4 sample_texel_ci4_tlut(TileInfo tile, uint tmem_instance, uvec2 st, uint pal, uint lut_offset, uint addr_xor, bool tlut_type)
|
|
{
|
|
uint byte_offset = tile.offset + tile.stride * st.y;
|
|
byte_offset += st.x >> 1;
|
|
byte_offset &= 0x7ff;
|
|
|
|
uint shift = (~st.x & 1) * 4;
|
|
|
|
uint index = byte_offset;
|
|
index ^= (st.y & 1) << 2;
|
|
index ^= 3;
|
|
|
|
uint word = uint(tmem8.instances[tmem_instance].elems[index]);
|
|
word = (word >> shift) & 0xf;
|
|
word |= pal << 4;
|
|
|
|
uint lut_entry = (word << 2) + lut_offset;
|
|
lut_entry ^= addr_xor;
|
|
|
|
word = uint(tmem16.instances[tmem_instance].elems[0x400 | lut_entry]);
|
|
return tlut_type ? convert_ia16(word) : convert_rgba16(word);
|
|
}
|
|
|
|
i16x4 sample_texel_ci8_tlut(TileInfo tile, uint tmem_instance, uvec2 st, uint lut_offset, uint addr_xor, bool tlut_type)
|
|
{
|
|
uint byte_offset = tile.offset + tile.stride * st.y;
|
|
byte_offset += st.x;
|
|
byte_offset &= 0x7ff;
|
|
|
|
uint index = byte_offset;
|
|
index ^= (st.y & 1) << 2;
|
|
index ^= 3;
|
|
|
|
uint word = uint(tmem8.instances[tmem_instance].elems[index]);
|
|
uint lut_entry = (word << 2) + lut_offset;
|
|
lut_entry ^= addr_xor;
|
|
|
|
word = uint(tmem16.instances[tmem_instance].elems[0x400 | lut_entry]);
|
|
return tlut_type ? convert_ia16(word) : convert_rgba16(word);
|
|
}
|
|
|
|
i16x4 sample_texel_ci32(TileInfo tile, uint tmem_instance, uvec2 st)
|
|
{
|
|
uint byte_offset = tile.offset + tile.stride * st.y;
|
|
byte_offset += st.x * 2;
|
|
byte_offset &= 0xfff;
|
|
|
|
uint index = byte_offset >> 1;
|
|
index ^= (st.y & 1) << 1;
|
|
index ^= 1;
|
|
|
|
uint word = uint(tmem16.instances[tmem_instance].elems[index]);
|
|
return i16x2(word >> 8, word & 0xff).xyxy;
|
|
}
|
|
|
|
i16x4 sample_texel_ci32_tlut(TileInfo tile, uint tmem_instance, uvec2 st, uint lut_offset, uint addr_xor, bool tlut_type)
|
|
{
|
|
uint byte_offset = tile.offset + tile.stride * st.y;
|
|
byte_offset += st.x * 2;
|
|
byte_offset &= 0x7ff;
|
|
|
|
uint index = byte_offset >> 1;
|
|
index ^= (st.y & 1) << 1;
|
|
index ^= 1;
|
|
|
|
uint word = uint(tmem16.instances[tmem_instance].elems[index]);
|
|
uint lut_entry = ((word >> 6) & ~3) + lut_offset;
|
|
lut_entry ^= addr_xor;
|
|
word = uint(tmem16.instances[tmem_instance].elems[0x400 | lut_entry]);
|
|
return tlut_type ? convert_ia16(word) : convert_rgba16(word);
|
|
}
|
|
|
|
i16x4 sample_texel_rgba8(TileInfo tile, uint tmem_instance, uvec2 st)
|
|
{
|
|
uint byte_offset = tile.offset + tile.stride * st.y;
|
|
byte_offset += st.x;
|
|
byte_offset &= 0xfff;
|
|
|
|
uint index = byte_offset;
|
|
index ^= (st.y & 1) << 2;
|
|
index ^= 3;
|
|
|
|
uint word = uint(tmem8.instances[tmem_instance].elems[index]);
|
|
return i16x4(word);
|
|
}
|
|
|
|
i16x4 sample_texel_ia8(TileInfo tile, uint tmem_instance, uvec2 st)
|
|
{
|
|
uint byte_offset = tile.offset + tile.stride * st.y;
|
|
byte_offset += st.x;
|
|
byte_offset &= 0xfff;
|
|
|
|
uint index = byte_offset;
|
|
index ^= (st.y & 1) << 2;
|
|
index ^= 3;
|
|
|
|
uint word = uint(tmem8.instances[tmem_instance].elems[index]);
|
|
uint intensity = word >> 4;
|
|
uint alpha = word & 0xf;
|
|
alpha |= alpha << 4;
|
|
intensity |= intensity << 4;
|
|
return i16x4(intensity, intensity, intensity, alpha);
|
|
}
|
|
|
|
i16x4 sample_texel_yuv16(TileInfo tile, uint tmem_instance, uvec2 st, uint chroma_x)
|
|
{
|
|
uint byte_offset = tile.offset + tile.stride * st.y;
|
|
uint byte_offset_luma = byte_offset + st.x;
|
|
byte_offset_luma &= 0x7ff;
|
|
|
|
uint byte_offset_chroma = byte_offset + chroma_x * 2;
|
|
byte_offset_chroma &= 0x7ff;
|
|
|
|
uint index_luma = byte_offset_luma;
|
|
index_luma ^= (st.y & 1) << 2;
|
|
index_luma ^= 3;
|
|
|
|
uint index_chroma = byte_offset_chroma >> 1;
|
|
index_chroma ^= (st.y & 1) << 1;
|
|
index_chroma ^= 1;
|
|
|
|
u8 luma = u8(tmem8.instances[tmem_instance].elems[index_luma | 0x800]);
|
|
u16 chroma = u16(tmem16.instances[tmem_instance].elems[index_chroma]);
|
|
u8 u = u8((chroma >> U16_C(8)) & U16_C(0xff));
|
|
u8 v = u8((chroma >> U16_C(0)) & U16_C(0xff));
|
|
return i16x4(i16(u) - I16_C(0x80), i16(v) - I16_C(0x80), luma, luma);
|
|
}
|
|
|
|
i16x4 sample_texel_rgba16(TileInfo tile, uint tmem_instance, uvec2 st)
|
|
{
|
|
uint byte_offset = tile.offset + tile.stride * st.y;
|
|
byte_offset += st.x * 2;
|
|
byte_offset &= 0xfff;
|
|
|
|
uint index = byte_offset >> 1;
|
|
index ^= (st.y & 1) << 1;
|
|
index ^= 1;
|
|
|
|
uint word = uint(tmem16.instances[tmem_instance].elems[index]);
|
|
return convert_rgba16(word);
|
|
}
|
|
|
|
i16x4 sample_texel_ia16(TileInfo tile, uint tmem_instance, uvec2 st)
|
|
{
|
|
uint byte_offset = tile.offset + tile.stride * st.y;
|
|
byte_offset += st.x * 2;
|
|
byte_offset &= 0xfff;
|
|
|
|
uint index = byte_offset >> 1;
|
|
index ^= (st.y & 1) << 1;
|
|
index ^= 1;
|
|
|
|
uint word = uint(tmem16.instances[tmem_instance].elems[index]);
|
|
return convert_ia16(word);
|
|
}
|
|
|
|
i16x4 sample_texel_rgba32(TileInfo tile, uint tmem_instance, uvec2 st)
|
|
{
|
|
uint byte_offset = tile.offset + tile.stride * st.y;
|
|
byte_offset += st.x * 2;
|
|
byte_offset &= 0x7ff;
|
|
|
|
uint index = byte_offset >> 1;
|
|
index ^= (st.y & 1) << 1;
|
|
index ^= 1;
|
|
|
|
uint lower_word = uint(tmem16.instances[tmem_instance].elems[index]);
|
|
uint upper_word = uint(tmem16.instances[tmem_instance].elems[index | 0x400]);
|
|
return i16x4(lower_word >> 8, lower_word & 0xff, upper_word >> 8, upper_word & 0xff);
|
|
}
|
|
|
|
int clamp_and_shift_coord(bool clamp_bit, int coord, int lo, int hi, int shift)
|
|
{
|
|
// Clamp 17-bit coordinate to 16-bit coordinate here.
|
|
coord = clamp(coord, -0x8000, 0x7fff);
|
|
|
|
if (shift < 11)
|
|
coord >>= shift;
|
|
else
|
|
{
|
|
coord <<= (32 - shift);
|
|
coord >>= 16;
|
|
}
|
|
|
|
if (clamp_bit)
|
|
{
|
|
bool clamp_hi = (coord >> 3) >= hi;
|
|
if (clamp_hi)
|
|
coord = (((hi >> 2) - (lo >> 2)) & 0x3ff) << 5;
|
|
else
|
|
coord = max(coord - (lo << 3), 0);
|
|
}
|
|
else
|
|
coord -= lo << 3;
|
|
|
|
return coord;
|
|
}
|
|
|
|
int shift_coord(int coord, int lo, int shift)
|
|
{
|
|
// Clamp 17-bit coordinate to 16-bit coordinate here.
|
|
coord = clamp(coord, -0x8000, 0x7fff);
|
|
|
|
if (shift < 11)
|
|
coord >>= shift;
|
|
else
|
|
{
|
|
coord <<= (32 - shift);
|
|
coord >>= 16;
|
|
}
|
|
coord -= lo << 3;
|
|
return coord;
|
|
}
|
|
|
|
// The copy pipe reads 4x16 words.
|
|
int sample_texture_copy_word(TileInfo tile, uint tmem_instance, ivec2 st, int s_offset, bool tlut, bool tlut_type)
|
|
{
|
|
// For non-16bpp TMEM, the lower 32-bits are sampled based on direct 16-bit fetches. There are no shifts applied.
|
|
bool high_word = s_offset < 2;
|
|
bool replicate_8bpp = high_word && tile.size != 2 && !tlut;
|
|
int samp;
|
|
|
|
int s_shamt = min(int(tile.size), 2);
|
|
bool large_texel = int(tile.size) == 3;
|
|
int idx_mask = (large_texel || tlut) ? 0x3ff : 0x7ff;
|
|
|
|
if (replicate_8bpp)
|
|
{
|
|
// The high word of 8-bpp replication is special in the sense that we sample 8-bpp correctly.
|
|
// Sample the two possible words.
|
|
st.x += 2 * s_offset;
|
|
ivec2 s = texel_mask_s_copy(tile, st.x);
|
|
int t = texel_mask_t(tile, st.y);
|
|
|
|
uint tbase = tile.offset + tile.stride * t;
|
|
uvec2 nibble_offset = (tbase * 2 + (s << s_shamt)) & 0x1fffu;
|
|
nibble_offset ^= (t & 1u) * 8u;
|
|
uvec2 index = nibble_offset >> 2u;
|
|
|
|
index &= idx_mask;
|
|
int samp0 = int(tmem16.instances[tmem_instance].elems[index.x ^ 1]);
|
|
int samp1 = int(tmem16.instances[tmem_instance].elems[index.y ^ 1]);
|
|
|
|
if (tile.size == 1)
|
|
{
|
|
samp0 >>= 8 - 4 * int(nibble_offset.x & 2);
|
|
samp1 >>= 8 - 4 * int(nibble_offset.y & 2);
|
|
samp0 &= 0xff;
|
|
samp1 &= 0xff;
|
|
}
|
|
else if (tile.size == 0)
|
|
{
|
|
samp0 >>= 12 - 4 * int(nibble_offset.x & 3u);
|
|
samp1 >>= 12 - 4 * int(nibble_offset.y & 3u);
|
|
samp0 = (samp0 & 0xf) * 0x11;
|
|
samp1 = (samp1 & 0xf) * 0x11;
|
|
}
|
|
else
|
|
{
|
|
samp0 >>= 8;
|
|
samp1 >>= 8;
|
|
}
|
|
|
|
samp = (samp0 << 8) | samp1;
|
|
}
|
|
else
|
|
{
|
|
st.x += s_offset;
|
|
int s = texel_mask_s(tile, st.x);
|
|
int t = texel_mask_t(tile, st.y);
|
|
|
|
uint tbase = tile.offset + tile.stride * t;
|
|
uint nibble_offset = (tbase * 2 + (s << s_shamt)) & 0x1fffu;
|
|
nibble_offset ^= (t & 1u) * 8u;
|
|
|
|
uint index = nibble_offset >> 2u;
|
|
index &= idx_mask;
|
|
samp = int(tmem16.instances[tmem_instance].elems[index ^ 1]);
|
|
|
|
if (tlut)
|
|
{
|
|
if (tile.size == 0)
|
|
{
|
|
samp >>= 12 - 4 * (nibble_offset & 3);
|
|
samp &= 0xf;
|
|
samp |= tile.palette << 4;
|
|
samp <<= 2;
|
|
samp += s_offset;
|
|
}
|
|
else
|
|
{
|
|
samp >>= 8 - 4 * (nibble_offset & 2);
|
|
samp &= 0xff;
|
|
samp <<= 2;
|
|
samp += s_offset;
|
|
}
|
|
samp = int(tmem16.instances[tmem_instance].elems[(samp | 0x400) ^ 1]);
|
|
}
|
|
}
|
|
|
|
return samp;
|
|
}
|
|
|
|
int sample_texture_copy(TileInfo tile, uint tmem_instance, ivec2 st, int s_offset, bool tlut, bool tlut_type)
|
|
{
|
|
st.x = shift_coord(st.x, int(tile.slo), int(tile.shift_s));
|
|
st.y = shift_coord(st.y, int(tile.tlo), int(tile.shift_t));
|
|
st >>= 5;
|
|
|
|
int samp;
|
|
if (global_constants.fb_info.fb_size == 0)
|
|
{
|
|
samp = 0;
|
|
}
|
|
else if (global_constants.fb_info.fb_size == 1)
|
|
{
|
|
samp = sample_texture_copy_word(tile, tmem_instance, st, s_offset >> 1, tlut, tlut_type);
|
|
samp >>= 8 - 8 * (s_offset & 1);
|
|
samp &= 0xff;
|
|
}
|
|
else
|
|
{
|
|
samp = sample_texture_copy_word(tile, tmem_instance, st, s_offset, tlut, tlut_type);
|
|
}
|
|
|
|
return samp;
|
|
}
|
|
|
|
i16x2 bilinear_3tap(i16x2 t00, i16x2 t10, i16x2 t01, i16x2 t11, ivec2 frac)
|
|
{
|
|
int sum_frac = frac.x + frac.y;
|
|
i16x2 t_base = sum_frac >= 32 ? t11 : t00;
|
|
i16x2 flip_frac = i16x2(sum_frac >= 32 ? (32 - frac.yx) : frac);
|
|
i16x2 accum = (t10 - t_base) * flip_frac.x;
|
|
accum += (t01 - t_base) * flip_frac.y;
|
|
accum += I16_C(0x10);
|
|
accum >>= I16_C(5);
|
|
accum += t_base;
|
|
return accum;
|
|
}
|
|
|
|
i16x4 texture_convert_factors(i16x4 texel_in, i16x4 factors)
|
|
{
|
|
ivec4 texel = bitfieldExtract(ivec4(texel_in), 0, 9);
|
|
|
|
int r = texel.b + ((factors.x * texel.g + 0x80) >> 8);
|
|
int g = texel.b + ((factors.y * texel.r + factors.z * texel.g + 0x80) >> 8);
|
|
int b = texel.b + ((factors.w * texel.r + 0x80) >> 8);
|
|
int a = texel.b;
|
|
return i16x4(r, g, b, a);
|
|
}
|
|
|
|
i16x4 sample_texture(TileInfo tile, uint tmem_instance, ivec2 st, bool tlut, bool tlut_type,
|
|
bool sample_quad, bool mid_texel_state, bool convert_one, bool bilerp,
|
|
i16x4 conversion_factors, i16x4 prev_cycle)
|
|
{
|
|
st.x = clamp_and_shift_coord((tile.flags & TILE_INFO_CLAMP_S_BIT) != 0, st.x, int(tile.slo), int(tile.shi), int(tile.shift_s));
|
|
st.y = clamp_and_shift_coord((tile.flags & TILE_INFO_CLAMP_T_BIT) != 0, st.y, int(tile.tlo), int(tile.thi), int(tile.shift_t));
|
|
|
|
ivec2 frac;
|
|
if (sample_quad || tlut)
|
|
frac = st & 31;
|
|
else
|
|
frac = ivec2(0);
|
|
|
|
int sum_frac = frac.x + frac.y;
|
|
st >>= 5;
|
|
|
|
int s0 = texel_mask_s(tile, st.x);
|
|
int t0 = texel_mask_t(tile, st.y);
|
|
int s1 = texel_mask_s(tile, st.x + 1);
|
|
int t1 = texel_mask_t(tile, st.y + 1);
|
|
|
|
// Very specific weird logic going on with t0 and t1.
|
|
int tdiff = max(t1 - t0, -255);
|
|
t1 = (t0 & 0xff) + tdiff;
|
|
t0 &= 0xff;
|
|
|
|
i16x4 t_base, t10, t01, t11;
|
|
bool mid_texel = all(bvec4(mid_texel_state, bilerp, equal(frac, ivec2(0x10))));
|
|
|
|
bool upper_lut = sum_frac >= 0x20;
|
|
if (mid_texel)
|
|
{
|
|
// Ensure we sample all 4 texels.
|
|
sum_frac = 0;
|
|
}
|
|
|
|
bool yuv = tile.fmt == TEXTURE_FORMAT_YUV;
|
|
ivec2 base_st = sum_frac >= 0x20 ? ivec2(s1, t1) : ivec2(s0, t0);
|
|
int chroma_frac = ((s0 & 1) << 4) | (frac.x >> 1);
|
|
|
|
if (tlut)
|
|
{
|
|
if (!sample_quad)
|
|
{
|
|
// Weird mode where we sample a bilinear footprint with the 4 banks of TLUT instead.
|
|
// Force the footprint to be sampled, but adjust the input coordinates instead.
|
|
base_st = ivec2(s0, t0);
|
|
s1 = s0;
|
|
t1 = t0;
|
|
}
|
|
|
|
switch (int(tile.fmt))
|
|
{
|
|
case TEXTURE_FORMAT_RGBA:
|
|
case TEXTURE_FORMAT_CI:
|
|
case TEXTURE_FORMAT_IA:
|
|
case TEXTURE_FORMAT_I:
|
|
{
|
|
// For TLUT, entries in the LUT are duplicated and we must make sure that we sample 3 different banks
|
|
// when we look up the TLUT entry. In normal situations, this is irrelevant, but we're trying to be accurate here.
|
|
bool upper = sum_frac >= 0x20;
|
|
uint addr_xor = upper_lut ? 2 : 1;
|
|
|
|
switch (int(tile.size))
|
|
{
|
|
case 0:
|
|
t_base = sample_texel_ci4_tlut(tile, tmem_instance, base_st, tile.palette, upper ? 3 : 0, addr_xor, tlut_type);
|
|
if (bilerp)
|
|
{
|
|
t10 = sample_texel_ci4_tlut(tile, tmem_instance, ivec2(s1, t0), tile.palette, 1, addr_xor,
|
|
tlut_type);
|
|
t01 = sample_texel_ci4_tlut(tile, tmem_instance, ivec2(s0, t1), tile.palette, 2, addr_xor,
|
|
tlut_type);
|
|
}
|
|
if (mid_texel)
|
|
{
|
|
t11 = sample_texel_ci4_tlut(tile, tmem_instance, ivec2(s1, t1), tile.palette, 3, addr_xor,
|
|
tlut_type);
|
|
}
|
|
break;
|
|
|
|
case 1:
|
|
t_base = sample_texel_ci8_tlut(tile, tmem_instance, base_st, upper ? 3 : 0, addr_xor, tlut_type);
|
|
if (bilerp)
|
|
{
|
|
t10 = sample_texel_ci8_tlut(tile, tmem_instance, ivec2(s1, t0), 1, addr_xor, tlut_type);
|
|
t01 = sample_texel_ci8_tlut(tile, tmem_instance, ivec2(s0, t1), 2, addr_xor, tlut_type);
|
|
}
|
|
if (mid_texel)
|
|
t11 = sample_texel_ci8_tlut(tile, tmem_instance, ivec2(s1, t1), 3, addr_xor, tlut_type);
|
|
break;
|
|
|
|
default:
|
|
t_base = sample_texel_ci32_tlut(tile, tmem_instance, base_st, upper ? 3 : 0, addr_xor, tlut_type);
|
|
if (bilerp)
|
|
{
|
|
t10 = sample_texel_ci32_tlut(tile, tmem_instance, ivec2(s1, t0), 1, addr_xor, tlut_type);
|
|
t01 = sample_texel_ci32_tlut(tile, tmem_instance, ivec2(s0, t1), 2, addr_xor, tlut_type);
|
|
}
|
|
if (mid_texel)
|
|
t11 = sample_texel_ci32_tlut(tile, tmem_instance, ivec2(s1, t1), 3, addr_xor, tlut_type);
|
|
break;
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
switch (int(tile.fmt))
|
|
{
|
|
case TEXTURE_FORMAT_RGBA:
|
|
switch (int(tile.size))
|
|
{
|
|
case 0:
|
|
t_base = sample_texel_rgba4(tile, tmem_instance, base_st);
|
|
if (sample_quad)
|
|
{
|
|
t10 = sample_texel_rgba4(tile, tmem_instance, ivec2(s1, t0));
|
|
t01 = sample_texel_rgba4(tile, tmem_instance, ivec2(s0, t1));
|
|
}
|
|
if (mid_texel)
|
|
t11 = sample_texel_rgba4(tile, tmem_instance, ivec2(s1, t1));
|
|
break;
|
|
|
|
case 1:
|
|
t_base = sample_texel_rgba8(tile, tmem_instance, base_st);
|
|
if (sample_quad)
|
|
{
|
|
t10 = sample_texel_rgba8(tile, tmem_instance, ivec2(s1, t0));
|
|
t01 = sample_texel_rgba8(tile, tmem_instance, ivec2(s0, t1));
|
|
}
|
|
if (mid_texel)
|
|
t11 = sample_texel_rgba8(tile, tmem_instance, ivec2(s1, t1));
|
|
break;
|
|
|
|
case 2:
|
|
t_base = sample_texel_rgba16(tile, tmem_instance, base_st);
|
|
if (sample_quad)
|
|
{
|
|
t10 = sample_texel_rgba16(tile, tmem_instance, ivec2(s1, t0));
|
|
t01 = sample_texel_rgba16(tile, tmem_instance, ivec2(s0, t1));
|
|
}
|
|
if (mid_texel)
|
|
t11 = sample_texel_rgba16(tile, tmem_instance, ivec2(s1, t1));
|
|
break;
|
|
|
|
case 3:
|
|
t_base = sample_texel_rgba32(tile, tmem_instance, base_st);
|
|
if (sample_quad)
|
|
{
|
|
t10 = sample_texel_rgba32(tile, tmem_instance, ivec2(s1, t0));
|
|
t01 = sample_texel_rgba32(tile, tmem_instance, ivec2(s0, t1));
|
|
}
|
|
if (mid_texel)
|
|
t11 = sample_texel_rgba32(tile, tmem_instance, ivec2(s1, t1));
|
|
break;
|
|
}
|
|
break;
|
|
|
|
case TEXTURE_FORMAT_YUV:
|
|
{
|
|
uint chroma_x0 = s0 >> 1;
|
|
uint chroma_x1 = (s1 + (s1 - s0)) >> 1;
|
|
|
|
// Only implement 16bpp for now. It's the only one that gives meaningful results.
|
|
t_base = sample_texel_yuv16(tile, tmem_instance, ivec2(s0, t0), chroma_x0);
|
|
if (sample_quad)
|
|
{
|
|
t10 = sample_texel_yuv16(tile, tmem_instance, ivec2(s1, t0), chroma_x1);
|
|
t01 = sample_texel_yuv16(tile, tmem_instance, ivec2(s0, t1), chroma_x0);
|
|
t11 = sample_texel_yuv16(tile, tmem_instance, ivec2(s1, t1), chroma_x1);
|
|
}
|
|
break;
|
|
}
|
|
|
|
case TEXTURE_FORMAT_CI:
|
|
switch (int(tile.size))
|
|
{
|
|
case 0:
|
|
t_base = sample_texel_ci4(tile, tmem_instance, base_st, tile.palette);
|
|
if (sample_quad)
|
|
{
|
|
t10 = sample_texel_ci4(tile, tmem_instance, ivec2(s1, t0), tile.palette);
|
|
t01 = sample_texel_ci4(tile, tmem_instance, ivec2(s0, t1), tile.palette);
|
|
}
|
|
if (mid_texel)
|
|
t11 = sample_texel_ci4(tile, tmem_instance, ivec2(s1, t1), tile.palette);
|
|
break;
|
|
|
|
case 1:
|
|
t_base = sample_texel_rgba8(tile, tmem_instance, base_st);
|
|
if (sample_quad)
|
|
{
|
|
t10 = sample_texel_rgba8(tile, tmem_instance, ivec2(s1, t0));
|
|
t01 = sample_texel_rgba8(tile, tmem_instance, ivec2(s0, t1));
|
|
}
|
|
if (mid_texel)
|
|
t11 = sample_texel_rgba8(tile, tmem_instance, ivec2(s1, t1));
|
|
break;
|
|
|
|
default:
|
|
t_base = sample_texel_ci32(tile, tmem_instance, base_st);
|
|
if (sample_quad)
|
|
{
|
|
t10 = sample_texel_ci32(tile, tmem_instance, ivec2(s1, t0));
|
|
t01 = sample_texel_ci32(tile, tmem_instance, ivec2(s0, t1));
|
|
}
|
|
if (mid_texel)
|
|
t11 = sample_texel_ci32(tile, tmem_instance, ivec2(s1, t1));
|
|
break;
|
|
}
|
|
break;
|
|
|
|
case TEXTURE_FORMAT_IA:
|
|
switch (int(tile.size))
|
|
{
|
|
case 0:
|
|
t_base = sample_texel_ia4(tile, tmem_instance, base_st);
|
|
if (sample_quad)
|
|
{
|
|
t10 = sample_texel_ia4(tile, tmem_instance, ivec2(s1, t0));
|
|
t01 = sample_texel_ia4(tile, tmem_instance, ivec2(s0, t1));
|
|
}
|
|
if (mid_texel)
|
|
t11 = sample_texel_ia4(tile, tmem_instance, ivec2(s1, t1));
|
|
break;
|
|
|
|
case 1:
|
|
t_base = sample_texel_ia8(tile, tmem_instance, base_st);
|
|
if (sample_quad)
|
|
{
|
|
t10 = sample_texel_ia8(tile, tmem_instance, ivec2(s1, t0));
|
|
t01 = sample_texel_ia8(tile, tmem_instance, ivec2(s0, t1));
|
|
}
|
|
if (mid_texel)
|
|
t11 = sample_texel_ia8(tile, tmem_instance, ivec2(s1, t1));
|
|
break;
|
|
|
|
case 2:
|
|
t_base = sample_texel_ia16(tile, tmem_instance, base_st);
|
|
if (sample_quad)
|
|
{
|
|
t10 = sample_texel_ia16(tile, tmem_instance, ivec2(s1, t0));
|
|
t01 = sample_texel_ia16(tile, tmem_instance, ivec2(s0, t1));
|
|
}
|
|
if (mid_texel)
|
|
t11 = sample_texel_ia16(tile, tmem_instance, ivec2(s1, t1));
|
|
break;
|
|
|
|
case 3:
|
|
t_base = sample_texel_ci32(tile, tmem_instance, base_st);
|
|
if (sample_quad)
|
|
{
|
|
t10 = sample_texel_ci32(tile, tmem_instance, ivec2(s1, t0));
|
|
t01 = sample_texel_ci32(tile, tmem_instance, ivec2(s0, t1));
|
|
}
|
|
if (mid_texel)
|
|
t11 = sample_texel_ci32(tile, tmem_instance, ivec2(s1, t1));
|
|
break;
|
|
}
|
|
break;
|
|
|
|
case TEXTURE_FORMAT_I:
|
|
switch (int(tile.size))
|
|
{
|
|
case 0:
|
|
t_base = sample_texel_rgba4(tile, tmem_instance, base_st);
|
|
if (sample_quad)
|
|
{
|
|
t10 = sample_texel_rgba4(tile, tmem_instance, ivec2(s1, t0));
|
|
t01 = sample_texel_rgba4(tile, tmem_instance, ivec2(s0, t1));
|
|
}
|
|
if (mid_texel)
|
|
t11 = sample_texel_rgba4(tile, tmem_instance, ivec2(s1, t1));
|
|
break;
|
|
|
|
case 1:
|
|
t_base = sample_texel_rgba8(tile, tmem_instance, base_st);
|
|
if (sample_quad)
|
|
{
|
|
t10 = sample_texel_rgba8(tile, tmem_instance, ivec2(s1, t0));
|
|
t01 = sample_texel_rgba8(tile, tmem_instance, ivec2(s0, t1));
|
|
}
|
|
if (mid_texel)
|
|
t11 = sample_texel_rgba8(tile, tmem_instance, ivec2(s1, t1));
|
|
break;
|
|
|
|
default:
|
|
t_base = sample_texel_ci32(tile, tmem_instance, base_st);
|
|
if (sample_quad)
|
|
{
|
|
t10 = sample_texel_ci32(tile, tmem_instance, ivec2(s1, t0));
|
|
t01 = sample_texel_ci32(tile, tmem_instance, ivec2(s0, t1));
|
|
}
|
|
if (mid_texel)
|
|
t11 = sample_texel_ci32(tile, tmem_instance, ivec2(s1, t1));
|
|
break;
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
|
|
i16x4 accum;
|
|
|
|
// This is esoteric gibberish for the most part ...
|
|
|
|
// Basic ideas seem to be:
|
|
// - If mid_texel is enabled and we end up sampling center pixel, replace any 3-tap bilinear with 4-tap average.
|
|
// - If YUV is used, filtering is separate for RG (chroma) and BA (luma) channels. Upper / Mid signals are separate.
|
|
// - For YUV, sampling without bilerp with sample_quad means picking either t00 or t11, the base texel of any 3-tap bilerp plane.
|
|
// Chroma and Luma planes are selected separately.
|
|
// Then, the texel is converted.
|
|
// - If convert_one + sample_quad + bilerp is used, a whack mode is entered where the conversion factors are dynamic.
|
|
// This also needs to handle variants of MID / YUV.
|
|
|
|
if (convert_one)
|
|
{
|
|
// bilerp + convert_one path. !bilerp + convert_one path is trivial and does not require sampling at all.
|
|
// It is handled outside.
|
|
|
|
ivec4 prev_sext = bitfieldExtract(ivec4(prev_cycle), 0, 9);
|
|
if (sample_quad)
|
|
{
|
|
bool mid_rg = yuv ? all(bvec3(mid_texel_state, equal(ivec2(chroma_frac, frac.y), ivec2(0x10)))) : mid_texel;
|
|
bool mid_ba = mid_texel;
|
|
|
|
bool upper_ba = sum_frac >= 32;
|
|
bool upper_rg = yuv ? ((chroma_frac + frac.y) >= 32 && !mid_rg) : upper_ba;
|
|
|
|
ivec2 factors_rg = upper_rg ? prev_sext.gr : prev_sext.rg;
|
|
ivec2 factors_ba = upper_ba ? prev_sext.gr : prev_sext.rg;
|
|
|
|
// t11 vs t00 selection is already done for non-YUV. YUV needs to defer here.
|
|
|
|
ivec2 converted_rg, converted_ba;
|
|
if (mid_rg)
|
|
{
|
|
converted_rg = factors_rg.r * (t01.rg - t11.rg) +
|
|
factors_rg.g * (t10.rg - t11.rg) +
|
|
((t_base.rg - t11.rg) << 6) + 0x80;
|
|
}
|
|
else
|
|
{
|
|
ivec2 base_rg = upper_rg && yuv ? t11.xy : t_base.xy;
|
|
converted_rg = factors_rg.r * (t10.xy - base_rg) + factors_rg.g * (t01.xy - base_rg) + 0x80;
|
|
}
|
|
|
|
if (mid_ba)
|
|
{
|
|
converted_ba = factors_ba.r * (t01.ba - t11.ba) +
|
|
factors_ba.g * (t10.ba - t11.ba) +
|
|
((t_base.ba - t11.ba) << 6) + 0x80;
|
|
}
|
|
else
|
|
{
|
|
ivec2 base_ba = upper_ba && yuv ? t11.zw : t_base.zw;
|
|
converted_ba = factors_ba.r * (t10.zw - base_ba) + factors_ba.g * (t01.zw - base_ba) + 0x80;
|
|
}
|
|
|
|
ivec4 converted = ivec4(converted_rg, converted_ba);
|
|
converted >>= 8;
|
|
converted += prev_sext.b;
|
|
accum = i16x4(converted);
|
|
}
|
|
else
|
|
accum = i16x4(prev_sext.bbbb);
|
|
}
|
|
else if (yuv)
|
|
{
|
|
if (sample_quad)
|
|
{
|
|
i16x2 accum_chroma;
|
|
i16x2 accum_luma;
|
|
|
|
if (bilerp)
|
|
{
|
|
bool mid_chroma = all(bvec3(mid_texel_state, equal(ivec2(chroma_frac, frac.y), ivec2(0x10))));
|
|
if (mid_chroma)
|
|
accum_chroma = (t_base.xy + t10.xy + t11.xy + t01.xy + I16_C(2)) >> I16_C(2);
|
|
else
|
|
accum_chroma = bilinear_3tap(t_base.xy, t10.xy, t01.xy, t11.xy, ivec2(chroma_frac, frac.y));
|
|
|
|
if (mid_texel)
|
|
accum_luma = (t_base.zw + t10.zw + t11.zw + t01.zw + I16_C(2)) >> I16_C(2);
|
|
else
|
|
accum_luma = bilinear_3tap(t_base.zw, t10.zw, t01.zw, t11.zw, frac);
|
|
}
|
|
else
|
|
{
|
|
// Weird path. Seems to pick either t00 or t11 for purposes of nearest.
|
|
// Bilinear footprint path, except it's not doing bilinear path.
|
|
accum_luma = frac.x + frac.y >= 32 ? t11.zw : t_base.zw;
|
|
accum_chroma = chroma_frac + frac.y >= 32 ? t11.xy : t_base.xy;
|
|
}
|
|
|
|
accum = i16x4(accum_chroma, accum_luma);
|
|
}
|
|
else
|
|
accum = t_base;
|
|
}
|
|
else if (mid_texel)
|
|
{
|
|
accum = (t_base + t01 + t10 + t11 + I16_C(2)) >> I16_C(2);
|
|
}
|
|
else if (bilerp && (sample_quad || tlut))
|
|
{
|
|
i16x2 flip_frac = i16x2(sum_frac >= 32 ? (32 - frac.yx) : frac);
|
|
accum = (t10 - t_base) * flip_frac.x;
|
|
accum += (t01 - t_base) * flip_frac.y;
|
|
accum += I16_C(0x10);
|
|
accum >>= I16_C(5);
|
|
accum += t_base;
|
|
}
|
|
else
|
|
accum = t_base;
|
|
|
|
// If we don't spend math on bilerp for this cycle, we get conversion instead.
|
|
// This happens regardless of convert_one. Convert_one in cycle 1 only means we take the
|
|
// previous texel cycle and perform some math on it.
|
|
|
|
if (!bilerp && !convert_one)
|
|
accum = texture_convert_factors(accum, conversion_factors);
|
|
|
|
return accum;
|
|
}
|
|
|
|
void compute_lod_2cycle(inout uint tile0, inout uint tile1, out i16 lod_frac, uint max_level, int min_lod,
|
|
ivec2 st, ivec2 st_dx, ivec2 st_dy,
|
|
bool perspective_overflow, bool tex_lod_en, bool sharpen_tex_en, bool detail_tex_en)
|
|
{
|
|
bool magnify = false;
|
|
bool distant = false;
|
|
|
|
uint tile_offset = 0;
|
|
|
|
if (perspective_overflow)
|
|
{
|
|
distant = true;
|
|
lod_frac = i16(0xff);
|
|
}
|
|
else
|
|
{
|
|
ivec2 dx = st_dx - st;
|
|
// Kinda abs, except it's 1 less than expected if negative.
|
|
dx ^= dx >> 31;
|
|
ivec2 dy = st_dy - st;
|
|
// Kinda abs, except it's 1 less than expected if negative.
|
|
dy ^= dy >> 31;
|
|
|
|
ivec2 max_d2 = max(dx, dy);
|
|
int max_d = max(max_d2.x, max_d2.y);
|
|
|
|
if (max_d >= 0x4000)
|
|
{
|
|
distant = true;
|
|
lod_frac = i16(0xff);
|
|
tile_offset = max_level;
|
|
}
|
|
else if (max_d < 32) // LOD < 0
|
|
{
|
|
distant = max_level == 0u;
|
|
magnify = true;
|
|
|
|
if (!sharpen_tex_en && !detail_tex_en)
|
|
lod_frac = i16(distant ? 0xff : 0);
|
|
else
|
|
lod_frac = i16((max(min_lod, max_d) << 3) + (sharpen_tex_en ? -0x100 : 0));
|
|
}
|
|
else
|
|
{
|
|
int mip_base = max(findMSB(max_d >> 5), 0);
|
|
distant = mip_base >= max_level;
|
|
|
|
if (distant && !sharpen_tex_en && !detail_tex_en)
|
|
{
|
|
lod_frac = i16(0xff);
|
|
}
|
|
else
|
|
{
|
|
lod_frac = i16(((max_d << 3) >> mip_base) & 0xff);
|
|
tile_offset = mip_base;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (tex_lod_en)
|
|
{
|
|
if (distant)
|
|
tile_offset = max_level;
|
|
|
|
if (!detail_tex_en)
|
|
{
|
|
tile0 = (tile0 + tile_offset) & 7u;
|
|
if (distant || (!sharpen_tex_en && magnify))
|
|
tile1 = tile0;
|
|
else
|
|
tile1 = (tile0 + 1) & 7;
|
|
}
|
|
else
|
|
{
|
|
tile1 = (tile0 + tile_offset + ((distant || magnify) ? 1 : 2)) & 7u;
|
|
tile0 = (tile0 + tile_offset + (magnify ? 0 : 1)) & 7u;
|
|
}
|
|
}
|
|
}
|
|
|
|
#endif
|