mirror of
https://github.com/gligli/nulldc-360.git
synced 2025-04-02 11:11:56 -04:00
290 lines
No EOL
8.7 KiB
HLSL
290 lines
No EOL
8.7 KiB
HLSL
//Pixel Shader
|
|
//Pvr emulation using a pixel shader .. bahh :p
|
|
//pp_Texture -> 1 if texture is enabled , 0 if its not
|
|
//pp_Offset -> 1 if offset is enabled , 0 if its not (only valid when texture is enabled)
|
|
//pp_ShadInstr -> 0 to 3 , see pvr docs , valid only when texture is enabled
|
|
//pp_IgnoreTexA -> 1 if on 0 if off , valid only w/ textures on
|
|
//pp_UseAlpha -> 1 if on 0 if off , works when no textures are used too ?
|
|
//pp_FogCtrl
|
|
//misc #defines :
|
|
//ZBufferMode -> z buffer mode :p
|
|
//ZBufferMode : 0 -> fp fixup (nop)
|
|
//ZBufferMode : 1 -> fp Z emu (emulate fp on matnissa bits)
|
|
//ZBufferMode : 2 -> rescale (nop)
|
|
|
|
#define ZBufferMode 2
|
|
|
|
//TextureLookup -> function to use for texture lookup.One of TextureLookup_Normal,TextureLookup_Palette,TextureLookup_Palette_Bilinear
|
|
struct pixel
|
|
{
|
|
float4 col : TEXCOORD1;
|
|
float4 offs : TEXCOORD2;
|
|
float4 uv : TEXCOORD0;
|
|
};
|
|
|
|
sampler2D samplr : register(s0);
|
|
sampler2D tex_pal : register(s1);
|
|
sampler1D fog_table: register(s2);
|
|
|
|
float4 current_pal: register(c0);
|
|
float4 texture_size: register(c1);
|
|
float4 FOG_COL_VERT:register(c2);
|
|
float4 FOG_COL_RAM :register(c3);
|
|
float4 FOG_DENSITY :register(c4);
|
|
|
|
float4 TextureLookup_Normal(float4 uv)
|
|
{
|
|
return tex2Dproj( samplr, uv);
|
|
}
|
|
|
|
//utility function for pal. lookups :)
|
|
float4 PalleteLookup(float4 pos)
|
|
{
|
|
//xyzw -> x=index , y=bank
|
|
float4 texcol=tex2D(tex_pal,pos.rg+current_pal.xy);
|
|
return texcol;
|
|
}
|
|
|
|
float4 TextureLookup_Palette(float4 uv)
|
|
{
|
|
float4 pal_color=TextureLookup_Normal(uv);
|
|
|
|
return PalleteLookup(pal_color);
|
|
}
|
|
|
|
/*
|
|
Bilinear filtering
|
|
Screen space perspective -> Texture space linear (*W)
|
|
Texture space quad, filtered
|
|
This also takes dx/dy/mipmaps into account correctly, but drkpvr so far does not generate pal. texture index mipmaps so i commented it out for now
|
|
*/
|
|
float4 TextureLookup_Palette_Nproj(float2 uv/*,float2 dx,float2 dy*/)
|
|
{
|
|
float4 pal_color=tex2D/*grad*/(samplr,uv/*,dx,dy*/);
|
|
|
|
return PalleteLookup(pal_color);
|
|
}
|
|
|
|
float4 TextureLookup_Palette_Bilinear(float4 uv)
|
|
{
|
|
float2 tcpoj=uv.xy/uv.w; //Project texture to 2d tc space
|
|
/*
|
|
float2 dx=ddx(tcpoj.xy); //Get x derivatives for mip mapping
|
|
float2 dy=ddy(tcpoj.xy); //Get y derivatives for mip mapping
|
|
*/
|
|
float2 Img=tcpoj*texture_size.xy-float2(0.5,0.5); //to image space to get the frac/ceil
|
|
float2 lt=floor(Img)/texture_size.xy;
|
|
float2 weight=frac(Img);
|
|
|
|
float ltx=lt.x;
|
|
float4 top_left = TextureLookup_Palette_Nproj( lt /*,dx,dy*/);
|
|
lt.x+=1/texture_size.x;
|
|
float4 top_right = TextureLookup_Palette_Nproj( lt /*,dx,dy*/);
|
|
lt.y+=1/texture_size.y;
|
|
float4 bot_right = TextureLookup_Palette_Nproj( lt /*,dx,dy*/);
|
|
lt.x=ltx;
|
|
float4 bot_left = TextureLookup_Palette_Nproj( lt/*,dx,dy*/);
|
|
|
|
|
|
float4 top = lerp( top_left, top_right, weight.x ); //.x=0 -> left, .x=1 -> right
|
|
float4 bot = lerp( bot_left, bot_right, weight.x );
|
|
float4 final = lerp( top, bot, weight.y ); //.y=0 -> top , .y=1 -> bottom
|
|
return final;
|
|
}
|
|
|
|
float4 TextureLookup_Palette_Bilinear_ko(float4 uv)
|
|
{
|
|
float2 tcpoj=uv.xy/uv.w; //Project texture to 2d tc space
|
|
/*
|
|
float2 dx=ddx(tcpoj.xy); //Get x derivatives for mip mapping
|
|
float2 dy=ddy(tcpoj.xy); //Get y derivatives for mip mapping
|
|
*/
|
|
float2 Img=tcpoj*texture_size.xy-float2(0.5,0.5); //to image space to get the frac/ceil
|
|
float4 ltrb=float4(floor(Img),ceil(Img))*texture_size.zwzw;//zw=1/xy
|
|
float2 weight=frac(Img);
|
|
|
|
float4 top_left = TextureLookup_Palette_Nproj( ltrb.xy /*,dx,dy*/);
|
|
float4 top_right = TextureLookup_Palette_Nproj( ltrb.zy /*,dx,dy*/);
|
|
float4 bot_left = TextureLookup_Palette_Nproj( ltrb.xw /*,dx,dy*/);
|
|
float4 bot_right = TextureLookup_Palette_Nproj( ltrb.zw /*,dx,dy*/);
|
|
|
|
float4 top = lerp( top_left, top_right, weight.x ); //.x=0 -> left, .x=1 -> right
|
|
float4 bot = lerp( bot_left, bot_right, weight.x );
|
|
float4 final = lerp( top, bot, weight.y ); //.y=0 -> top , .y=1 -> bottom
|
|
return final;
|
|
}
|
|
|
|
//same as below, but uses fewer sm2 opcodes so that the damn shader can fit on 2_0 cards
|
|
float fdecp(float flt,out float e)
|
|
{
|
|
float lg2=log2(flt); //ie , 2.5
|
|
float frc=frac(lg2); //ie , 0.5
|
|
e=lg2-frc; //ie , 2.5-0.5=2 (exp)
|
|
return pow(2,frc); //2^0.5 (manitsa)
|
|
}
|
|
/*
|
|
float fdecp(float flt,out float e)
|
|
{
|
|
//float fogexp=floor(log2(foginvW)); //0 ... 7
|
|
//float fogexp_pow=pow(2,fogexp); //0 ... 128
|
|
//float fogman=(foginvW/fogexp_pow); //[1,2) mantissa bits. that is 1.m
|
|
|
|
e=floor(log2(flt));
|
|
float powe=pow(2,e);
|
|
return (w/powx);
|
|
}
|
|
*/
|
|
//compress Z to D{s6e18}S8
|
|
float CompressZ(float w)
|
|
{
|
|
float x;
|
|
float y=fdecp(w,x);
|
|
x=clamp(x-16,-63,0); //s6e18, max : 2^16*(2^18-1)/2(^18) , min : 2^-47*(2^18-1)/2(^18)
|
|
x+=62; //bias to positive, +1 more is done by the add below.x_max =62,x_min = -1 (63;0)
|
|
//y //mantissa bits, allways in [1..2) range as 0 is not a valid input :)
|
|
return (x+y)/64.0f; //Combine and save the exp + mantissa at the mantissa field.Min value is 0 (-1+1), max value is 63 +(2^18-1)/2(^18).
|
|
//Normalised by 64 so that it falls in the [0..1) range :)
|
|
}
|
|
float fog_mode2(float invW)
|
|
{
|
|
//pixel z* scale, scale is on m1.7es8 format,result is
|
|
//1.m0.7eu3, with clamping (min val = 1<<0 -> 1, max value = 1.1111111<<7=11111111.0 -> 255.0
|
|
//FOG lookup uses idx=eu3:m[6:3] -> 0, .. 127 [128 values pairs]
|
|
//Then it interpolates lerp(FOG[idx][0],FOG[idx][1],0.m[2:0])
|
|
float foginvW=FOG_DENSITY.x*invW;
|
|
foginvW=clamp(foginvW,1,255);
|
|
|
|
float fogexp; //0 ... 7
|
|
float fogman=fdecp(foginvW, fogexp); //[1,2) mantissa bits. that is 1.m
|
|
|
|
float fogman_hi=fogman*16-16; //[16,32) -16 -> [0,16)
|
|
float fogman_idx=floor(fogman_hi); //[0,15]
|
|
float fogman_blend=frac(fogman_hi); //[0,1) -- can also be fogman_idx-fogman_idx !
|
|
float fog_idx_fr=fogexp*16+fogman_idx; //[0,127]
|
|
//D3D9 texture mapping rule : [0.0, 1.0] (0.0 to 1.0, inclusive) to an integer texel space value ranging from [ - 0.5, n - 0.5]
|
|
//0 (0/255) -> 0.5 BEFORE 1st texture pixel (#0)
|
|
//1 (256/256)-> 0.5 AFTER LAST texture pixel (#255)
|
|
//? (0.5/255) -> EXACTLY 1st pixel (#0)
|
|
//? (255.5/256) -> EXACTLY LAST pixel (#255)
|
|
//on an 256x1 texture , pixel 0 is at 0, 1 at 1 .. 255 at 255
|
|
//idx select index *2.idx=0 -> pixel 0(0.5) ,idx=1 -> pixel 2 (2.5), .. idx=127-> pixel 254 (254.5)
|
|
//fraction then blends betwen the idx pixel, and the next pixel
|
|
//I'l use bilinear filter for that work, so idx=127 is [254.5,255.4999) -> 254.5 (idx*2+0.5) + blend factor
|
|
// -> actualy, i can use .r and .g to store the cooefs and do the lerp manualy !
|
|
// -> so, [0.5, 127.5]
|
|
float fog_idx_pixel_fr=fog_idx_fr+0.5f;
|
|
float fog_idx_pixel_n=fog_idx_pixel_fr/128;//normalise to [0.5/128,127.5/128) coordinates ;p
|
|
|
|
//fog is 128x1 texure
|
|
//ARGB 8888 -> B G R A -> B=7:0 aka '1', G=15:8 aka '0'
|
|
float2 fog_coefs=tex1D(fog_table,fog_idx_pixel_n).rg;
|
|
//frexp(foginvW,out fogexp); //for exp .. 0 .. 7
|
|
|
|
float fog_coef=lerp(fog_coefs.r,fog_coefs.g,fogman_blend);
|
|
|
|
return fog_coef;
|
|
}
|
|
|
|
struct PSO
|
|
{
|
|
float4 col:COLOR0;
|
|
#if ZBufferMode==1
|
|
float z :DEPTH;
|
|
#endif
|
|
};
|
|
|
|
//pvr only supports ARGB8888 colors, but they are pre-clamped on the vertex shader (no need to do it here)
|
|
PSO main(in pixel s )
|
|
{
|
|
float4 color=s.col/s.uv.w;
|
|
clip(s.uv.z);
|
|
|
|
|
|
#if 1
|
|
//For non-old ATI cards, fixes noised cars in Jet Grind Radio for example.
|
|
//color=round(color*255)/255;
|
|
|
|
if (!pp_UseAlpha){
|
|
color.a=1;
|
|
}
|
|
|
|
if (pp_FogCtrl && pp_FogCtrl2){
|
|
//color.a=;
|
|
|
|
color=float4(FOG_COL_RAM.rgb,fog_mode2(s.uv.w));
|
|
}
|
|
|
|
if (pp_Texture){
|
|
float4 texcol;
|
|
|
|
//get texture color
|
|
if(!pp_Palette){
|
|
texcol=TextureLookup_Normal(s.uv);
|
|
}else{
|
|
if(!pp_PaletteBilinear){
|
|
texcol=TextureLookup_Palette(s.uv);
|
|
}else{
|
|
texcol=TextureLookup_Palette_Bilinear(s.uv);
|
|
}
|
|
}
|
|
|
|
//apply modifiers
|
|
if (pp_IgnoreTexA){
|
|
texcol.a=1;
|
|
}
|
|
|
|
//OFFSETRGB is allways added after that (if enabled)
|
|
if(pp_ShadInstr){
|
|
if(pp_ShadInstr2){
|
|
//PIXRGB= COLRGB x TEXRGB + OFFSETRGB
|
|
color.rgb*=texcol.rgb;
|
|
//PIXA = COLA x TEXA
|
|
color.a*=texcol.a;
|
|
}else{
|
|
//PIXRGB = COLRGB x TEXRGB + OFFSETRGB
|
|
color.rgb*=texcol.rgb;
|
|
//PIXA = TEXA
|
|
color.a=texcol.a;
|
|
}
|
|
}else{
|
|
if(pp_ShadInstr2){
|
|
//PIXRGB = (TEXRGB x TEXA) + (COLRGB x (1- TEXA) ) + OFFSETRGB
|
|
color.rgb=(texcol.rgb*texcol.a) + (color.rgb * (1-texcol.a));
|
|
//PIXA = COLA
|
|
//color.a remains the same
|
|
}else{
|
|
//PIXRGB = TEXRGB + OFFSETRGB
|
|
color.rgb=texcol.rgb;
|
|
//PIXA = TEXA
|
|
color.a=texcol.a;
|
|
}
|
|
}
|
|
|
|
//if offset is enabled , add it :)
|
|
if (pp_Offset){
|
|
float4 offscol=s.offs/s.uv.w;
|
|
color.rgb+=offscol.rgb;
|
|
|
|
if (pp_FogCtrl && !pp_FogCtrl2){
|
|
color.rgb=lerp(color.rgb,FOG_COL_VERT.rgb,offscol.a);
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
if (!pp_FogCtrl && !pp_FogCtrl2){
|
|
float fog_blend=fog_mode2(s.uv.w);
|
|
|
|
color.rgb=lerp(color.rgb,FOG_COL_RAM.rgb,fog_blend);
|
|
}
|
|
#endif
|
|
|
|
PSO rv;
|
|
rv.col=color;
|
|
#if ZBufferMode==1
|
|
rv.z=CompressZ(s.uv.w);
|
|
rv.z=1.0-rv.z;
|
|
#endif
|
|
|
|
return rv;
|
|
} |