nulldc-360/files/ps.hlsl

290 lines
No EOL
8.7 KiB
HLSL

//Pixel Shader
//Pvr emulation using a pixel shader .. bahh :p
//pp_Texture -> 1 if texture is enabled , 0 if its not
//pp_Offset -> 1 if offset is enabled , 0 if its not (only valid when texture is enabled)
//pp_ShadInstr -> 0 to 3 , see pvr docs , valid only when texture is enabled
//pp_IgnoreTexA -> 1 if on 0 if off , valid only w/ textures on
//pp_UseAlpha -> 1 if on 0 if off , works when no textures are used too ?
//pp_FogCtrl
//misc #defines :
//ZBufferMode -> z buffer mode :p
//ZBufferMode : 0 -> fp fixup (nop)
//ZBufferMode : 1 -> fp Z emu (emulate fp on matnissa bits)
//ZBufferMode : 2 -> rescale (nop)
#define ZBufferMode 2
//TextureLookup -> function to use for texture lookup.One of TextureLookup_Normal,TextureLookup_Palette,TextureLookup_Palette_Bilinear
struct pixel
{
float4 col : TEXCOORD1;
float4 offs : TEXCOORD2;
float4 uv : TEXCOORD0;
};
sampler2D samplr : register(s0);
sampler2D tex_pal : register(s1);
sampler1D fog_table: register(s2);
float4 current_pal: register(c0);
float4 texture_size: register(c1);
float4 FOG_COL_VERT:register(c2);
float4 FOG_COL_RAM :register(c3);
float4 FOG_DENSITY :register(c4);
float4 TextureLookup_Normal(float4 uv)
{
return tex2Dproj( samplr, uv);
}
//utility function for pal. lookups :)
float4 PalleteLookup(float4 pos)
{
//xyzw -> x=index , y=bank
float4 texcol=tex2D(tex_pal,pos.rg+current_pal.xy);
return texcol;
}
float4 TextureLookup_Palette(float4 uv)
{
float4 pal_color=TextureLookup_Normal(uv);
return PalleteLookup(pal_color);
}
/*
Bilinear filtering
Screen space perspective -> Texture space linear (*W)
Texture space quad, filtered
This also takes dx/dy/mipmaps into account correctly, but drkpvr so far does not generate pal. texture index mipmaps so i commented it out for now
*/
float4 TextureLookup_Palette_Nproj(float2 uv/*,float2 dx,float2 dy*/)
{
float4 pal_color=tex2D/*grad*/(samplr,uv/*,dx,dy*/);
return PalleteLookup(pal_color);
}
float4 TextureLookup_Palette_Bilinear(float4 uv)
{
float2 tcpoj=uv.xy/uv.w; //Project texture to 2d tc space
/*
float2 dx=ddx(tcpoj.xy); //Get x derivatives for mip mapping
float2 dy=ddy(tcpoj.xy); //Get y derivatives for mip mapping
*/
float2 Img=tcpoj*texture_size.xy-float2(0.5,0.5); //to image space to get the frac/ceil
float2 lt=floor(Img)/texture_size.xy;
float2 weight=frac(Img);
float ltx=lt.x;
float4 top_left = TextureLookup_Palette_Nproj( lt /*,dx,dy*/);
lt.x+=1/texture_size.x;
float4 top_right = TextureLookup_Palette_Nproj( lt /*,dx,dy*/);
lt.y+=1/texture_size.y;
float4 bot_right = TextureLookup_Palette_Nproj( lt /*,dx,dy*/);
lt.x=ltx;
float4 bot_left = TextureLookup_Palette_Nproj( lt/*,dx,dy*/);
float4 top = lerp( top_left, top_right, weight.x ); //.x=0 -> left, .x=1 -> right
float4 bot = lerp( bot_left, bot_right, weight.x );
float4 final = lerp( top, bot, weight.y ); //.y=0 -> top , .y=1 -> bottom
return final;
}
float4 TextureLookup_Palette_Bilinear_ko(float4 uv)
{
float2 tcpoj=uv.xy/uv.w; //Project texture to 2d tc space
/*
float2 dx=ddx(tcpoj.xy); //Get x derivatives for mip mapping
float2 dy=ddy(tcpoj.xy); //Get y derivatives for mip mapping
*/
float2 Img=tcpoj*texture_size.xy-float2(0.5,0.5); //to image space to get the frac/ceil
float4 ltrb=float4(floor(Img),ceil(Img))*texture_size.zwzw;//zw=1/xy
float2 weight=frac(Img);
float4 top_left = TextureLookup_Palette_Nproj( ltrb.xy /*,dx,dy*/);
float4 top_right = TextureLookup_Palette_Nproj( ltrb.zy /*,dx,dy*/);
float4 bot_left = TextureLookup_Palette_Nproj( ltrb.xw /*,dx,dy*/);
float4 bot_right = TextureLookup_Palette_Nproj( ltrb.zw /*,dx,dy*/);
float4 top = lerp( top_left, top_right, weight.x ); //.x=0 -> left, .x=1 -> right
float4 bot = lerp( bot_left, bot_right, weight.x );
float4 final = lerp( top, bot, weight.y ); //.y=0 -> top , .y=1 -> bottom
return final;
}
//same as below, but uses fewer sm2 opcodes so that the damn shader can fit on 2_0 cards
float fdecp(float flt,out float e)
{
float lg2=log2(flt); //ie , 2.5
float frc=frac(lg2); //ie , 0.5
e=lg2-frc; //ie , 2.5-0.5=2 (exp)
return pow(2,frc); //2^0.5 (manitsa)
}
/*
float fdecp(float flt,out float e)
{
//float fogexp=floor(log2(foginvW)); //0 ... 7
//float fogexp_pow=pow(2,fogexp); //0 ... 128
//float fogman=(foginvW/fogexp_pow); //[1,2) mantissa bits. that is 1.m
e=floor(log2(flt));
float powe=pow(2,e);
return (w/powx);
}
*/
//compress Z to D{s6e18}S8
float CompressZ(float w)
{
float x;
float y=fdecp(w,x);
x=clamp(x-16,-63,0); //s6e18, max : 2^16*(2^18-1)/2(^18) , min : 2^-47*(2^18-1)/2(^18)
x+=62; //bias to positive, +1 more is done by the add below.x_max =62,x_min = -1 (63;0)
//y //mantissa bits, allways in [1..2) range as 0 is not a valid input :)
return (x+y)/64.0f; //Combine and save the exp + mantissa at the mantissa field.Min value is 0 (-1+1), max value is 63 +(2^18-1)/2(^18).
//Normalised by 64 so that it falls in the [0..1) range :)
}
float fog_mode2(float invW)
{
//pixel z* scale, scale is on m1.7es8 format,result is
//1.m0.7eu3, with clamping (min val = 1<<0 -> 1, max value = 1.1111111<<7=11111111.0 -> 255.0
//FOG lookup uses idx=eu3:m[6:3] -> 0, .. 127 [128 values pairs]
//Then it interpolates lerp(FOG[idx][0],FOG[idx][1],0.m[2:0])
float foginvW=FOG_DENSITY.x*invW;
foginvW=clamp(foginvW,1,255);
float fogexp; //0 ... 7
float fogman=fdecp(foginvW, fogexp); //[1,2) mantissa bits. that is 1.m
float fogman_hi=fogman*16-16; //[16,32) -16 -> [0,16)
float fogman_idx=floor(fogman_hi); //[0,15]
float fogman_blend=frac(fogman_hi); //[0,1) -- can also be fogman_idx-fogman_idx !
float fog_idx_fr=fogexp*16+fogman_idx; //[0,127]
//D3D9 texture mapping rule : [0.0, 1.0] (0.0 to 1.0, inclusive) to an integer texel space value ranging from [ - 0.5, n - 0.5]
//0 (0/255) -> 0.5 BEFORE 1st texture pixel (#0)
//1 (256/256)-> 0.5 AFTER LAST texture pixel (#255)
//? (0.5/255) -> EXACTLY 1st pixel (#0)
//? (255.5/256) -> EXACTLY LAST pixel (#255)
//on an 256x1 texture , pixel 0 is at 0, 1 at 1 .. 255 at 255
//idx select index *2.idx=0 -> pixel 0(0.5) ,idx=1 -> pixel 2 (2.5), .. idx=127-> pixel 254 (254.5)
//fraction then blends betwen the idx pixel, and the next pixel
//I'l use bilinear filter for that work, so idx=127 is [254.5,255.4999) -> 254.5 (idx*2+0.5) + blend factor
// -> actualy, i can use .r and .g to store the cooefs and do the lerp manualy !
// -> so, [0.5, 127.5]
float fog_idx_pixel_fr=fog_idx_fr+0.5f;
float fog_idx_pixel_n=fog_idx_pixel_fr/128;//normalise to [0.5/128,127.5/128) coordinates ;p
//fog is 128x1 texure
//ARGB 8888 -> B G R A -> B=7:0 aka '1', G=15:8 aka '0'
float2 fog_coefs=tex1D(fog_table,fog_idx_pixel_n).rg;
//frexp(foginvW,out fogexp); //for exp .. 0 .. 7
float fog_coef=lerp(fog_coefs.r,fog_coefs.g,fogman_blend);
return fog_coef;
}
struct PSO
{
float4 col:COLOR0;
#if ZBufferMode==1
float z :DEPTH;
#endif
};
//pvr only supports ARGB8888 colors, but they are pre-clamped on the vertex shader (no need to do it here)
PSO main(in pixel s )
{
float4 color=s.col/s.uv.w;
clip(s.uv.z);
#if 1
//For non-old ATI cards, fixes noised cars in Jet Grind Radio for example.
//color=round(color*255)/255;
if (!pp_UseAlpha){
color.a=1;
}
if (pp_FogCtrl && pp_FogCtrl2){
//color.a=;
color=float4(FOG_COL_RAM.rgb,fog_mode2(s.uv.w));
}
if (pp_Texture){
float4 texcol;
//get texture color
if(!pp_Palette){
texcol=TextureLookup_Normal(s.uv);
}else{
if(!pp_PaletteBilinear){
texcol=TextureLookup_Palette(s.uv);
}else{
texcol=TextureLookup_Palette_Bilinear(s.uv);
}
}
//apply modifiers
if (pp_IgnoreTexA){
texcol.a=1;
}
//OFFSETRGB is allways added after that (if enabled)
if(pp_ShadInstr){
if(pp_ShadInstr2){
//PIXRGB= COLRGB x TEXRGB + OFFSETRGB
color.rgb*=texcol.rgb;
//PIXA = COLA x TEXA
color.a*=texcol.a;
}else{
//PIXRGB = COLRGB x TEXRGB + OFFSETRGB
color.rgb*=texcol.rgb;
//PIXA = TEXA
color.a=texcol.a;
}
}else{
if(pp_ShadInstr2){
//PIXRGB = (TEXRGB x TEXA) + (COLRGB x (1- TEXA) ) + OFFSETRGB
color.rgb=(texcol.rgb*texcol.a) + (color.rgb * (1-texcol.a));
//PIXA = COLA
//color.a remains the same
}else{
//PIXRGB = TEXRGB + OFFSETRGB
color.rgb=texcol.rgb;
//PIXA = TEXA
color.a=texcol.a;
}
}
//if offset is enabled , add it :)
if (pp_Offset){
float4 offscol=s.offs/s.uv.w;
color.rgb+=offscol.rgb;
if (pp_FogCtrl && !pp_FogCtrl2){
color.rgb=lerp(color.rgb,FOG_COL_VERT.rgb,offscol.a);
}
}
}
if (!pp_FogCtrl && !pp_FogCtrl2){
float fog_blend=fog_mode2(s.uv.w);
color.rgb=lerp(color.rgb,FOG_COL_RAM.rgb,fog_blend);
}
#endif
PSO rv;
rv.col=color;
#if ZBufferMode==1
rv.z=CompressZ(s.uv.w);
rv.z=1.0-rv.z;
#endif
return rv;
}