Merge pull request #15317 from unknownbrackets/softgpu-lighting

softgpu: Precompute lighting parameters
This commit is contained in:
Henrik Rydgård 2022-01-17 01:06:35 +01:00 committed by GitHub
commit bdc69f5171
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 211 additions and 67 deletions

View file

@ -121,7 +121,7 @@ void ComputeVertexShaderID(VShaderID *id_out, u32 vertType, bool useHWTransform,
if (gstate.isLightingEnabled()) {
// doShadeMapping is stored as UVGenMode, and light type doesn't matter for shade mapping.
id.SetBits(VS_BIT_MATERIAL_UPDATE, 3, gstate.getMaterialUpdate() & 7);
id.SetBits(VS_BIT_MATERIAL_UPDATE, 3, gstate.getMaterialUpdate());
id.SetBit(VS_BIT_LIGHTING_ENABLE);
// Light bits
for (int i = 0; i < 4; i++) {

View file

@ -338,7 +338,7 @@ struct GPUgstate {
unsigned int getAmbientB() const { return (ambientcolor>>16)&0xFF; }
unsigned int getAmbientA() const { return ambientalpha&0xFF; }
unsigned int getAmbientRGBA() const { return (ambientcolor&0xFFFFFF) | ((ambientalpha&0xFF)<<24); }
unsigned int getMaterialUpdate() const { return materialupdate&0xFFFFFF; }
unsigned int getMaterialUpdate() const { return materialupdate & 7; }
unsigned int getMaterialAmbientR() const { return materialambient&0xFF; }
unsigned int getMaterialAmbientG() const { return (materialambient>>8)&0xFF; }
unsigned int getMaterialAmbientB() const { return (materialambient>>16)&0xFF; }

View file

@ -34,9 +34,6 @@ static inline Vec3f GetLightVec(u32 lparams[12], int light) {
}
static inline float pspLightPow(float v, float e) {
if (e <= 0.0f || (std::isnan(e) && std::signbit(e))) {
return 1.0f;
}
if (v > 0.0f) {
return pow(v, e);
}
@ -44,6 +41,102 @@ static inline float pspLightPow(float v, float e) {
return v;
}
void ComputeState(State *state, bool hasColor0) {
const Vec4<int> ones = Vec4<int>::AssignToAll(1);
bool anyAmbient = false;
bool anyDiffuse = false;
bool anySpecular = false;
for (int light = 0; light < 4; ++light) {
auto &lstate = state->lights[light];
lstate.enabled = gstate.isLightChanEnabled(light);
if (!lstate.enabled)
continue;
lstate.spot = gstate.isSpotLight(light);
lstate.directional = gstate.isDirectionalLight(light);
lstate.poweredDiffuse = gstate.isUsingPoweredDiffuseLight(light);
lstate.specular = gstate.isUsingSpecularLight(light);
lstate.ambientColorFactor = Vec4<int>::FromRGBA(gstate.getLightAmbientColor(light)) * 2 + ones;
lstate.ambient = !(lstate.ambientColorFactor == ones);
anyAmbient = anyAmbient || lstate.ambient;
lstate.diffuseColorFactor = Vec4<int>::FromRGBA(gstate.getDiffuseColor(light)) * 2 + ones;
lstate.diffuse = !(lstate.diffuseColorFactor == ones);
anyDiffuse = anyDiffuse || lstate.diffuse;
if (lstate.specular) {
lstate.specularColorFactor = Vec4<int>::FromRGBA(gstate.getSpecularColor(light)) * 2 + ones;
lstate.specular = !(lstate.specularColorFactor == ones);
anySpecular = anySpecular || lstate.specular;
}
lstate.pos = GetLightVec(gstate.lpos, light);
if (lstate.directional)
lstate.pos.NormalizeOr001();
else
lstate.att = GetLightVec(gstate.latt, light);
if (lstate.spot) {
lstate.spotDir = GetLightVec(gstate.ldir, light);
lstate.spotDir.Normalize();
lstate.spotCutoff = getFloat24(gstate.lcutoff[light]);
if (std::isnan(lstate.spotCutoff) && std::signbit(lstate.spotCutoff))
lstate.spotCutoff = 0.0f;
lstate.spotExp = getFloat24(gstate.lconv[light]);
if (lstate.spotExp <= 0.0f)
lstate.spotExp = 0.0f;
else if (std::isnan(lstate.spotExp))
lstate.spotExp = std::signbit(lstate.spotExp) ? 0.0f : INFINITY;
}
}
const int materialupdate = gstate.materialupdate & (hasColor0 ? 7 : 0);
state->colorForAmbient = (materialupdate & 1) != 0;
state->colorForDiffuse = (materialupdate & 2) != 0;
state->colorForSpecular = (materialupdate & 4) != 0;
if (!state->colorForAmbient) {
state->material.ambientColorFactor = Vec4<int>::FromRGBA(gstate.getMaterialAmbientRGBA()) * 2 + ones;
if (state->material.ambientColorFactor == ones && anyAmbient) {
for (int i = 0; i < 4; ++i)
state->lights[i].ambient = false;
}
}
if (anyDiffuse && !state->colorForDiffuse) {
state->material.diffuseColorFactor = Vec4<int>::FromRGBA(gstate.getMaterialDiffuse()) * 2 + ones;
if (state->material.diffuseColorFactor == ones) {
anyDiffuse = false;
for (int i = 0; i < 4; ++i)
state->lights[i].diffuse = false;
}
}
if (anySpecular && !state->colorForSpecular) {
state->material.specularColorFactor = Vec4<int>::FromRGBA(gstate.getMaterialSpecular()) * 2 + ones;
if (state->material.specularColorFactor == ones) {
anySpecular = false;
for (int i = 0; i < 4; ++i)
state->lights[i].specular = false;
}
}
if (anyDiffuse || anySpecular) {
state->specularExp = gstate.getMaterialSpecularCoef();
if (state->specularExp <= 0.0f)
state->specularExp = 0.0f;
else if (std::isnan(state->specularExp))
state->specularExp = std::signbit(state->specularExp) ? 0.0f : INFINITY;
}
state->baseAmbientColorFactor = Vec4<int>::FromRGBA(gstate.getAmbientRGBA()) * 2 + ones;
state->setColor1 = gstate.isUsingSecondaryColor() && anySpecular;
state->addColor1 = !gstate.isUsingSecondaryColor() && anySpecular;
}
static inline float GenerateLightCoord(VertexData &vertex, const WorldCoords &worldnormal, int light) {
// TODO: Should specular lighting should affect this, too? Doesn't in GLES.
Vec3<float> L = GetLightVec(gstate.lpos, light);
@ -60,36 +153,36 @@ void GenerateLightST(VertexData &vertex, const WorldCoords &worldnormal) {
vertex.texturecoords.t() = GenerateLightCoord(vertex, worldnormal, gstate.getUVLS1());
}
void Process(VertexData& vertex, const WorldCoords &worldpos, const WorldCoords &worldnormal, bool hasColor) {
const int materialupdate = gstate.materialupdate & (hasColor ? 7 : 0);
void Process(VertexData &vertex, const WorldCoords &worldpos, const WorldCoords &worldnormal, const State &state) {
// Lighting blending rounds using the half offset method (like alpha blend.)
const Vec4<int> ones = Vec4<int>::AssignToAll(1);
Vec4<int> colorFactor;
if (state.colorForAmbient || state.colorForDiffuse || state.colorForSpecular)
colorFactor = vertex.color0 * 2 + ones;
Vec4<int> mec = Vec4<int>::FromRGBA(gstate.getMaterialEmissive());
Vec4<int> mac = (materialupdate & 1) ? vertex.color0 : Vec4<int>::FromRGBA(gstate.getMaterialAmbientRGBA());
Vec4<int> ac = Vec4<int>::FromRGBA(gstate.getAmbientRGBA());
// Ambient (whether vertex or material) rounds using the half offset method (like alpha blend.)
const Vec4<int> ones = Vec4<int>::AssignToAll(1);
Vec4<int> ambient = ((mac * 2 + ones) * (ac * 2 + ones)) / 1024;
Vec4<int> mac = state.colorForAmbient ? colorFactor : state.material.ambientColorFactor;
Vec4<int> ambient = (mac * state.baseAmbientColorFactor) / 1024;
Vec4<int> final_color = mec + ambient;
Vec4<int> specular_color = Vec4<int>::AssignToAll(0);
for (unsigned int light = 0; light < 4; ++light) {
if (!gstate.isLightChanEnabled(light))
const auto &lstate = state.lights[light];
if (!lstate.enabled)
continue;
// L = vector from vertex to light source
// TODO: Should transfer the light positions to world/view space for these calculations?
Vec3<float> L = GetLightVec(gstate.lpos, light);
if (!gstate.isDirectionalLight(light)) {
L -= worldpos;
}
// TODO: Should this normalize (0, 0, 0) to (0, 0, 1)?
float d = L.NormalizeOr001();
Vec3<float> L = lstate.pos;
float att = 1.0f;
if (!gstate.isDirectionalLight(light)) {
att = 1.0f / Dot(GetLightVec(gstate.latt, light), Vec3f(1.0f, d, d * d));
if (!lstate.directional) {
L -= worldpos;
// TODO: Should this normalize (0, 0, 0) to (0, 0, 1)?
float d = L.NormalizeOr001();
att = 1.0f / Dot(lstate.att, Vec3f(1.0f, d, d * d));
if (!(att > 0.0f))
att = 0.0f;
else if (att > 1.0f)
@ -97,17 +190,13 @@ void Process(VertexData& vertex, const WorldCoords &worldpos, const WorldCoords
}
float spot = 1.0f;
if (gstate.isSpotLight(light)) {
Vec3<float> dir = GetLightVec(gstate.ldir, light);
float rawSpot = Dot(dir.Normalized(cpu_info.bSSE4_1), L);
if (lstate.spot) {
float rawSpot = Dot(lstate.spotDir, L);
if (std::isnan(rawSpot))
rawSpot = std::signbit(rawSpot) ? 0.0f : 1.0f;
float cutoff = getFloat24(gstate.lcutoff[light]);
if (std::isnan(cutoff) && std::signbit(cutoff))
cutoff = 0.0f;
if (rawSpot >= cutoff) {
float conv = getFloat24(gstate.lconv[light]);
spot = pspLightPow(rawSpot, conv);
if (rawSpot >= lstate.spotCutoff) {
spot = pspLightPow(rawSpot, lstate.spotExp);
if (std::isnan(spot))
spot = 0.0f;
} else {
@ -116,54 +205,57 @@ void Process(VertexData& vertex, const WorldCoords &worldpos, const WorldCoords
}
// ambient lighting
int attspot = (int)ceilf(256 * 2 * att * spot + 1);
if (attspot > 512)
attspot = 512;
Vec4<int> lac = Vec4<int>::FromRGBA(gstate.getLightAmbientColor(light));
Vec4<int> lambient = ((mac * 2 + ones) * (lac * 2 + ones) * attspot) / (1024 * 512);
final_color += lambient;
// diffuse lighting
float diffuse_factor = Dot(L, worldnormal);
if (gstate.isUsingPoweredDiffuseLight(light)) {
float k = gstate.getMaterialSpecularCoef();
diffuse_factor = pspLightPow(diffuse_factor, k);
if (lstate.ambient) {
int attspot = (int)ceilf(256 * 2 * att * spot + 1);
if (attspot > 512)
attspot = 512;
Vec4<int> lambient = (mac * lstate.ambientColorFactor * attspot) / (1024 * 512);
final_color += lambient;
}
if (diffuse_factor > 0.0f) {
// diffuse lighting
float diffuse_factor;
if (lstate.diffuse || lstate.specular) {
diffuse_factor = Dot(L, worldnormal);
if (lstate.poweredDiffuse) {
diffuse_factor = pspLightPow(diffuse_factor, state.specularExp);
}
}
if (lstate.diffuse && diffuse_factor > 0.0f) {
int diffuse_attspot = (int)ceilf(256 * 2 * att * spot * diffuse_factor + 1);
if (diffuse_attspot > 512)
diffuse_attspot = 512;
Vec4<int> ldc = Vec4<int>::FromRGBA(gstate.getDiffuseColor(light));
Vec4<int> mdc = (materialupdate & 2) ? vertex.color0 : Vec4<int>::FromRGBA(gstate.getMaterialDiffuse());
Vec4<int> ldiffuse = ((ldc * 2 + ones) * (mdc * 2 + ones) * diffuse_attspot) / (1024 * 512);
Vec4<int> mdc = state.colorForDiffuse ? colorFactor : state.material.diffuseColorFactor;
Vec4<int> ldiffuse = (lstate.diffuseColorFactor * mdc * diffuse_attspot) / (1024 * 512);
final_color += ldiffuse;
}
if (gstate.isUsingSpecularLight(light) && diffuse_factor >= 0.0f) {
if (lstate.specular && diffuse_factor >= 0.0f) {
Vec3<float> H = L + Vec3<float>(0.f, 0.f, 1.f);
float specular_factor = Dot(H.NormalizedOr001(cpu_info.bSSE4_1), worldnormal);
float k = gstate.getMaterialSpecularCoef();
specular_factor = pspLightPow(specular_factor, k);
specular_factor = pspLightPow(specular_factor, state.specularExp);
if (specular_factor > 0.0f) {
int specular_attspot = (int)ceilf(256 * 2 * att * spot * specular_factor + 1);
if (specular_attspot > 512)
specular_attspot = 512;
Vec4<int> lsc = Vec4<int>::FromRGBA(gstate.getSpecularColor(light));
Vec4<int> msc = (materialupdate & 4) ? vertex.color0 : Vec4<int>::FromRGBA(gstate.getMaterialSpecular());
Vec4<int> lspecular = ((lsc * 2 + ones) * (msc * 2 + ones) * specular_attspot) / (1024 * 512);
Vec4<int> msc = state.colorForSpecular ? colorFactor : state.material.specularColorFactor;
Vec4<int> lspecular = (lstate.specularColorFactor * msc * specular_attspot) / (1024 * 512);
specular_color += lspecular;
}
}
}
if (gstate.isUsingSecondaryColor()) {
if (state.setColor1) {
vertex.color0 = final_color.Clamp(0, 255);
vertex.color1 = specular_color.Clamp(0, 255).rgb();
} else {
} else if (state.addColor1) {
vertex.color0 = (final_color + specular_color).Clamp(0, 255);
} else {
vertex.color0 = final_color.Clamp(0, 255);
}
}

View file

@ -21,7 +21,51 @@
namespace Lighting {
struct State {
struct {
// Pre-normalized if directional.
Vec3f pos;
Vec3f att;
Vec3f spotDir;
float spotCutoff;
float spotExp;
Vec4<int> ambientColorFactor;
Vec4<int> diffuseColorFactor;
Vec4<int> specularColorFactor;
struct {
bool enabled : 1;
bool spot : 1;
bool directional : 1;
bool poweredDiffuse : 1;
bool ambient : 1;
bool diffuse : 1;
bool specular : 1;
};
} lights[4];
struct {
Vec4<int> ambientColorFactor;
Vec4<int> diffuseColorFactor;
Vec4<int> specularColorFactor;
} material;
Vec4<int> baseAmbientColorFactor;
float specularExp;
struct {
bool colorForAmbient : 1;
bool colorForDiffuse : 1;
bool colorForSpecular : 1;
bool setColor1 : 1;
bool addColor1 : 1;
};
};
void ComputeState(State *state, bool hasColor0);
void GenerateLightST(VertexData &vertex, const WorldCoords &worldnormal);
void Process(VertexData &vertex, const WorldCoords &worldpos, const WorldCoords &worldnormal, bool hasColor);
void Process(VertexData &vertex, const WorldCoords &worldpos, const WorldCoords &worldnormal, const State &state);
}

View file

@ -143,7 +143,7 @@ ScreenCoords TransformUnit::DrawingToScreen(const DrawingCoords &coords, u16 z)
return ret;
}
VertexData TransformUnit::ReadVertex(VertexReader &vreader, bool &outside_range_flag) {
VertexData TransformUnit::ReadVertex(VertexReader &vreader, const Lighting::State &lstate, bool &outside_range_flag) {
PROFILE_THIS_SCOPE("read_vert");
VertexData vertex;
@ -265,7 +265,7 @@ VertexData TransformUnit::ReadVertex(VertexReader &vreader, bool &outside_range_
PROFILE_THIS_SCOPE("light");
if (gstate.isLightingEnabled())
Lighting::Process(vertex, worldpos, worldnormal, vreader.hasColor0());
Lighting::Process(vertex, worldpos, worldnormal, lstate);
} else {
vertex.screenpos.x = (int)(pos[0] * 16) + gstate.getOffsetX16();
vertex.screenpos.y = (int)(pos[1] * 16) + gstate.getOffsetY16();
@ -337,6 +337,10 @@ void TransformUnit::SubmitPrimitive(void* vertices, void* indices, GEPrimitiveTy
binner_->UpdateState();
Lighting::State lstate;
if (gstate.isLightingEnabled())
ComputeState(&lstate, vreader.hasColor0());
bool outside_range_flag = false;
switch (prim_type) {
case GE_PRIM_POINTS:
@ -350,7 +354,7 @@ void TransformUnit::SubmitPrimitive(void* vertices, void* indices, GEPrimitiveTy
vreader.Goto(vtx);
}
data[data_index++] = ReadVertex(vreader, outside_range_flag);
data[data_index++] = ReadVertex(vreader, lstate, outside_range_flag);
if (data_index < vtcs_per_prim) {
// Keep reading. Note: an incomplete prim will stay read for GE_PRIM_KEEP_PREVIOUS.
continue;
@ -401,7 +405,7 @@ void TransformUnit::SubmitPrimitive(void* vertices, void* indices, GEPrimitiveTy
vreader.Goto(vtx);
}
data[data_index++] = ReadVertex(vreader, outside_range_flag);
data[data_index++] = ReadVertex(vreader, lstate, outside_range_flag);
if (outside_range_flag) {
outside_range_flag = false;
// Note: this is the post increment index. If odd, we set the first vert.
@ -447,7 +451,7 @@ void TransformUnit::SubmitPrimitive(void* vertices, void* indices, GEPrimitiveTy
vreader.Goto(vtx);
}
data[(data_index++) & 1] = ReadVertex(vreader, outside_range_flag);
data[(data_index++) & 1] = ReadVertex(vreader, lstate, outside_range_flag);
if (outside_range_flag) {
// Drop all primitives containing the current vertex
skip_count = 2;
@ -480,7 +484,7 @@ void TransformUnit::SubmitPrimitive(void* vertices, void* indices, GEPrimitiveTy
else {
vreader.Goto(vtx);
}
data[vtx] = ReadVertex(vreader, outside_range_flag);
data[vtx] = ReadVertex(vreader, lstate, outside_range_flag);
}
// If a strip is effectively a rectangle, draw it as such!
@ -499,7 +503,7 @@ void TransformUnit::SubmitPrimitive(void* vertices, void* indices, GEPrimitiveTy
}
int provoking_index = (data_index++) % 3;
data[provoking_index] = ReadVertex(vreader, outside_range_flag);
data[provoking_index] = ReadVertex(vreader, lstate, outside_range_flag);
if (outside_range_flag) {
// Drop all primitives containing the current vertex
skip_count = 2;
@ -540,7 +544,7 @@ void TransformUnit::SubmitPrimitive(void* vertices, void* indices, GEPrimitiveTy
} else {
vreader.Goto(0);
}
data[0] = ReadVertex(vreader, outside_range_flag);
data[0] = ReadVertex(vreader, lstate, outside_range_flag);
data_index++;
start_vtx = 1;
@ -556,7 +560,7 @@ void TransformUnit::SubmitPrimitive(void* vertices, void* indices, GEPrimitiveTy
} else {
vreader.Goto(vtx);
}
data[vtx] = ReadVertex(vreader, outside_range_flag);
data[vtx] = ReadVertex(vreader, lstate, outside_range_flag);
}
int tl = -1, br = -1;
@ -575,7 +579,7 @@ void TransformUnit::SubmitPrimitive(void* vertices, void* indices, GEPrimitiveTy
}
int provoking_index = 2 - ((data_index++) % 2);
data[provoking_index] = ReadVertex(vreader, outside_range_flag);
data[provoking_index] = ReadVertex(vreader, lstate, outside_range_flag);
if (outside_range_flag) {
// Drop all primitives containing the current vertex
skip_count = 2;

View file

@ -34,6 +34,10 @@ typedef Vec4<float> ClipCoords; // Range: -w <= x/y/z <= w
struct SplinePatch;
class BinManager;
namespace Lighting {
struct State;
};
struct ScreenCoords
{
ScreenCoords() {}
@ -126,7 +130,7 @@ public:
void GetStats(char *buffer, size_t bufsize);
private:
VertexData ReadVertex(VertexReader &vreader, bool &outside_range_flag);
VertexData ReadVertex(VertexReader &vreader, const Lighting::State &lstate, bool &outside_range_flag);
u8 *decoded_ = nullptr;
BinManager *binner_ = nullptr;