mirror of
https://github.com/hrydgard/ppsspp.git
synced 2025-04-02 11:01:50 -04:00
Merge pull request #15284 from unknownbrackets/softgpu-opt
Improve softgpu lighting accuracy and speed
This commit is contained in:
commit
eee62849fe
4 changed files with 228 additions and 113 deletions
155
GPU/Math3D.h
155
GPU/Math3D.h
|
@ -38,6 +38,12 @@
|
|||
#endif
|
||||
#endif
|
||||
|
||||
#if PPSSPP_PLATFORM(WINDOWS) && (defined(_MSC_VER) || defined(__clang__) || defined(__INTEL_COMPILER))
|
||||
#define MATH3D_CALL __vectorcall
|
||||
#else
|
||||
#define MATH3D_CALL
|
||||
#endif
|
||||
|
||||
namespace Math3D {
|
||||
|
||||
// Helper for Vec classes to clamp values.
|
||||
|
@ -876,33 +882,45 @@ float vectorGetByIndex(__m128 v) {
|
|||
}
|
||||
#endif
|
||||
|
||||
// v and vecOut must point to different memory.
|
||||
inline void Vec3ByMatrix43(float vecOut[3], const float v[3], const float m[12]) {
|
||||
#if defined(_M_SSE)
|
||||
// x, y, and z should be broadcast. Should only be used through Vec3f version.
|
||||
inline __m128 MATH3D_CALL Vec3ByMatrix43Internal(__m128 x, __m128 y, __m128 z, const float m[12]) {
|
||||
__m128 col0 = _mm_loadu_ps(m);
|
||||
__m128 col1 = _mm_loadu_ps(m + 3);
|
||||
__m128 col2 = _mm_loadu_ps(m + 6);
|
||||
__m128 col3 = _mm_loadu_ps(m + 9);
|
||||
__m128 x = _mm_set1_ps(v[0]);
|
||||
__m128 y = _mm_set1_ps(v[1]);
|
||||
__m128 z = _mm_set1_ps(v[2]);
|
||||
__m128 sum = _mm_add_ps(
|
||||
_mm_add_ps(_mm_mul_ps(col0, x), _mm_mul_ps(col1, y)),
|
||||
_mm_add_ps(_mm_mul_ps(col2, z), col3));
|
||||
return sum;
|
||||
}
|
||||
#elif PPSSPP_ARCH(ARM_NEON) && PPSSPP_ARCH(ARM64)
|
||||
inline float32x4_t Vec3ByMatrix43Internal(float32x4_t vec, const float m[16]) {
|
||||
float32x4_t col0 = vld1q_f32(m);
|
||||
float32x4_t col1 = vld1q_f32(m + 3);
|
||||
float32x4_t col2 = vld1q_f32(m + 6);
|
||||
float32x4_t col3 = vld1q_f32(m + 9);
|
||||
float32x4_t sum = vaddq_f32(
|
||||
vaddq_f32(vmulq_laneq_f32(col0, vec, 0), vmulq_laneq_f32(col1, vec, 1)),
|
||||
vaddq_f32(vmulq_laneq_f32(col2, vec, 2), col3));
|
||||
return sum;
|
||||
}
|
||||
#endif
|
||||
|
||||
// v and vecOut must point to different memory.
|
||||
inline void Vec3ByMatrix43(float vecOut[3], const float v[3], const float m[12]) {
|
||||
#if defined(_M_SSE)
|
||||
__m128 x = _mm_set1_ps(v[0]);
|
||||
__m128 y = _mm_set1_ps(v[1]);
|
||||
__m128 z = _mm_set1_ps(v[2]);
|
||||
__m128 sum = Vec3ByMatrix43Internal(x, y, z, m);
|
||||
// Not sure what the best way to store 3 elements is. Ideally, we should
|
||||
// probably store all four.
|
||||
vecOut[0] = _mm_cvtss_f32(sum);
|
||||
vecOut[1] = vectorGetByIndex<1>(sum);
|
||||
vecOut[2] = vectorGetByIndex<2>(sum);
|
||||
#elif PPSSPP_ARCH(ARM_NEON) && PPSSPP_ARCH(ARM64)
|
||||
float32x4_t col0 = vld1q_f32(m);
|
||||
float32x4_t col1 = vld1q_f32(m + 3);
|
||||
float32x4_t col2 = vld1q_f32(m + 6);
|
||||
float32x4_t col3 = vld1q_f32(m + 9);
|
||||
float32x4_t vec = vld1q_f32(v);
|
||||
float32x4_t sum = vaddq_f32(
|
||||
vaddq_f32(vmulq_laneq_f32(col0, vec, 0), vmulq_laneq_f32(col1, vec, 1)),
|
||||
vaddq_f32(vmulq_laneq_f32(col2, vec, 2), col3));
|
||||
float32x4_t sum = Vec3ByMatrix43Internal(vld1q_f32(v), m);
|
||||
vecOut[0] = vgetq_lane_f32(sum, 0);
|
||||
vecOut[1] = vgetq_lane_f32(sum, 1);
|
||||
vecOut[2] = vgetq_lane_f32(sum, 2);
|
||||
|
@ -913,29 +931,55 @@ inline void Vec3ByMatrix43(float vecOut[3], const float v[3], const float m[12])
|
|||
#endif
|
||||
}
|
||||
|
||||
inline void Vec3ByMatrix44(float vecOut[4], const float v[3], const float m[16])
|
||||
{
|
||||
inline Vec3f MATH3D_CALL Vec3ByMatrix43(const Vec3f v, const float m[12]) {
|
||||
#if defined(_M_SSE)
|
||||
__m128 x = _mm_shuffle_ps(v.vec, v.vec, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
__m128 y = _mm_shuffle_ps(v.vec, v.vec, _MM_SHUFFLE(1, 1, 1, 1));
|
||||
__m128 z = _mm_shuffle_ps(v.vec, v.vec, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
return Vec3ByMatrix43Internal(x, y, z, m);
|
||||
#elif PPSSPP_ARCH(ARM_NEON) && PPSSPP_ARCH(ARM64)
|
||||
return Vec3ByMatrix43Internal(v.vec, m);
|
||||
#else
|
||||
Vec3f vecOut;
|
||||
Vec3ByMatrix43(vecOut.AsArray(), v.AsArray(), m);
|
||||
return vecOut;
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined(_M_SSE)
|
||||
// x, y, and z should be broadcast. Should only be used through Vec3f version.
|
||||
inline __m128 MATH3D_CALL Vec3ByMatrix44Internal(__m128 x, __m128 y, __m128 z, const float m[16]) {
|
||||
__m128 col0 = _mm_loadu_ps(m);
|
||||
__m128 col1 = _mm_loadu_ps(m + 4);
|
||||
__m128 col2 = _mm_loadu_ps(m + 8);
|
||||
__m128 col3 = _mm_loadu_ps(m + 12);
|
||||
__m128 x = _mm_set1_ps(v[0]);
|
||||
__m128 y = _mm_set1_ps(v[1]);
|
||||
__m128 z = _mm_set1_ps(v[2]);
|
||||
__m128 sum = _mm_add_ps(
|
||||
_mm_add_ps(_mm_mul_ps(col0, x), _mm_mul_ps(col1, y)),
|
||||
_mm_add_ps(_mm_mul_ps(col2, z), col3));
|
||||
_mm_storeu_ps(vecOut, sum);
|
||||
return sum;
|
||||
}
|
||||
#elif PPSSPP_ARCH(ARM_NEON) && PPSSPP_ARCH(ARM64)
|
||||
inline float32x4_t Vec3ByMatrix44Internal(float32x4_t vec, const float m[16]) {
|
||||
float32x4_t col0 = vld1q_f32(m);
|
||||
float32x4_t col1 = vld1q_f32(m + 4);
|
||||
float32x4_t col2 = vld1q_f32(m + 8);
|
||||
float32x4_t col3 = vld1q_f32(m + 12);
|
||||
float32x4_t vec = vld1q_f32(v);
|
||||
float32x4_t sum = vaddq_f32(
|
||||
vaddq_f32(vmulq_laneq_f32(col0, vec, 0), vmulq_laneq_f32(col1, vec, 1)),
|
||||
vaddq_f32(vmulq_laneq_f32(col2, vec, 2), col3));
|
||||
return sum;
|
||||
}
|
||||
#endif
|
||||
|
||||
inline void Vec3ByMatrix44(float vecOut[4], const float v[3], const float m[16]) {
|
||||
#if defined(_M_SSE)
|
||||
__m128 x = _mm_set1_ps(v[0]);
|
||||
__m128 y = _mm_set1_ps(v[1]);
|
||||
__m128 z = _mm_set1_ps(v[2]);
|
||||
__m128 sum = Vec3ByMatrix44Internal(x, y, z, m);
|
||||
_mm_storeu_ps(vecOut, sum);
|
||||
#elif PPSSPP_ARCH(ARM_NEON) && PPSSPP_ARCH(ARM64)
|
||||
float32x4_t sum = Vec3ByMatrix44Internal(vld1q_f32(v), m);
|
||||
vst1q_f32(vecOut, sum);
|
||||
#else
|
||||
vecOut[0] = v[0] * m[0] + v[1] * m[4] + v[2] * m[8] + m[12];
|
||||
|
@ -945,11 +989,78 @@ inline void Vec3ByMatrix44(float vecOut[4], const float v[3], const float m[16])
|
|||
#endif
|
||||
}
|
||||
|
||||
inline void Norm3ByMatrix43(float vecOut[3], const float v[3], const float m[12])
|
||||
{
|
||||
inline Vec4f MATH3D_CALL Vec3ByMatrix44(const Vec3f v, const float m[16]) {
|
||||
#if defined(_M_SSE)
|
||||
__m128 x = _mm_shuffle_ps(v.vec, v.vec, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
__m128 y = _mm_shuffle_ps(v.vec, v.vec, _MM_SHUFFLE(1, 1, 1, 1));
|
||||
__m128 z = _mm_shuffle_ps(v.vec, v.vec, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
return Vec3ByMatrix44Internal(x, y, z, m);
|
||||
#elif PPSSPP_ARCH(ARM_NEON) && PPSSPP_ARCH(ARM64)
|
||||
return Vec3ByMatrix44Internal(v.vec, m);
|
||||
#else
|
||||
Vec4f vecOut;
|
||||
Vec3ByMatrix44(vecOut.AsArray(), v.AsArray(), m);
|
||||
return vecOut;
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined(_M_SSE)
|
||||
// x, y, and z should be broadcast. Should only be used through Vec3f version.
|
||||
inline __m128 MATH3D_CALL Norm3ByMatrix43Internal(__m128 x, __m128 y, __m128 z, const float m[12]) {
|
||||
__m128 col0 = _mm_loadu_ps(m);
|
||||
__m128 col1 = _mm_loadu_ps(m + 3);
|
||||
__m128 col2 = _mm_loadu_ps(m + 6);
|
||||
__m128 sum = _mm_add_ps(
|
||||
_mm_add_ps(_mm_mul_ps(col0, x), _mm_mul_ps(col1, y)),
|
||||
_mm_mul_ps(col2, z));
|
||||
return sum;
|
||||
}
|
||||
#elif PPSSPP_ARCH(ARM_NEON) && PPSSPP_ARCH(ARM64)
|
||||
inline float32x4_t Norm3ByMatrix43Internal(float32x4_t vec, const float m[16]) {
|
||||
float32x4_t col0 = vld1q_f32(m);
|
||||
float32x4_t col1 = vld1q_f32(m + 3);
|
||||
float32x4_t col2 = vld1q_f32(m + 6);
|
||||
float32x4_t sum = vaddq_f32(
|
||||
vaddq_f32(vmulq_laneq_f32(col0, vec, 0), vmulq_laneq_f32(col1, vec, 1)),
|
||||
vmulq_laneq_f32(col2, vec, 2));
|
||||
return sum;
|
||||
}
|
||||
#endif
|
||||
|
||||
inline void Norm3ByMatrix43(float vecOut[3], const float v[3], const float m[12]) {
|
||||
#if defined(_M_SSE)
|
||||
__m128 x = _mm_set1_ps(v[0]);
|
||||
__m128 y = _mm_set1_ps(v[1]);
|
||||
__m128 z = _mm_set1_ps(v[2]);
|
||||
__m128 sum = Norm3ByMatrix43Internal(x, y, z, m);
|
||||
vecOut[0] = _mm_cvtss_f32(sum);
|
||||
vecOut[1] = vectorGetByIndex<1>(sum);
|
||||
vecOut[2] = vectorGetByIndex<2>(sum);
|
||||
#elif PPSSPP_ARCH(ARM_NEON) && PPSSPP_ARCH(ARM64)
|
||||
float32x4_t sum = Norm3ByMatrix43Internal(vld1q_f32(v), m);
|
||||
vecOut[0] = vgetq_lane_f32(sum, 0);
|
||||
vecOut[1] = vgetq_lane_f32(sum, 1);
|
||||
vecOut[2] = vgetq_lane_f32(sum, 2);
|
||||
#else
|
||||
vecOut[0] = v[0] * m[0] + v[1] * m[3] + v[2] * m[6];
|
||||
vecOut[1] = v[0] * m[1] + v[1] * m[4] + v[2] * m[7];
|
||||
vecOut[2] = v[0] * m[2] + v[1] * m[5] + v[2] * m[8];
|
||||
#endif
|
||||
}
|
||||
|
||||
inline Vec3f MATH3D_CALL Norm3ByMatrix43(const Vec3f v, const float m[12]) {
|
||||
#if defined(_M_SSE)
|
||||
__m128 x = _mm_shuffle_ps(v.vec, v.vec, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
__m128 y = _mm_shuffle_ps(v.vec, v.vec, _MM_SHUFFLE(1, 1, 1, 1));
|
||||
__m128 z = _mm_shuffle_ps(v.vec, v.vec, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
return Norm3ByMatrix43Internal(x, y, z, m);
|
||||
#elif PPSSPP_ARCH(ARM_NEON) && PPSSPP_ARCH(ARM64)
|
||||
return Norm3ByMatrix43Internal(v.vec, m);
|
||||
#else
|
||||
Vec3f vecOut;
|
||||
Norm3ByMatrix43(vecOut.AsArray(), v.AsArray(), m);
|
||||
return vecOut;
|
||||
#endif
|
||||
}
|
||||
|
||||
inline void Matrix4ByMatrix4(float out[16], const float a[16], const float b[16]) {
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
|
||||
|
||||
#include "ppsspp_config.h"
|
||||
#include <cmath>
|
||||
#include "Common/CPUDetect.h"
|
||||
#include "GPU/GPUState.h"
|
||||
#include "GPU/Software/Lighting.h"
|
||||
|
@ -33,7 +34,7 @@ static inline Vec3f GetLightVec(u32 lparams[12], int light) {
|
|||
}
|
||||
|
||||
static inline float pspLightPow(float v, float e) {
|
||||
if (e <= 0.0f) {
|
||||
if (e <= 0.0f || (std::isnan(e) && std::signbit(e))) {
|
||||
return 1.0f;
|
||||
}
|
||||
if (v > 0.0f) {
|
||||
|
@ -43,35 +44,35 @@ static inline float pspLightPow(float v, float e) {
|
|||
return v;
|
||||
}
|
||||
|
||||
static inline float GenerateLightCoord(VertexData &vertex, int light) {
|
||||
// TODO: Should specular lighting should affect this, too? Doesn't in GLES.
|
||||
Vec3<float> L = GetLightVec(gstate.lpos, light);
|
||||
// In other words, L.Length2() == 0.0f means Dot({0, 0, 1}, worldnormal).
|
||||
float diffuse_factor = Dot(L.NormalizedOr001(cpu_info.bSSE4_1), vertex.worldnormal);
|
||||
|
||||
return (diffuse_factor + 1.0f) / 2.0f;
|
||||
}
|
||||
|
||||
void GenerateLightST(VertexData &vertex) {
|
||||
// Always calculate texture coords from lighting results if environment mapping is active
|
||||
// This should be done even if lighting is disabled altogether.
|
||||
vertex.texturecoords.s() = GenerateLightCoord(vertex, gstate.getUVLS0());
|
||||
vertex.texturecoords.t() = GenerateLightCoord(vertex, gstate.getUVLS1());
|
||||
}
|
||||
|
||||
void Process(VertexData& vertex, bool hasColor) {
|
||||
const int materialupdate = gstate.materialupdate & (hasColor ? 7 : 0);
|
||||
|
||||
Vec3<float> vcol0 = vertex.color0.rgb().Cast<float>() * Vec3<float>::AssignToAll(1.0f / 255.0f);
|
||||
Vec3<float> mec = Vec3<float>::FromRGB(gstate.getMaterialEmissive());
|
||||
Vec4<int> mec = Vec4<int>::FromRGBA(gstate.getMaterialEmissive());
|
||||
|
||||
Vec3<float> mac = (materialupdate & 1) ? vcol0 : Vec3<float>::FromRGB(gstate.getMaterialAmbientRGBA());
|
||||
Vec3<float> final_color = mec + mac * Vec3<float>::FromRGB(gstate.getAmbientRGBA());
|
||||
Vec3<float> specular_color(0.0f, 0.0f, 0.0f);
|
||||
Vec4<int> mac = (materialupdate & 1) ? vertex.color0 : Vec4<int>::FromRGBA(gstate.getMaterialAmbientRGBA());
|
||||
Vec4<int> ac = Vec4<int>::FromRGBA(gstate.getAmbientRGBA());
|
||||
// Ambient (whether vertex or material) rounds using the half offset method (like alpha blend.)
|
||||
const Vec4<int> ones = Vec4<int>::AssignToAll(1);
|
||||
Vec4<int> ambient = ((mac * 2 + ones) * (ac * 2 + ones)) / 1024;
|
||||
|
||||
for (unsigned int light = 0; light < 4; ++light) {
|
||||
// Always calculate texture coords from lighting results if environment mapping is active
|
||||
// TODO: Should specular lighting should affect this, too? Doesn't in GLES.
|
||||
// This should be done even if lighting is disabled altogether.
|
||||
if (gstate.getUVGenMode() == GE_TEXMAP_ENVIRONMENT_MAP) {
|
||||
Vec3<float> L = GetLightVec(gstate.lpos, light);
|
||||
// In other words, L.Length2() == 0.0f means Dot({0, 0, 1}, worldnormal).
|
||||
float diffuse_factor = Dot(L.NormalizedOr001(cpu_info.bSSE4_1), vertex.worldnormal);
|
||||
|
||||
if (gstate.getUVLS0() == (int)light)
|
||||
vertex.texturecoords.s() = (diffuse_factor + 1.f) / 2.f;
|
||||
|
||||
if (gstate.getUVLS1() == (int)light)
|
||||
vertex.texturecoords.t() = (diffuse_factor + 1.f) / 2.f;
|
||||
}
|
||||
}
|
||||
|
||||
if (!gstate.isLightingEnabled())
|
||||
return;
|
||||
Vec4<int> final_color = mec + ambient;
|
||||
Vec4<int> specular_color = Vec4<int>::AssignToAll(0);
|
||||
|
||||
for (unsigned int light = 0; light < 4; ++light) {
|
||||
if (!gstate.isLightChanEnabled(light))
|
||||
|
@ -86,70 +87,83 @@ void Process(VertexData& vertex, bool hasColor) {
|
|||
// TODO: Should this normalize (0, 0, 0) to (0, 0, 1)?
|
||||
float d = L.NormalizeOr001();
|
||||
|
||||
float att = 1.f;
|
||||
float att = 1.0f;
|
||||
if (!gstate.isDirectionalLight(light)) {
|
||||
att = 1.f / Dot(GetLightVec(gstate.latt, light), Vec3f(1.0f, d, d * d));
|
||||
if (att > 1.f) att = 1.f;
|
||||
if (att < 0.f) att = 0.f;
|
||||
att = 1.0f / Dot(GetLightVec(gstate.latt, light), Vec3f(1.0f, d, d * d));
|
||||
if (!(att > 0.0f))
|
||||
att = 0.0f;
|
||||
else if (att > 1.0f)
|
||||
att = 1.0f;
|
||||
}
|
||||
|
||||
float spot = 1.f;
|
||||
float spot = 1.0f;
|
||||
if (gstate.isSpotLight(light)) {
|
||||
Vec3<float> dir = GetLightVec(gstate.ldir, light);
|
||||
float rawSpot = Dot(dir.NormalizedOr001(cpu_info.bSSE4_1), L);
|
||||
float rawSpot = Dot(dir.Normalized(cpu_info.bSSE4_1), L);
|
||||
if (std::isnan(rawSpot))
|
||||
rawSpot = std::signbit(rawSpot) ? 0.0f : 1.0f;
|
||||
float cutoff = getFloat24(gstate.lcutoff[light]);
|
||||
if (std::isnan(cutoff) && std::signbit(cutoff))
|
||||
cutoff = 0.0f;
|
||||
if (rawSpot >= cutoff) {
|
||||
float conv = getFloat24(gstate.lconv[light]);
|
||||
spot = pspLightPow(rawSpot, conv);
|
||||
if (std::isnan(spot))
|
||||
spot = 0.0f;
|
||||
} else {
|
||||
spot = 0.f;
|
||||
spot = 0.0f;
|
||||
}
|
||||
}
|
||||
|
||||
// ambient lighting
|
||||
Vec3<float> lac = Vec3<float>::FromRGB(gstate.getLightAmbientColor(light));
|
||||
final_color += lac * mac * att * spot;
|
||||
int attspot = (int)ceilf(256 * 2 * att * spot + 1);
|
||||
if (attspot > 512)
|
||||
attspot = 512;
|
||||
Vec4<int> lac = Vec4<int>::FromRGBA(gstate.getLightAmbientColor(light));
|
||||
Vec4<int> lambient = ((mac * 2 + ones) * (lac * 2 + ones) * attspot) / (1024 * 512);
|
||||
final_color += lambient;
|
||||
|
||||
// diffuse lighting
|
||||
Vec3<float> ldc = Vec3<float>::FromRGB(gstate.getDiffuseColor(light));
|
||||
Vec3<float> mdc = (materialupdate & 2) ? vcol0 : Vec3<float>::FromRGB(gstate.getMaterialDiffuse());
|
||||
|
||||
float diffuse_factor = Dot(L, vertex.worldnormal);
|
||||
if (gstate.isUsingPoweredDiffuseLight(light)) {
|
||||
float k = gstate.getMaterialSpecularCoef();
|
||||
diffuse_factor = pspLightPow(diffuse_factor, k);
|
||||
}
|
||||
|
||||
if (diffuse_factor > 0.f) {
|
||||
final_color += ldc * mdc * diffuse_factor * att * spot;
|
||||
if (diffuse_factor > 0.0f) {
|
||||
int diffuse_attspot = (int)ceilf(256 * 2 * att * spot * diffuse_factor + 1);
|
||||
if (diffuse_attspot > 512)
|
||||
diffuse_attspot = 512;
|
||||
Vec4<int> ldc = Vec4<int>::FromRGBA(gstate.getDiffuseColor(light));
|
||||
Vec4<int> mdc = (materialupdate & 2) ? vertex.color0 : Vec4<int>::FromRGBA(gstate.getMaterialDiffuse());
|
||||
Vec4<int> ldiffuse = ((ldc * 2 + ones) * (mdc * 2 + ones) * diffuse_attspot) / (1024 * 512);
|
||||
final_color += ldiffuse;
|
||||
}
|
||||
|
||||
if (gstate.isUsingSpecularLight(light) && diffuse_factor >= 0.0f) {
|
||||
Vec3<float> H = L + Vec3<float>(0.f, 0.f, 1.f);
|
||||
|
||||
Vec3<float> lsc = Vec3<float>::FromRGB(gstate.getSpecularColor(light));
|
||||
Vec3<float> msc = (materialupdate & 4) ? vcol0 : Vec3<float>::FromRGB(gstate.getMaterialSpecular());
|
||||
|
||||
float specular_factor = Dot(H.NormalizedOr001(cpu_info.bSSE4_1), vertex.worldnormal);
|
||||
float k = gstate.getMaterialSpecularCoef();
|
||||
specular_factor = pspLightPow(specular_factor, k);
|
||||
|
||||
if (specular_factor > 0.f) {
|
||||
specular_color += lsc * msc * specular_factor * att * spot;
|
||||
if (specular_factor > 0.0f) {
|
||||
int specular_attspot = (int)ceilf(256 * 2 * att * spot * specular_factor + 1);
|
||||
if (specular_attspot > 512)
|
||||
specular_attspot = 512;
|
||||
Vec4<int> lsc = Vec4<int>::FromRGBA(gstate.getSpecularColor(light));
|
||||
Vec4<int> msc = (materialupdate & 4) ? vertex.color0 : Vec4<int>::FromRGBA(gstate.getMaterialSpecular());
|
||||
Vec4<int> lspecular = ((lsc * 2 + ones) * (msc * 2 + ones) * specular_attspot) / (1024 * 512);
|
||||
specular_color += lspecular;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int maa = (materialupdate & 1) ? vertex.color0.a() : gstate.getMaterialAmbientA();
|
||||
int final_alpha = (gstate.getAmbientA() * maa) / 255;
|
||||
|
||||
if (gstate.isUsingSecondaryColor()) {
|
||||
Vec3<int> final_color_int = (final_color.Clamp(0.0f, 1.0f) * 255.0f).Cast<int>();
|
||||
vertex.color0 = Vec4<int>(final_color_int, final_alpha);
|
||||
vertex.color1 = (specular_color.Clamp(0.0f, 1.0f) * 255.0f).Cast<int>();
|
||||
vertex.color0 = final_color.Clamp(0, 255);
|
||||
vertex.color1 = specular_color.Clamp(0, 255).rgb();
|
||||
} else {
|
||||
Vec3<int> final_color_int = ((final_color + specular_color).Clamp(0.0f, 1.0f) * 255.0f).Cast<int>();
|
||||
vertex.color0 = Vec4<int>(final_color_int, final_alpha);
|
||||
vertex.color0 = (final_color + specular_color).Clamp(0, 255);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -21,6 +21,7 @@
|
|||
|
||||
namespace Lighting {
|
||||
|
||||
void GenerateLightST(VertexData &vertex);
|
||||
void Process(VertexData& vertex, bool hasColor);
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
@ -67,29 +67,20 @@ VertexDecoder *SoftwareDrawEngine::FindVertexDecoder(u32 vtype) {
|
|||
return DrawEngineCommon::GetVertexDecoder(vertTypeID);
|
||||
}
|
||||
|
||||
WorldCoords TransformUnit::ModelToWorld(const ModelCoords& coords)
|
||||
{
|
||||
Mat3x3<float> world_matrix(gstate.worldMatrix);
|
||||
return WorldCoords(world_matrix * coords) + Vec3<float>(gstate.worldMatrix[9], gstate.worldMatrix[10], gstate.worldMatrix[11]);
|
||||
WorldCoords TransformUnit::ModelToWorld(const ModelCoords &coords) {
|
||||
return Vec3ByMatrix43(coords, gstate.worldMatrix);
|
||||
}
|
||||
|
||||
WorldCoords TransformUnit::ModelToWorldNormal(const ModelCoords& coords)
|
||||
{
|
||||
Mat3x3<float> world_matrix(gstate.worldMatrix);
|
||||
return WorldCoords(world_matrix * coords);
|
||||
WorldCoords TransformUnit::ModelToWorldNormal(const ModelCoords &coords) {
|
||||
return Norm3ByMatrix43(coords, gstate.worldMatrix);
|
||||
}
|
||||
|
||||
ViewCoords TransformUnit::WorldToView(const WorldCoords& coords)
|
||||
{
|
||||
Mat3x3<float> view_matrix(gstate.viewMatrix);
|
||||
return ViewCoords(view_matrix * coords) + Vec3<float>(gstate.viewMatrix[9], gstate.viewMatrix[10], gstate.viewMatrix[11]);
|
||||
ViewCoords TransformUnit::WorldToView(const WorldCoords &coords) {
|
||||
return Vec3ByMatrix43(coords, gstate.viewMatrix);
|
||||
}
|
||||
|
||||
ClipCoords TransformUnit::ViewToClip(const ViewCoords& coords)
|
||||
{
|
||||
Vec4<float> coords4(coords.x, coords.y, coords.z, 1.0f);
|
||||
Mat4x4<float> projection_matrix(gstate.projMatrix);
|
||||
return ClipCoords(projection_matrix * coords4);
|
||||
ClipCoords TransformUnit::ViewToClip(const ViewCoords &coords) {
|
||||
return Vec3ByMatrix44(coords, gstate.projMatrix);
|
||||
}
|
||||
|
||||
static inline ScreenCoords ClipToScreenInternal(const ClipCoords& coords, bool *outside_range_flag) {
|
||||
|
@ -161,20 +152,16 @@ VertexData TransformUnit::ReadVertex(VertexReader &vreader, bool &outside_range_
|
|||
PROFILE_THIS_SCOPE("read_vert");
|
||||
VertexData vertex;
|
||||
|
||||
float pos[3];
|
||||
ModelCoords pos;
|
||||
// VertexDecoder normally scales z, but we want it unscaled.
|
||||
vreader.ReadPosThroughZ16(pos);
|
||||
vreader.ReadPosThroughZ16(pos.AsArray());
|
||||
|
||||
if (!gstate.isModeClear() && gstate.isTextureMapEnabled() && vreader.hasUV()) {
|
||||
float uv[2];
|
||||
vreader.ReadUV(uv);
|
||||
vertex.texturecoords = Vec2<float>(uv[0], uv[1]);
|
||||
vreader.ReadUV(vertex.texturecoords.AsArray());
|
||||
}
|
||||
|
||||
if (vreader.hasNormal()) {
|
||||
float normal[3];
|
||||
vreader.ReadNrm(normal);
|
||||
vertex.normal = Vec3<float>(normal[0], normal[1], normal[2]);
|
||||
vreader.ReadNrm(vertex.normal.AsArray());
|
||||
|
||||
if (gstate.areNormalsReversed())
|
||||
vertex.normal = -vertex.normal;
|
||||
|
@ -188,15 +175,15 @@ VertexData TransformUnit::ReadVertex(VertexReader &vreader, bool &outside_range_
|
|||
Vec3<float> tmpnrm(0.f, 0.f, 0.f);
|
||||
|
||||
for (int i = 0; i < vertTypeGetNumBoneWeights(gstate.vertType); ++i) {
|
||||
Mat3x3<float> bone(&gstate.boneMatrix[12*i]);
|
||||
tmppos += (bone * ModelCoords(pos[0], pos[1], pos[2]) + Vec3<float>(gstate.boneMatrix[12*i+9], gstate.boneMatrix[12*i+10], gstate.boneMatrix[12*i+11])) * W[i];
|
||||
if (vreader.hasNormal())
|
||||
tmpnrm += (bone * vertex.normal) * W[i];
|
||||
Vec3<float> step = Vec3ByMatrix43(pos, gstate.boneMatrix + i * 12);
|
||||
tmppos += step * W[i];
|
||||
if (vreader.hasNormal()) {
|
||||
step = Norm3ByMatrix43(vertex.normal, gstate.boneMatrix + i * 12);
|
||||
tmpnrm += step * W[i];
|
||||
}
|
||||
}
|
||||
|
||||
pos[0] = tmppos.x;
|
||||
pos[1] = tmppos.y;
|
||||
pos[2] = tmppos.z;
|
||||
pos = tmppos;
|
||||
if (vreader.hasNormal())
|
||||
vertex.normal = tmpnrm;
|
||||
}
|
||||
|
@ -206,7 +193,7 @@ VertexData TransformUnit::ReadVertex(VertexReader &vreader, bool &outside_range_
|
|||
vreader.ReadColor0(col);
|
||||
vertex.color0 = Vec4<int>(col[0]*255, col[1]*255, col[2]*255, col[3]*255);
|
||||
} else {
|
||||
vertex.color0 = Vec4<int>(gstate.getMaterialAmbientR(), gstate.getMaterialAmbientG(), gstate.getMaterialAmbientB(), gstate.getMaterialAmbientA());
|
||||
vertex.color0 = Vec4<int>::FromRGBA(gstate.getMaterialAmbientRGBA());
|
||||
}
|
||||
|
||||
if (vreader.hasColor1()) {
|
||||
|
@ -218,7 +205,7 @@ VertexData TransformUnit::ReadVertex(VertexReader &vreader, bool &outside_range_
|
|||
}
|
||||
|
||||
if (!gstate.isModeThrough()) {
|
||||
vertex.modelpos = ModelCoords(pos[0], pos[1], pos[2]);
|
||||
vertex.modelpos = pos;
|
||||
vertex.worldpos = WorldCoords(TransformUnit::ModelToWorld(vertex.modelpos));
|
||||
ModelCoords viewpos = TransformUnit::WorldToView(vertex.worldpos);
|
||||
vertex.clippos = ClipCoords(TransformUnit::ViewToClip(viewpos));
|
||||
|
@ -241,7 +228,7 @@ VertexData TransformUnit::ReadVertex(VertexReader &vreader, bool &outside_range_
|
|||
|
||||
if (vreader.hasNormal()) {
|
||||
vertex.worldnormal = TransformUnit::ModelToWorldNormal(vertex.normal);
|
||||
vertex.worldnormal /= vertex.worldnormal.Length();
|
||||
vertex.worldnormal.NormalizeOr001();
|
||||
} else {
|
||||
vertex.worldnormal = Vec3<float>(0.0f, 0.0f, 1.0f);
|
||||
}
|
||||
|
@ -273,14 +260,16 @@ VertexData TransformUnit::ReadVertex(VertexReader &vreader, bool &outside_range_
|
|||
}
|
||||
|
||||
// TODO: What about uv scale and offset?
|
||||
Mat3x3<float> tgen(gstate.tgenMatrix);
|
||||
Vec3<float> stq = tgen * source + Vec3<float>(gstate.tgenMatrix[9], gstate.tgenMatrix[10], gstate.tgenMatrix[11]);
|
||||
Vec3<float> stq = Vec3ByMatrix43(source, gstate.tgenMatrix);
|
||||
float z_recip = 1.0f / stq.z;
|
||||
vertex.texturecoords = Vec2f(stq.x * z_recip, stq.y * z_recip);
|
||||
} else if (gstate.getUVGenMode() == GE_TEXMAP_ENVIRONMENT_MAP) {
|
||||
Lighting::GenerateLightST(vertex);
|
||||
}
|
||||
|
||||
PROFILE_THIS_SCOPE("light");
|
||||
Lighting::Process(vertex, vreader.hasColor0());
|
||||
if (gstate.isLightingEnabled())
|
||||
Lighting::Process(vertex, vreader.hasColor0());
|
||||
} else {
|
||||
vertex.screenpos.x = (int)(pos[0] * 16) + gstate.getOffsetX16();
|
||||
vertex.screenpos.y = (int)(pos[1] * 16) + gstate.getOffsetY16();
|
||||
|
|
Loading…
Add table
Reference in a new issue