Improve SSE usage in software transform.

It's actually already pretty decent (unlike the softgpu), but there were a
few places it could use a bit of help.  Speeds up things with hardware
transform off, or areas that need to use software transform.
This commit is contained in:
Unknown W. Brackets 2014-03-17 23:05:48 -07:00
parent 416df17088
commit 678237aa6c
3 changed files with 22 additions and 32 deletions

View file

@ -67,8 +67,14 @@ struct TransformedVertex
{ {
float x, y, z, fog; // in case of morph, preblend during decode float x, y, z, fog; // in case of morph, preblend during decode
float u; float v; float w; // scaled by uscale, vscale, if there float u; float v; float w; // scaled by uscale, vscale, if there
u8 color0[4]; // prelit union {
u8 color1[4]; // prelit u8 color0[4]; // prelit
u32 color0_32;
};
union {
u8 color1[4]; // prelit
u32 color1_32;
};
}; };
void GetIndexBounds(const void *inds, int count, u32 vertType, u16 *indexLowerBound, u16 *indexUpperBound); void GetIndexBounds(const void *inds, int count, u32 vertType, u16 *indexLowerBound, u16 *indexUpperBound);

View file

@ -318,8 +318,8 @@ void TransformDrawEngine::SoftwareTransformAndDraw(
reader.Goto(index); reader.Goto(index);
float v[3] = {0, 0, 0}; float v[3] = {0, 0, 0};
float c0[4] = {1, 1, 1, 1}; Vec4f c0 = Vec4f(1, 1, 1, 1);
float c1[4] = {0, 0, 0, 0}; Vec4f c1 = Vec4f(0, 0, 0, 0);
float uv[3] = {0, 0, 1}; float uv[3] = {0, 0, 1};
float fogCoef = 1.0f; float fogCoef = 1.0f;
@ -327,15 +327,10 @@ void TransformDrawEngine::SoftwareTransformAndDraw(
// Do not touch the coordinates or the colors. No lighting. // Do not touch the coordinates or the colors. No lighting.
reader.ReadPos(v); reader.ReadPos(v);
if (reader.hasColor0()) { if (reader.hasColor0()) {
reader.ReadColor0(c0); reader.ReadColor0(&c0.x);
for (int j = 0; j < 4; j++) { // c1 is already 0.
c1[j] = 0.0f;
}
} else { } else {
c0[0] = gstate.getMaterialAmbientR() / 255.f; c0 = Vec4f::FromRGBA(gstate.getMaterialAmbientRGBA());
c0[1] = gstate.getMaterialAmbientG() / 255.f;
c0[2] = gstate.getMaterialAmbientB() / 255.f;
c0[3] = gstate.getMaterialAmbientA() / 255.f;
} }
if (reader.hasUV()) { if (reader.hasUV()) {
@ -389,18 +384,15 @@ void TransformDrawEngine::SoftwareTransformAndDraw(
} }
// Perform lighting here if enabled. don't need to check through, it's checked above. // Perform lighting here if enabled. don't need to check through, it's checked above.
float unlitColor[4] = {1, 1, 1, 1}; Vec4f unlitColor = Vec4f(1, 1, 1, 1);
if (reader.hasColor0()) { if (reader.hasColor0()) {
reader.ReadColor0(unlitColor); reader.ReadColor0(&unlitColor.x);
} else { } else {
unlitColor[0] = gstate.getMaterialAmbientR() / 255.f; unlitColor = Vec4f::FromRGBA(gstate.getMaterialAmbientRGBA());
unlitColor[1] = gstate.getMaterialAmbientG() / 255.f;
unlitColor[2] = gstate.getMaterialAmbientB() / 255.f;
unlitColor[3] = gstate.getMaterialAmbientA() / 255.f;
} }
float litColor0[4]; float litColor0[4];
float litColor1[4]; float litColor1[4];
lighter.Light(litColor0, litColor1, unlitColor, out, normal); lighter.Light(litColor0, litColor1, unlitColor.AsArray(), out, normal);
if (gstate.isLightingEnabled()) { if (gstate.isLightingEnabled()) {
// Don't ignore gstate.lmode - we should send two colors in that case // Don't ignore gstate.lmode - we should send two colors in that case
@ -424,15 +416,10 @@ void TransformDrawEngine::SoftwareTransformAndDraw(
c0[j] = unlitColor[j]; c0[j] = unlitColor[j];
} }
} else { } else {
c0[0] = gstate.getMaterialAmbientR() / 255.f; c0 = Vec4f::FromRGBA(gstate.getMaterialAmbientRGBA());
c0[1] = gstate.getMaterialAmbientG() / 255.f;
c0[2] = gstate.getMaterialAmbientB() / 255.f;
c0[3] = gstate.getMaterialAmbientA() / 255.f;
} }
if (lmode) { if (lmode) {
for (int j = 0; j < 4; j++) { // c1 is already 0.
c1[j] = 0.0f;
}
} }
} }
@ -528,12 +515,8 @@ void TransformDrawEngine::SoftwareTransformAndDraw(
if (gstate_c.flipTexture) { if (gstate_c.flipTexture) {
transformed[index].v = 1.0f - transformed[index].v; transformed[index].v = 1.0f - transformed[index].v;
} }
for (int i = 0; i < 4; i++) { transformed[index].color0_32 = c0.ToRGBA();
transformed[index].color0[i] = c0[i] * 255.0f; transformed[index].color1_32 = c1.ToRGBA();
}
for (int i = 0; i < 3; i++) {
transformed[index].color1[i] = c1[i] * 255.0f;
}
} }
// Here's the best opportunity to try to detect rectangles used to clear the screen, and // Here's the best opportunity to try to detect rectangles used to clear the screen, and

View file

@ -331,6 +331,7 @@ struct GPUgstate
unsigned int getMaterialAmbientG() const { return (materialambient>>8)&0xFF; } unsigned int getMaterialAmbientG() const { return (materialambient>>8)&0xFF; }
unsigned int getMaterialAmbientB() const { return (materialambient>>16)&0xFF; } unsigned int getMaterialAmbientB() const { return (materialambient>>16)&0xFF; }
unsigned int getMaterialAmbientA() const { return materialalpha&0xFF; } unsigned int getMaterialAmbientA() const { return materialalpha&0xFF; }
unsigned int getMaterialAmbientRGBA() const { return (materialambient & 0x00FFFFFF) | (materialalpha << 24); }
unsigned int getMaterialDiffuseR() const { return materialdiffuse&0xFF; } unsigned int getMaterialDiffuseR() const { return materialdiffuse&0xFF; }
unsigned int getMaterialDiffuseG() const { return (materialdiffuse>>8)&0xFF; } unsigned int getMaterialDiffuseG() const { return (materialdiffuse>>8)&0xFF; }
unsigned int getMaterialDiffuseB() const { return (materialdiffuse>>16)&0xFF; } unsigned int getMaterialDiffuseB() const { return (materialdiffuse>>16)&0xFF; }