mirror of
https://github.com/hrydgard/ppsspp.git
synced 2025-04-02 11:01:50 -04:00
Improve SSE usage in software transform.
It's actually already pretty decent (unlike the softgpu), but there were a few places it could use a bit of help. Speeds up things with hardware transform off, or areas that need to use software transform.
This commit is contained in:
parent
416df17088
commit
678237aa6c
3 changed files with 22 additions and 32 deletions
|
@ -67,8 +67,14 @@ struct TransformedVertex
|
||||||
{
|
{
|
||||||
float x, y, z, fog; // in case of morph, preblend during decode
|
float x, y, z, fog; // in case of morph, preblend during decode
|
||||||
float u; float v; float w; // scaled by uscale, vscale, if there
|
float u; float v; float w; // scaled by uscale, vscale, if there
|
||||||
u8 color0[4]; // prelit
|
union {
|
||||||
u8 color1[4]; // prelit
|
u8 color0[4]; // prelit
|
||||||
|
u32 color0_32;
|
||||||
|
};
|
||||||
|
union {
|
||||||
|
u8 color1[4]; // prelit
|
||||||
|
u32 color1_32;
|
||||||
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
void GetIndexBounds(const void *inds, int count, u32 vertType, u16 *indexLowerBound, u16 *indexUpperBound);
|
void GetIndexBounds(const void *inds, int count, u32 vertType, u16 *indexLowerBound, u16 *indexUpperBound);
|
||||||
|
|
|
@ -318,8 +318,8 @@ void TransformDrawEngine::SoftwareTransformAndDraw(
|
||||||
reader.Goto(index);
|
reader.Goto(index);
|
||||||
|
|
||||||
float v[3] = {0, 0, 0};
|
float v[3] = {0, 0, 0};
|
||||||
float c0[4] = {1, 1, 1, 1};
|
Vec4f c0 = Vec4f(1, 1, 1, 1);
|
||||||
float c1[4] = {0, 0, 0, 0};
|
Vec4f c1 = Vec4f(0, 0, 0, 0);
|
||||||
float uv[3] = {0, 0, 1};
|
float uv[3] = {0, 0, 1};
|
||||||
float fogCoef = 1.0f;
|
float fogCoef = 1.0f;
|
||||||
|
|
||||||
|
@ -327,15 +327,10 @@ void TransformDrawEngine::SoftwareTransformAndDraw(
|
||||||
// Do not touch the coordinates or the colors. No lighting.
|
// Do not touch the coordinates or the colors. No lighting.
|
||||||
reader.ReadPos(v);
|
reader.ReadPos(v);
|
||||||
if (reader.hasColor0()) {
|
if (reader.hasColor0()) {
|
||||||
reader.ReadColor0(c0);
|
reader.ReadColor0(&c0.x);
|
||||||
for (int j = 0; j < 4; j++) {
|
// c1 is already 0.
|
||||||
c1[j] = 0.0f;
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
c0[0] = gstate.getMaterialAmbientR() / 255.f;
|
c0 = Vec4f::FromRGBA(gstate.getMaterialAmbientRGBA());
|
||||||
c0[1] = gstate.getMaterialAmbientG() / 255.f;
|
|
||||||
c0[2] = gstate.getMaterialAmbientB() / 255.f;
|
|
||||||
c0[3] = gstate.getMaterialAmbientA() / 255.f;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (reader.hasUV()) {
|
if (reader.hasUV()) {
|
||||||
|
@ -389,18 +384,15 @@ void TransformDrawEngine::SoftwareTransformAndDraw(
|
||||||
}
|
}
|
||||||
|
|
||||||
// Perform lighting here if enabled. don't need to check through, it's checked above.
|
// Perform lighting here if enabled. don't need to check through, it's checked above.
|
||||||
float unlitColor[4] = {1, 1, 1, 1};
|
Vec4f unlitColor = Vec4f(1, 1, 1, 1);
|
||||||
if (reader.hasColor0()) {
|
if (reader.hasColor0()) {
|
||||||
reader.ReadColor0(unlitColor);
|
reader.ReadColor0(&unlitColor.x);
|
||||||
} else {
|
} else {
|
||||||
unlitColor[0] = gstate.getMaterialAmbientR() / 255.f;
|
unlitColor = Vec4f::FromRGBA(gstate.getMaterialAmbientRGBA());
|
||||||
unlitColor[1] = gstate.getMaterialAmbientG() / 255.f;
|
|
||||||
unlitColor[2] = gstate.getMaterialAmbientB() / 255.f;
|
|
||||||
unlitColor[3] = gstate.getMaterialAmbientA() / 255.f;
|
|
||||||
}
|
}
|
||||||
float litColor0[4];
|
float litColor0[4];
|
||||||
float litColor1[4];
|
float litColor1[4];
|
||||||
lighter.Light(litColor0, litColor1, unlitColor, out, normal);
|
lighter.Light(litColor0, litColor1, unlitColor.AsArray(), out, normal);
|
||||||
|
|
||||||
if (gstate.isLightingEnabled()) {
|
if (gstate.isLightingEnabled()) {
|
||||||
// Don't ignore gstate.lmode - we should send two colors in that case
|
// Don't ignore gstate.lmode - we should send two colors in that case
|
||||||
|
@ -424,15 +416,10 @@ void TransformDrawEngine::SoftwareTransformAndDraw(
|
||||||
c0[j] = unlitColor[j];
|
c0[j] = unlitColor[j];
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
c0[0] = gstate.getMaterialAmbientR() / 255.f;
|
c0 = Vec4f::FromRGBA(gstate.getMaterialAmbientRGBA());
|
||||||
c0[1] = gstate.getMaterialAmbientG() / 255.f;
|
|
||||||
c0[2] = gstate.getMaterialAmbientB() / 255.f;
|
|
||||||
c0[3] = gstate.getMaterialAmbientA() / 255.f;
|
|
||||||
}
|
}
|
||||||
if (lmode) {
|
if (lmode) {
|
||||||
for (int j = 0; j < 4; j++) {
|
// c1 is already 0.
|
||||||
c1[j] = 0.0f;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -528,12 +515,8 @@ void TransformDrawEngine::SoftwareTransformAndDraw(
|
||||||
if (gstate_c.flipTexture) {
|
if (gstate_c.flipTexture) {
|
||||||
transformed[index].v = 1.0f - transformed[index].v;
|
transformed[index].v = 1.0f - transformed[index].v;
|
||||||
}
|
}
|
||||||
for (int i = 0; i < 4; i++) {
|
transformed[index].color0_32 = c0.ToRGBA();
|
||||||
transformed[index].color0[i] = c0[i] * 255.0f;
|
transformed[index].color1_32 = c1.ToRGBA();
|
||||||
}
|
|
||||||
for (int i = 0; i < 3; i++) {
|
|
||||||
transformed[index].color1[i] = c1[i] * 255.0f;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Here's the best opportunity to try to detect rectangles used to clear the screen, and
|
// Here's the best opportunity to try to detect rectangles used to clear the screen, and
|
||||||
|
|
|
@ -331,6 +331,7 @@ struct GPUgstate
|
||||||
unsigned int getMaterialAmbientG() const { return (materialambient>>8)&0xFF; }
|
unsigned int getMaterialAmbientG() const { return (materialambient>>8)&0xFF; }
|
||||||
unsigned int getMaterialAmbientB() const { return (materialambient>>16)&0xFF; }
|
unsigned int getMaterialAmbientB() const { return (materialambient>>16)&0xFF; }
|
||||||
unsigned int getMaterialAmbientA() const { return materialalpha&0xFF; }
|
unsigned int getMaterialAmbientA() const { return materialalpha&0xFF; }
|
||||||
|
unsigned int getMaterialAmbientRGBA() const { return (materialambient & 0x00FFFFFF) | (materialalpha << 24); }
|
||||||
unsigned int getMaterialDiffuseR() const { return materialdiffuse&0xFF; }
|
unsigned int getMaterialDiffuseR() const { return materialdiffuse&0xFF; }
|
||||||
unsigned int getMaterialDiffuseG() const { return (materialdiffuse>>8)&0xFF; }
|
unsigned int getMaterialDiffuseG() const { return (materialdiffuse>>8)&0xFF; }
|
||||||
unsigned int getMaterialDiffuseB() const { return (materialdiffuse>>16)&0xFF; }
|
unsigned int getMaterialDiffuseB() const { return (materialdiffuse>>16)&0xFF; }
|
||||||
|
|
Loading…
Add table
Reference in a new issue