Temporarily disable most of the viewport code as it broke some games. Kept the flip detection.

This commit is contained in:
Henrik Rydgard 2012-11-26 20:38:26 +01:00
parent d8c92caad9
commit 2822ce2737
6 changed files with 128 additions and 110 deletions

View file

@ -51,6 +51,7 @@ GLES_GPU::GLES_GPU(int renderWidth, int renderHeight)
{
renderWidthFactor_ = (float)renderWidth / 480.0f;
renderHeightFactor_ = (float)renderHeight / 272.0f;
shaderManager_ = &shaderManager;
// Sanity check gstate
if ((int *)&gstate.transferstart - (int *)&gstate != 0xEA) {
@ -299,8 +300,7 @@ void GLES_GPU::DrawBezier(int ucount, int vcount)
}
}
LinkedShader *linkedShader = shaderManager.ApplyShader();
TransformAndDrawPrim(Memory::GetPointer(gstate_c.vertexAddr), &indices[0], GE_PRIM_TRIANGLES, 3 * 3 * 6, linkedShader, customUV, GE_VTYPE_IDX_16BIT);
TransformAndDrawPrim(Memory::GetPointer(gstate_c.vertexAddr), &indices[0], GE_PRIM_TRIANGLES, 3 * 3 * 6, customUV, GE_VTYPE_IDX_16BIT);
}
@ -365,14 +365,13 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff)
};
DEBUG_LOG(G3D, "DL DrawPrim type: %s count: %i vaddr= %08x, iaddr= %08x", type<7 ? types[type] : "INVALID", count, gstate_c.vertexAddr, gstate_c.indexAddr);
LinkedShader *linkedShader = shaderManager.ApplyShader();
// TODO: Split this so that we can collect sequences of primitives, can greatly speed things up
// on platforms where draw calls are expensive like mobile and D3D
void *verts = Memory::GetPointer(gstate_c.vertexAddr);
void *inds = 0;
if ((gstate.vertType & GE_VTYPE_IDX_MASK) != GE_VTYPE_IDX_NONE)
inds = Memory::GetPointer(gstate_c.indexAddr);
TransformAndDrawPrim(verts, inds, type, count, linkedShader, 0, -1);
TransformAndDrawPrim(verts, inds, type, count, 0, -1);
}
break;
@ -1033,7 +1032,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff)
case GE_CMD_PROJMATRIXDATA:
DEBUG_LOG(G3D,"DL PROJECTION matrix data # %f", getFloat24(data));
{
int num = gstate.projmtxnum & 0xF;
int num = gstate.projmtxnum & 0xF;
gstate.projMatrix[num++] = getFloat24(data);
gstate.projmtxnum = (gstate.projmtxnum & 0xFF000000) | (num & 0xF);
}
@ -1049,21 +1048,24 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff)
DEBUG_LOG(G3D,"DL TGEN matrix data # %f", getFloat24(data));
{
int num = gstate.texmtxnum & 0xF;
gstate.tgenMatrix[num++] = getFloat24(data);
if (num < 12)
gstate.tgenMatrix[num++] = getFloat24(data);
gstate.texmtxnum = (gstate.texmtxnum & 0xFF000000) | (num & 0xF);
}
break;
case GE_CMD_BONEMATRIXNUMBER:
DEBUG_LOG(G3D,"DL BONE matrix #%i", data);
gstate.boneMatrixNumber &= 0xFF000007F;
gstate.boneMatrixNumber &= 0xFF00007F;
break;
case GE_CMD_BONEMATRIXDATA:
DEBUG_LOG(G3D,"DL BONE matrix data #%i %f", gstate.boneMatrixNumber & 0x7f, getFloat24(data));
{
int num = gstate.boneMatrixNumber & 0x7F;
gstate.boneMatrix[num++] = getFloat24(data);
if (num < 96) {
gstate.boneMatrix[num++] = getFloat24(data);
}
gstate.boneMatrixNumber = (gstate.boneMatrixNumber & 0xFF000000) | (num & 0x7F);
}
break;

View file

@ -48,10 +48,13 @@ public:
private:
// TransformPipeline.cpp
void TransformAndDrawPrim(void *verts, void *inds, int prim, int vertexCount, LinkedShader *program, float *customUV, int forceIndexType);
void TransformAndDrawPrim(void *verts, void *inds, int prim, int vertexCount, float *customUV, int forceIndexType);
void UpdateViewportAndProjection();
void DrawBezier(int ucount, int vcount);
void DoBlockTransfer();
bool ProcessDLQueue();
ShaderManager *shaderManager_;
bool interruptsEnabled_;
u32 displayFramebufPtr_;

View file

@ -111,8 +111,19 @@ void LinkedShader::use() {
else
{
glUniformMatrix4fv(u_proj, 1, GL_FALSE, gstate.projMatrix);
}
float flippedMatrix[16];
memcpy(flippedMatrix, gstate.projMatrix, 16 * sizeof(float));
if (gstate_c.vpHeight < 0) {
flippedMatrix[5] = -flippedMatrix[5];
flippedMatrix[13] = -flippedMatrix[13];
}
if (gstate_c.vpWidth < 0) {
flippedMatrix[0] = -flippedMatrix[0];
flippedMatrix[12] = -flippedMatrix[12];
}
glUniformMatrix4fv(u_proj, 1, GL_FALSE, flippedMatrix);
}
}
if (u_texenv != -1 && dirtyUniforms & DIRTY_TEXENV) {
glUniform4f(u_texenv, 1.0, 1.0, 1.0, 1.0); // TODO

View file

@ -196,7 +196,7 @@ void Lighter::Light(float colorOut0[4], float colorOut1[4], const float colorIn[
// primitives correctly. Other primitives are possible to transform and light in hardware
// using vertex shader, which will be way, way faster, especially on mobile. This has
// not yet been implemented though.
void GLES_GPU::TransformAndDrawPrim(void *verts, void *inds, int prim, int vertexCount, LinkedShader *program, float *customUV, int forceIndexType)
void GLES_GPU::TransformAndDrawPrim(void *verts, void *inds, int prim, int vertexCount, float *customUV, int forceIndexType)
{
int indexLowerBound, indexUpperBound;
// First, decode the verts and apply morphing
@ -217,7 +217,7 @@ void GLES_GPU::TransformAndDrawPrim(void *verts, void *inds, int prim, int verte
}
gpuStats.numDrawCalls++;
gpuStats.numVertsTransformed += vertexCount;
bool throughmode = (gstate.vertType & GE_VTYPE_THROUGH_MASK) != 0;
// Then, transform and draw in one big swoop (urgh!)
// need to move this to the shader.
@ -312,54 +312,43 @@ void GLES_GPU::TransformAndDrawPrim(void *verts, void *inds, int prim, int verte
// Perform lighting here if enabled. don't need to check through, it's checked above.
float dots[4] = {0,0,0,0};
if (program->a_color0 != -1)
{
float unlitColor[4];
for (int j = 0; j < 4; j++) {
unlitColor[j] = decoded[index].color[j] / 255.0f;
}
float litColor0[4];
float litColor1[4];
lighter.Light(litColor0, litColor1, unlitColor, out, norm, dots);
float unlitColor[4];
for (int j = 0; j < 4; j++) {
unlitColor[j] = decoded[index].color[j] / 255.0f;
}
float litColor0[4];
float litColor1[4];
lighter.Light(litColor0, litColor1, unlitColor, out, norm, dots);
if (gstate.lightingEnable & 1)
{
// TODO: don't ignore gstate.lmode - we should send two colors in that case
if (gstate.lmode & 1) {
// Separate colors
for (int j = 0; j < 4; j++) {
c0[j] = litColor0[j];
c1[j] = litColor1[j];
}
} else {
// Summed color into c0
for (int j = 0; j < 4; j++) {
c0[j] = litColor0[j] + litColor1[j];
c1[j] = 0.0f;
}
if (gstate.lightingEnable & 1)
{
// TODO: don't ignore gstate.lmode - we should send two colors in that case
if (gstate.lmode & 1) {
// Separate colors
for (int j = 0; j < 4; j++) {
c0[j] = litColor0[j];
c1[j] = litColor1[j];
}
}
else
{
if(dec.hasColor()) {
for (int j = 0; j < 4; j++) {
c0[j] = unlitColor[j];
c1[j] = 0.0f;
}
} else {
c0[0] = (gstate.materialambient & 0xFF) / 255.f;
c0[1] = ((gstate.materialambient >> 8) & 0xFF) / 255.f;
c0[2] = ((gstate.materialambient >> 16) & 0xFF) / 255.f;
c0[3] = (gstate.materialalpha & 0xFF) / 255.f;
} else {
// Summed color into c0
for (int j = 0; j < 4; j++) {
c0[j] = litColor0[j] + litColor1[j];
c1[j] = 0.0f;
}
}
}
else
{
// no color in the fragment program???
for (int j = 0; j < 4; j++) {
c0[j] = decoded[index].color[j] / 255.0f;
c1[j] = 0.0f;
if(dec.hasColor()) {
for (int j = 0; j < 4; j++) {
c0[j] = unlitColor[j];
c1[j] = 0.0f;
}
} else {
c0[0] = (gstate.materialambient & 0xFF) / 255.f;
c0[1] = ((gstate.materialambient >> 8) & 0xFF) / 255.f;
c0[2] = ((gstate.materialambient >> 16) & 0xFF) / 255.f;
c0[3] = (gstate.materialalpha & 0xFF) / 255.f;
}
}
@ -623,57 +612,8 @@ void GLES_GPU::TransformAndDrawPrim(void *verts, void *inds, int prim, int verte
glstate.depthRange.set(gstate_c.zOff - gstate_c.zScale, gstate_c.zOff + gstate_c.zScale);
// Debugging code to mess around with the viewport
#if 1
// We can probably use these to simply set scissors? Maybe we need to offset by regionX1/Y1
int regionX1 = gstate.region1 & 0x3FF;
int regionY1 = (gstate.region1 >> 10) & 0x3FF;
int regionX2 = (gstate.region2 & 0x3FF) + 1;
int regionY2 = ((gstate.region2 >> 10) & 0x3FF) + 1;
float offsetX = (float)(gstate.offsetx & 0xFFFF) / 16.0f;
float offsetY = (float)(gstate.offsety & 0xFFFF) / 16.0f;
if (throughmode) {
// No viewport transform here. Let's experiment with using region.
glViewport((0 + regionX1) * renderWidthFactor_, (0 - regionY1) * renderHeightFactor_, (regionX2 - regionX1) * renderWidthFactor_, (regionY2 - regionY1) * renderHeightFactor_);
} else {
// These we can turn into a glViewport call, offset by offsetX and offsetY. Math after.
float vpXa = getFloat24(gstate.viewportx1);
float vpXb = getFloat24(gstate.viewportx2);
float vpYa = getFloat24(gstate.viewporty1);
float vpYb = getFloat24(gstate.viewporty2);
float vpZa = getFloat24(gstate.viewportz1); // / 65536.0f should map it to OpenGL's 0.0-1.0 Z range
float vpZb = getFloat24(gstate.viewportz2); // / 65536.0f
// The viewport transform appears to go like this:
// Xscreen = -offsetX + vpXb + vpXa * Xview
// Yscreen = -offsetY + vpYb + vpYa * Yview
// Zscreen = vpZb + vpZa * Zview
// This means that to get the analogue glViewport we must:
float vpX0 = vpXb - offsetX - vpXa;
float vpY0 = vpYb - offsetY + vpYa; // Need to account for sign of Y
float vpWidth = vpXa * 2;
float vpHeight = -vpYa * 2;
// TODO: These two should feed into glDepthRange somehow.
float vpZ0 = (vpZb - vpZa) / 65536.0f;
float vpZ1 = (vpZa * 2) / 65536.0f;
vpX0 *= renderWidthFactor_;
vpY0 *= renderHeightFactor_;
vpWidth *= renderWidthFactor_;
vpHeight *= renderHeightFactor_;
// Flip vpY0 to match the OpenGL coordinate system.
vpY0 = renderHeight_ - (vpY0 + vpHeight);
glViewport(vpX0, vpY0, vpWidth, vpHeight);
// Sadly, as glViewport takes integers, we will not be able to support sub pixel offsets this way. But meh.
}
#endif
UpdateViewportAndProjection();
LinkedShader *program = shaderManager_->ApplyShader();
// TODO: Make a cache for glEnableVertexAttribArray and glVertexAttribPtr states, these spam the gDebugger log.
glEnableVertexAttribArray(program->a_position);
@ -696,3 +636,62 @@ void GLES_GPU::TransformAndDrawPrim(void *verts, void *inds, int prim, int verte
if (program->a_color0 != -1) glDisableVertexAttribArray(program->a_color0);
if (program->a_color1 != -1) glDisableVertexAttribArray(program->a_color1);
}
void GLES_GPU::UpdateViewportAndProjection()
{
bool throughmode = (gstate.vertType & GE_VTYPE_THROUGH_MASK) != 0;
// We can probably use these to simply set scissors? Maybe we need to offset by regionX1/Y1
int regionX1 = gstate.region1 & 0x3FF;
int regionY1 = (gstate.region1 >> 10) & 0x3FF;
int regionX2 = (gstate.region2 & 0x3FF) + 1;
int regionY2 = ((gstate.region2 >> 10) & 0x3FF) + 1;
float offsetX = (float)(gstate.offsetx & 0xFFFF) / 16.0f;
float offsetY = (float)(gstate.offsety & 0xFFFF) / 16.0f;
if (throughmode) {
return;
// No viewport transform here. Let's experiment with using region.
glViewport((0 + regionX1) * renderWidthFactor_, (0 - regionY1) * renderHeightFactor_, (regionX2 - regionX1) * renderWidthFactor_, (regionY2 - regionY1) * renderHeightFactor_);
} else {
// These we can turn into a glViewport call, offset by offsetX and offsetY. Math after.
float vpXa = getFloat24(gstate.viewportx1);
float vpXb = getFloat24(gstate.viewportx2);
float vpYa = getFloat24(gstate.viewporty1);
float vpYb = getFloat24(gstate.viewporty2);
float vpZa = getFloat24(gstate.viewportz1); // / 65536.0f should map it to OpenGL's 0.0-1.0 Z range
float vpZb = getFloat24(gstate.viewportz2); // / 65536.0f
// The viewport transform appears to go like this:
// Xscreen = -offsetX + vpXb + vpXa * Xview
// Yscreen = -offsetY + vpYb + vpYa * Yview
// Zscreen = vpZb + vpZa * Zview
// This means that to get the analogue glViewport we must:
float vpX0 = vpXb - offsetX - vpXa;
float vpY0 = vpYb - offsetY + vpYa; // Need to account for sign of Y
gstate_c.vpWidth = vpXa * 2;
gstate_c.vpHeight = -vpYa * 2;
return;
float vpWidth = fabsf(gstate_c.vpWidth);
float vpHeight = fabsf(gstate_c.vpHeight);
// TODO: These two should feed into glDepthRange somehow.
float vpZ0 = (vpZb - vpZa) / 65536.0f;
float vpZ1 = (vpZa * 2) / 65536.0f;
vpX0 *= renderWidthFactor_;
vpY0 *= renderWidthFactor_;
vpWidth *= renderWidthFactor_;
vpHeight *= renderWidthFactor_;
// Flip vpY0 to match the OpenGL coordinate system.
vpY0 = renderHeight_ - (vpY0 + vpHeight);
glViewport(vpX0, vpY0, vpWidth, vpHeight);
// Sadly, as glViewport takes integers, we will not be able to support sub pixel offsets this way. But meh.
shaderManager_->DirtyUniform(DIRTY_PROJMATRIX);
}
}

View file

@ -283,8 +283,12 @@ void VertexDecoder::DecodeVerts(DecodedVertex *decoded, const void *verts, const
}
switch (nrm)
{
case 0:
//no normals
case GE_VTYPE_NRM_8BIT:
{
const s8 *sv = (const s8*)(ptr + onesize_*n + nrmoff);
for (int j = 0; j < 3; j++)
normal[j] += (sv[j]/127.0f) * multiplier;
}
break;
case GE_VTYPE_NRM_FLOAT >> 5:
@ -302,10 +306,6 @@ void VertexDecoder::DecodeVerts(DecodedVertex *decoded, const void *verts, const
normal[j] += (sv[j]/32767.0f) * multiplier;
}
break;
default:
DEBUG_LOG(G3D,"Unknown normal format %i",nrm);
break;
}
}

View file

@ -279,6 +279,9 @@ struct GPUStateCache
u32 curTextureWidth;
u32 curTextureHeight;
float vpWidth;
float vpHeight;
};
// TODO: Implement support for these.