diff --git a/GPU/GLES/DisplayListInterpreter.cpp b/GPU/GLES/DisplayListInterpreter.cpp index b3a68bd937..98cd0e059f 100644 --- a/GPU/GLES/DisplayListInterpreter.cpp +++ b/GPU/GLES/DisplayListInterpreter.cpp @@ -51,6 +51,7 @@ GLES_GPU::GLES_GPU(int renderWidth, int renderHeight) { renderWidthFactor_ = (float)renderWidth / 480.0f; renderHeightFactor_ = (float)renderHeight / 272.0f; + shaderManager_ = &shaderManager; // Sanity check gstate if ((int *)&gstate.transferstart - (int *)&gstate != 0xEA) { @@ -299,8 +300,7 @@ void GLES_GPU::DrawBezier(int ucount, int vcount) } } - LinkedShader *linkedShader = shaderManager.ApplyShader(); - TransformAndDrawPrim(Memory::GetPointer(gstate_c.vertexAddr), &indices[0], GE_PRIM_TRIANGLES, 3 * 3 * 6, linkedShader, customUV, GE_VTYPE_IDX_16BIT); + TransformAndDrawPrim(Memory::GetPointer(gstate_c.vertexAddr), &indices[0], GE_PRIM_TRIANGLES, 3 * 3 * 6, customUV, GE_VTYPE_IDX_16BIT); } @@ -365,14 +365,13 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) }; DEBUG_LOG(G3D, "DL DrawPrim type: %s count: %i vaddr= %08x, iaddr= %08x", type<7 ? types[type] : "INVALID", count, gstate_c.vertexAddr, gstate_c.indexAddr); - LinkedShader *linkedShader = shaderManager.ApplyShader(); // TODO: Split this so that we can collect sequences of primitives, can greatly speed things up // on platforms where draw calls are expensive like mobile and D3D void *verts = Memory::GetPointer(gstate_c.vertexAddr); void *inds = 0; if ((gstate.vertType & GE_VTYPE_IDX_MASK) != GE_VTYPE_IDX_NONE) inds = Memory::GetPointer(gstate_c.indexAddr); - TransformAndDrawPrim(verts, inds, type, count, linkedShader, 0, -1); + TransformAndDrawPrim(verts, inds, type, count, 0, -1); } break; @@ -1033,7 +1032,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) case GE_CMD_PROJMATRIXDATA: DEBUG_LOG(G3D,"DL PROJECTION matrix data # %f", getFloat24(data)); { - int num = gstate.projmtxnum & 0xF; + int num = gstate.projmtxnum & 0xF; gstate.projMatrix[num++] = getFloat24(data); gstate.projmtxnum = (gstate.projmtxnum & 0xFF000000) | (num & 0xF); } @@ -1049,21 +1048,24 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) DEBUG_LOG(G3D,"DL TGEN matrix data # %f", getFloat24(data)); { int num = gstate.texmtxnum & 0xF; - gstate.tgenMatrix[num++] = getFloat24(data); + if (num < 12) + gstate.tgenMatrix[num++] = getFloat24(data); gstate.texmtxnum = (gstate.texmtxnum & 0xFF000000) | (num & 0xF); } break; case GE_CMD_BONEMATRIXNUMBER: DEBUG_LOG(G3D,"DL BONE matrix #%i", data); - gstate.boneMatrixNumber &= 0xFF000007F; + gstate.boneMatrixNumber &= 0xFF00007F; break; case GE_CMD_BONEMATRIXDATA: DEBUG_LOG(G3D,"DL BONE matrix data #%i %f", gstate.boneMatrixNumber & 0x7f, getFloat24(data)); { int num = gstate.boneMatrixNumber & 0x7F; - gstate.boneMatrix[num++] = getFloat24(data); + if (num < 96) { + gstate.boneMatrix[num++] = getFloat24(data); + } gstate.boneMatrixNumber = (gstate.boneMatrixNumber & 0xFF000000) | (num & 0x7F); } break; diff --git a/GPU/GLES/DisplayListInterpreter.h b/GPU/GLES/DisplayListInterpreter.h index 7bdf1af1d7..7eb4c8a46c 100644 --- a/GPU/GLES/DisplayListInterpreter.h +++ b/GPU/GLES/DisplayListInterpreter.h @@ -48,10 +48,13 @@ public: private: // TransformPipeline.cpp - void TransformAndDrawPrim(void *verts, void *inds, int prim, int vertexCount, LinkedShader *program, float *customUV, int forceIndexType); + void TransformAndDrawPrim(void *verts, void *inds, int prim, int vertexCount, float *customUV, int forceIndexType); + void UpdateViewportAndProjection(); void DrawBezier(int ucount, int vcount); void DoBlockTransfer(); bool ProcessDLQueue(); + + ShaderManager *shaderManager_; bool interruptsEnabled_; u32 displayFramebufPtr_; diff --git a/GPU/GLES/ShaderManager.cpp b/GPU/GLES/ShaderManager.cpp index 01a0917bdf..5def745e84 100644 --- a/GPU/GLES/ShaderManager.cpp +++ b/GPU/GLES/ShaderManager.cpp @@ -111,8 +111,19 @@ void LinkedShader::use() { else { glUniformMatrix4fv(u_proj, 1, GL_FALSE, gstate.projMatrix); - } + float flippedMatrix[16]; + memcpy(flippedMatrix, gstate.projMatrix, 16 * sizeof(float)); + if (gstate_c.vpHeight < 0) { + flippedMatrix[5] = -flippedMatrix[5]; + flippedMatrix[13] = -flippedMatrix[13]; + } + if (gstate_c.vpWidth < 0) { + flippedMatrix[0] = -flippedMatrix[0]; + flippedMatrix[12] = -flippedMatrix[12]; + } + glUniformMatrix4fv(u_proj, 1, GL_FALSE, flippedMatrix); + } } if (u_texenv != -1 && dirtyUniforms & DIRTY_TEXENV) { glUniform4f(u_texenv, 1.0, 1.0, 1.0, 1.0); // TODO diff --git a/GPU/GLES/TransformPipeline.cpp b/GPU/GLES/TransformPipeline.cpp index 96a799df1e..27c49edd5a 100644 --- a/GPU/GLES/TransformPipeline.cpp +++ b/GPU/GLES/TransformPipeline.cpp @@ -196,7 +196,7 @@ void Lighter::Light(float colorOut0[4], float colorOut1[4], const float colorIn[ // primitives correctly. Other primitives are possible to transform and light in hardware // using vertex shader, which will be way, way faster, especially on mobile. This has // not yet been implemented though. -void GLES_GPU::TransformAndDrawPrim(void *verts, void *inds, int prim, int vertexCount, LinkedShader *program, float *customUV, int forceIndexType) +void GLES_GPU::TransformAndDrawPrim(void *verts, void *inds, int prim, int vertexCount, float *customUV, int forceIndexType) { int indexLowerBound, indexUpperBound; // First, decode the verts and apply morphing @@ -217,7 +217,7 @@ void GLES_GPU::TransformAndDrawPrim(void *verts, void *inds, int prim, int verte } gpuStats.numDrawCalls++; gpuStats.numVertsTransformed += vertexCount; - + bool throughmode = (gstate.vertType & GE_VTYPE_THROUGH_MASK) != 0; // Then, transform and draw in one big swoop (urgh!) // need to move this to the shader. @@ -312,54 +312,43 @@ void GLES_GPU::TransformAndDrawPrim(void *verts, void *inds, int prim, int verte // Perform lighting here if enabled. don't need to check through, it's checked above. float dots[4] = {0,0,0,0}; - if (program->a_color0 != -1) - { - float unlitColor[4]; - for (int j = 0; j < 4; j++) { - unlitColor[j] = decoded[index].color[j] / 255.0f; - } - float litColor0[4]; - float litColor1[4]; - lighter.Light(litColor0, litColor1, unlitColor, out, norm, dots); + float unlitColor[4]; + for (int j = 0; j < 4; j++) { + unlitColor[j] = decoded[index].color[j] / 255.0f; + } + float litColor0[4]; + float litColor1[4]; + lighter.Light(litColor0, litColor1, unlitColor, out, norm, dots); - if (gstate.lightingEnable & 1) - { - // TODO: don't ignore gstate.lmode - we should send two colors in that case - if (gstate.lmode & 1) { - // Separate colors - for (int j = 0; j < 4; j++) { - c0[j] = litColor0[j]; - c1[j] = litColor1[j]; - } - } else { - // Summed color into c0 - for (int j = 0; j < 4; j++) { - c0[j] = litColor0[j] + litColor1[j]; - c1[j] = 0.0f; - } + if (gstate.lightingEnable & 1) + { + // TODO: don't ignore gstate.lmode - we should send two colors in that case + if (gstate.lmode & 1) { + // Separate colors + for (int j = 0; j < 4; j++) { + c0[j] = litColor0[j]; + c1[j] = litColor1[j]; } - } - else - { - if(dec.hasColor()) { - for (int j = 0; j < 4; j++) { - c0[j] = unlitColor[j]; - c1[j] = 0.0f; - } - } else { - c0[0] = (gstate.materialambient & 0xFF) / 255.f; - c0[1] = ((gstate.materialambient >> 8) & 0xFF) / 255.f; - c0[2] = ((gstate.materialambient >> 16) & 0xFF) / 255.f; - c0[3] = (gstate.materialalpha & 0xFF) / 255.f; + } else { + // Summed color into c0 + for (int j = 0; j < 4; j++) { + c0[j] = litColor0[j] + litColor1[j]; + c1[j] = 0.0f; } } } else { - // no color in the fragment program??? - for (int j = 0; j < 4; j++) { - c0[j] = decoded[index].color[j] / 255.0f; - c1[j] = 0.0f; + if(dec.hasColor()) { + for (int j = 0; j < 4; j++) { + c0[j] = unlitColor[j]; + c1[j] = 0.0f; + } + } else { + c0[0] = (gstate.materialambient & 0xFF) / 255.f; + c0[1] = ((gstate.materialambient >> 8) & 0xFF) / 255.f; + c0[2] = ((gstate.materialambient >> 16) & 0xFF) / 255.f; + c0[3] = (gstate.materialalpha & 0xFF) / 255.f; } } @@ -623,57 +612,8 @@ void GLES_GPU::TransformAndDrawPrim(void *verts, void *inds, int prim, int verte glstate.depthRange.set(gstate_c.zOff - gstate_c.zScale, gstate_c.zOff + gstate_c.zScale); - - // Debugging code to mess around with the viewport -#if 1 - // We can probably use these to simply set scissors? Maybe we need to offset by regionX1/Y1 - int regionX1 = gstate.region1 & 0x3FF; - int regionY1 = (gstate.region1 >> 10) & 0x3FF; - int regionX2 = (gstate.region2 & 0x3FF) + 1; - int regionY2 = ((gstate.region2 >> 10) & 0x3FF) + 1; - - float offsetX = (float)(gstate.offsetx & 0xFFFF) / 16.0f; - float offsetY = (float)(gstate.offsety & 0xFFFF) / 16.0f; - - if (throughmode) { - // No viewport transform here. Let's experiment with using region. - glViewport((0 + regionX1) * renderWidthFactor_, (0 - regionY1) * renderHeightFactor_, (regionX2 - regionX1) * renderWidthFactor_, (regionY2 - regionY1) * renderHeightFactor_); - } else { - // These we can turn into a glViewport call, offset by offsetX and offsetY. Math after. - float vpXa = getFloat24(gstate.viewportx1); - float vpXb = getFloat24(gstate.viewportx2); - float vpYa = getFloat24(gstate.viewporty1); - float vpYb = getFloat24(gstate.viewporty2); - float vpZa = getFloat24(gstate.viewportz1); // / 65536.0f should map it to OpenGL's 0.0-1.0 Z range - float vpZb = getFloat24(gstate.viewportz2); // / 65536.0f - - // The viewport transform appears to go like this: - // Xscreen = -offsetX + vpXb + vpXa * Xview - // Yscreen = -offsetY + vpYb + vpYa * Yview - // Zscreen = vpZb + vpZa * Zview - - // This means that to get the analogue glViewport we must: - float vpX0 = vpXb - offsetX - vpXa; - float vpY0 = vpYb - offsetY + vpYa; // Need to account for sign of Y - float vpWidth = vpXa * 2; - float vpHeight = -vpYa * 2; - - // TODO: These two should feed into glDepthRange somehow. - float vpZ0 = (vpZb - vpZa) / 65536.0f; - float vpZ1 = (vpZa * 2) / 65536.0f; - - vpX0 *= renderWidthFactor_; - vpY0 *= renderHeightFactor_; - vpWidth *= renderWidthFactor_; - vpHeight *= renderHeightFactor_; - - // Flip vpY0 to match the OpenGL coordinate system. - vpY0 = renderHeight_ - (vpY0 + vpHeight); - glViewport(vpX0, vpY0, vpWidth, vpHeight); - // Sadly, as glViewport takes integers, we will not be able to support sub pixel offsets this way. But meh. - } - -#endif + UpdateViewportAndProjection(); + LinkedShader *program = shaderManager_->ApplyShader(); // TODO: Make a cache for glEnableVertexAttribArray and glVertexAttribPtr states, these spam the gDebugger log. glEnableVertexAttribArray(program->a_position); @@ -696,3 +636,62 @@ void GLES_GPU::TransformAndDrawPrim(void *verts, void *inds, int prim, int verte if (program->a_color0 != -1) glDisableVertexAttribArray(program->a_color0); if (program->a_color1 != -1) glDisableVertexAttribArray(program->a_color1); } + +void GLES_GPU::UpdateViewportAndProjection() +{ + bool throughmode = (gstate.vertType & GE_VTYPE_THROUGH_MASK) != 0; + + // We can probably use these to simply set scissors? Maybe we need to offset by regionX1/Y1 + int regionX1 = gstate.region1 & 0x3FF; + int regionY1 = (gstate.region1 >> 10) & 0x3FF; + int regionX2 = (gstate.region2 & 0x3FF) + 1; + int regionY2 = ((gstate.region2 >> 10) & 0x3FF) + 1; + + float offsetX = (float)(gstate.offsetx & 0xFFFF) / 16.0f; + float offsetY = (float)(gstate.offsety & 0xFFFF) / 16.0f; + + if (throughmode) { + return; + // No viewport transform here. Let's experiment with using region. + glViewport((0 + regionX1) * renderWidthFactor_, (0 - regionY1) * renderHeightFactor_, (regionX2 - regionX1) * renderWidthFactor_, (regionY2 - regionY1) * renderHeightFactor_); + } else { + // These we can turn into a glViewport call, offset by offsetX and offsetY. Math after. + float vpXa = getFloat24(gstate.viewportx1); + float vpXb = getFloat24(gstate.viewportx2); + float vpYa = getFloat24(gstate.viewporty1); + float vpYb = getFloat24(gstate.viewporty2); + float vpZa = getFloat24(gstate.viewportz1); // / 65536.0f should map it to OpenGL's 0.0-1.0 Z range + float vpZb = getFloat24(gstate.viewportz2); // / 65536.0f + + // The viewport transform appears to go like this: + // Xscreen = -offsetX + vpXb + vpXa * Xview + // Yscreen = -offsetY + vpYb + vpYa * Yview + // Zscreen = vpZb + vpZa * Zview + + // This means that to get the analogue glViewport we must: + float vpX0 = vpXb - offsetX - vpXa; + float vpY0 = vpYb - offsetY + vpYa; // Need to account for sign of Y + gstate_c.vpWidth = vpXa * 2; + gstate_c.vpHeight = -vpYa * 2; + + return; + + float vpWidth = fabsf(gstate_c.vpWidth); + float vpHeight = fabsf(gstate_c.vpHeight); + + // TODO: These two should feed into glDepthRange somehow. + float vpZ0 = (vpZb - vpZa) / 65536.0f; + float vpZ1 = (vpZa * 2) / 65536.0f; + + vpX0 *= renderWidthFactor_; + vpY0 *= renderWidthFactor_; + vpWidth *= renderWidthFactor_; + vpHeight *= renderWidthFactor_; + + // Flip vpY0 to match the OpenGL coordinate system. + vpY0 = renderHeight_ - (vpY0 + vpHeight); + glViewport(vpX0, vpY0, vpWidth, vpHeight); + // Sadly, as glViewport takes integers, we will not be able to support sub pixel offsets this way. But meh. + shaderManager_->DirtyUniform(DIRTY_PROJMATRIX); + } +} \ No newline at end of file diff --git a/GPU/GLES/VertexDecoder.cpp b/GPU/GLES/VertexDecoder.cpp index 167aab7fc0..194a3cb4b7 100644 --- a/GPU/GLES/VertexDecoder.cpp +++ b/GPU/GLES/VertexDecoder.cpp @@ -283,8 +283,12 @@ void VertexDecoder::DecodeVerts(DecodedVertex *decoded, const void *verts, const } switch (nrm) { - case 0: - //no normals + case GE_VTYPE_NRM_8BIT: + { + const s8 *sv = (const s8*)(ptr + onesize_*n + nrmoff); + for (int j = 0; j < 3; j++) + normal[j] += (sv[j]/127.0f) * multiplier; + } break; case GE_VTYPE_NRM_FLOAT >> 5: @@ -302,10 +306,6 @@ void VertexDecoder::DecodeVerts(DecodedVertex *decoded, const void *verts, const normal[j] += (sv[j]/32767.0f) * multiplier; } break; - - default: - DEBUG_LOG(G3D,"Unknown normal format %i",nrm); - break; } } diff --git a/GPU/GPUState.h b/GPU/GPUState.h index cc464f377c..de4d7e5849 100644 --- a/GPU/GPUState.h +++ b/GPU/GPUState.h @@ -279,6 +279,9 @@ struct GPUStateCache u32 curTextureWidth; u32 curTextureHeight; + + float vpWidth; + float vpHeight; }; // TODO: Implement support for these.