From 1b35964445601ef76f1970506b61d1ea663d9990 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Wed, 26 Dec 2012 08:54:33 +0100 Subject: [PATCH] Add support for rotated UVs of flipped RECTANGLES. Avoid using indexed draws when unnecessary. --- GPU/GLES/IndexGenerator.cpp | 57 +++++++++++---- GPU/GLES/IndexGenerator.h | 6 +- GPU/GLES/TransformPipeline.cpp | 129 +++++++++++++++++---------------- GPU/GLES/VertexDecoder.h | 2 +- 4 files changed, 111 insertions(+), 83 deletions(-) diff --git a/GPU/GLES/IndexGenerator.cpp b/GPU/GLES/IndexGenerator.cpp index 7f1da114c1..32cbcc78a9 100644 --- a/GPU/GLES/IndexGenerator.cpp +++ b/GPU/GLES/IndexGenerator.cpp @@ -28,10 +28,16 @@ const u8 indexedPrimitiveType[7] = { GE_PRIM_RECTANGLES, }; +enum { + SEEN_INDEX8 = 1 << 29, + SEEN_INDEX16 = 1 << 30 +}; + void IndexGenerator::Reset() { prim_ = -1; count_ = 0; index_ = 0; + seenPrims_ = 0; this->inds_ = indsBase_; } @@ -41,14 +47,12 @@ bool IndexGenerator::PrimCompatible(int prim) { return indexedPrimitiveType[prim] == prim_; } -void IndexGenerator::Setup(u16 *inds) -{ +void IndexGenerator::Setup(u16 *inds) { this->indsBase_ = inds; Reset(); } -void IndexGenerator::AddPoints(int numVerts) -{ +void IndexGenerator::AddPoints(int numVerts) { //if we have no vertices return for (int i = 0; i < numVerts; i++) { @@ -58,6 +62,7 @@ void IndexGenerator::AddPoints(int numVerts) index_ += numVerts; count_ += numVerts; prim_ = GE_PRIM_POINTS; + seenPrims_ |= 1 << GE_PRIM_POINTS; } void IndexGenerator::AddList(int numVerts) @@ -75,6 +80,7 @@ void IndexGenerator::AddList(int numVerts) index_ += numVerts; count_ += numTris * 3; prim_ = GE_PRIM_TRIANGLES; + seenPrims_ |= 1 << GE_PRIM_TRIANGLES; } void IndexGenerator::AddStrip(int numVerts) @@ -91,6 +97,7 @@ void IndexGenerator::AddStrip(int numVerts) index_ += numVerts; count_ += numTris * 3; prim_ = GE_PRIM_TRIANGLES; + seenPrims_ |= 1 << GE_PRIM_TRIANGLE_STRIP; } void IndexGenerator::AddFan(int numVerts) @@ -105,6 +112,7 @@ void IndexGenerator::AddFan(int numVerts) index_ += numVerts; count_ += numTris * 3; prim_ = GE_PRIM_TRIANGLES; + seenPrims_ |= 1 << GE_PRIM_TRIANGLE_FAN; } void IndexGenerator::TranslatePoints(int numVerts, const u8 *inds, int offset) @@ -116,6 +124,7 @@ void IndexGenerator::TranslatePoints(int numVerts, const u8 *inds, int offset) index_ += numVerts; count_ += numVerts; prim_ = GE_PRIM_POINTS; + seenPrims_ |= (1 << GE_PRIM_POINTS) | SEEN_INDEX8; } void IndexGenerator::TranslatePoints(int numVerts, const u16 *inds, int offset) @@ -127,6 +136,7 @@ void IndexGenerator::TranslatePoints(int numVerts, const u16 *inds, int offset) index_ += numVerts; count_ += numVerts; prim_ = GE_PRIM_POINTS; + seenPrims_ |= (1 << GE_PRIM_POINTS) | SEEN_INDEX16; } void IndexGenerator::TranslateList(int numVerts, const u8 *inds, int offset) @@ -141,6 +151,7 @@ void IndexGenerator::TranslateList(int numVerts, const u8 *inds, int offset) index_ += numVerts; count_ += numTris * 3; prim_ = GE_PRIM_TRIANGLES; + seenPrims_ |= (1 << GE_PRIM_TRIANGLES) | SEEN_INDEX8; } void IndexGenerator::TranslateStrip(int numVerts, const u8 *inds, int offset) @@ -157,6 +168,7 @@ void IndexGenerator::TranslateStrip(int numVerts, const u8 *inds, int offset) index_ += numVerts; count_ += numTris * 3; prim_ = GE_PRIM_TRIANGLES; + seenPrims_ |= (1 << GE_PRIM_TRIANGLE_STRIP) | SEEN_INDEX8; } void IndexGenerator::TranslateFan(int numVerts, const u8 *inds, int offset) @@ -172,6 +184,7 @@ void IndexGenerator::TranslateFan(int numVerts, const u8 *inds, int offset) index_ += numVerts; count_ += numTris * 3; prim_ = GE_PRIM_TRIANGLES; + seenPrims_ |= (1 << GE_PRIM_TRIANGLE_STRIP) | SEEN_INDEX8;; } void IndexGenerator::TranslateList(int numVerts, const u16 *inds, int offset) @@ -186,6 +199,7 @@ void IndexGenerator::TranslateList(int numVerts, const u16 *inds, int offset) index_ += numVerts; count_ += numTris * 3; prim_ = GE_PRIM_TRIANGLES; + seenPrims_ |= (1 << GE_PRIM_TRIANGLES) | SEEN_INDEX16; } void IndexGenerator::TranslateStrip(int numVerts, const u16 *inds, int offset) @@ -202,6 +216,7 @@ void IndexGenerator::TranslateStrip(int numVerts, const u16 *inds, int offset) index_ += numVerts; count_ += numTris * 3; prim_ = GE_PRIM_TRIANGLES; + seenPrims_ |= (1 << GE_PRIM_TRIANGLE_STRIP) | SEEN_INDEX16; } void IndexGenerator::TranslateFan(int numVerts, const u16 *inds, int offset) @@ -217,6 +232,7 @@ void IndexGenerator::TranslateFan(int numVerts, const u16 *inds, int offset) index_ += numVerts; count_ += numTris * 3; prim_ = GE_PRIM_TRIANGLES; + seenPrims_ |= (1 << GE_PRIM_TRIANGLE_FAN) | SEEN_INDEX16; } //Lines @@ -231,6 +247,7 @@ void IndexGenerator::AddLineList(int numVerts) index_ += numVerts; count_ += numLines * 2; prim_ = GE_PRIM_LINES; + seenPrims_ |= 1 << prim_; } void IndexGenerator::AddLineStrip(int numVerts) @@ -244,6 +261,7 @@ void IndexGenerator::AddLineStrip(int numVerts) index_ += numVerts; count_ += numLines * 2; prim_ = GE_PRIM_LINES; + seenPrims_ |= 1 << GE_PRIM_LINE_STRIP; } void IndexGenerator::AddRectangles(int numVerts) @@ -257,6 +275,7 @@ void IndexGenerator::AddRectangles(int numVerts) index_ += numVerts; count_ += numRects * 2; prim_ = GE_PRIM_RECTANGLES; + seenPrims_ |= 1 << GE_PRIM_RECTANGLES; } void IndexGenerator::TranslateLineList(int numVerts, const u8 *inds, int offset) @@ -264,12 +283,13 @@ void IndexGenerator::TranslateLineList(int numVerts, const u8 *inds, int offset) int numLines = numVerts / 2; for (int i = 0; i < numLines; i++) { - *inds_++ = index_ + i*2; - *inds_++ = index_ + i*2+1; + *inds_++ = index_ + offset + inds_[i*2]; + *inds_++ = index_ + offset + inds_[i*2+1]; } index_ += numVerts; count_ += numLines * 2; prim_ = GE_PRIM_LINES; + seenPrims_ |= (1 << GE_PRIM_LINES) | SEEN_INDEX8; } void IndexGenerator::TranslateLineStrip(int numVerts, const u8 *inds, int offset) @@ -277,12 +297,13 @@ void IndexGenerator::TranslateLineStrip(int numVerts, const u8 *inds, int offset int numLines = numVerts - 1; for (int i = 0; i < numLines; i++) { - *inds_++ = index_ + i; - *inds_++ = index_ + i + 1; + *inds_++ = index_ + offset + inds_[i]; + *inds_++ = index_ + offset + inds_[i + 1]; } index_ += numVerts; count_ += numLines * 2; prim_ = GE_PRIM_LINES; + seenPrims_ |= (1 << GE_PRIM_LINE_STRIP) | SEEN_INDEX8; } void IndexGenerator::TranslateLineList(int numVerts, const u16 *inds, int offset) @@ -290,12 +311,13 @@ void IndexGenerator::TranslateLineList(int numVerts, const u16 *inds, int offset int numLines = numVerts / 2; for (int i = 0; i < numLines; i++) { - *inds_++ = index_ + i*2; - *inds_++ = index_ + i*2+1; + *inds_++ = index_ + offset + inds_[i*2]; + *inds_++ = index_ + offset + inds_[i*2+1]; } index_ += numVerts; count_ += numLines * 2; prim_ = GE_PRIM_LINES; + seenPrims_ |= 1 << GE_PRIM_LINES; } void IndexGenerator::TranslateLineStrip(int numVerts, const u16 *inds, int offset) @@ -303,12 +325,13 @@ void IndexGenerator::TranslateLineStrip(int numVerts, const u16 *inds, int offse int numLines = numVerts - 1; for (int i = 0; i < numLines; i++) { - *inds_++ = index_ + i; - *inds_++ = index_ + i + 1; + *inds_++ = index_ + offset + inds_[i]; + *inds_++ = index_ + offset + inds_[i + 1]; } index_ += numVerts; count_ += numLines * 2; prim_ = GE_PRIM_LINES; + seenPrims_ |= 1 << GE_PRIM_LINE_STRIP; } void IndexGenerator::TranslateRectangles(int numVerts, const u8 *inds, int offset) @@ -316,12 +339,13 @@ void IndexGenerator::TranslateRectangles(int numVerts, const u8 *inds, int offse int numRects = numVerts / 2; for (int i = 0; i < numRects; i++) { - *inds_++ = index_ + i*2; - *inds_++ = index_ + i*2+1; + *inds_++ = index_ + offset + inds_[i*2]; + *inds_++ = index_ + offset + inds_[i*2+1]; } index_ += numVerts; count_ += numRects * 2; prim_ = GE_PRIM_RECTANGLES; + seenPrims_ |= 1 << GE_PRIM_RECTANGLES; } void IndexGenerator::TranslateRectangles(int numVerts, const u16 *inds, int offset) @@ -329,10 +353,11 @@ void IndexGenerator::TranslateRectangles(int numVerts, const u16 *inds, int offs int numRects = numVerts / 2; for (int i = 0; i < numRects; i++) { - *inds_++ = index_ + i*2; - *inds_++ = index_ + i*2+1; + *inds_++ = index_ + offset + inds_[i*2]; + *inds_++ = index_ + offset + inds_[i*2+1]; } index_ += numVerts; count_ += numRects * 2; prim_ = GE_PRIM_RECTANGLES; + seenPrims_ |= 1 << GE_PRIM_RECTANGLES; } diff --git a/GPU/GLES/IndexGenerator.h b/GPU/GLES/IndexGenerator.h index 2ed1d03d5a..68cc4d8a74 100644 --- a/GPU/GLES/IndexGenerator.h +++ b/GPU/GLES/IndexGenerator.h @@ -58,13 +58,14 @@ public: void TranslateList(int numVerts, const u16 *inds, int offset); void TranslateStrip(int numVerts, const u16 *inds, int offset); void TranslateFan(int numVerts, const u16 *inds, int offset); - - int MaxIndex() { return index_; } + + int MaxIndex() { return index_; } int VertexCount() { return count_; } bool Empty() { return index_ == 0; } void SetIndex(int ind) { index_ = ind; } + int SeenPrims() const { return seenPrims_; } private: u16 *indsBase_; @@ -72,5 +73,6 @@ private: int index_; int count_; int prim_; + int seenPrims_; }; diff --git a/GPU/GLES/TransformPipeline.cpp b/GPU/GLES/TransformPipeline.cpp index 63df7e64b6..8935422d0a 100644 --- a/GPU/GLES/TransformPipeline.cpp +++ b/GPU/GLES/TransformPipeline.cpp @@ -283,6 +283,27 @@ static void DesetupDecFmtForDraw(LinkedShader *program, const DecVtxFormat &decF VertexAttribDisable(program->a_position, decFmt.posfmt); } +// The verts are in the order: BR BL TL TR +static void SwapUVs(TransformedVertex &a, TransformedVertex &b) { + float tempu = a.u; + float tempv = a.v; + a.u = b.u; + a.v = b.v; + b.u = tempu; + b.v = tempv; +} +// 2 3 3 2 0 3 2 1 +// to to or +// 1 0 0 1 1 2 3 0 + +static void RotateUVs(TransformedVertex v[4]) { + if (v[0].y < v[2].y && v[0].x > v[2].x) { + SwapUVs(v[0], v[2]); + } else if (v[0].y > v[2].y && v[0].x < v[2].x) { + SwapUVs(v[1], v[3]); + } +} + // This is the software transform pipeline, which is necessary for supporting RECT // primitives correctly, and may be easier to use for debugging than the hardware // transform pipeline. @@ -299,8 +320,7 @@ static void DesetupDecFmtForDraw(LinkedShader *program, const DecVtxFormat &decF // Actually again, single quads could be drawn more efficiently using GL_TRIANGLE_STRIP, no need to duplicate verts as for // GL_TRIANGLES. Still need to sw transform to compute the extra two corners though. void TransformDrawEngine::SoftwareTransformAndDraw( - int prim, u8 *decoded, LinkedShader *program, int vertexCount, void *inds, int indexType, const DecVtxFormat &decVtxFormat, int maxIndex) -{ + int prim, u8 *decoded, LinkedShader *program, int vertexCount, void *inds, int indexType, const DecVtxFormat &decVtxFormat, int maxIndex) { /* DEBUG_LOG(G3D, "View matrix:"); const float *m = &gstate.viewMatrix[0]; @@ -334,8 +354,7 @@ void TransformDrawEngine::SoftwareTransformAndDraw( float c1[4] = {0, 0, 0, 0}; float uv[2] = {0, 0}; - if (throughmode) - { + if (throughmode) { // Do not touch the coordinates or the colors. No lighting. reader.ReadPos(v); if (reader.hasColor0()) { @@ -343,9 +362,7 @@ void TransformDrawEngine::SoftwareTransformAndDraw( for (int j = 0; j < 4; j++) { c1[j] = 0.0f; } - } - else - { + } else { c0[0] = (gstate.materialambient & 0xFF) / 255.f; c0[1] = ((gstate.materialambient >> 8) & 0xFF) / 255.f; c0[2] = ((gstate.materialambient >> 16) & 0xFF) / 255.f; @@ -356,9 +373,7 @@ void TransformDrawEngine::SoftwareTransformAndDraw( reader.ReadUV(uv); } // Scale UV? - } - else - { + } else { // We do software T&L for now float out[3], norm[3]; float pos[3], nrm[3] = {0}; @@ -366,17 +381,14 @@ void TransformDrawEngine::SoftwareTransformAndDraw( if (reader.hasNormal()) reader.ReadNrm(nrm); - if ((gstate.vertType & GE_VTYPE_WEIGHT_MASK) == GE_VTYPE_WEIGHT_NONE) - { + if ((gstate.vertType & GE_VTYPE_WEIGHT_MASK) == GE_VTYPE_WEIGHT_NONE) { Vec3ByMatrix43(out, pos, gstate.worldMatrix); if (reader.hasNormal()) { Norm3ByMatrix43(norm, nrm, gstate.worldMatrix); } else { memset(norm, 0, 12); } - } - else - { + } else { float weights[8]; reader.ReadWeights(weights); // Skinning @@ -419,8 +431,7 @@ void TransformDrawEngine::SoftwareTransformAndDraw( float litColor1[4]; lighter.Light(litColor0, litColor1, unlitColor, out, norm, dots); - if (gstate.lightingEnable & 1) - { + if (gstate.lightingEnable & 1) { // Don't ignore gstate.lmode - we should send two colors in that case if (gstate.lmode & 1) { // Separate colors @@ -435,9 +446,7 @@ void TransformDrawEngine::SoftwareTransformAndDraw( c1[j] = 0.0f; } } - } - else - { + } else { if (reader.hasColor0()) { for (int j = 0; j < 4; j++) { c0[j] = unlitColor[j]; @@ -507,7 +516,7 @@ void TransformDrawEngine::SoftwareTransformAndDraw( // TODO: Write to a flexible buffer, we don't always need all four components. memcpy(&transformed[index].x, v, 3 * sizeof(float)); - memcpy(&transformed[index].uv, uv, 2 * sizeof(float)); + memcpy(&transformed[index].u, uv, 2 * sizeof(float)); memcpy(&transformed[index].color0, c0, 4 * sizeof(float)); memcpy(&transformed[index].color1, c1, 3 * sizeof(float)); } @@ -540,43 +549,35 @@ void TransformDrawEngine::SoftwareTransformAndDraw( { // We have to turn the rectangle into two triangles, so 6 points. Sigh. - // TODO: there's supposed to be extra magic here to rotate the UV coordinates depending on if upside down etc. - // bottom right - *trans = transVtx; - trans++; - - // top left - *trans = transVtx; - trans->x = saved.x; - trans->uv[0] = saved.uv[0]; - trans->y = saved.y; - trans->uv[1] = saved.uv[1]; - trans++; - - // top right - *trans = transVtx; - trans->x = saved.x; - trans->uv[0] = saved.uv[0]; - trans++; + trans[0] = transVtx; // bottom left - *trans = transVtx; - trans->y = saved.y; - trans->uv[1] = saved.uv[1]; - trans++; - - // bottom right - *trans = transVtx; - trans->x = saved.x; - trans->uv[0] = saved.uv[0]; - trans->y = saved.y; - trans->uv[1] = saved.uv[1]; - trans++; + trans[1] = transVtx; + trans[1].y = saved.y; + trans[1].v = saved.v; // top left - *trans = transVtx; - trans++; + trans[2] = transVtx; + trans[2].x = saved.x; + trans[2].y = saved.y; + trans[2].u = saved.u; + trans[2].v = saved.v; + + // top right + trans[3] = transVtx; + trans[3].x = saved.x; + trans[3].u = saved.u; + + // That's the four corners. Now process UV rotation. + RotateUVs(trans); + + // bottom right + trans[4] = trans[0]; + + // top left + trans[5] = trans[2]; + trans += 6; numTrans += 6; } @@ -605,8 +606,7 @@ void TransformDrawEngine::SoftwareTransformAndDraw( if (program->a_color1 != -1) glDisableVertexAttribArray(program->a_color1); } -void TransformDrawEngine::SubmitPrim(void *verts, void *inds, int prim, int vertexCount, float *customUV, int forceIndexType, int *bytesRead) -{ +void TransformDrawEngine::SubmitPrim(void *verts, void *inds, int prim, int vertexCount, float *customUV, int forceIndexType, int *bytesRead) { // For the future if (!indexGen.PrimCompatible(prim)) Flush(); @@ -628,7 +628,6 @@ void TransformDrawEngine::SubmitPrim(void *verts, void *inds, int prim, int vert // Decode the verts and apply morphing dec.DecodeVerts(decoded + numVerts * (int)dec.GetDecVtxFmt().stride, verts, inds, prim, vertexCount, &indexLowerBound, &indexUpperBound); numVerts += indexUpperBound - indexLowerBound + 1; - if (bytesRead) *bytesRead = vertexCount * dec.VertexSize(); @@ -676,19 +675,13 @@ void TransformDrawEngine::SubmitPrim(void *verts, void *inds, int prim, int vert void TransformDrawEngine::Flush() { if (indexGen.Empty()) return; - // From here on out, the index type is ALWAYS 16-bit. Deal with it. - - // And here we should return, having collected the morphed but untransformed vertices. - // Note that DecodeVerts should convert strips into indexed lists etc, adding to our - // current vertex buffer and index buffer. - - // The rest below here should only execute on Flush. #if 0 for (int i = indexLowerBound; i <= indexUpperBound; i++) { PrintDecodedVertex(decoded[i], gstate.vertType); } #endif + // Check if anything needs updating if (gstate_c.textureChanged) { if ((gstate.textureMapEnable & 1) && !gstate.isModeClear()) { @@ -712,7 +705,15 @@ void TransformDrawEngine::Flush() { if (CanUseHardwareTransform(prim)) { SetupDecFmtForDraw(program, dec.GetDecVtxFmt(), decoded); - glDrawElements(glprim[prim], indexGen.VertexCount(), GL_UNSIGNED_SHORT, (GLvoid *)decIndex); + // If there's only been one primitive type, and it's either TRIANGLES, LINES or POINTS, + // there is no need for the index buffer we built. We can then use glDrawArrays instead + // for a very minor speed boost. + int seen = indexGen.SeenPrims() | 0x83204820; + if (seen == (1 << GE_PRIM_TRIANGLES) || seen == (1 << GE_PRIM_LINES) || seen == (1 << GE_PRIM_POINTS)) { + glDrawArrays(glprim[prim], 0, indexGen.VertexCount()); + } else { + glDrawElements(glprim[prim], indexGen.VertexCount(), GL_UNSIGNED_SHORT, (GLvoid *)decIndex); + } DesetupDecFmtForDraw(program, dec.GetDecVtxFmt()); } else { SoftwareTransformAndDraw(prim, decoded, program, indexGen.VertexCount(), (void *)decIndex, GE_VTYPE_IDX_16BIT, dec.GetDecVtxFmt(), diff --git a/GPU/GLES/VertexDecoder.h b/GPU/GLES/VertexDecoder.h index d41e5f1fd2..ef34677eb1 100644 --- a/GPU/GLES/VertexDecoder.h +++ b/GPU/GLES/VertexDecoder.h @@ -54,7 +54,7 @@ struct DecVtxFormat { struct TransformedVertex { float x, y, z; // in case of morph, preblend during decode - float uv[2]; // scaled by uscale, vscale, if there + float u; float v; // scaled by uscale, vscale, if there float color0[4]; // prelit float color1[3]; // prelit };