From 4eeab8338951b7407ec5943fe4d62f7d0b4b7c5e Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Fri, 21 Dec 2012 17:50:22 +0100 Subject: [PATCH 1/8] Add IndexGenerator.cpp/h which will later be used to combine small draw calls into large indexed draw calls, for better performance. --- CMakeLists.txt | 2 + GPU/CMakeLists.txt | 1 + GPU/GLES/IndexGenerator.cpp | 232 +++++++++++++++++++++++++++++ GPU/GLES/IndexGenerator.h | 57 +++++++ GPU/GLES/VertexShaderGenerator.cpp | 5 - GPU/GPU.vcxproj | 2 + GPU/GPU.vcxproj.filters | 6 + android/jni/Android.mk | 1 + 8 files changed, 301 insertions(+), 5 deletions(-) create mode 100644 GPU/GLES/IndexGenerator.cpp create mode 100644 GPU/GLES/IndexGenerator.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 858bc710c4..adaf9dc6fb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -786,6 +786,8 @@ add_library(GPU OBJECT GPU/GLES/FragmentShaderGenerator.h GPU/GLES/Framebuffer.cpp GPU/GLES/Framebuffer.h + GPU/GLES/IndexGenerator.cpp + GPU/GLES/IndexGenerator.h GPU/GLES/ShaderManager.cpp GPU/GLES/ShaderManager.h GPU/GLES/StateMapping.cpp diff --git a/GPU/CMakeLists.txt b/GPU/CMakeLists.txt index 803200ae6d..ad56972231 100644 --- a/GPU/CMakeLists.txt +++ b/GPU/CMakeLists.txt @@ -4,6 +4,7 @@ set(SRCS GLES/DisplayListInterpreter.cpp GLES/FragmentShaderGenerator.cpp GLES/Framebuffer.cpp + GLES/IndexGenerator.cpp GLES/ShaderManager.cpp GLES/StateMapping.cpp GLES/TextureCache.cpp diff --git a/GPU/GLES/IndexGenerator.cpp b/GPU/GLES/IndexGenerator.cpp new file mode 100644 index 0000000000..7a786488cf --- /dev/null +++ b/GPU/GLES/IndexGenerator.cpp @@ -0,0 +1,232 @@ +// Copyright (c) 2012- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#include "IndexGenerator.h" + +// Points don't need indexing... +const u8 indexedPrimitiveType[7] = { + GE_PRIM_POINTS, + GE_PRIM_LINES, + GE_PRIM_LINES, + GE_PRIM_TRIANGLES, + GE_PRIM_TRIANGLES, + GE_PRIM_TRIANGLES, + GE_PRIM_TRIANGLES, +}; + +void IndexGenerator::Reset() { + prim_ = -1; + inds_ = 0; +} + +bool IndexGenerator::PrimCompatible(int prim) { + if (prim_ == -1) + return true; + return indexedPrimitiveType[prim] == indexedPrimitiveType[prim_]; +} + +void IndexGenerator::Start(u16 *inds, int baseIndex, int prim) +{ + this->inds_ = inds; + index_ = baseIndex; +} + +void IndexGenerator::AddList(int numVerts) +{ + //if we have no vertices return + int numTris = numVerts / 3; + for (int i = 0; i < numTris; i++) + { + *inds_++ = index_ + i*3; + *inds_++ = index_ + i*3 + 1; + *inds_++ = index_ + i*3 + 2; + } + + // ignore overflow verts + index_ += numVerts; +} + +void IndexGenerator::AddStrip(int numVerts) +{ + bool wind = false; + int numTris = numVerts - 2; + for (int i = 0; i < numTris; i++) + { + *inds_++ = index_ + i; + *inds_++ = index_ + i+(wind?2:1); + *inds_++ = index_ + i+(wind?1:2); + wind = !wind; + } + index_ += numVerts; +} + +void IndexGenerator::AddFan(int numVerts) +{ + int numTris = numVerts - 2; + for (int i = 0; i < numTris; i++) + { + *inds_++ = index_; + *inds_++ = index_ + i + 1; + *inds_++ = index_ + i + 2; + } + index_ += numVerts; +} + +void IndexGenerator::TranslateList(int numVerts, const u8 *inds, int offset) +{ + int numTris = numVerts / 3; + for (int i = 0; i < numTris; i++) + { + *inds_++ = index_ + offset + inds[i*3]; + *inds_++ = index_ + offset + inds[i*3 + 1]; + *inds_++ = index_ + offset + inds[i*3 + 2]; + } + index_ += numVerts; +} + +void IndexGenerator::TranslateStrip(int numVerts, const u8 *inds, int offset) +{ + bool wind = false; + int numTris = numVerts - 2; + for (int i = 0; i < numTris; i++) + { + *inds_++ = index_ + offset + inds[i]; + *inds_++ = index_ + offset + inds[i + (wind?2:1)]; + *inds_++ = index_ + offset + inds[i + (wind?1:2)]; + wind = !wind; + } + index_ += numVerts; +} + +void IndexGenerator::TranslateFan(int numVerts, const u8 *inds, int offset) +{ + if (numVerts <= 0) return; + int numTris = numVerts - 2; + for (int i = 0; i < numTris; i++) + { + *inds_++ = index_ + offset + inds[i]; + *inds_++ = index_ + offset + inds[i + 1]; + *inds_++ = index_ + offset + inds[i + 2]; + } + index_ += numVerts; +} + +void IndexGenerator::TranslateList(int numVerts, const u16 *inds, int offset) +{ + int numTris = numVerts / 3; + for (int i = 0; i < numTris; i++) + { + *inds_++ = index_ + offset + inds[i*3]; + *inds_++ = index_ + offset + inds[i*3 + 1]; + *inds_++ = index_ + offset + inds[i*3 + 2]; + } + index_ += numVerts; +} + +void IndexGenerator::TranslateStrip(int numVerts, const u16 *inds, int offset) +{ + bool wind = false; + int numTris = numVerts - 2; + for (int i = 0; i < numTris; i++) + { + *inds_++ = index_ + offset + inds[i]; + *inds_++ = index_ + offset + inds[i + (wind?2:1)]; + *inds_++ = index_ + offset + inds[i + (wind?1:2)]; + wind = !wind; + } + index_ += numVerts; +} + +void IndexGenerator::TranslateFan(int numVerts, const u16 *inds, int offset) +{ + if (numVerts <= 0) return; + int numTris = numVerts - 2; + for (int i = 0; i < numTris; i++) + { + *inds_++ = index_ + offset + inds[i]; + *inds_++ = index_ + offset + inds[i + 1]; + *inds_++ = index_ + offset + inds[i + 2]; + } + index_ += numVerts; +} + +//Lines +void IndexGenerator::AddLineList(int numVerts) +{ + int numLines = numVerts / 2; + for (int i = 0; i < numLines; i++) + { + *inds_++ = index_ + i*2; + *inds_++ = index_ + i*2+1; + } + index_ += numVerts; +} + +void IndexGenerator::AddLineStrip(int numVerts) +{ + int numLines = numVerts - 1; + for (int i = 0; i < numLines; i++) + { + *inds_++ = index_ + i; + *inds_++ = index_ + i + 1; + } + index_ += numVerts; +} + +void IndexGenerator::TranslateLineList(int numVerts, const u8 *inds, int offset) +{ + int numLines = numVerts / 2; + for (int i = 0; i < numLines; i++) + { + *inds_++ = index_ + i*2; + *inds_++ = index_ + i*2+1; + } + index_ += numVerts; +} + +void IndexGenerator::TranslateLineStrip(int numVerts, const u8 *inds, int offset) +{ + int numLines = numVerts - 1; + for (int i = 0; i < numLines; i++) + { + *inds_++ = index_ + i; + *inds_++ = index_ + i + 1; + } + index_ += numVerts; +} + +void IndexGenerator::TranslateLineList(int numVerts, const u16 *inds, int offset) +{ + int numLines = numVerts / 2; + for (int i = 0; i < numLines; i++) + { + *inds_++ = index_ + i*2; + *inds_++ = index_ + i*2+1; + } + index_ += numVerts; +} + +void IndexGenerator::TranslateLineStrip(int numVerts, const u16 *inds, int offset) +{ + int numLines = numVerts - 1; + for (int i = 0; i < numLines; i++) + { + *inds_++ = index_ + i; + *inds_++ = index_ + i + 1; + } + index_ += numVerts; +} \ No newline at end of file diff --git a/GPU/GLES/IndexGenerator.h b/GPU/GLES/IndexGenerator.h new file mode 100644 index 0000000000..45d3a0bad3 --- /dev/null +++ b/GPU/GLES/IndexGenerator.h @@ -0,0 +1,57 @@ +// Copyright (c) 2012- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + + +#pragma once + +#include "CommonTypes.h" +#include "../ge_constants.h" + +class IndexGenerator +{ +public: + void Reset(); + void Start(u16 *indexptr, int baseIndex, int prim); + bool PrimCompatible(int prim); + + // Triangles + void AddList(int numVerts); + void AddStrip(int numVerts); + void AddFan(int numVerts); + // Lines + void AddLineList(int numVerts); + void AddLineStrip(int numVerts); + + // Translates already indexed lists + void TranslateLineList(int numVerts, const u8 *inds, int offset); + void TranslateLineStrip(int numVerts, const u8 *inds, int offset); + void TranslateLineList(int numVerts, const u16 *inds, int offset); + void TranslateLineStrip(int numVerts, const u16 *inds, int offset); + + void TranslateList(int numVerts, const u8 *inds, int offset); + void TranslateStrip(int numVerts, const u8 *inds, int offset); + void TranslateFan(int numVerts, const u8 *inds, int offset); + void TranslateList(int numVerts, const u16 *inds, int offset); + void TranslateStrip(int numVerts, const u16 *inds, int offset); + void TranslateFan(int numVerts, const u16 *inds, int offset); + +private: + u16 *inds_; + int index_; + int prim_; +}; + diff --git a/GPU/GLES/VertexShaderGenerator.cpp b/GPU/GLES/VertexShaderGenerator.cpp index 395021ce56..953b5c684d 100644 --- a/GPU/GLES/VertexShaderGenerator.cpp +++ b/GPU/GLES/VertexShaderGenerator.cpp @@ -15,10 +15,6 @@ // Official git repository and contact information can be found at // https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. -// TODO: We should transition from doing the transform in software, as seen in TransformPipeline.cpp, -// into doing the transform in the vertex shader - except for Rectangles, there we really need to do -// the transforms ourselves. - #include #if defined(_WIN32) && defined(_DEBUG) #include @@ -42,7 +38,6 @@ static char buffer[16384]; #define WRITE p+=sprintf - bool CanUseHardwareTransform(int prim) { if (!g_Config.bHardwareTransform) diff --git a/GPU/GPU.vcxproj b/GPU/GPU.vcxproj index 7b36bc7078..9b3228b826 100644 --- a/GPU/GPU.vcxproj +++ b/GPU/GPU.vcxproj @@ -120,6 +120,7 @@ + @@ -135,6 +136,7 @@ + diff --git a/GPU/GPU.vcxproj.filters b/GPU/GPU.vcxproj.filters index aad3ad15c2..e5a783590f 100644 --- a/GPU/GPU.vcxproj.filters +++ b/GPU/GPU.vcxproj.filters @@ -57,6 +57,9 @@ GLES + + GLES + @@ -95,6 +98,9 @@ GLES + + GLES + diff --git a/android/jni/Android.mk b/android/jni/Android.mk index 13a44e02f1..84ddbce292 100644 --- a/android/jni/Android.mk +++ b/android/jni/Android.mk @@ -72,6 +72,7 @@ LOCAL_SRC_FILES := \ $(SRC)/GPU/GLES/Framebuffer.cpp \ $(SRC)/GPU/GLES/DisplayListInterpreter.cpp \ $(SRC)/GPU/GLES/TextureCache.cpp \ + $(SRC)/GPU/GLES/IndexGenerator.cpp \ $(SRC)/GPU/GLES/TransformPipeline.cpp \ $(SRC)/GPU/GLES/StateMapping.cpp \ $(SRC)/GPU/GLES/VertexDecoder.cpp \ From 2e9daa5f89ebdb3892d6793eedbfb9107c5771f2 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Fri, 21 Dec 2012 18:46:15 +0100 Subject: [PATCH 2/8] All drawing is now indexed lists, through IndexGenerator. --- GPU/GLES/IndexGenerator.cpp | 50 +++++++++++++ GPU/GLES/IndexGenerator.h | 8 +++ GPU/GLES/TransformPipeline.cpp | 126 ++++++++++++++++----------------- GPU/GLES/TransformPipeline.h | 2 +- GPU/GLES/VertexDecoder.cpp | 2 +- GPU/GLES/VertexDecoder.h | 1 + 6 files changed, 122 insertions(+), 67 deletions(-) diff --git a/GPU/GLES/IndexGenerator.cpp b/GPU/GLES/IndexGenerator.cpp index 7a786488cf..b08ab3cd5b 100644 --- a/GPU/GLES/IndexGenerator.cpp +++ b/GPU/GLES/IndexGenerator.cpp @@ -31,6 +31,7 @@ const u8 indexedPrimitiveType[7] = { void IndexGenerator::Reset() { prim_ = -1; inds_ = 0; + count_ = 0; } bool IndexGenerator::PrimCompatible(int prim) { @@ -41,8 +42,22 @@ bool IndexGenerator::PrimCompatible(int prim) { void IndexGenerator::Start(u16 *inds, int baseIndex, int prim) { + count_ = 0; this->inds_ = inds; index_ = baseIndex; + prim_ = indexedPrimitiveType[prim]; +} + +void IndexGenerator::AddPoints(int numVerts) +{ + //if we have no vertices return + for (int i = 0; i < numVerts; i++) + { + *inds_++ = index_ + i; + } + // ignore overflow verts + index_ += numVerts; + count_ += numVerts; } void IndexGenerator::AddList(int numVerts) @@ -58,6 +73,7 @@ void IndexGenerator::AddList(int numVerts) // ignore overflow verts index_ += numVerts; + count_ += numTris * 3; } void IndexGenerator::AddStrip(int numVerts) @@ -72,6 +88,7 @@ void IndexGenerator::AddStrip(int numVerts) wind = !wind; } index_ += numVerts; + count_ += numTris * 3; } void IndexGenerator::AddFan(int numVerts) @@ -84,6 +101,27 @@ void IndexGenerator::AddFan(int numVerts) *inds_++ = index_ + i + 2; } index_ += numVerts; + count_ += numTris * 3; +} + +void IndexGenerator::TranslatePoints(int numVerts, const u8 *inds, int offset) +{ + for (int i = 0; i < numVerts; i++) + { + *inds_++ = index_ + offset + inds[i]; + } + index_ += numVerts; + count_ += numVerts; +} + +void IndexGenerator::TranslatePoints(int numVerts, const u16 *inds, int offset) +{ + for (int i = 0; i < numVerts; i++) + { + *inds_++ = index_ + offset + inds[i]; + } + index_ += numVerts; + count_ += numVerts; } void IndexGenerator::TranslateList(int numVerts, const u8 *inds, int offset) @@ -96,6 +134,7 @@ void IndexGenerator::TranslateList(int numVerts, const u8 *inds, int offset) *inds_++ = index_ + offset + inds[i*3 + 2]; } index_ += numVerts; + count_ += numTris * 3; } void IndexGenerator::TranslateStrip(int numVerts, const u8 *inds, int offset) @@ -110,6 +149,7 @@ void IndexGenerator::TranslateStrip(int numVerts, const u8 *inds, int offset) wind = !wind; } index_ += numVerts; + count_ += numTris * 3; } void IndexGenerator::TranslateFan(int numVerts, const u8 *inds, int offset) @@ -123,6 +163,7 @@ void IndexGenerator::TranslateFan(int numVerts, const u8 *inds, int offset) *inds_++ = index_ + offset + inds[i + 2]; } index_ += numVerts; + count_ += numTris * 3; } void IndexGenerator::TranslateList(int numVerts, const u16 *inds, int offset) @@ -135,6 +176,7 @@ void IndexGenerator::TranslateList(int numVerts, const u16 *inds, int offset) *inds_++ = index_ + offset + inds[i*3 + 2]; } index_ += numVerts; + count_ += numTris * 3; } void IndexGenerator::TranslateStrip(int numVerts, const u16 *inds, int offset) @@ -149,6 +191,7 @@ void IndexGenerator::TranslateStrip(int numVerts, const u16 *inds, int offset) wind = !wind; } index_ += numVerts; + count_ += numTris * 3; } void IndexGenerator::TranslateFan(int numVerts, const u16 *inds, int offset) @@ -162,6 +205,7 @@ void IndexGenerator::TranslateFan(int numVerts, const u16 *inds, int offset) *inds_++ = index_ + offset + inds[i + 2]; } index_ += numVerts; + count_ += numTris * 3; } //Lines @@ -174,6 +218,7 @@ void IndexGenerator::AddLineList(int numVerts) *inds_++ = index_ + i*2+1; } index_ += numVerts; + count_ += numLines * 2; } void IndexGenerator::AddLineStrip(int numVerts) @@ -185,6 +230,7 @@ void IndexGenerator::AddLineStrip(int numVerts) *inds_++ = index_ + i + 1; } index_ += numVerts; + count_ += numLines * 2; } void IndexGenerator::TranslateLineList(int numVerts, const u8 *inds, int offset) @@ -196,6 +242,7 @@ void IndexGenerator::TranslateLineList(int numVerts, const u8 *inds, int offset) *inds_++ = index_ + i*2+1; } index_ += numVerts; + count_ += numLines * 2; } void IndexGenerator::TranslateLineStrip(int numVerts, const u8 *inds, int offset) @@ -207,6 +254,7 @@ void IndexGenerator::TranslateLineStrip(int numVerts, const u8 *inds, int offset *inds_++ = index_ + i + 1; } index_ += numVerts; + count_ += numLines * 2; } void IndexGenerator::TranslateLineList(int numVerts, const u16 *inds, int offset) @@ -218,6 +266,7 @@ void IndexGenerator::TranslateLineList(int numVerts, const u16 *inds, int offset *inds_++ = index_ + i*2+1; } index_ += numVerts; + count_ += numLines * 2; } void IndexGenerator::TranslateLineStrip(int numVerts, const u16 *inds, int offset) @@ -229,4 +278,5 @@ void IndexGenerator::TranslateLineStrip(int numVerts, const u16 *inds, int offse *inds_++ = index_ + i + 1; } index_ += numVerts; + count_ += numLines * 2; } \ No newline at end of file diff --git a/GPU/GLES/IndexGenerator.h b/GPU/GLES/IndexGenerator.h index 45d3a0bad3..bb2a9bde00 100644 --- a/GPU/GLES/IndexGenerator.h +++ b/GPU/GLES/IndexGenerator.h @@ -28,6 +28,8 @@ public: void Start(u16 *indexptr, int baseIndex, int prim); bool PrimCompatible(int prim); + // Points (why index these? code simplicity) + void AddPoints(int numVerts); // Triangles void AddList(int numVerts); void AddStrip(int numVerts); @@ -36,6 +38,8 @@ public: void AddLineList(int numVerts); void AddLineStrip(int numVerts); + void TranslatePoints(int numVerts, const u8 *inds, int offset); + void TranslatePoints(int numVerts, const u16 *inds, int offset); // Translates already indexed lists void TranslateLineList(int numVerts, const u8 *inds, int offset); void TranslateLineStrip(int numVerts, const u8 *inds, int offset); @@ -49,9 +53,13 @@ public: void TranslateStrip(int numVerts, const u16 *inds, int offset); void TranslateFan(int numVerts, const u16 *inds, int offset); + int MaxIndex() { return index_; } + int VertexCount() { return count_; } + private: u16 *inds_; int index_; + int count_; int prim_; }; diff --git a/GPU/GLES/TransformPipeline.cpp b/GPU/GLES/TransformPipeline.cpp index 91a8548749..913f2c23ca 100644 --- a/GPU/GLES/TransformPipeline.cpp +++ b/GPU/GLES/TransformPipeline.cpp @@ -30,6 +30,7 @@ #include "VertexDecoder.h" #include "ShaderManager.h" #include "DisplayListInterpreter.h" +#include "IndexGenerator.h" const GLuint glprim[8] = { GL_POINTS, @@ -42,7 +43,9 @@ const GLuint glprim[8] = { }; u8 decoded[65536 * 32]; -// uint16_t decIndex[65536]; // Unused +uint16_t decIndex[65536]; // Unused + +IndexGenerator indexGen; TransformedVertex transformed[65536]; TransformedVertex transformedExpanded[65536]; @@ -262,7 +265,7 @@ static void DesetupDecFmtForDraw(LinkedShader *program, const DecVtxFormat &decF // Actually again, single quads could be drawn more efficiently using GL_TRIANGLE_STRIP, no need to duplicate verts as for // GL_TRIANGLES. Still need to sw transform to compute the extra two corners though. -void SoftwareTransformAndDraw(int prim, LinkedShader *program, int forceIndexType, int vertexCount, void *inds, const DecVtxFormat &decVtxFormat, int indexLowerBound, int indexUpperBound, float *customUV) +void SoftwareTransformAndDraw(int prim, LinkedShader *program, int vertexCount, void *inds, int indexType, const DecVtxFormat &decVtxFormat, int maxIndex, float *customUV) { /* DEBUG_LOG(G3D, "View matrix:"); @@ -289,7 +292,7 @@ void SoftwareTransformAndDraw(int prim, LinkedShader *program, int forceIndexTyp Lighter lighter; VertexReader reader(decoded, decVtxFormat); - for (int index = indexLowerBound; index <= indexUpperBound; index++) + for (int index = 0; index < maxIndex; index++) { reader.Goto(index); @@ -483,48 +486,19 @@ void SoftwareTransformAndDraw(int prim, LinkedShader *program, int forceIndexTyp const TransformedVertex *drawBuffer = transformed; int numTrans = 0; - int indexType = (gstate.vertType & GE_VTYPE_IDX_MASK); - if (forceIndexType != -1) { - indexType = forceIndexType; - } bool drawIndexed = false; - GLuint glIndexType = 0; if (prim != GE_PRIM_RECTANGLES) { // We can simply draw the unexpanded buffer. numTrans = vertexCount; - switch (indexType) { - case GE_VTYPE_IDX_8BIT: - drawIndexed = true; - glIndexType = GL_UNSIGNED_BYTE; - break; - case GE_VTYPE_IDX_16BIT: - drawIndexed = true; - glIndexType = GL_UNSIGNED_SHORT; - break; - default: - drawIndexed = false; - break; - } + drawIndexed = true; } else { numTrans = 0; drawBuffer = transformedExpanded; TransformedVertex *trans = &transformedExpanded[0]; TransformedVertex saved; for (int i = 0; i < vertexCount; i++) { - int index; - if (indexType == GE_VTYPE_IDX_8BIT) - { - index = ((u8*)inds)[i]; - } - else if (indexType == GE_VTYPE_IDX_16BIT) - { - index = ((u16*)inds)[i]; - } - else - { - index = i; - } + int index = ((u16*)inds)[i]; TransformedVertex &transVtx = transformed[index]; if ((i & 1) == 0) @@ -591,7 +565,7 @@ void SoftwareTransformAndDraw(int prim, LinkedShader *program, int forceIndexTyp if (program->a_color0 != -1) glVertexAttribPointer(program->a_color0, 4, GL_FLOAT, GL_FALSE, vertexSize, ((uint8_t*)drawBuffer) + 5 * 4); if (program->a_color1 != -1) glVertexAttribPointer(program->a_color1, 3, GL_FLOAT, GL_FALSE, vertexSize, ((uint8_t*)drawBuffer) + 9 * 4); if (drawIndexed) { - glDrawElements(glprim[prim], numTrans, glIndexType, (GLvoid *)inds); + glDrawElements(glprim[prim], numTrans, GL_UNSIGNED_SHORT, (GLvoid *)inds); } else { glDrawArrays(glprim[prim], 0, numTrans); } @@ -603,6 +577,10 @@ void SoftwareTransformAndDraw(int prim, LinkedShader *program, int forceIndexTyp void GLES_GPU::TransformAndDrawPrim(void *verts, void *inds, int prim, int vertexCount, float *customUV, int forceIndexType, int *bytesRead) { + // For the future + if (!indexGen.PrimCompatible(prim)) + Flush(prim); + int indexLowerBound, indexUpperBound; // First, decode the verts and apply morphing VertexDecoder dec; @@ -611,6 +589,51 @@ void GLES_GPU::TransformAndDrawPrim(void *verts, void *inds, int prim, int verte if (bytesRead) *bytesRead = vertexCount * dec.VertexSize(); + indexGen.Start(decIndex, 0, prim); + + int indexType = (gstate.vertType & GE_VTYPE_IDX_MASK); + if (forceIndexType != -1) indexType = forceIndexType; + switch (indexType) { + case GE_VTYPE_IDX_NONE: + switch (prim) { + case GE_PRIM_POINTS: indexGen.AddPoints(vertexCount); break; + case GE_PRIM_LINES: indexGen.AddLineList(vertexCount); break; + case GE_PRIM_LINE_STRIP: indexGen.AddLineStrip(vertexCount); break; + case GE_PRIM_TRIANGLES: indexGen.AddList(vertexCount); break; + case GE_PRIM_TRIANGLE_STRIP: indexGen.AddStrip(vertexCount); break; + case GE_PRIM_TRIANGLE_FAN: indexGen.AddFan(vertexCount); break; + case GE_PRIM_RECTANGLES: indexGen.AddLineList(vertexCount); break; // Same + } + break; + + case GE_VTYPE_IDX_8BIT: + switch (prim) { + case GE_PRIM_POINTS: indexGen.TranslatePoints(vertexCount, (const u16 *)inds, -indexLowerBound); break; + case GE_PRIM_LINES: indexGen.TranslateLineList(vertexCount, (const u8 *)inds, -indexLowerBound); break; + case GE_PRIM_LINE_STRIP: indexGen.TranslateLineStrip(vertexCount, (const u8 *)inds, -indexLowerBound); break; + case GE_PRIM_TRIANGLES: indexGen.TranslateList(vertexCount, (const u8 *)inds, -indexLowerBound); break; + case GE_PRIM_TRIANGLE_STRIP: indexGen.TranslateStrip(vertexCount, (const u8 *)inds, -indexLowerBound); break; + case GE_PRIM_TRIANGLE_FAN: indexGen.TranslateFan(vertexCount, (const u8 *)inds, -indexLowerBound); break; + case GE_PRIM_RECTANGLES: indexGen.TranslateLineList(vertexCount, (const u8 *)inds, -indexLowerBound); break; // Same + } + break; + + case GE_VTYPE_IDX_16BIT: + switch (prim) { + case GE_PRIM_POINTS: indexGen.TranslatePoints(vertexCount, (const u16 *)inds, -indexLowerBound); break; + case GE_PRIM_LINES: indexGen.TranslateLineList(vertexCount, (const u16 *)inds, -indexLowerBound); break; + case GE_PRIM_LINE_STRIP: indexGen.TranslateLineStrip(vertexCount, (const u16 *)inds, -indexLowerBound); break; + case GE_PRIM_TRIANGLES: indexGen.TranslateList(vertexCount, (const u16 *)inds, -indexLowerBound); break; + case GE_PRIM_TRIANGLE_STRIP: indexGen.TranslateStrip(vertexCount, (const u16 *)inds, -indexLowerBound); break; + case GE_PRIM_TRIANGLE_FAN: indexGen.TranslateFan(vertexCount, (const u16 *)inds, -indexLowerBound); break; + case GE_PRIM_RECTANGLES: indexGen.TranslateLineList(vertexCount, (const u16 *)inds, -indexLowerBound); break; // Same + } + break; + } + + indexType = GE_VTYPE_IDX_16BIT; + // From here on out, the index type is ALWAYS 16-bit. Deal with it. + // And here we should return, having collected the morphed but untransformed vertices. // Note that DecodeVerts should convert strips into indexed lists etc, adding to our // current vertex buffer and index buffer. @@ -622,15 +645,12 @@ void GLES_GPU::TransformAndDrawPrim(void *verts, void *inds, int prim, int verte PrintDecodedVertex(decoded[i], gstate.vertType); } #endif - bool useTexCoord = false; - // Check if anything needs updating if (gstate_c.textureChanged) { if ((gstate.textureMapEnable & 1) && !gstate.isModeClear()) { PSPSetTexture(); - useTexCoord = true; } gstate_c.textureChanged = false; } @@ -647,35 +667,11 @@ void GLES_GPU::TransformAndDrawPrim(void *verts, void *inds, int prim, int verte if (CanUseHardwareTransform(prim)) { SetupDecFmtForDraw(program, dec.GetDecVtxFmt(), decoded); - bool drawIndexed; - GLuint glIndexType; - int indexType = (gstate.vertType & GE_VTYPE_IDX_MASK); - if (forceIndexType != -1) { - indexType = forceIndexType; - } - int numTrans = vertexCount; - switch (indexType) { - case GE_VTYPE_IDX_8BIT: - drawIndexed = true; - glIndexType = GL_UNSIGNED_BYTE; - break; - case GE_VTYPE_IDX_16BIT: - drawIndexed = true; - glIndexType = GL_UNSIGNED_SHORT; - break; - default: - drawIndexed = false; - break; - } - // NOTICE_LOG(G3D,"DrawPrimitive: %i", numTrans); - if (drawIndexed) { - glDrawElements(glprim[prim], numTrans, glIndexType, (GLvoid *)inds); - } else { - glDrawArrays(glprim[prim], 0, numTrans); - } + glDrawElements(glprim[prim], indexGen.VertexCount(), GL_UNSIGNED_SHORT, (GLvoid *)decIndex); DesetupDecFmtForDraw(program, dec.GetDecVtxFmt()); } else { - SoftwareTransformAndDraw(prim, program, forceIndexType, vertexCount, inds, dec.GetDecVtxFmt(), indexLowerBound, indexUpperBound, customUV); + SoftwareTransformAndDraw(prim, program, indexGen.VertexCount(), (void *)decIndex, indexType, dec.GetDecVtxFmt(), + indexGen.MaxIndex(), customUV); } } diff --git a/GPU/GLES/TransformPipeline.h b/GPU/GLES/TransformPipeline.h index 5314e152f6..4c89dfcda3 100644 --- a/GPU/GLES/TransformPipeline.h +++ b/GPU/GLES/TransformPipeline.h @@ -65,4 +65,4 @@ struct Color4 } }; -void SoftwareTransformAndDraw(int prim, LinkedShader *program, int forceIndexType, int vertexCount, void *inds, const DecVtxFormat &decVtxFormat, int indexLowerBound, int indexUpperBound, float *customUV); +// void SoftwareTransformAndDraw(int prim, LinkedShader *program, int vertexCount, void *inds, int indexType, const DecVtxFormat &decVtxFormat, int maxIndex, float *customUV); diff --git a/GPU/GLES/VertexDecoder.cpp b/GPU/GLES/VertexDecoder.cpp index 8f7b7fb0b3..34bef88276 100644 --- a/GPU/GLES/VertexDecoder.cpp +++ b/GPU/GLES/VertexDecoder.cpp @@ -675,7 +675,7 @@ void VertexDecoder::DecodeVerts(u8 *decodedptr, const void *verts, const void *i *indexUpperBound = upperBound; // Decode the vertices within the found bounds, once each - decoded_ = decodedptr + lowerBound * decFmt.stride; + decoded_ = decodedptr; // + lowerBound * decFmt.stride; ptr_ = (const u8*)verts + lowerBound * size; for (int index = lowerBound; index <= upperBound; index++) { diff --git a/GPU/GLES/VertexDecoder.h b/GPU/GLES/VertexDecoder.h index 6e6bc887ea..f664d303a3 100644 --- a/GPU/GLES/VertexDecoder.h +++ b/GPU/GLES/VertexDecoder.h @@ -86,6 +86,7 @@ public: void SetVertexType(u32 vtype); const DecVtxFormat &GetDecVtxFmt() { return decFmt; } + void DecodeVerts(u8 *decoded, const void *verts, const void *inds, int prim, int count, int *indexLowerBound, int *indexUpperBound) const; bool hasColor() const { return col != 0; } int VertexSize() const { return size; } From 3b114eb24ab0858d0f620ebc25e1244144917bff Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Fri, 21 Dec 2012 19:16:17 +0100 Subject: [PATCH 3/8] More work on indexed draws. Not combining yet. --- Core/HLE/sceDisplay.cpp | 2 + GPU/GLES/DisplayListInterpreter.cpp | 2 + GPU/GLES/DisplayListInterpreter.h | 5 +- GPU/GLES/IndexGenerator.cpp | 72 +++++++++++++++++++++++++---- GPU/GLES/IndexGenerator.h | 14 +++++- GPU/GLES/TransformPipeline.cpp | 52 ++++++++++++--------- GPU/GPUState.h | 1 + 7 files changed, 115 insertions(+), 33 deletions(-) diff --git a/Core/HLE/sceDisplay.cpp b/Core/HLE/sceDisplay.cpp index a417ea9cdc..be4a7cf43a 100644 --- a/Core/HLE/sceDisplay.cpp +++ b/Core/HLE/sceDisplay.cpp @@ -182,6 +182,7 @@ void hleEnterVblank(u64 userdata, int cyclesLate) sprintf(stats, "Frames: %i\n" "Draw calls: %i\n" + "Draw flushes: %i\n" "Vertices Transformed: %i\n" "Textures active: %i\n" "Vertex shaders loaded: %i\n" @@ -189,6 +190,7 @@ void hleEnterVblank(u64 userdata, int cyclesLate) "Combined shaders loaded: %i\n", gpuStats.numFrames, gpuStats.numDrawCalls, + gpuStats.numFlushes, gpuStats.numVertsTransformed, gpuStats.numTextures, gpuStats.numVertexShaders, diff --git a/GPU/GLES/DisplayListInterpreter.cpp b/GPU/GLES/DisplayListInterpreter.cpp index d516cfe9e4..54ec9715c0 100644 --- a/GPU/GLES/DisplayListInterpreter.cpp +++ b/GPU/GLES/DisplayListInterpreter.cpp @@ -54,6 +54,7 @@ GLES_GPU::GLES_GPU(int renderWidth, int renderHeight) renderHeightFactor_ = (float)renderHeight / 272.0f; shaderManager_ = &shaderManager; TextureCache_Init(); + InitTransform(); // Sanity check gstate if ((int *)&gstate.transferstart - (int *)&gstate != 0xEA) { ERROR_LOG(G3D, "gstate has drifted out of sync!"); @@ -113,6 +114,7 @@ void GLES_GPU::SetDisplayFramebuffer(u32 framebuf, u32 stride, int format) void GLES_GPU::CopyDisplayToOutput() { + Flush(); if (!g_Config.bBufferedRendering) return; diff --git a/GPU/GLES/DisplayListInterpreter.h b/GPU/GLES/DisplayListInterpreter.h index 690088e98e..439157c9b9 100644 --- a/GPU/GLES/DisplayListInterpreter.h +++ b/GPU/GLES/DisplayListInterpreter.h @@ -22,11 +22,11 @@ #include "../GPUInterface.h" #include "Framebuffer.h" +#include "VertexDecoder.h" #include "gfx_es2/fbo.h" class ShaderManager; class LinkedShader; -struct DecVtxFormat; class GLES_GPU : public GPUInterface { @@ -52,10 +52,11 @@ public: private: // TransformPipeline.cpp + void InitTransform(); void TransformAndDrawPrim(void *verts, void *inds, int prim, int vertexCount, float *customUV, int forceIndexType, int *bytesRead = 0); //void SoftwareTransformAndDraw(int prim, LinkedShader *program, int forceIndexType, int vertexCount, void *inds, const DecVtxFormat &decVtxFormat, int indexLowerBound, int indexUpperBound, float *customUV); void ApplyDrawState(); - void Flush(int prim); + void Flush(); void UpdateViewportAndProjection(); void DrawBezier(int ucount, int vcount); void DoBlockTransfer(); diff --git a/GPU/GLES/IndexGenerator.cpp b/GPU/GLES/IndexGenerator.cpp index b08ab3cd5b..d28d92d663 100644 --- a/GPU/GLES/IndexGenerator.cpp +++ b/GPU/GLES/IndexGenerator.cpp @@ -25,13 +25,14 @@ const u8 indexedPrimitiveType[7] = { GE_PRIM_TRIANGLES, GE_PRIM_TRIANGLES, GE_PRIM_TRIANGLES, - GE_PRIM_TRIANGLES, + GE_PRIM_RECTANGLES, }; void IndexGenerator::Reset() { prim_ = -1; - inds_ = 0; count_ = 0; + index_ = 0; + this->inds_ = indsBase_; } bool IndexGenerator::PrimCompatible(int prim) { @@ -40,12 +41,10 @@ bool IndexGenerator::PrimCompatible(int prim) { return indexedPrimitiveType[prim] == indexedPrimitiveType[prim_]; } -void IndexGenerator::Start(u16 *inds, int baseIndex, int prim) +void IndexGenerator::Setup(u16 *inds) { - count_ = 0; - this->inds_ = inds; - index_ = baseIndex; - prim_ = indexedPrimitiveType[prim]; + this->indsBase_ = inds; + Reset(); } void IndexGenerator::AddPoints(int numVerts) @@ -58,6 +57,7 @@ void IndexGenerator::AddPoints(int numVerts) // ignore overflow verts index_ += numVerts; count_ += numVerts; + prim_ = GE_PRIM_POINTS; } void IndexGenerator::AddList(int numVerts) @@ -74,6 +74,7 @@ void IndexGenerator::AddList(int numVerts) // ignore overflow verts index_ += numVerts; count_ += numTris * 3; + prim_ = GE_PRIM_TRIANGLES; } void IndexGenerator::AddStrip(int numVerts) @@ -89,6 +90,7 @@ void IndexGenerator::AddStrip(int numVerts) } index_ += numVerts; count_ += numTris * 3; + prim_ = GE_PRIM_TRIANGLES; } void IndexGenerator::AddFan(int numVerts) @@ -102,6 +104,7 @@ void IndexGenerator::AddFan(int numVerts) } index_ += numVerts; count_ += numTris * 3; + prim_ = GE_PRIM_TRIANGLES; } void IndexGenerator::TranslatePoints(int numVerts, const u8 *inds, int offset) @@ -112,6 +115,7 @@ void IndexGenerator::TranslatePoints(int numVerts, const u8 *inds, int offset) } index_ += numVerts; count_ += numVerts; + prim_ = GE_PRIM_POINTS; } void IndexGenerator::TranslatePoints(int numVerts, const u16 *inds, int offset) @@ -122,6 +126,7 @@ void IndexGenerator::TranslatePoints(int numVerts, const u16 *inds, int offset) } index_ += numVerts; count_ += numVerts; + prim_ = GE_PRIM_POINTS; } void IndexGenerator::TranslateList(int numVerts, const u8 *inds, int offset) @@ -135,6 +140,7 @@ void IndexGenerator::TranslateList(int numVerts, const u8 *inds, int offset) } index_ += numVerts; count_ += numTris * 3; + prim_ = GE_PRIM_TRIANGLES; } void IndexGenerator::TranslateStrip(int numVerts, const u8 *inds, int offset) @@ -150,6 +156,7 @@ void IndexGenerator::TranslateStrip(int numVerts, const u8 *inds, int offset) } index_ += numVerts; count_ += numTris * 3; + prim_ = GE_PRIM_TRIANGLES; } void IndexGenerator::TranslateFan(int numVerts, const u8 *inds, int offset) @@ -164,6 +171,7 @@ void IndexGenerator::TranslateFan(int numVerts, const u8 *inds, int offset) } index_ += numVerts; count_ += numTris * 3; + prim_ = GE_PRIM_TRIANGLES; } void IndexGenerator::TranslateList(int numVerts, const u16 *inds, int offset) @@ -177,6 +185,7 @@ void IndexGenerator::TranslateList(int numVerts, const u16 *inds, int offset) } index_ += numVerts; count_ += numTris * 3; + prim_ = GE_PRIM_TRIANGLES; } void IndexGenerator::TranslateStrip(int numVerts, const u16 *inds, int offset) @@ -192,6 +201,7 @@ void IndexGenerator::TranslateStrip(int numVerts, const u16 *inds, int offset) } index_ += numVerts; count_ += numTris * 3; + prim_ = GE_PRIM_TRIANGLES; } void IndexGenerator::TranslateFan(int numVerts, const u16 *inds, int offset) @@ -206,6 +216,7 @@ void IndexGenerator::TranslateFan(int numVerts, const u16 *inds, int offset) } index_ += numVerts; count_ += numTris * 3; + prim_ = GE_PRIM_TRIANGLES; } //Lines @@ -219,6 +230,7 @@ void IndexGenerator::AddLineList(int numVerts) } index_ += numVerts; count_ += numLines * 2; + prim_ = GE_PRIM_LINES; } void IndexGenerator::AddLineStrip(int numVerts) @@ -231,6 +243,20 @@ void IndexGenerator::AddLineStrip(int numVerts) } index_ += numVerts; count_ += numLines * 2; + prim_ = GE_PRIM_LINES; +} + +void IndexGenerator::AddRectangles(int numVerts) +{ + int numRects = numVerts / 2; + for (int i = 0; i < numRects; i++) + { + *inds_++ = index_ + i*2; + *inds_++ = index_ + i*2+1; + } + index_ += numVerts; + count_ += numRects * 2; + prim_ = GE_PRIM_RECTANGLES; } void IndexGenerator::TranslateLineList(int numVerts, const u8 *inds, int offset) @@ -243,6 +269,7 @@ void IndexGenerator::TranslateLineList(int numVerts, const u8 *inds, int offset) } index_ += numVerts; count_ += numLines * 2; + prim_ = GE_PRIM_LINES; } void IndexGenerator::TranslateLineStrip(int numVerts, const u8 *inds, int offset) @@ -255,6 +282,7 @@ void IndexGenerator::TranslateLineStrip(int numVerts, const u8 *inds, int offset } index_ += numVerts; count_ += numLines * 2; + prim_ = GE_PRIM_LINES; } void IndexGenerator::TranslateLineList(int numVerts, const u16 *inds, int offset) @@ -267,6 +295,7 @@ void IndexGenerator::TranslateLineList(int numVerts, const u16 *inds, int offset } index_ += numVerts; count_ += numLines * 2; + prim_ = GE_PRIM_LINES; } void IndexGenerator::TranslateLineStrip(int numVerts, const u16 *inds, int offset) @@ -279,4 +308,31 @@ void IndexGenerator::TranslateLineStrip(int numVerts, const u16 *inds, int offse } index_ += numVerts; count_ += numLines * 2; -} \ No newline at end of file + prim_ = GE_PRIM_LINES; +} + +void IndexGenerator::TranslateRectangles(int numVerts, const u8 *inds, int offset) +{ + int numRects = numVerts / 2; + for (int i = 0; i < numRects; i++) + { + *inds_++ = index_ + i*2; + *inds_++ = index_ + i*2+1; + } + index_ += numVerts; + count_ += numRects * 2; + prim_ = GE_PRIM_RECTANGLES; +} + +void IndexGenerator::TranslateRectangles(int numVerts, const u16 *inds, int offset) +{ + int numRects = numVerts / 2; + for (int i = 0; i < numRects; i++) + { + *inds_++ = index_ + i*2; + *inds_++ = index_ + i*2+1; + } + index_ += numVerts; + count_ += numRects * 2; + prim_ = GE_PRIM_RECTANGLES; +} diff --git a/GPU/GLES/IndexGenerator.h b/GPU/GLES/IndexGenerator.h index bb2a9bde00..af6da67ae4 100644 --- a/GPU/GLES/IndexGenerator.h +++ b/GPU/GLES/IndexGenerator.h @@ -24,9 +24,10 @@ class IndexGenerator { public: + void Setup(u16 *indexptr); void Reset(); - void Start(u16 *indexptr, int baseIndex, int prim); bool PrimCompatible(int prim); + int Prim() const { return prim_; } // Points (why index these? code simplicity) void AddPoints(int numVerts); @@ -37,15 +38,21 @@ public: // Lines void AddLineList(int numVerts); void AddLineStrip(int numVerts); + // Rectangles + void AddRectangles(int numVerts); void TranslatePoints(int numVerts, const u8 *inds, int offset); void TranslatePoints(int numVerts, const u16 *inds, int offset); // Translates already indexed lists void TranslateLineList(int numVerts, const u8 *inds, int offset); - void TranslateLineStrip(int numVerts, const u8 *inds, int offset); void TranslateLineList(int numVerts, const u16 *inds, int offset); + void TranslateLineStrip(int numVerts, const u8 *inds, int offset); void TranslateLineStrip(int numVerts, const u16 *inds, int offset); + void TranslateRectangles(int numVerts, const u8 *inds, int offset); + void TranslateRectangles(int numVerts, const u16 *inds, int offset); + + void TranslateList(int numVerts, const u8 *inds, int offset); void TranslateStrip(int numVerts, const u8 *inds, int offset); void TranslateFan(int numVerts, const u8 *inds, int offset); @@ -56,7 +63,10 @@ public: int MaxIndex() { return index_; } int VertexCount() { return count_; } + bool Empty() { return index_ == 0; } + private: + u16 *indsBase_; u16 *inds_; int index_; int count_; diff --git a/GPU/GLES/TransformPipeline.cpp b/GPU/GLES/TransformPipeline.cpp index 913f2c23ca..88d08908cb 100644 --- a/GPU/GLES/TransformPipeline.cpp +++ b/GPU/GLES/TransformPipeline.cpp @@ -43,7 +43,10 @@ const GLuint glprim[8] = { }; u8 decoded[65536 * 32]; +VertexDecoder dec; uint16_t decIndex[65536]; // Unused +int numVerts; +int numInds; IndexGenerator indexGen; @@ -265,7 +268,7 @@ static void DesetupDecFmtForDraw(LinkedShader *program, const DecVtxFormat &decF // Actually again, single quads could be drawn more efficiently using GL_TRIANGLE_STRIP, no need to duplicate verts as for // GL_TRIANGLES. Still need to sw transform to compute the extra two corners though. -void SoftwareTransformAndDraw(int prim, LinkedShader *program, int vertexCount, void *inds, int indexType, const DecVtxFormat &decVtxFormat, int maxIndex, float *customUV) +void SoftwareTransformAndDraw(int prim, LinkedShader *program, int vertexCount, void *inds, int indexType, const DecVtxFormat &decVtxFormat, int maxIndex) { /* DEBUG_LOG(G3D, "View matrix:"); @@ -418,10 +421,7 @@ void SoftwareTransformAndDraw(int prim, LinkedShader *program, int vertexCount, } } - if (customUV) { - uv[0] = customUV[index * 2 + 0]*gstate_c.uScale + gstate_c.uOff; - uv[1] = customUV[index * 2 + 1]*gstate_c.vScale + gstate_c.vOff; - } else if (reader.hasUV()) { + if (reader.hasUV()) { float ruv[2]; reader.ReadUV(ruv); // Perform texture coordinate generation after the transform and lighting - one style of UV depends on lights. @@ -575,22 +575,27 @@ void SoftwareTransformAndDraw(int prim, LinkedShader *program, int vertexCount, if (program->a_color1 != -1) glDisableVertexAttribArray(program->a_color1); } +void GLES_GPU::InitTransform() { + indexGen.Setup(decIndex); +} + void GLES_GPU::TransformAndDrawPrim(void *verts, void *inds, int prim, int vertexCount, float *customUV, int forceIndexType, int *bytesRead) { // For the future if (!indexGen.PrimCompatible(prim)) - Flush(prim); + Flush(); + + gpuStats.numDrawCalls++; + gpuStats.numVertsTransformed += vertexCount; int indexLowerBound, indexUpperBound; // First, decode the verts and apply morphing - VertexDecoder dec; dec.SetVertexType(gstate.vertType); dec.DecodeVerts(decoded, verts, inds, prim, vertexCount, &indexLowerBound, &indexUpperBound); + if (bytesRead) *bytesRead = vertexCount * dec.VertexSize(); - indexGen.Start(decIndex, 0, prim); - int indexType = (gstate.vertType & GE_VTYPE_IDX_MASK); if (forceIndexType != -1) indexType = forceIndexType; switch (indexType) { @@ -602,7 +607,7 @@ void GLES_GPU::TransformAndDrawPrim(void *verts, void *inds, int prim, int verte case GE_PRIM_TRIANGLES: indexGen.AddList(vertexCount); break; case GE_PRIM_TRIANGLE_STRIP: indexGen.AddStrip(vertexCount); break; case GE_PRIM_TRIANGLE_FAN: indexGen.AddFan(vertexCount); break; - case GE_PRIM_RECTANGLES: indexGen.AddLineList(vertexCount); break; // Same + case GE_PRIM_RECTANGLES: indexGen.AddRectangles(vertexCount); break; // Same } break; @@ -614,7 +619,7 @@ void GLES_GPU::TransformAndDrawPrim(void *verts, void *inds, int prim, int verte case GE_PRIM_TRIANGLES: indexGen.TranslateList(vertexCount, (const u8 *)inds, -indexLowerBound); break; case GE_PRIM_TRIANGLE_STRIP: indexGen.TranslateStrip(vertexCount, (const u8 *)inds, -indexLowerBound); break; case GE_PRIM_TRIANGLE_FAN: indexGen.TranslateFan(vertexCount, (const u8 *)inds, -indexLowerBound); break; - case GE_PRIM_RECTANGLES: indexGen.TranslateLineList(vertexCount, (const u8 *)inds, -indexLowerBound); break; // Same + case GE_PRIM_RECTANGLES: indexGen.TranslateRectangles(vertexCount, (const u8 *)inds, -indexLowerBound); break; // Same } break; @@ -626,12 +631,18 @@ void GLES_GPU::TransformAndDrawPrim(void *verts, void *inds, int prim, int verte case GE_PRIM_TRIANGLES: indexGen.TranslateList(vertexCount, (const u16 *)inds, -indexLowerBound); break; case GE_PRIM_TRIANGLE_STRIP: indexGen.TranslateStrip(vertexCount, (const u16 *)inds, -indexLowerBound); break; case GE_PRIM_TRIANGLE_FAN: indexGen.TranslateFan(vertexCount, (const u16 *)inds, -indexLowerBound); break; - case GE_PRIM_RECTANGLES: indexGen.TranslateLineList(vertexCount, (const u16 *)inds, -indexLowerBound); break; // Same + case GE_PRIM_RECTANGLES: indexGen.TranslateRectangles(vertexCount, (const u16 *)inds, -indexLowerBound); break; // Same } break; } - indexType = GE_VTYPE_IDX_16BIT; + Flush(); +} + +void GLES_GPU::Flush() +{ + if (indexGen.Empty()) + return; // From here on out, the index type is ALWAYS 16-bit. Deal with it. // And here we should return, having collected the morphed but untransformed vertices. @@ -654,12 +665,13 @@ void GLES_GPU::TransformAndDrawPrim(void *verts, void *inds, int prim, int verte } gstate_c.textureChanged = false; } - gpuStats.numDrawCalls++; - gpuStats.numVertsTransformed += vertexCount; + gpuStats.numFlushes++; // TODO: This should not be done on every drawcall, we should collect vertex data // until critical state changes. That's when we draw (flush). + int prim = indexGen.Prim(); + ApplyDrawState(); UpdateViewportAndProjection(); @@ -670,11 +682,9 @@ void GLES_GPU::TransformAndDrawPrim(void *verts, void *inds, int prim, int verte glDrawElements(glprim[prim], indexGen.VertexCount(), GL_UNSIGNED_SHORT, (GLvoid *)decIndex); DesetupDecFmtForDraw(program, dec.GetDecVtxFmt()); } else { - SoftwareTransformAndDraw(prim, program, indexGen.VertexCount(), (void *)decIndex, indexType, dec.GetDecVtxFmt(), - indexGen.MaxIndex(), customUV); + SoftwareTransformAndDraw(prim, program, indexGen.VertexCount(), (void *)decIndex, GE_VTYPE_IDX_16BIT, dec.GetDecVtxFmt(), + indexGen.MaxIndex()); } -} -void GLES_GPU::Flush(int prim) { - // TODO -} + indexGen.Reset(); +} \ No newline at end of file diff --git a/GPU/GPUState.h b/GPU/GPUState.h index 66cb21d90b..e031f6bc71 100644 --- a/GPU/GPUState.h +++ b/GPU/GPUState.h @@ -263,6 +263,7 @@ struct GPUStatistics // Per frame statistics int numDrawCalls; + int numFlushes; int numVertsTransformed; int numTextureSwitches; int numShaderSwitches; From d663e28bdea7ac4dfdb09269f133ba4c9b635ff0 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Fri, 21 Dec 2012 21:49:09 +0100 Subject: [PATCH 4/8] More work and optimization. Still not quite there. --- Core/Dialog/PSPSaveDialog.cpp | 1 + Core/HLE/sceDisplay.cpp | 7 +- Core/HLE/sceGe.cpp | 4 +- GPU/GLES/DisplayListInterpreter.cpp | 188 +++++++++++++++++++++++++++- GPU/GLES/IndexGenerator.h | 4 +- GPU/GLES/ShaderManager.cpp | 15 ++- GPU/GLES/ShaderManager.h | 1 + GPU/GLES/TextureCache.cpp | 38 +++--- GPU/GLES/TransformPipeline.cpp | 18 ++- GPU/GLES/VertexShaderGenerator.cpp | 23 +--- GPU/GPUState.h | 5 + native | 2 +- 12 files changed, 250 insertions(+), 56 deletions(-) diff --git a/Core/Dialog/PSPSaveDialog.cpp b/Core/Dialog/PSPSaveDialog.cpp index 3185a43a0b..5a1ff4ea53 100644 --- a/Core/Dialog/PSPSaveDialog.cpp +++ b/Core/Dialog/PSPSaveDialog.cpp @@ -33,6 +33,7 @@ PSPSaveDialog::~PSPSaveDialog() { u32 PSPSaveDialog::Init(int paramAddr) { + return 0; // Ignore if already running if (status != SCE_UTILITY_STATUS_NONE && status != SCE_UTILITY_STATUS_SHUTDOWN) { diff --git a/Core/HLE/sceDisplay.cpp b/Core/HLE/sceDisplay.cpp index be4a7cf43a..dba162c325 100644 --- a/Core/HLE/sceDisplay.cpp +++ b/Core/HLE/sceDisplay.cpp @@ -185,6 +185,7 @@ void hleEnterVblank(u64 userdata, int cyclesLate) "Draw flushes: %i\n" "Vertices Transformed: %i\n" "Textures active: %i\n" + "Textures decoded: %i\n" "Vertex shaders loaded: %i\n" "Fragment shaders loaded: %i\n" "Combined shaders loaded: %i\n", @@ -193,15 +194,15 @@ void hleEnterVblank(u64 userdata, int cyclesLate) gpuStats.numFlushes, gpuStats.numVertsTransformed, gpuStats.numTextures, + gpuStats.numTexturesDecoded, gpuStats.numVertexShaders, gpuStats.numFragmentShaders, gpuStats.numShaders ); - float zoom = 0.7f / g_Config.iWindowZoom; + float zoom = 0.7f; /// g_Config.iWindowZoom; PPGeBegin(); - PPGeDrawText(stats, 2, 2, 0, zoom, 0x90000000); - PPGeDrawText(stats, 0, 0, 0, zoom); + PPGeDrawText(stats, 0, 0, 0, zoom, 0xFFc0c0c0); PPGeEnd(); gpuStats.resetFrame(); diff --git a/Core/HLE/sceGe.cpp b/Core/HLE/sceGe.cpp index dd41e6e1db..3752622f7f 100644 --- a/Core/HLE/sceGe.cpp +++ b/Core/HLE/sceGe.cpp @@ -225,12 +225,12 @@ const HLEFunction sceGe_user[] = {0xE0D68148,&WrapV_UU, "sceGeListUpdateStallAddr"}, {0x03444EB4,&WrapI_UU, "sceGeListSync"}, {0xB287BD61,&WrapU_U, "sceGeDrawSync"}, - {0xB448EC0D,&WrapV_U, "sceGeBreak"}, + {0xB448EC0D,&WrapV_U, "sceGeBreak"}, {0x4C06E472,sceGeContinue, "sceGeContinue"}, {0xA4FC06A4,&WrapU_U, "sceGeSetCallback"}, {0x05DB22CE,&WrapV_U, "sceGeUnsetCallback"}, {0x1F6752AD,&WrapU_V, "sceGeEdramGetSize"}, - {0xB77905EA,&WrapU_I,"sceGeEdramSetAddrTranslation"}, + {0xB77905EA,&WrapU_I,"sceGeEdramSetAddrTranslation"}, {0xDC93CFEF,0,"sceGeGetCmd"}, {0x57C8945B,&sceGeGetMtx,"sceGeGetMtx"}, {0x438A385A,&WrapU_U,"sceGeSaveContext"}, diff --git a/GPU/GLES/DisplayListInterpreter.cpp b/GPU/GLES/DisplayListInterpreter.cpp index 54ec9715c0..4071efe200 100644 --- a/GPU/GLES/DisplayListInterpreter.cpp +++ b/GPU/GLES/DisplayListInterpreter.cpp @@ -43,6 +43,121 @@ ShaderManager shaderManager; extern u32 curTextureWidth; extern u32 curTextureHeight; +bool flushBeforeCommand[256] = {0}; +const bool flushBeforeCommandList[] = { + GE_CMD_BEZIER, + GE_CMD_SPLINE, + GE_CMD_SIGNAL, + GE_CMD_FINISH, + GE_CMD_END, + GE_CMD_BJUMP, + GE_CMD_VERTEXTYPE, + GE_CMD_OFFSETADDR, + GE_CMD_REGION1, + GE_CMD_REGION2, + GE_CMD_CULLFACEENABLE, + GE_CMD_TEXTUREMAPENABLE, + GE_CMD_LIGHTINGENABLE, + GE_CMD_FOGENABLE, + GE_CMD_TEXSCALEU, + GE_CMD_TEXSCALEV, + GE_CMD_TEXOFFSETU, + GE_CMD_TEXOFFSETV, + GE_CMD_SCISSOR1, + GE_CMD_SCISSOR2, + GE_CMD_MINZ, + GE_CMD_MAXZ, + GE_CMD_FRAMEBUFPTR, + GE_CMD_FRAMEBUFWIDTH, + GE_CMD_FRAMEBUFPIXFORMAT, + GE_CMD_TEXADDR0, + GE_CMD_CLUTADDR, + GE_CMD_LOADCLUT, + GE_CMD_TEXMAPMODE, + GE_CMD_TEXSHADELS, + GE_CMD_CLUTFORMAT, + GE_CMD_TRANSFERSTART, + GE_CMD_TEXSIZE0, + GE_CMD_TEXSIZE1, + GE_CMD_TEXSIZE2, + GE_CMD_TEXSIZE3, + GE_CMD_TEXSIZE4, + GE_CMD_TEXSIZE5, + GE_CMD_TEXSIZE6, + GE_CMD_TEXSIZE7, + GE_CMD_ZBUFPTR, + GE_CMD_ZBUFWIDTH, + GE_CMD_AMBIENTCOLOR, + GE_CMD_AMBIENTALPHA, + GE_CMD_MATERIALAMBIENT, + GE_CMD_MATERIALDIFFUSE, + GE_CMD_MATERIALEMISSIVE, + GE_CMD_MATERIALSPECULAR, + GE_CMD_MATERIALALPHA, + GE_CMD_MATERIALSPECULARCOEF, + GE_CMD_LIGHTTYPE0, + GE_CMD_LIGHTTYPE1, + GE_CMD_LIGHTTYPE2, + GE_CMD_LIGHTTYPE3, + GE_CMD_LX0, + GE_CMD_LX1, + GE_CMD_LX2, + GE_CMD_LX3, + GE_CMD_LDX0, + GE_CMD_LDX1, + GE_CMD_LDX2, + GE_CMD_LDX3, + GE_CMD_LKA0, + GE_CMD_LAC0, + GE_CMD_LDC0, + GE_CMD_LSC0, + GE_CMD_VIEWPORTX1, + GE_CMD_VIEWPORTY1, + GE_CMD_VIEWPORTX2, + GE_CMD_VIEWPORTY2, + GE_CMD_VIEWPORTZ1, + GE_CMD_VIEWPORTZ2, + GE_CMD_LIGHTENABLE0, + GE_CMD_LIGHTENABLE1, + GE_CMD_LIGHTENABLE2, + GE_CMD_LIGHTENABLE3, + GE_CMD_CULL, + GE_CMD_LMODE, + GE_CMD_PATCHDIVISION, + GE_CMD_MATERIALUPDATE, + GE_CMD_CLEARMODE, + GE_CMD_ALPHABLENDENABLE, + GE_CMD_BLENDMODE, + GE_CMD_BLENDFIXEDA, + GE_CMD_BLENDFIXEDB, + GE_CMD_ALPHATESTENABLE, + GE_CMD_ALPHATEST, + GE_CMD_TEXFUNC, + GE_CMD_TEXFILTER, + GE_CMD_TEXENVCOLOR, + GE_CMD_TEXMODE, + GE_CMD_TEXFORMAT, + GE_CMD_TEXFLUSH, + GE_CMD_TEXWRAP, + GE_CMD_ZTESTENABLE, + GE_CMD_STENCILTESTENABLE, + GE_CMD_ZTEST, + GE_CMD_MORPHWEIGHT0, + GE_CMD_MORPHWEIGHT1, + GE_CMD_MORPHWEIGHT2, + GE_CMD_MORPHWEIGHT3, + GE_CMD_MORPHWEIGHT4, + GE_CMD_MORPHWEIGHT5, + GE_CMD_MORPHWEIGHT6, + GE_CMD_MORPHWEIGHT7, + GE_CMD_WORLDMATRIXNUMBER, + GE_CMD_VIEWMATRIXNUMBER, + GE_CMD_PROJMATRIXNUMBER, + GE_CMD_PROJMATRIXDATA, + GE_CMD_TGENMATRIXNUMBER, + GE_CMD_BONEMATRIXNUMBER, +}; + GLES_GPU::GLES_GPU(int renderWidth, int renderHeight) : interruptsEnabled_(true), renderWidth_(renderWidth), @@ -59,6 +174,10 @@ GLES_GPU::GLES_GPU(int renderWidth, int renderHeight) if ((int *)&gstate.transferstart - (int *)&gstate != 0xEA) { ERROR_LOG(G3D, "gstate has drifted out of sync!"); } + + for (int i = 0; i < ARRAY_SIZE(flushBeforeCommandList); i++) { + flushBeforeCommand[flushBeforeCommandList[i]] = true; + } } GLES_GPU::~GLES_GPU() @@ -104,6 +223,7 @@ void GLES_GPU::BeginFrame() void GLES_GPU::SetDisplayFramebuffer(u32 framebuf, u32 stride, int format) { if (framebuf & 0x04000000) { + DEBUG_LOG(G3D, "Switch display framebuffer %08x", framebuf); displayFramebufPtr_ = framebuf; displayStride_ = stride; displayFormat_ = format; @@ -168,6 +288,7 @@ GLES_GPU::VirtualFramebuffer *GLES_GPU::GetDisplayFBO() void GLES_GPU::SetRenderFrameBuffer() { + Flush(); if (!g_Config.bBufferedRendering) return; // Get parameters @@ -300,7 +421,7 @@ void GLES_GPU::UpdateStall(int listid, u32 newstall) void GLES_GPU::DrawSync(int mode) { - + Flush(); } void GLES_GPU::Continue() @@ -432,6 +553,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) // The arrow and other rotary items in Puzbob are bezier patches, strangely enough. case GE_CMD_BEZIER: { + Flush(); int bz_ucount = data & 0xFF; int bz_vcount = (data >> 8) & 0xFF; DrawBezier(bz_ucount, bz_vcount); @@ -441,6 +563,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) case GE_CMD_SPLINE: { + Flush(); int sp_ucount = data & 0xFF; int sp_vcount = (data >> 8) & 0xFF; int sp_utype = (data >> 16) & 0x3; @@ -489,6 +612,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) break; case GE_CMD_FINISH: + Flush(); DEBUG_LOG(G3D,"DL CMD FINISH"); // TODO: Should this run while interrupts are suspended? if (interruptsEnabled_) @@ -544,6 +668,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) break; case GE_CMD_BJUMP: + Flush(); // bounding box jump. Let's just not jump, for now. DEBUG_LOG(G3D,"DL BBOX JUMP - unimplemented"); break; @@ -558,6 +683,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) break; case GE_CMD_VERTEXTYPE: + Flush(); DEBUG_LOG(G3D,"DL SetVertexType: %06x", data); if (diff & GE_VTYPE_THROUGH) { // Throughmode changed, let's make the proj matrix dirty. @@ -593,21 +719,25 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) break; case GE_CMD_CULLFACEENABLE: + Flush(); DEBUG_LOG(G3D, "DL CullFace Enable: %i (ignoring)", data); break; case GE_CMD_TEXTUREMAPENABLE: + Flush(); gstate_c.textureChanged = true; DEBUG_LOG(G3D, "DL Texture map enable: %i", data); break; case GE_CMD_LIGHTINGENABLE: + Flush(); DEBUG_LOG(G3D, "DL Lighting enable: %i", data); data += 1; //We don't use OpenGL lighting break; case GE_CMD_FOGENABLE: + Flush(); DEBUG_LOG(G3D, "DL Fog Enable: %i", data); break; @@ -624,24 +754,28 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) break; case GE_CMD_TEXSCALEU: + Flush(); gstate_c.uScale = getFloat24(data); DEBUG_LOG(G3D, "DL Texture U Scale: %f", gstate_c.uScale); shaderManager.DirtyUniform(DIRTY_UVSCALEOFFSET); break; case GE_CMD_TEXSCALEV: + Flush(); gstate_c.vScale = getFloat24(data); DEBUG_LOG(G3D, "DL Texture V Scale: %f", gstate_c.vScale); shaderManager.DirtyUniform(DIRTY_UVSCALEOFFSET); break; case GE_CMD_TEXOFFSETU: + Flush(); gstate_c.uOff = getFloat24(data); DEBUG_LOG(G3D, "DL Texture U Offset: %f", gstate_c.uOff); shaderManager.DirtyUniform(DIRTY_UVSCALEOFFSET); break; case GE_CMD_TEXOFFSETV: + Flush(); gstate_c.vOff = getFloat24(data); DEBUG_LOG(G3D, "DL Texture V Offset: %f", gstate_c.vOff); shaderManager.DirtyUniform(DIRTY_UVSCALEOFFSET); @@ -649,6 +783,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) case GE_CMD_SCISSOR1: { + Flush(); int x1 = data & 0x3ff; int y1 = data >> 10; DEBUG_LOG(G3D, "DL Scissor TL: %i, %i", x1,y1); @@ -656,6 +791,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) break; case GE_CMD_SCISSOR2: { + Flush(); int x2 = data & 0x3ff; int y2 = data >> 10; DEBUG_LOG(G3D, "DL Scissor BR: %i, %i", x2, y2); @@ -674,6 +810,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) case GE_CMD_FRAMEBUFPTR: { + Flush(); u32 ptr = op & 0xFFE000; DEBUG_LOG(G3D, "DL FramebufPtr: %08x", ptr); } @@ -681,6 +818,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) case GE_CMD_FRAMEBUFWIDTH: { + Flush(); u32 w = data & 0xFFFFFF; DEBUG_LOG(G3D, "DL FramebufWidth: %i", w); } @@ -690,6 +828,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) break; case GE_CMD_TEXADDR0: + Flush(); gstate_c.textureChanged = true; case GE_CMD_TEXADDR1: case GE_CMD_TEXADDR2: @@ -738,10 +877,12 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) break; case GE_CMD_TEXMAPMODE: + Flush(); DEBUG_LOG(G3D,"Tex map mode: %06x", data); break; case GE_CMD_TEXSHADELS: + Flush(); DEBUG_LOG(G3D,"Tex shade light sources: %06x", data); break; @@ -805,6 +946,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) case GE_CMD_TRANSFERSTART: // Orphis calls this TRXKICK { + Flush(); // TODO: Here we should check if the transfer overlaps a framebuffer or any textures, // and take appropriate action. This is a block transfer between RAM and VRAM, or vice versa. DoBlockTransfer(); @@ -812,6 +954,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) } case GE_CMD_TEXSIZE0: + Flush(); gstate_c.textureChanged = true; gstate_c.curTextureWidth = 1 << (gstate.texsize[0] & 0xf); gstate_c.curTextureHeight = 1 << ((gstate.texsize[0]>>8) & 0xf); @@ -828,6 +971,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) case GE_CMD_ZBUFPTR: { + Flush(); u32 ptr = op & 0xFFE000; DEBUG_LOG(G3D,"Zbuf Ptr: %06x", ptr); } @@ -841,44 +985,52 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) break; case GE_CMD_AMBIENTCOLOR: + Flush(); DEBUG_LOG(G3D,"DL Ambient Color: %06x", data); break; case GE_CMD_AMBIENTALPHA: + Flush(); DEBUG_LOG(G3D,"DL Ambient Alpha: %06x", data); break; case GE_CMD_MATERIALAMBIENT: + Flush(); DEBUG_LOG(G3D,"DL Material Ambient Color: %06x", data); if (diff) shaderManager.DirtyUniform(DIRTY_MATAMBIENTALPHA); break; case GE_CMD_MATERIALDIFFUSE: + Flush(); DEBUG_LOG(G3D,"DL Material Diffuse Color: %06x", data); if (diff) shaderManager.DirtyUniform(DIRTY_MATDIFFUSE); break; case GE_CMD_MATERIALEMISSIVE: + Flush(); DEBUG_LOG(G3D,"DL Material Emissive Color: %06x", data); if (diff) shaderManager.DirtyUniform(DIRTY_MATEMISSIVE); break; case GE_CMD_MATERIALSPECULAR: + Flush(); DEBUG_LOG(G3D,"DL Material Specular Color: %06x", data); if (diff) shaderManager.DirtyUniform(DIRTY_MATSPECULAR); break; case GE_CMD_MATERIALALPHA: + Flush(); DEBUG_LOG(G3D,"DL Material Alpha Color: %06x", data); if (diff) shaderManager.DirtyUniform(DIRTY_MATAMBIENTALPHA); break; case GE_CMD_MATERIALSPECULARCOEF: + Flush(); DEBUG_LOG(G3D,"DL Material specular coef: %f", getFloat24(data)); if (diff) shaderManager.DirtyUniform(DIRTY_MATSPECULAR); @@ -896,6 +1048,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) case GE_CMD_LX2:case GE_CMD_LY2:case GE_CMD_LZ2: case GE_CMD_LX3:case GE_CMD_LY3:case GE_CMD_LZ3: { + Flush(); int n = cmd - GE_CMD_LX0; int l = n / 3; int c = n % 3; @@ -912,6 +1065,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) case GE_CMD_LDX2:case GE_CMD_LDY2:case GE_CMD_LDZ2: case GE_CMD_LDX3:case GE_CMD_LDY3:case GE_CMD_LDZ3: { + Flush(); int n = cmd - GE_CMD_LDX0; int l = n / 3; int c = n % 3; @@ -928,6 +1082,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) case GE_CMD_LKA2:case GE_CMD_LKB2:case GE_CMD_LKC2: case GE_CMD_LKA3:case GE_CMD_LKB3:case GE_CMD_LKC3: { + Flush(); int n = cmd - GE_CMD_LKA0; int l = n / 3; int c = n % 3; @@ -944,6 +1099,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) case GE_CMD_LDC0:case GE_CMD_LDC1:case GE_CMD_LDC2:case GE_CMD_LDC3: case GE_CMD_LSC0:case GE_CMD_LSC1:case GE_CMD_LSC2:case GE_CMD_LSC3: { + Flush(); float r = (float)(data & 0xff)/255.0f; float g = (float)((data>>8) & 0xff)/255.0f; float b = (float)(data>>16)/255.0f; @@ -962,13 +1118,16 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) case GE_CMD_VIEWPORTY1: case GE_CMD_VIEWPORTX2: case GE_CMD_VIEWPORTY2: + Flush(); DEBUG_LOG(G3D,"DL Viewport param %i: %f", cmd-GE_CMD_VIEWPORTX1, getFloat24(data)); break; case GE_CMD_VIEWPORTZ1: + Flush(); gstate_c.zScale = getFloat24(data) / 65535.f; DEBUG_LOG(G3D,"DL Z scale: %f", gstate_c.zScale); break; case GE_CMD_VIEWPORTZ2: + Flush(); gstate_c.zOff = getFloat24(data) / 65535.f; DEBUG_LOG(G3D,"DL Z pos: %f", gstate_c.zOff); break; @@ -976,13 +1135,16 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) case GE_CMD_LIGHTENABLE1: case GE_CMD_LIGHTENABLE2: case GE_CMD_LIGHTENABLE3: + Flush(); DEBUG_LOG(G3D,"DL Light %i enable: %d", cmd-GE_CMD_LIGHTENABLE0, data); break; case GE_CMD_CULL: + Flush(); DEBUG_LOG(G3D,"DL cull: %06x", data); break; case GE_CMD_LMODE: + Flush(); DEBUG_LOG(G3D,"DL Shade mode: %06x", data); break; @@ -993,6 +1155,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) break; case GE_CMD_MATERIALUPDATE: + Flush(); DEBUG_LOG(G3D,"DL Material Update: %d", data); break; @@ -1001,6 +1164,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) // CLEARING ////////////////////////////////////////////////////////////////// case GE_CMD_CLEARMODE: + Flush(); // If it becomes a performance problem, check diff&1 if (data & 1) EnterClearMode(data); @@ -1014,33 +1178,40 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) // ALPHA BLENDING ////////////////////////////////////////////////////////////////// case GE_CMD_ALPHABLENDENABLE: + Flush(); DEBUG_LOG(G3D,"DL Alpha blend enable: %d", data); break; case GE_CMD_BLENDMODE: + Flush(); DEBUG_LOG(G3D,"DL Blend mode: %06x", data); break; case GE_CMD_BLENDFIXEDA: + Flush(); DEBUG_LOG(G3D,"DL Blend fix A: %06x", data); break; case GE_CMD_BLENDFIXEDB: + Flush(); DEBUG_LOG(G3D,"DL Blend fix B: %06x", data); break; case GE_CMD_ALPHATESTENABLE: + Flush(); DEBUG_LOG(G3D,"DL Alpha test enable: %d", data); // This is done in the shader. break; case GE_CMD_ALPHATEST: + Flush(); DEBUG_LOG(G3D,"DL Alpha test settings"); shaderManager.DirtyUniform(DIRTY_ALPHACOLORREF); break; case GE_CMD_TEXFUNC: { + Flush(); DEBUG_LOG(G3D,"DL TexFunc %i", data&7); /* int m=GL_MODULATE; @@ -1068,26 +1239,32 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) } case GE_CMD_TEXFILTER: { + Flush(); int min = data & 7; int mag = (data >> 8) & 1; DEBUG_LOG(G3D,"DL TexFilter min: %i mag: %i", min, mag); } break; case GE_CMD_TEXENVCOLOR: + Flush(); DEBUG_LOG(G3D,"DL TexEnvColor %06x", data); if (diff) shaderManager.DirtyUniform(DIRTY_TEXENV); break; case GE_CMD_TEXMODE: + Flush(); DEBUG_LOG(G3D,"DL TexMode %08x", data); break; case GE_CMD_TEXFORMAT: + Flush(); DEBUG_LOG(G3D,"DL TexFormat %08x", data); break; case GE_CMD_TEXFLUSH: + Flush(); DEBUG_LOG(G3D,"DL TexFlush"); break; case GE_CMD_TEXWRAP: + Flush(); DEBUG_LOG(G3D,"DL TexWrap %08x", data); break; ////////////////////////////////////////////////////////////////// @@ -1095,10 +1272,12 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) ////////////////////////////////////////////////////////////////// case GE_CMD_ZTESTENABLE: + Flush(); DEBUG_LOG(G3D,"DL Z test enable: %d", data & 1); break; case GE_CMD_STENCILTESTENABLE: + Flush(); DEBUG_LOG(G3D,"DL Stencil test enable: %d", data); break; @@ -1132,6 +1311,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) break; case GE_CMD_WORLDMATRIXNUMBER: + Flush(); DEBUG_LOG(G3D,"DL World # %i", data & 0xF); gstate.worldmtxnum &= 0xFF00000F; break; @@ -1148,6 +1328,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) break; case GE_CMD_VIEWMATRIXNUMBER: + Flush(); DEBUG_LOG(G3D,"DL VIEW # %i", data & 0xF); gstate.viewmtxnum &= 0xFF00000F; break; @@ -1164,6 +1345,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) break; case GE_CMD_PROJMATRIXNUMBER: + Flush(); DEBUG_LOG(G3D,"DL PROJECTION # %i", data & 0xF); gstate.projmtxnum &= 0xFF00000F; break; @@ -1179,6 +1361,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) break; case GE_CMD_TGENMATRIXNUMBER: + Flush(); DEBUG_LOG(G3D,"DL TGEN # %i", data & 0xF); gstate.texmtxnum &= 0xFF00000F; break; @@ -1195,6 +1378,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) break; case GE_CMD_BONEMATRIXNUMBER: + Flush(); DEBUG_LOG(G3D,"DL BONE #%i", data); gstate.boneMatrixNumber &= 0xFF00007F; break; @@ -1237,6 +1421,8 @@ bool GLES_GPU::InterpretList() op = Memory::ReadUnchecked_U32(dcontext.pc); //read from memory u32 cmd = op >> 24; + if (flushBeforeCommand[cmd]) + Flush(); u32 diff = op ^ gstate.cmdmem[cmd]; gstate.cmdmem[cmd] = op; diff --git a/GPU/GLES/IndexGenerator.h b/GPU/GLES/IndexGenerator.h index af6da67ae4..2b491c6789 100644 --- a/GPU/GLES/IndexGenerator.h +++ b/GPU/GLES/IndexGenerator.h @@ -41,7 +41,7 @@ public: // Rectangles void AddRectangles(int numVerts); - void TranslatePoints(int numVerts, const u8 *inds, int offset); + void TranslatePoints(int numVerts, const u8 *inds, int offset); void TranslatePoints(int numVerts, const u16 *inds, int offset); // Translates already indexed lists void TranslateLineList(int numVerts, const u8 *inds, int offset); @@ -65,6 +65,8 @@ public: bool Empty() { return index_ == 0; } + void SetIndex(int ind) { index_ = ind; } + private: u16 *indsBase_; u16 *inds_; diff --git a/GPU/GLES/ShaderManager.cpp b/GPU/GLES/ShaderManager.cpp index 819b2815b7..ec5b60e290 100644 --- a/GPU/GLES/ShaderManager.cpp +++ b/GPU/GLES/ShaderManager.cpp @@ -176,8 +176,10 @@ static void SetMatrix4x3(int uniform, const float *m4x3) { void LinkedShader::use() { glUseProgram(program); - glUniform1i(u_tex, 0); + updateUniforms(); +} +void LinkedShader::updateUniforms() { if (!dirtyUniforms) return; @@ -300,6 +302,7 @@ void ShaderManager::DirtyShader() // Forget the last shader ID lastFSID.clear(); lastVSID.clear(); + lastShader = 0; } @@ -318,8 +321,11 @@ LinkedShader *ShaderManager::ApplyShader(int prim) ComputeVertexShaderID(&VSID, prim); ComputeFragmentShaderID(&FSID); - // Bail quickly in the no-op case. TODO: why does it cause trouble? - // if (VSID == lastVSID && FSID == lastFSID) return lastShader; // Already all set. + // Just update uniforms if this is the same shader as last time. + if (lastShader != 0 && VSID == lastVSID && FSID == lastFSID) { + lastShader->updateUniforms(); + return lastShader; // Already all set. + } lastVSID = VSID; lastFSID = FSID; @@ -355,10 +361,9 @@ LinkedShader *ShaderManager::ApplyShader(int prim) linkedShaderCache[linkedID] = ls; } else { ls = iter->second; + ls->use(); } - ls->use(); - lastShader = ls; return ls; } diff --git a/GPU/GLES/ShaderManager.h b/GPU/GLES/ShaderManager.h index c7cf3d1040..275b479cdf 100644 --- a/GPU/GLES/ShaderManager.h +++ b/GPU/GLES/ShaderManager.h @@ -32,6 +32,7 @@ public: ~LinkedShader(); void use(); + void updateUniforms(); uint32_t program; u32 dirtyUniforms; diff --git a/GPU/GLES/TextureCache.cpp b/GPU/GLES/TextureCache.cpp index 9ad2685657..a09c0d1da3 100644 --- a/GPU/GLES/TextureCache.cpp +++ b/GPU/GLES/TextureCache.cpp @@ -597,17 +597,16 @@ void convertColors(u8 *finalBuf, GLuint dstFmt, int numPixels) void PSPSetTexture() { + static int lastBoundTexture = -1; + u32 texaddr = (gstate.texaddr[0] & 0xFFFFF0) | ((gstate.texbufwidth[0]<<8) & 0xFF000000); texaddr &= 0xFFFFFFF; - if (!texaddr) return; - u8 level = 0; u32 format = gstate.texformat & 0xF; u32 clutformat = gstate.clutformat & 3; u32 clutaddr = GetClutAddr(clutformat == GE_CMODE_32BIT_ABGR8888 ? 4 : 2); - DEBUG_LOG(G3D,"Texture at %08x",texaddr); u8 *texptr = Memory::GetPointer(texaddr); u32 texhash = texptr ? *(u32*)texptr : 0; @@ -636,8 +635,11 @@ void PSPSetTexture() if (match) { //got one! entry.frameCounter = gpuStats.numFrames; - glBindTexture(GL_TEXTURE_2D, entry.texture); - UpdateSamplingParams(); + if (entry.texture != lastBoundTexture) { + glBindTexture(GL_TEXTURE_2D, entry.texture); + UpdateSamplingParams(); + lastBoundTexture = entry.texture; + } DEBUG_LOG(G3D, "Texture at %08x Found in Cache, applying", texaddr); return; //Done! } else { @@ -653,7 +655,7 @@ void PSPSetTexture() //we have to decode it - TexCacheEntry entry; + TexCacheEntry entry = {0}; entry.addr = texaddr; entry.hash = texhash; @@ -671,9 +673,6 @@ void PSPSetTexture() entry.clutaddr = 0; } - glGenTextures(1, &entry.texture); - glBindTexture(GL_TEXTURE_2D, entry.texture); - int bufw = gstate.texbufwidth[0] & 0x3ff; entry.dim = gstate.texsize[0] & 0xF0F; @@ -681,8 +680,6 @@ void PSPSetTexture() int w = 1 << (gstate.texsize[0] & 0xf); int h = 1 << ((gstate.texsize[0]>>8) & 0xf); - INFO_LOG(G3D, "Creating texture %i from %08x: %i x %i (stride: %i). fmt: %i", entry.texture, entry.addr, w, h, bufw, entry.format); - gstate_c.curTextureWidth=w; gstate_c.curTextureHeight=h; GLenum dstFmt = 0; @@ -932,26 +929,27 @@ void PSPSetTexture() } } + gpuStats.numTexturesDecoded++; // Can restore these and remove the above fixup on some platforms. //glPixelStorei(GL_UNPACK_ROW_LENGTH, bufw); - glPixelStorei(GL_UNPACK_ALIGNMENT, texByteAlign); + //glPixelStorei(GL_UNPACK_ALIGNMENT, texByteAlign); //glPixelStorei(GL_PACK_ROW_LENGTH, bufw); - glPixelStorei(GL_PACK_ALIGNMENT, texByteAlign); + //glPixelStorei(GL_PACK_ALIGNMENT, texByteAlign); + INFO_LOG(G3D, "Creating texture %i from %08x: %i x %i (stride: %i). fmt: %i", entry.texture, entry.addr, w, h, bufw, entry.format); + + glGenTextures(1, &entry.texture); + glBindTexture(GL_TEXTURE_2D, entry.texture); + lastBoundTexture = entry.texture; GLuint components = dstFmt == GL_UNSIGNED_SHORT_5_6_5 ? GL_RGB : GL_RGBA; glTexImage2D(GL_TEXTURE_2D, 0, components, w, h, 0, components, dstFmt, finalBuf); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); - // glGenerateMipmap(GL_TEXTURE_2D); UpdateSamplingParams(); //glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); - glPixelStorei(GL_UNPACK_ALIGNMENT, 1); + //glPixelStorei(GL_UNPACK_ALIGNMENT, 1); //glPixelStorei(GL_PACK_ROW_LENGTH, 0); - glPixelStorei(GL_PACK_ALIGNMENT, 1); + //glPixelStorei(GL_PACK_ALIGNMENT, 1); cache[cachekey] = entry; } diff --git a/GPU/GLES/TransformPipeline.cpp b/GPU/GLES/TransformPipeline.cpp index 88d08908cb..03599d5ab3 100644 --- a/GPU/GLES/TransformPipeline.cpp +++ b/GPU/GLES/TransformPipeline.cpp @@ -44,9 +44,8 @@ const GLuint glprim[8] = { u8 decoded[65536 * 32]; VertexDecoder dec; -uint16_t decIndex[65536]; // Unused +uint16_t decIndex[65536]; int numVerts; -int numInds; IndexGenerator indexGen; @@ -577,6 +576,7 @@ void SoftwareTransformAndDraw(int prim, LinkedShader *program, int vertexCount, void GLES_GPU::InitTransform() { indexGen.Setup(decIndex); + numVerts = 0; } void GLES_GPU::TransformAndDrawPrim(void *verts, void *inds, int prim, int vertexCount, float *customUV, int forceIndexType, int *bytesRead) @@ -585,13 +585,18 @@ void GLES_GPU::TransformAndDrawPrim(void *verts, void *inds, int prim, int verte if (!indexGen.PrimCompatible(prim)) Flush(); + if (!indexGen.Empty()) { + gpuStats.numJoins++; + } gpuStats.numDrawCalls++; gpuStats.numVertsTransformed += vertexCount; + indexGen.SetIndex(numVerts); int indexLowerBound, indexUpperBound; // First, decode the verts and apply morphing dec.SetVertexType(gstate.vertType); - dec.DecodeVerts(decoded, verts, inds, prim, vertexCount, &indexLowerBound, &indexUpperBound); + dec.DecodeVerts(decoded + numVerts * (int)dec.GetDecVtxFmt().stride, verts, inds, prim, vertexCount, &indexLowerBound, &indexUpperBound); + numVerts += indexUpperBound - indexLowerBound + 1; if (bytesRead) *bytesRead = vertexCount * dec.VertexSize(); @@ -613,7 +618,7 @@ void GLES_GPU::TransformAndDrawPrim(void *verts, void *inds, int prim, int verte case GE_VTYPE_IDX_8BIT: switch (prim) { - case GE_PRIM_POINTS: indexGen.TranslatePoints(vertexCount, (const u16 *)inds, -indexLowerBound); break; + case GE_PRIM_POINTS: indexGen.TranslatePoints(vertexCount, (const u8 *)inds, -indexLowerBound); break; case GE_PRIM_LINES: indexGen.TranslateLineList(vertexCount, (const u8 *)inds, -indexLowerBound); break; case GE_PRIM_LINE_STRIP: indexGen.TranslateLineStrip(vertexCount, (const u8 *)inds, -indexLowerBound); break; case GE_PRIM_TRIANGLES: indexGen.TranslateList(vertexCount, (const u8 *)inds, -indexLowerBound); break; @@ -677,6 +682,8 @@ void GLES_GPU::Flush() LinkedShader *program = shaderManager_->ApplyShader(prim); + DEBUG_LOG(G3D, "Flush prim %i! %i verts in one go", prim, numVerts); + if (CanUseHardwareTransform(prim)) { SetupDecFmtForDraw(program, dec.GetDecVtxFmt(), decoded); glDrawElements(glprim[prim], indexGen.VertexCount(), GL_UNSIGNED_SHORT, (GLvoid *)decIndex); @@ -687,4 +694,5 @@ void GLES_GPU::Flush() } indexGen.Reset(); -} \ No newline at end of file + numVerts = 0; +} diff --git a/GPU/GLES/VertexShaderGenerator.cpp b/GPU/GLES/VertexShaderGenerator.cpp index 953b5c684d..0c41881839 100644 --- a/GPU/GLES/VertexShaderGenerator.cpp +++ b/GPU/GLES/VertexShaderGenerator.cpp @@ -88,19 +88,11 @@ void ComputeVertexShaderID(VertexShaderID *id, int prim) id->d[1] |= ((gstate.ltype[i] >> 8) & 3) << (i * 4 + 2); } id->d[1] |= (gstate.materialupdate & 7) << 16; + id->d[1] |= (gstate.lightingEnable & 1) << 19; + for (int i = 0; i < 4; i++) { + id->d[1] |= (gstate.lightEnable[i] & 1) << (20 + i); + } } - - // Bits that we will need: - // lightenable * 4 - // lighttype * 4 - // lightcomp * 4 - // uv gen: - // mapping type - // texshade light choices (ONLY IF uv mapping type is shade) -} - -void WriteLight(char *p, int l) { - // TODO } const char *boneWeightAttrDecl[8] = { @@ -131,7 +123,6 @@ enum DoLightComputation { LIGHT_FULL, }; - char *GenerateVertexShader(int prim) { char *p = buffer; @@ -316,7 +307,7 @@ char *GenerateVertexShader(int prim) WRITE(p, " float dot%i = dot(normalize(toLight%i), worldnormal);\n", i, i); if (poweredDiffuse) { - WRITE(p, " dot%i = pow(dot%i, u_matspecular.a);\n"); + WRITE(p, " dot%i = pow(dot%i, u_matspecular.a);\n", i); } if (doLight[i] == LIGHT_DOTONLY) @@ -400,10 +391,6 @@ char *GenerateVertexShader(int prim) WRITE(p, " v_depth = gl_Position.z;\n"); WRITE(p, "}\n"); - // DEBUG_LOG(HLE, "\n%s", buffer); -#if defined(_WIN32) && defined(_DEBUG) - OutputDebugString(buffer); -#endif return buffer; } diff --git a/GPU/GPUState.h b/GPU/GPUState.h index e031f6bc71..4e271b4278 100644 --- a/GPU/GPUState.h +++ b/GPU/GPUState.h @@ -255,18 +255,23 @@ struct GPUStatistics memset(this, 0, sizeof(*this)); } void resetFrame() { + numJoins = 0; numDrawCalls = 0; numVertsTransformed = 0; numTextureSwitches = 0; numShaderSwitches = 0; + numFlushes = 0; + numTexturesDecoded = 0; } // Per frame statistics + int numJoins; int numDrawCalls; int numFlushes; int numVertsTransformed; int numTextureSwitches; int numShaderSwitches; + int numTexturesDecoded; // Total statistics, updated by the GPU core in UpdateStats int numFrames; diff --git a/native b/native index 0de5e114f3..ff60f2341b 160000 --- a/native +++ b/native @@ -1 +1 @@ -Subproject commit 0de5e114f337859a03d0763c30beaf6e03af03c4 +Subproject commit ff60f2341b31d3a8764641c9bee5b824c1090b2a From 6b3ac02dae1bffbc482200d5e8b0510b88f7ae31 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Fri, 21 Dec 2012 22:52:09 +0100 Subject: [PATCH 5/8] Now not flushing at every drawcall by mistake... --- GPU/GLES/DisplayListInterpreter.cpp | 83 ++++------------------------- GPU/GLES/IndexGenerator.cpp | 2 +- GPU/GLES/TextureCache.cpp | 2 +- GPU/GLES/TransformPipeline.cpp | 2 - 4 files changed, 13 insertions(+), 76 deletions(-) diff --git a/GPU/GLES/DisplayListInterpreter.cpp b/GPU/GLES/DisplayListInterpreter.cpp index 4071efe200..edffc0943d 100644 --- a/GPU/GLES/DisplayListInterpreter.cpp +++ b/GPU/GLES/DisplayListInterpreter.cpp @@ -43,13 +43,12 @@ ShaderManager shaderManager; extern u32 curTextureWidth; extern u32 curTextureHeight; -bool flushBeforeCommand[256] = {0}; -const bool flushBeforeCommandList[] = { +bool *flushBeforeCommand = 0; +const int flushBeforeCommandList[] = { GE_CMD_BEZIER, GE_CMD_SPLINE, GE_CMD_SIGNAL, GE_CMD_FINISH, - GE_CMD_END, GE_CMD_BJUMP, GE_CMD_VERTEXTYPE, GE_CMD_OFFSETADDR, @@ -63,8 +62,6 @@ const bool flushBeforeCommandList[] = { GE_CMD_TEXSCALEV, GE_CMD_TEXOFFSETU, GE_CMD_TEXOFFSETV, - GE_CMD_SCISSOR1, - GE_CMD_SCISSOR2, GE_CMD_MINZ, GE_CMD_MAXZ, GE_CMD_FRAMEBUFPTR, @@ -175,9 +172,12 @@ GLES_GPU::GLES_GPU(int renderWidth, int renderHeight) ERROR_LOG(G3D, "gstate has drifted out of sync!"); } + flushBeforeCommand = new bool[256]; + memset(flushBeforeCommand, 0, 256 * sizeof(bool)); for (int i = 0; i < ARRAY_SIZE(flushBeforeCommandList); i++) { flushBeforeCommand[flushBeforeCommandList[i]] = true; } + flushBeforeCommand[1] = false; } GLES_GPU::~GLES_GPU() @@ -288,7 +288,6 @@ GLES_GPU::VirtualFramebuffer *GLES_GPU::GetDisplayFBO() void GLES_GPU::SetRenderFrameBuffer() { - Flush(); if (!g_Config.bBufferedRendering) return; // Get parameters @@ -320,6 +319,7 @@ void GLES_GPU::SetRenderFrameBuffer() // None found? Create one. if (!vfb) { + Flush(); vfb = new VirtualFramebuffer; vfb->fb_address = fb_address; vfb->fb_stride = fb_stride; @@ -340,6 +340,7 @@ void GLES_GPU::SetRenderFrameBuffer() if (vfb != currentRenderVfb_) { + Flush(); // Use it as a render target. DEBUG_LOG(HLE, "Switching render target to FBO for %08x", vfb->fb_address); fbo_bind_as_render_target(vfb->fbo); @@ -553,7 +554,6 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) // The arrow and other rotary items in Puzbob are bezier patches, strangely enough. case GE_CMD_BEZIER: { - Flush(); int bz_ucount = data & 0xFF; int bz_vcount = (data >> 8) & 0xFF; DrawBezier(bz_ucount, bz_vcount); @@ -563,7 +563,6 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) case GE_CMD_SPLINE: { - Flush(); int sp_ucount = data & 0xFF; int sp_vcount = (data >> 8) & 0xFF; int sp_utype = (data >> 16) & 0x3; @@ -612,7 +611,6 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) break; case GE_CMD_FINISH: - Flush(); DEBUG_LOG(G3D,"DL CMD FINISH"); // TODO: Should this run while interrupts are suspended? if (interruptsEnabled_) @@ -668,7 +666,6 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) break; case GE_CMD_BJUMP: - Flush(); // bounding box jump. Let's just not jump, for now. DEBUG_LOG(G3D,"DL BBOX JUMP - unimplemented"); break; @@ -683,7 +680,6 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) break; case GE_CMD_VERTEXTYPE: - Flush(); DEBUG_LOG(G3D,"DL SetVertexType: %06x", data); if (diff & GE_VTYPE_THROUGH) { // Throughmode changed, let's make the proj matrix dirty. @@ -719,25 +715,21 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) break; case GE_CMD_CULLFACEENABLE: - Flush(); DEBUG_LOG(G3D, "DL CullFace Enable: %i (ignoring)", data); break; case GE_CMD_TEXTUREMAPENABLE: - Flush(); gstate_c.textureChanged = true; DEBUG_LOG(G3D, "DL Texture map enable: %i", data); break; case GE_CMD_LIGHTINGENABLE: - Flush(); DEBUG_LOG(G3D, "DL Lighting enable: %i", data); data += 1; //We don't use OpenGL lighting break; case GE_CMD_FOGENABLE: - Flush(); DEBUG_LOG(G3D, "DL Fog Enable: %i", data); break; @@ -754,28 +746,24 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) break; case GE_CMD_TEXSCALEU: - Flush(); gstate_c.uScale = getFloat24(data); DEBUG_LOG(G3D, "DL Texture U Scale: %f", gstate_c.uScale); shaderManager.DirtyUniform(DIRTY_UVSCALEOFFSET); break; case GE_CMD_TEXSCALEV: - Flush(); gstate_c.vScale = getFloat24(data); DEBUG_LOG(G3D, "DL Texture V Scale: %f", gstate_c.vScale); shaderManager.DirtyUniform(DIRTY_UVSCALEOFFSET); break; case GE_CMD_TEXOFFSETU: - Flush(); gstate_c.uOff = getFloat24(data); DEBUG_LOG(G3D, "DL Texture U Offset: %f", gstate_c.uOff); shaderManager.DirtyUniform(DIRTY_UVSCALEOFFSET); break; case GE_CMD_TEXOFFSETV: - Flush(); gstate_c.vOff = getFloat24(data); DEBUG_LOG(G3D, "DL Texture V Offset: %f", gstate_c.vOff); shaderManager.DirtyUniform(DIRTY_UVSCALEOFFSET); @@ -783,7 +771,6 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) case GE_CMD_SCISSOR1: { - Flush(); int x1 = data & 0x3ff; int y1 = data >> 10; DEBUG_LOG(G3D, "DL Scissor TL: %i, %i", x1,y1); @@ -791,7 +778,6 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) break; case GE_CMD_SCISSOR2: { - Flush(); int x2 = data & 0x3ff; int y2 = data >> 10; DEBUG_LOG(G3D, "DL Scissor BR: %i, %i", x2, y2); @@ -810,7 +796,6 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) case GE_CMD_FRAMEBUFPTR: { - Flush(); u32 ptr = op & 0xFFE000; DEBUG_LOG(G3D, "DL FramebufPtr: %08x", ptr); } @@ -818,7 +803,6 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) case GE_CMD_FRAMEBUFWIDTH: { - Flush(); u32 w = data & 0xFFFFFF; DEBUG_LOG(G3D, "DL FramebufWidth: %i", w); } @@ -828,7 +812,6 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) break; case GE_CMD_TEXADDR0: - Flush(); gstate_c.textureChanged = true; case GE_CMD_TEXADDR1: case GE_CMD_TEXADDR2: @@ -853,7 +836,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) break; case GE_CMD_CLUTADDR: - //DEBUG_LOG(G3D,"CLUT base addr: %06x", data); + DEBUG_LOG(G3D,"CLUT base addr: %06x", data); break; case GE_CMD_CLUTADDRUPPER: @@ -877,12 +860,10 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) break; case GE_CMD_TEXMAPMODE: - Flush(); DEBUG_LOG(G3D,"Tex map mode: %06x", data); break; case GE_CMD_TEXSHADELS: - Flush(); DEBUG_LOG(G3D,"Tex shade light sources: %06x", data); break; @@ -946,7 +927,6 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) case GE_CMD_TRANSFERSTART: // Orphis calls this TRXKICK { - Flush(); // TODO: Here we should check if the transfer overlaps a framebuffer or any textures, // and take appropriate action. This is a block transfer between RAM and VRAM, or vice versa. DoBlockTransfer(); @@ -954,7 +934,6 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) } case GE_CMD_TEXSIZE0: - Flush(); gstate_c.textureChanged = true; gstate_c.curTextureWidth = 1 << (gstate.texsize[0] & 0xf); gstate_c.curTextureHeight = 1 << ((gstate.texsize[0]>>8) & 0xf); @@ -971,7 +950,6 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) case GE_CMD_ZBUFPTR: { - Flush(); u32 ptr = op & 0xFFE000; DEBUG_LOG(G3D,"Zbuf Ptr: %06x", ptr); } @@ -985,52 +963,44 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) break; case GE_CMD_AMBIENTCOLOR: - Flush(); DEBUG_LOG(G3D,"DL Ambient Color: %06x", data); break; case GE_CMD_AMBIENTALPHA: - Flush(); DEBUG_LOG(G3D,"DL Ambient Alpha: %06x", data); break; case GE_CMD_MATERIALAMBIENT: - Flush(); DEBUG_LOG(G3D,"DL Material Ambient Color: %06x", data); if (diff) shaderManager.DirtyUniform(DIRTY_MATAMBIENTALPHA); break; case GE_CMD_MATERIALDIFFUSE: - Flush(); DEBUG_LOG(G3D,"DL Material Diffuse Color: %06x", data); if (diff) shaderManager.DirtyUniform(DIRTY_MATDIFFUSE); break; case GE_CMD_MATERIALEMISSIVE: - Flush(); DEBUG_LOG(G3D,"DL Material Emissive Color: %06x", data); if (diff) shaderManager.DirtyUniform(DIRTY_MATEMISSIVE); break; case GE_CMD_MATERIALSPECULAR: - Flush(); DEBUG_LOG(G3D,"DL Material Specular Color: %06x", data); if (diff) shaderManager.DirtyUniform(DIRTY_MATSPECULAR); break; case GE_CMD_MATERIALALPHA: - Flush(); DEBUG_LOG(G3D,"DL Material Alpha Color: %06x", data); if (diff) shaderManager.DirtyUniform(DIRTY_MATAMBIENTALPHA); break; case GE_CMD_MATERIALSPECULARCOEF: - Flush(); DEBUG_LOG(G3D,"DL Material specular coef: %f", getFloat24(data)); if (diff) shaderManager.DirtyUniform(DIRTY_MATSPECULAR); @@ -1048,7 +1018,6 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) case GE_CMD_LX2:case GE_CMD_LY2:case GE_CMD_LZ2: case GE_CMD_LX3:case GE_CMD_LY3:case GE_CMD_LZ3: { - Flush(); int n = cmd - GE_CMD_LX0; int l = n / 3; int c = n % 3; @@ -1065,7 +1034,6 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) case GE_CMD_LDX2:case GE_CMD_LDY2:case GE_CMD_LDZ2: case GE_CMD_LDX3:case GE_CMD_LDY3:case GE_CMD_LDZ3: { - Flush(); int n = cmd - GE_CMD_LDX0; int l = n / 3; int c = n % 3; @@ -1082,7 +1050,6 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) case GE_CMD_LKA2:case GE_CMD_LKB2:case GE_CMD_LKC2: case GE_CMD_LKA3:case GE_CMD_LKB3:case GE_CMD_LKC3: { - Flush(); int n = cmd - GE_CMD_LKA0; int l = n / 3; int c = n % 3; @@ -1099,13 +1066,13 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) case GE_CMD_LDC0:case GE_CMD_LDC1:case GE_CMD_LDC2:case GE_CMD_LDC3: case GE_CMD_LSC0:case GE_CMD_LSC1:case GE_CMD_LSC2:case GE_CMD_LSC3: { - Flush(); float r = (float)(data & 0xff)/255.0f; float g = (float)((data>>8) & 0xff)/255.0f; float b = (float)(data>>16)/255.0f; int l = (cmd - GE_CMD_LAC0) / 3; int t = (cmd - GE_CMD_LAC0) % 3; + DEBUG_LOG(G3D,"DL Light color %i", l); gstate_c.lightColor[t][l][0] = r; gstate_c.lightColor[t][l][1] = g; gstate_c.lightColor[t][l][2] = b; @@ -1118,16 +1085,13 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) case GE_CMD_VIEWPORTY1: case GE_CMD_VIEWPORTX2: case GE_CMD_VIEWPORTY2: - Flush(); DEBUG_LOG(G3D,"DL Viewport param %i: %f", cmd-GE_CMD_VIEWPORTX1, getFloat24(data)); break; case GE_CMD_VIEWPORTZ1: - Flush(); gstate_c.zScale = getFloat24(data) / 65535.f; DEBUG_LOG(G3D,"DL Z scale: %f", gstate_c.zScale); break; case GE_CMD_VIEWPORTZ2: - Flush(); gstate_c.zOff = getFloat24(data) / 65535.f; DEBUG_LOG(G3D,"DL Z pos: %f", gstate_c.zOff); break; @@ -1135,16 +1099,13 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) case GE_CMD_LIGHTENABLE1: case GE_CMD_LIGHTENABLE2: case GE_CMD_LIGHTENABLE3: - Flush(); DEBUG_LOG(G3D,"DL Light %i enable: %d", cmd-GE_CMD_LIGHTENABLE0, data); break; case GE_CMD_CULL: - Flush(); DEBUG_LOG(G3D,"DL cull: %06x", data); break; case GE_CMD_LMODE: - Flush(); DEBUG_LOG(G3D,"DL Shade mode: %06x", data); break; @@ -1155,7 +1116,6 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) break; case GE_CMD_MATERIALUPDATE: - Flush(); DEBUG_LOG(G3D,"DL Material Update: %d", data); break; @@ -1164,7 +1124,6 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) // CLEARING ////////////////////////////////////////////////////////////////// case GE_CMD_CLEARMODE: - Flush(); // If it becomes a performance problem, check diff&1 if (data & 1) EnterClearMode(data); @@ -1178,40 +1137,33 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) // ALPHA BLENDING ////////////////////////////////////////////////////////////////// case GE_CMD_ALPHABLENDENABLE: - Flush(); DEBUG_LOG(G3D,"DL Alpha blend enable: %d", data); break; case GE_CMD_BLENDMODE: - Flush(); DEBUG_LOG(G3D,"DL Blend mode: %06x", data); break; case GE_CMD_BLENDFIXEDA: - Flush(); DEBUG_LOG(G3D,"DL Blend fix A: %06x", data); break; case GE_CMD_BLENDFIXEDB: - Flush(); DEBUG_LOG(G3D,"DL Blend fix B: %06x", data); break; case GE_CMD_ALPHATESTENABLE: - Flush(); DEBUG_LOG(G3D,"DL Alpha test enable: %d", data); // This is done in the shader. break; case GE_CMD_ALPHATEST: - Flush(); DEBUG_LOG(G3D,"DL Alpha test settings"); shaderManager.DirtyUniform(DIRTY_ALPHACOLORREF); break; case GE_CMD_TEXFUNC: { - Flush(); DEBUG_LOG(G3D,"DL TexFunc %i", data&7); /* int m=GL_MODULATE; @@ -1239,32 +1191,26 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) } case GE_CMD_TEXFILTER: { - Flush(); int min = data & 7; int mag = (data >> 8) & 1; DEBUG_LOG(G3D,"DL TexFilter min: %i mag: %i", min, mag); } break; case GE_CMD_TEXENVCOLOR: - Flush(); DEBUG_LOG(G3D,"DL TexEnvColor %06x", data); if (diff) shaderManager.DirtyUniform(DIRTY_TEXENV); break; case GE_CMD_TEXMODE: - Flush(); DEBUG_LOG(G3D,"DL TexMode %08x", data); break; case GE_CMD_TEXFORMAT: - Flush(); DEBUG_LOG(G3D,"DL TexFormat %08x", data); break; case GE_CMD_TEXFLUSH: - Flush(); DEBUG_LOG(G3D,"DL TexFlush"); break; case GE_CMD_TEXWRAP: - Flush(); DEBUG_LOG(G3D,"DL TexWrap %08x", data); break; ////////////////////////////////////////////////////////////////// @@ -1272,12 +1218,10 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) ////////////////////////////////////////////////////////////////// case GE_CMD_ZTESTENABLE: - Flush(); DEBUG_LOG(G3D,"DL Z test enable: %d", data & 1); break; case GE_CMD_STENCILTESTENABLE: - Flush(); DEBUG_LOG(G3D,"DL Stencil test enable: %d", data); break; @@ -1311,7 +1255,6 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) break; case GE_CMD_WORLDMATRIXNUMBER: - Flush(); DEBUG_LOG(G3D,"DL World # %i", data & 0xF); gstate.worldmtxnum &= 0xFF00000F; break; @@ -1328,7 +1271,6 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) break; case GE_CMD_VIEWMATRIXNUMBER: - Flush(); DEBUG_LOG(G3D,"DL VIEW # %i", data & 0xF); gstate.viewmtxnum &= 0xFF00000F; break; @@ -1345,7 +1287,6 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) break; case GE_CMD_PROJMATRIXNUMBER: - Flush(); DEBUG_LOG(G3D,"DL PROJECTION # %i", data & 0xF); gstate.projmtxnum &= 0xFF00000F; break; @@ -1361,7 +1302,6 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) break; case GE_CMD_TGENMATRIXNUMBER: - Flush(); DEBUG_LOG(G3D,"DL TGEN # %i", data & 0xF); gstate.texmtxnum &= 0xFF00000F; break; @@ -1378,7 +1318,6 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) break; case GE_CMD_BONEMATRIXNUMBER: - Flush(); DEBUG_LOG(G3D,"DL BONE #%i", data); gstate.boneMatrixNumber &= 0xFF00007F; break; @@ -1421,9 +1360,9 @@ bool GLES_GPU::InterpretList() op = Memory::ReadUnchecked_U32(dcontext.pc); //read from memory u32 cmd = op >> 24; - if (flushBeforeCommand[cmd]) - Flush(); u32 diff = op ^ gstate.cmdmem[cmd]; + if (diff && flushBeforeCommand[cmd]) + Flush(); gstate.cmdmem[cmd] = op; ExecuteOp(op, diff); diff --git a/GPU/GLES/IndexGenerator.cpp b/GPU/GLES/IndexGenerator.cpp index d28d92d663..5c2cdf3cb9 100644 --- a/GPU/GLES/IndexGenerator.cpp +++ b/GPU/GLES/IndexGenerator.cpp @@ -38,7 +38,7 @@ void IndexGenerator::Reset() { bool IndexGenerator::PrimCompatible(int prim) { if (prim_ == -1) return true; - return indexedPrimitiveType[prim] == indexedPrimitiveType[prim_]; + return indexedPrimitiveType[prim] == prim_; } void IndexGenerator::Setup(u16 *inds) diff --git a/GPU/GLES/TextureCache.cpp b/GPU/GLES/TextureCache.cpp index a09c0d1da3..08da1b008a 100644 --- a/GPU/GLES/TextureCache.cpp +++ b/GPU/GLES/TextureCache.cpp @@ -635,7 +635,7 @@ void PSPSetTexture() if (match) { //got one! entry.frameCounter = gpuStats.numFrames; - if (entry.texture != lastBoundTexture) { + if (true || entry.texture != lastBoundTexture) { glBindTexture(GL_TEXTURE_2D, entry.texture); UpdateSamplingParams(); lastBoundTexture = entry.texture; diff --git a/GPU/GLES/TransformPipeline.cpp b/GPU/GLES/TransformPipeline.cpp index 03599d5ab3..bece8c27e8 100644 --- a/GPU/GLES/TransformPipeline.cpp +++ b/GPU/GLES/TransformPipeline.cpp @@ -640,8 +640,6 @@ void GLES_GPU::TransformAndDrawPrim(void *verts, void *inds, int prim, int verte } break; } - - Flush(); } void GLES_GPU::Flush() From e42af096c8c804bd132a9d1aa56b9df2615b52c7 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Fri, 21 Dec 2012 23:43:48 +0100 Subject: [PATCH 6/8] Merge --- Core/Dialog/PSPSaveDialog.cpp | 4 ++-- Core/HLE/__sceAudio.cpp | 2 +- Core/HLE/sceAudio.cpp | 2 +- Core/HLE/sceKernel.cpp | 5 +++++ Core/HLE/sceKernelInterrupt.cpp | 5 +++-- Core/HLE/sceKernelThread.cpp | 2 +- Core/HLE/sceRtc.cpp | 14 ++++++------- GPU/GLES/DisplayListInterpreter.cpp | 32 +++++++++++++++++++++-------- GPU/GLES/DisplayListInterpreter.h | 1 + GPU/GLES/TextureCache.cpp | 20 ++++++++++++++++++ GPU/GLES/TextureCache.h | 1 + GPU/GLES/VertexShaderGenerator.cpp | 2 +- GPU/GPUInterface.h | 4 ++++ GPU/Null/NullGpu.cpp | 5 +++++ GPU/Null/NullGpu.h | 1 + 15 files changed, 77 insertions(+), 23 deletions(-) diff --git a/Core/Dialog/PSPSaveDialog.cpp b/Core/Dialog/PSPSaveDialog.cpp index 5a1ff4ea53..a404870fc4 100644 --- a/Core/Dialog/PSPSaveDialog.cpp +++ b/Core/Dialog/PSPSaveDialog.cpp @@ -231,7 +231,7 @@ void PSPSaveDialog::DisplaySaveDataInfo1() else { char txt[1024]; - sprintf(txt,"%s\n%02d/%02d/%d %02d:%02d %d KB\n%s\n%s" + sprintf(txt,"%s\n%02d/%02d/%d %02d:%02d %lld KB\n%s\n%s" , param.GetFileInfo(currentSelectedSave).title , param.GetFileInfo(currentSelectedSave).modif_time.tm_mday , param.GetFileInfo(currentSelectedSave).modif_time.tm_mon + 1 @@ -255,7 +255,7 @@ void PSPSaveDialog::DisplaySaveDataInfo2() else { char txt[1024]; - sprintf(txt,"%s\n%02d/%02d/%d %02d:%02d\n%d KB" + sprintf(txt,"%s\n%02d/%02d/%d %02d:%02d\n%lld KB" , param.GetFileInfo(currentSelectedSave).saveTitle , param.GetFileInfo(currentSelectedSave).modif_time.tm_mday , param.GetFileInfo(currentSelectedSave).modif_time.tm_mon + 1 diff --git a/Core/HLE/__sceAudio.cpp b/Core/HLE/__sceAudio.cpp index db1cc7332a..fc474706f0 100644 --- a/Core/HLE/__sceAudio.cpp +++ b/Core/HLE/__sceAudio.cpp @@ -186,7 +186,7 @@ void __AudioUpdate() } else { // This happens quite a lot. There's still something slightly off // about the amount of audio we produce. - DEBUG_LOG(HLE, "Audio outbuffer overrun! room = %i / %i", outAudioQueue.room(), outAudioQueue.capacity()); + DEBUG_LOG(HLE, "Audio outbuffer overrun! room = %i / %i", outAudioQueue.room(), (u32)outAudioQueue.capacity()); } } diff --git a/Core/HLE/sceAudio.cpp b/Core/HLE/sceAudio.cpp index fcb869687c..563824421c 100644 --- a/Core/HLE/sceAudio.cpp +++ b/Core/HLE/sceAudio.cpp @@ -186,7 +186,7 @@ u32 sceAudioChReserve(u32 channel, u32 sampleCount, u32 format) //.Allocate soun { WARN_LOG(HLE, "WARNING: Reserving already reserved channel. Error?"); } - DEBUG_LOG(HLE, "%i = sceAudioChReserve(%i, %i, %i)", channel, sampleCount, format); + DEBUG_LOG(HLE, "sceAudioChReserve(channel = %d, sampleCount = %d, format = %d)", channel, sampleCount, format); chans[channel].sampleCount = sampleCount; chans[channel].reserved = true; diff --git a/Core/HLE/sceKernel.cpp b/Core/HLE/sceKernel.cpp index c99b4b6a2a..e9ea4db1e8 100644 --- a/Core/HLE/sceKernel.cpp +++ b/Core/HLE/sceKernel.cpp @@ -25,6 +25,8 @@ #include "../PSPLoaders.h" #include "../../Core/CoreTiming.h" #include "../../Core/System.h" +#include "../../GPU/GPUInterface.h" +#include "../../GPU/GPUState.h" #include "__sceAudio.h" @@ -187,6 +189,7 @@ void sceKernelGetGPI() // textures, and in the future display lists, in some cases though. void sceKernelDcacheInvalidateRange(u32 addr, int size) { + gpu->InvalidateCache(addr, size); } void sceKernelDcacheWritebackAll() { @@ -196,9 +199,11 @@ void sceKernelDcacheWritebackRange(u32 addr, int size) } void sceKernelDcacheWritebackInvalidateRange(u32 addr, int size) { + gpu->InvalidateCache(addr, size); } void sceKernelDcacheWritebackInvalidateAll() { + gpu->InvalidateCache(0, -1); } KernelObjectPool kernelObjects; diff --git a/Core/HLE/sceKernelInterrupt.cpp b/Core/HLE/sceKernelInterrupt.cpp index a8eaa41e3c..0c7b71296f 100644 --- a/Core/HLE/sceKernelInterrupt.cpp +++ b/Core/HLE/sceKernelInterrupt.cpp @@ -295,7 +295,7 @@ void __TriggerInterrupt(int type, PSPInterrupt intno, int subintr) if (interruptsEnabled || (type & PSP_INTR_ONLY_IF_ENABLED) == 0) { intrHandlers[intno].queueUp(subintr); - DEBUG_LOG(HLE, "Triggering subinterrupts for interrupt %i sub %i (%i in queue)", intno, subintr, pendingInterrupts.size()); + DEBUG_LOG(HLE, "Triggering subinterrupts for interrupt %i sub %i (%i in queue)", intno, subintr, (u32)pendingInterrupts.size()); __TriggerRunInterrupts(type); } } @@ -305,7 +305,8 @@ void __TriggerInterruptWithArg(int type, PSPInterrupt intno, int subintr, int ar if (interruptsEnabled || (type & PSP_INTR_ONLY_IF_ENABLED) == 0) { intrHandlers[intno].queueUpWithArg(subintr, arg); - DEBUG_LOG(HLE, "Triggering subinterrupts for interrupt %i sub %i with arg %i (%i in queue)", intno, subintr, arg, pendingInterrupts.size()); + DEBUG_LOG(HLE, "Triggering subinterrupts for interrupt %i sub %i with arg %i (%i in queue)", intno, subintr, arg, + (u32)pendingInterrupts.size()); __TriggerRunInterrupts(type); } } diff --git a/Core/HLE/sceKernelThread.cpp b/Core/HLE/sceKernelThread.cpp index 6523d8ba3a..446fe4a871 100644 --- a/Core/HLE/sceKernelThread.cpp +++ b/Core/HLE/sceKernelThread.cpp @@ -981,7 +981,7 @@ int sceKernelCreateThread(const char *threadName, u32 entry, u32 prio, int stack __KernelCreateThread(id, curModule, threadName, entry, prio, stacksize, attr); INFO_LOG(HLE, "%i = sceKernelCreateThread(name=\"%s\", entry=%08x, prio=%x, stacksize=%i)", id, threadName, entry, prio, stacksize); if (optionAddr != 0) - WARN_LOG(HLE, "sceKernelCreateThread: unsupported options parameter.", threadName); + WARN_LOG(HLE, "sceKernelCreateThread(name=\"%s\"): unsupported options parameter %08x", threadName, optionAddr); return id; } diff --git a/Core/HLE/sceRtc.cpp b/Core/HLE/sceRtc.cpp index 38447046fa..40df086d01 100644 --- a/Core/HLE/sceRtc.cpp +++ b/Core/HLE/sceRtc.cpp @@ -435,7 +435,7 @@ int sceRtcSetTime_t(u32 datePtr, u32 time) int sceRtcSetTime64_t(u32 datePtr, u64 time) { - ERROR_LOG(HLE, "HACK sceRtcSetTime64_t(%d,%d)", datePtr, time); + ERROR_LOG(HLE, "HACK sceRtcSetTime64_t(%d,%lld)", datePtr, time); if (Memory::IsValidAddress(datePtr)) { ScePspDateTime pt; @@ -453,7 +453,7 @@ int sceRtcSetTime64_t(u32 datePtr, u64 time) int sceRtcGetTime_t(u32 datePtr, u32 timePtr) { - ERROR_LOG(HLE, "HACK sceRtcGetTime_t(%d,%d)", datePtr, time); + ERROR_LOG(HLE, "HACK sceRtcGetTime_t(%d,%d)", datePtr, timePtr); if (Memory::IsValidAddress(datePtr)&&Memory::IsValidAddress(timePtr)) { ScePspDateTime pt; @@ -472,7 +472,7 @@ int sceRtcGetTime_t(u32 datePtr, u32 timePtr) int sceRtcGetTime64_t(u32 datePtr, u32 timePtr) { - ERROR_LOG(HLE, "HACK sceRtcGetTime64_t(%d,%d)", datePtr, time); + ERROR_LOG(HLE, "HACK sceRtcGetTime64_t(%d,%d)", datePtr, timePtr); if (Memory::IsValidAddress(datePtr)&&Memory::IsValidAddress(timePtr)) { ScePspDateTime pt; @@ -568,7 +568,7 @@ int sceRtcTickAddTicks(u32 destTickPtr, u32 srcTickPtr, u64 numTicks) Memory::Write_U64(srcTick, destTickPtr); } - DEBUG_LOG(HLE, "sceRtcTickAddTicks(%d,%d,%d)", destTickPtr, srcTickPtr, numTicks); + DEBUG_LOG(HLE, "sceRtcTickAddTicks(%x,%x,%llu)", destTickPtr, srcTickPtr, numTicks); return 0; } @@ -582,7 +582,7 @@ int sceRtcTickAddMicroseconds(u32 destTickPtr,u32 srcTickPtr, u64 numMS) Memory::Write_U64(srcTick, destTickPtr); } - ERROR_LOG(HLE, "HACK sceRtcTickAddMicroseconds(%d,%d,%d)", destTickPtr, srcTickPtr, numMS); + ERROR_LOG(HLE, "HACK sceRtcTickAddMicroseconds(%x,%x,%llu)", destTickPtr, srcTickPtr, numMS); return 0; } @@ -595,7 +595,7 @@ int sceRtcTickAddSeconds(u32 destTickPtr, u32 srcTickPtr, u64 numSecs) srcTick += numSecs * 1000000UL; Memory::Write_U64(srcTick, destTickPtr); } - ERROR_LOG(HLE, "HACK sceRtcTickAddSeconds(%d,%d,%d)", destTickPtr, srcTickPtr, numSecs); + ERROR_LOG(HLE, "HACK sceRtcTickAddSeconds(%x,%x,%llu)", destTickPtr, srcTickPtr, numSecs); return 0; } @@ -608,7 +608,7 @@ int sceRtcTickAddMinutes(u32 destTickPtr, u32 srcTickPtr, u64 numMins) srcTick += numMins*60000000UL; Memory::Write_U64(srcTick, destTickPtr); } - ERROR_LOG(HLE, "HACK sceRtcTickAddMinutes(%d,%d,%d)", destTickPtr, srcTickPtr, numMins); + ERROR_LOG(HLE, "HACK sceRtcTickAddMinutes(%x,%x,%llu)", destTickPtr, srcTickPtr, numMins); return 0; } diff --git a/GPU/GLES/DisplayListInterpreter.cpp b/GPU/GLES/DisplayListInterpreter.cpp index edffc0943d..845d380db5 100644 --- a/GPU/GLES/DisplayListInterpreter.cpp +++ b/GPU/GLES/DisplayListInterpreter.cpp @@ -156,11 +156,11 @@ const int flushBeforeCommandList[] = { }; GLES_GPU::GLES_GPU(int renderWidth, int renderHeight) - : interruptsEnabled_(true), +: interruptsEnabled_(true), + displayFramebufPtr_(0), renderWidth_(renderWidth), renderHeight_(renderHeight), - dlIdGenerator(1), - displayFramebufPtr_(0) + dlIdGenerator(1) { renderWidthFactor_ = (float)renderWidth / 480.0f; renderHeightFactor_ = (float)renderHeight / 272.0f; @@ -320,6 +320,7 @@ void GLES_GPU::SetRenderFrameBuffer() // None found? Create one. if (!vfb) { Flush(); + gstate_c.textureChanged = true; vfb = new VirtualFramebuffer; vfb->fb_address = fb_address; vfb->fb_stride = fb_stride; @@ -343,6 +344,7 @@ void GLES_GPU::SetRenderFrameBuffer() Flush(); // Use it as a render target. DEBUG_LOG(HLE, "Switching render target to FBO for %08x", vfb->fb_address); + gstate_c.textureChanged = true; fbo_bind_as_render_target(vfb->fbo); glViewport(0, 0, renderWidth_, renderHeight_); currentRenderVfb_ = vfb; @@ -812,7 +814,6 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) break; case GE_CMD_TEXADDR0: - gstate_c.textureChanged = true; case GE_CMD_TEXADDR1: case GE_CMD_TEXADDR2: case GE_CMD_TEXADDR3: @@ -820,11 +821,11 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) case GE_CMD_TEXADDR5: case GE_CMD_TEXADDR6: case GE_CMD_TEXADDR7: + gstate_c.textureChanged = true; DEBUG_LOG(G3D,"DL Texture address %i: %06x", cmd-GE_CMD_TEXADDR0, data); break; case GE_CMD_TEXBUFWIDTH0: - gstate_c.textureChanged = true; case GE_CMD_TEXBUFWIDTH1: case GE_CMD_TEXBUFWIDTH2: case GE_CMD_TEXBUFWIDTH3: @@ -832,18 +833,22 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) case GE_CMD_TEXBUFWIDTH5: case GE_CMD_TEXBUFWIDTH6: case GE_CMD_TEXBUFWIDTH7: + gstate_c.textureChanged = true; DEBUG_LOG(G3D,"DL Texture BUFWIDTHess %i: %06x", cmd-GE_CMD_TEXBUFWIDTH0, data); break; case GE_CMD_CLUTADDR: - DEBUG_LOG(G3D,"CLUT base addr: %06x", data); + //DEBUG_LOG(G3D,"CLUT base addr: %06x", data); + gstate_c.textureChanged = true; break; case GE_CMD_CLUTADDRUPPER: + gstate_c.textureChanged = true; DEBUG_LOG(G3D,"DL CLUT addr: %08x", ((gstate.clutaddrupper & 0xFF0000)<<8) | (gstate.clutaddr & 0xFFFFFF)); break; case GE_CMD_LOADCLUT: + gstate_c.textureChanged = true; // This could be used to "dirty" textures with clut. { u32 clutAddr = ((gstate.clutaddrupper & 0xFF0000)<<8) | (gstate.clutaddr & 0xFFFFFF); @@ -869,6 +874,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) case GE_CMD_CLUTFORMAT: { + gstate_c.textureChanged = true; DEBUG_LOG(G3D,"DL Clut format: %06x", data); } break; @@ -934,7 +940,6 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) } case GE_CMD_TEXSIZE0: - gstate_c.textureChanged = true; gstate_c.curTextureWidth = 1 << (gstate.texsize[0] & 0xf); gstate_c.curTextureHeight = 1 << ((gstate.texsize[0]>>8) & 0xf); //fall thru - ignoring the mipmap sizes for now @@ -946,6 +951,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) case GE_CMD_TEXSIZE6: case GE_CMD_TEXSIZE7: DEBUG_LOG(G3D,"DL Texture Size %i: %06x", cmd - GE_CMD_TEXSIZE0, data); + gstate_c.textureChanged = true; break; case GE_CMD_ZBUFPTR: @@ -1386,7 +1392,7 @@ void GLES_GPU::DoBlockTransfer() { // TODO: This is used a lot to copy data around between render targets and textures, // and also to quickly load textures from RAM to VRAM. So we should do checks like the following: - // * Does dstBasePtr point to an existing texture? If so invalidate it and reload it immediately. + // * Does dstBasePtr point to an existing texture? If so maybe reload it immediately. // // * Does srcBasePtr point to a render target, and dstBasePtr to a texture? If so // either copy between rt and texture or reassign the texture to point to the render target @@ -1420,4 +1426,14 @@ void GLES_GPU::DoBlockTransfer() } // TODO: Notify all overlapping textures that it's time to die/reload. + + TextureCache_Invalidate(dstBasePtr + dstY * dstStride + dstX, height * dstStride + width * bpp); +} + +void GLES_GPU::InvalidateCache(u32 addr, int size) +{ + if (size > 0) + TextureCache_Invalidate(addr, size); + else + TextureCache_Clear(true); } diff --git a/GPU/GLES/DisplayListInterpreter.h b/GPU/GLES/DisplayListInterpreter.h index 439157c9b9..425bc999a3 100644 --- a/GPU/GLES/DisplayListInterpreter.h +++ b/GPU/GLES/DisplayListInterpreter.h @@ -49,6 +49,7 @@ public: virtual void CopyDisplayToOutput(); virtual void BeginFrame(); virtual void UpdateStats(); + virtual void InvalidateCache(u32 addr, int size); private: // TransformPipeline.cpp diff --git a/GPU/GLES/TextureCache.cpp b/GPU/GLES/TextureCache.cpp index 08da1b008a..ad8e708a1b 100644 --- a/GPU/GLES/TextureCache.cpp +++ b/GPU/GLES/TextureCache.cpp @@ -106,6 +106,26 @@ void TextureCache_Decimate() } } +void TextureCache_Invalidate(u32 addr, int size) +{ + u32 addr_end = addr + size; + + for (TexCache::iterator iter = cache.begin(); iter != cache.end(); ) + { + // Clear if either the addr or clutaddr is in the range. + bool invalidate = iter->second.addr >= addr && iter->second.addr < addr_end; + invalidate |= iter->second.clutaddr >= addr && iter->second.clutaddr < addr_end; + + if (invalidate) + { + glDeleteTextures(1, &iter->second.texture); + cache.erase(iter++); + } + else + ++iter; + } +} + int TextureCache_NumLoadedTextures() { return cache.size(); diff --git a/GPU/GLES/TextureCache.h b/GPU/GLES/TextureCache.h index 2579aa677e..c895c569c7 100644 --- a/GPU/GLES/TextureCache.h +++ b/GPU/GLES/TextureCache.h @@ -25,4 +25,5 @@ void TextureCache_Init(); void TextureCache_Shutdown(); void TextureCache_Clear(bool delete_them); void TextureCache_Decimate(); // Run this once per frame to get rid of old textures. +void TextureCache_Invalidate(u32 addr, int size); int TextureCache_NumLoadedTextures(); diff --git a/GPU/GLES/VertexShaderGenerator.cpp b/GPU/GLES/VertexShaderGenerator.cpp index 0c41881839..8cd6cafa6a 100644 --- a/GPU/GLES/VertexShaderGenerator.cpp +++ b/GPU/GLES/VertexShaderGenerator.cpp @@ -307,7 +307,7 @@ char *GenerateVertexShader(int prim) WRITE(p, " float dot%i = dot(normalize(toLight%i), worldnormal);\n", i, i); if (poweredDiffuse) { - WRITE(p, " dot%i = pow(dot%i, u_matspecular.a);\n", i); + WRITE(p, " dot%i = pow(dot%i, u_matspecular.a);\n", i, i); } if (doLight[i] == LIGHT_DOTONLY) diff --git a/GPU/GPUInterface.h b/GPU/GPUInterface.h index f3fb0dad5e..be31b0c026 100644 --- a/GPU/GPUInterface.h +++ b/GPU/GPUInterface.h @@ -45,6 +45,10 @@ public: // Tells the GPU to update the gpuStats structure. virtual void UpdateStats() = 0; + // Invalidate any cached content sourced from the specified range. + // If size = -1, invalidate everything. + virtual void InvalidateCache(u32 addr, int size) = 0; + // Internal hack to avoid interrupts from "PPGe" drawing (utility UI, etc) virtual void EnableInterrupts(bool enable) = 0; }; diff --git a/GPU/Null/NullGpu.cpp b/GPU/Null/NullGpu.cpp index c327266a02..3dd5228a02 100644 --- a/GPU/Null/NullGpu.cpp +++ b/GPU/Null/NullGpu.cpp @@ -838,3 +838,8 @@ void NullGPU::UpdateStats() gpuStats.numShaders = 0; gpuStats.numTextures = 0; } + +void NullGPU::InvalidateCache(u32 addr, int size) +{ + // Nothing to invalidate. +} diff --git a/GPU/Null/NullGpu.h b/GPU/Null/NullGpu.h index 4acbf6895c..eacee19084 100644 --- a/GPU/Null/NullGpu.h +++ b/GPU/Null/NullGpu.h @@ -40,6 +40,7 @@ public: virtual void SetDisplayFramebuffer(u32 framebuf, u32 stride, int format) {} virtual void CopyDisplayToOutput() {} virtual void UpdateStats(); + virtual void InvalidateCache(u32 addr, int size); private: bool ProcessDLQueue(); From 252845ecb6ba1445e77a5ddabe581dbf46adba89 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Fri, 21 Dec 2012 23:54:38 +0100 Subject: [PATCH 7/8] Bugfix, remove hack --- Core/Dialog/PSPSaveDialog.cpp | 1 - Core/HLE/sceGe.cpp | 3 ++- GPU/GPUInterface.h | 2 ++ GPU/GPUState.cpp | 1 + GPU/Null/NullGpu.h | 1 + 5 files changed, 6 insertions(+), 2 deletions(-) diff --git a/Core/Dialog/PSPSaveDialog.cpp b/Core/Dialog/PSPSaveDialog.cpp index a404870fc4..8136491f4a 100644 --- a/Core/Dialog/PSPSaveDialog.cpp +++ b/Core/Dialog/PSPSaveDialog.cpp @@ -33,7 +33,6 @@ PSPSaveDialog::~PSPSaveDialog() { u32 PSPSaveDialog::Init(int paramAddr) { - return 0; // Ignore if already running if (status != SCE_UTILITY_STATUS_NONE && status != SCE_UTILITY_STATUS_SHUTDOWN) { diff --git a/Core/HLE/sceGe.cpp b/Core/HLE/sceGe.cpp index 3752622f7f..8cbff40b80 100644 --- a/Core/HLE/sceGe.cpp +++ b/Core/HLE/sceGe.cpp @@ -166,7 +166,7 @@ void sceGeUnsetCallback(u32 cbID) { u32 sceGeSaveContext(u32 ctxAddr) { DEBUG_LOG(HLE, "sceGeSaveContext(%08x)", ctxAddr); - + gpu->Flush(); if (sizeof(gstate) > 512 * 4) { ERROR_LOG(HLE, "AARGH! sizeof(gstate) has grown too large!"); @@ -187,6 +187,7 @@ u32 sceGeSaveContext(u32 ctxAddr) u32 sceGeRestoreContext(u32 ctxAddr) { DEBUG_LOG(HLE, "sceGeRestoreContext(%08x)", ctxAddr); + gpu->Flush(); if (sizeof(gstate) > 512 * 4) { diff --git a/GPU/GPUInterface.h b/GPU/GPUInterface.h index be31b0c026..09773f7cf2 100644 --- a/GPU/GPUInterface.h +++ b/GPU/GPUInterface.h @@ -51,4 +51,6 @@ public: // Internal hack to avoid interrupts from "PPGe" drawing (utility UI, etc) virtual void EnableInterrupts(bool enable) = 0; + + virtual void Flush() = 0; }; diff --git a/GPU/GPUState.cpp b/GPU/GPUState.cpp index ff4e868b27..2f60deb5d6 100644 --- a/GPU/GPUState.cpp +++ b/GPU/GPUState.cpp @@ -78,6 +78,7 @@ void ReapplyGfxState() { if (!gpu) return; + gpu->Flush(); // ShaderManager_DirtyShader(); // The commands are embedded in the command memory so we can just reexecute the words. Convenient. // To be safe we pass 0xFFFFFFF as the diff. diff --git a/GPU/Null/NullGpu.h b/GPU/Null/NullGpu.h index eacee19084..93e9de9488 100644 --- a/GPU/Null/NullGpu.h +++ b/GPU/Null/NullGpu.h @@ -41,6 +41,7 @@ public: virtual void CopyDisplayToOutput() {} virtual void UpdateStats(); virtual void InvalidateCache(u32 addr, int size); + virtual void Flush() {} private: bool ProcessDLQueue(); From baa640ea023d4ee8aa3de30532b4021c7310513e Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Sat, 22 Dec 2012 00:24:04 +0100 Subject: [PATCH 8/8] Two more commands that get to trigger flush --- GPU/GLES/DisplayListInterpreter.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/GPU/GLES/DisplayListInterpreter.cpp b/GPU/GLES/DisplayListInterpreter.cpp index 845d380db5..7540b62041 100644 --- a/GPU/GLES/DisplayListInterpreter.cpp +++ b/GPU/GLES/DisplayListInterpreter.cpp @@ -120,6 +120,7 @@ const int flushBeforeCommandList[] = { GE_CMD_LIGHTENABLE3, GE_CMD_CULL, GE_CMD_LMODE, + GE_CMD_REVERSENORMAL, GE_CMD_PATCHDIVISION, GE_CMD_MATERIALUPDATE, GE_CMD_CLEARMODE, @@ -138,6 +139,7 @@ const int flushBeforeCommandList[] = { GE_CMD_TEXWRAP, GE_CMD_ZTESTENABLE, GE_CMD_STENCILTESTENABLE, + GE_CMD_STENCILOP, GE_CMD_ZTEST, GE_CMD_MORPHWEIGHT0, GE_CMD_MORPHWEIGHT1,