diff --git a/Core/HLE/sceDisplay.cpp b/Core/HLE/sceDisplay.cpp index a417ea9cdc..dba162c325 100644 --- a/Core/HLE/sceDisplay.cpp +++ b/Core/HLE/sceDisplay.cpp @@ -182,24 +182,27 @@ void hleEnterVblank(u64 userdata, int cyclesLate) sprintf(stats, "Frames: %i\n" "Draw calls: %i\n" + "Draw flushes: %i\n" "Vertices Transformed: %i\n" "Textures active: %i\n" + "Textures decoded: %i\n" "Vertex shaders loaded: %i\n" "Fragment shaders loaded: %i\n" "Combined shaders loaded: %i\n", gpuStats.numFrames, gpuStats.numDrawCalls, + gpuStats.numFlushes, gpuStats.numVertsTransformed, gpuStats.numTextures, + gpuStats.numTexturesDecoded, gpuStats.numVertexShaders, gpuStats.numFragmentShaders, gpuStats.numShaders ); - float zoom = 0.7f / g_Config.iWindowZoom; + float zoom = 0.7f; /// g_Config.iWindowZoom; PPGeBegin(); - PPGeDrawText(stats, 2, 2, 0, zoom, 0x90000000); - PPGeDrawText(stats, 0, 0, 0, zoom); + PPGeDrawText(stats, 0, 0, 0, zoom, 0xFFc0c0c0); PPGeEnd(); gpuStats.resetFrame(); diff --git a/Core/HLE/sceGe.cpp b/Core/HLE/sceGe.cpp index dd41e6e1db..8cbff40b80 100644 --- a/Core/HLE/sceGe.cpp +++ b/Core/HLE/sceGe.cpp @@ -166,7 +166,7 @@ void sceGeUnsetCallback(u32 cbID) { u32 sceGeSaveContext(u32 ctxAddr) { DEBUG_LOG(HLE, "sceGeSaveContext(%08x)", ctxAddr); - + gpu->Flush(); if (sizeof(gstate) > 512 * 4) { ERROR_LOG(HLE, "AARGH! sizeof(gstate) has grown too large!"); @@ -187,6 +187,7 @@ u32 sceGeSaveContext(u32 ctxAddr) u32 sceGeRestoreContext(u32 ctxAddr) { DEBUG_LOG(HLE, "sceGeRestoreContext(%08x)", ctxAddr); + gpu->Flush(); if (sizeof(gstate) > 512 * 4) { @@ -225,12 +226,12 @@ const HLEFunction sceGe_user[] = {0xE0D68148,&WrapV_UU, "sceGeListUpdateStallAddr"}, {0x03444EB4,&WrapI_UU, "sceGeListSync"}, {0xB287BD61,&WrapU_U, "sceGeDrawSync"}, - {0xB448EC0D,&WrapV_U, "sceGeBreak"}, + {0xB448EC0D,&WrapV_U, "sceGeBreak"}, {0x4C06E472,sceGeContinue, "sceGeContinue"}, {0xA4FC06A4,&WrapU_U, "sceGeSetCallback"}, {0x05DB22CE,&WrapV_U, "sceGeUnsetCallback"}, {0x1F6752AD,&WrapU_V, "sceGeEdramGetSize"}, - {0xB77905EA,&WrapU_I,"sceGeEdramSetAddrTranslation"}, + {0xB77905EA,&WrapU_I,"sceGeEdramSetAddrTranslation"}, {0xDC93CFEF,0,"sceGeGetCmd"}, {0x57C8945B,&sceGeGetMtx,"sceGeGetMtx"}, {0x438A385A,&WrapU_U,"sceGeSaveContext"}, diff --git a/GPU/GLES/DisplayListInterpreter.cpp b/GPU/GLES/DisplayListInterpreter.cpp index 36f6ec7b82..c7317112b9 100644 --- a/GPU/GLES/DisplayListInterpreter.cpp +++ b/GPU/GLES/DisplayListInterpreter.cpp @@ -43,6 +43,120 @@ ShaderManager shaderManager; extern u32 curTextureWidth; extern u32 curTextureHeight; +bool *flushBeforeCommand = 0; +const int flushBeforeCommandList[] = { + GE_CMD_BEZIER, + GE_CMD_SPLINE, + GE_CMD_SIGNAL, + GE_CMD_FINISH, + GE_CMD_BJUMP, + GE_CMD_VERTEXTYPE, + GE_CMD_OFFSETADDR, + GE_CMD_REGION1, + GE_CMD_REGION2, + GE_CMD_CULLFACEENABLE, + GE_CMD_TEXTUREMAPENABLE, + GE_CMD_LIGHTINGENABLE, + GE_CMD_FOGENABLE, + GE_CMD_TEXSCALEU, + GE_CMD_TEXSCALEV, + GE_CMD_TEXOFFSETU, + GE_CMD_TEXOFFSETV, + GE_CMD_MINZ, + GE_CMD_MAXZ, + GE_CMD_FRAMEBUFPTR, + GE_CMD_FRAMEBUFWIDTH, + GE_CMD_FRAMEBUFPIXFORMAT, + GE_CMD_TEXADDR0, + GE_CMD_CLUTADDR, + GE_CMD_LOADCLUT, + GE_CMD_TEXMAPMODE, + GE_CMD_TEXSHADELS, + GE_CMD_CLUTFORMAT, + GE_CMD_TRANSFERSTART, + GE_CMD_TEXSIZE0, + GE_CMD_TEXSIZE1, + GE_CMD_TEXSIZE2, + GE_CMD_TEXSIZE3, + GE_CMD_TEXSIZE4, + GE_CMD_TEXSIZE5, + GE_CMD_TEXSIZE6, + GE_CMD_TEXSIZE7, + GE_CMD_ZBUFPTR, + GE_CMD_ZBUFWIDTH, + GE_CMD_AMBIENTCOLOR, + GE_CMD_AMBIENTALPHA, + GE_CMD_MATERIALAMBIENT, + GE_CMD_MATERIALDIFFUSE, + GE_CMD_MATERIALEMISSIVE, + GE_CMD_MATERIALSPECULAR, + GE_CMD_MATERIALALPHA, + GE_CMD_MATERIALSPECULARCOEF, + GE_CMD_LIGHTTYPE0, + GE_CMD_LIGHTTYPE1, + GE_CMD_LIGHTTYPE2, + GE_CMD_LIGHTTYPE3, + GE_CMD_LX0, + GE_CMD_LX1, + GE_CMD_LX2, + GE_CMD_LX3, + GE_CMD_LDX0, + GE_CMD_LDX1, + GE_CMD_LDX2, + GE_CMD_LDX3, + GE_CMD_LKA0, + GE_CMD_LAC0, + GE_CMD_LDC0, + GE_CMD_LSC0, + GE_CMD_VIEWPORTX1, + GE_CMD_VIEWPORTY1, + GE_CMD_VIEWPORTX2, + GE_CMD_VIEWPORTY2, + GE_CMD_VIEWPORTZ1, + GE_CMD_VIEWPORTZ2, + GE_CMD_LIGHTENABLE0, + GE_CMD_LIGHTENABLE1, + GE_CMD_LIGHTENABLE2, + GE_CMD_LIGHTENABLE3, + GE_CMD_CULL, + GE_CMD_LMODE, + GE_CMD_REVERSENORMAL, + GE_CMD_PATCHDIVISION, + GE_CMD_MATERIALUPDATE, + GE_CMD_CLEARMODE, + GE_CMD_ALPHABLENDENABLE, + GE_CMD_BLENDMODE, + GE_CMD_BLENDFIXEDA, + GE_CMD_BLENDFIXEDB, + GE_CMD_ALPHATESTENABLE, + GE_CMD_ALPHATEST, + GE_CMD_TEXFUNC, + GE_CMD_TEXFILTER, + GE_CMD_TEXENVCOLOR, + GE_CMD_TEXMODE, + GE_CMD_TEXFORMAT, + GE_CMD_TEXFLUSH, + GE_CMD_TEXWRAP, + GE_CMD_ZTESTENABLE, + GE_CMD_STENCILTESTENABLE, + GE_CMD_STENCILOP, + GE_CMD_ZTEST, + GE_CMD_MORPHWEIGHT0, + GE_CMD_MORPHWEIGHT1, + GE_CMD_MORPHWEIGHT2, + GE_CMD_MORPHWEIGHT3, + GE_CMD_MORPHWEIGHT4, + GE_CMD_MORPHWEIGHT5, + GE_CMD_MORPHWEIGHT6, + GE_CMD_MORPHWEIGHT7, + GE_CMD_WORLDMATRIXNUMBER, + GE_CMD_VIEWMATRIXNUMBER, + GE_CMD_PROJMATRIXNUMBER, + GE_CMD_PROJMATRIXDATA, + GE_CMD_TGENMATRIXNUMBER, + GE_CMD_BONEMATRIXNUMBER, +}; + GLES_GPU::GLES_GPU(int renderWidth, int renderHeight) : interruptsEnabled_(true), displayFramebufPtr_(0), @@ -54,10 +168,18 @@ GLES_GPU::GLES_GPU(int renderWidth, int renderHeight) renderHeightFactor_ = (float)renderHeight / 272.0f; shaderManager_ = &shaderManager; TextureCache_Init(); + InitTransform(); // Sanity check gstate if ((int *)&gstate.transferstart - (int *)&gstate != 0xEA) { ERROR_LOG(G3D, "gstate has drifted out of sync!"); } + + flushBeforeCommand = new bool[256]; + memset(flushBeforeCommand, 0, 256 * sizeof(bool)); + for (int i = 0; i < ARRAY_SIZE(flushBeforeCommandList); i++) { + flushBeforeCommand[flushBeforeCommandList[i]] = true; + } + flushBeforeCommand[1] = false; } GLES_GPU::~GLES_GPU() @@ -103,6 +225,7 @@ void GLES_GPU::BeginFrame() void GLES_GPU::SetDisplayFramebuffer(u32 framebuf, u32 stride, int format) { if (framebuf & 0x04000000) { + DEBUG_LOG(G3D, "Switch display framebuffer %08x", framebuf); displayFramebufPtr_ = framebuf; displayStride_ = stride; displayFormat_ = format; @@ -113,6 +236,7 @@ void GLES_GPU::SetDisplayFramebuffer(u32 framebuf, u32 stride, int format) void GLES_GPU::CopyDisplayToOutput() { + Flush(); if (!g_Config.bBufferedRendering) return; @@ -197,6 +321,7 @@ void GLES_GPU::SetRenderFrameBuffer() // None found? Create one. if (!vfb) { + Flush(); gstate_c.textureChanged = true; vfb = new VirtualFramebuffer; vfb->fb_address = fb_address; @@ -218,6 +343,7 @@ void GLES_GPU::SetRenderFrameBuffer() if (vfb != currentRenderVfb_) { + Flush(); // Use it as a render target. DEBUG_LOG(HLE, "Switching render target to FBO for %08x", vfb->fb_address); gstate_c.textureChanged = true; @@ -300,7 +426,7 @@ void GLES_GPU::UpdateStall(int listid, u32 newstall) void GLES_GPU::DrawSync(int mode) { - + Flush(); } void GLES_GPU::Continue() @@ -716,6 +842,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) case GE_CMD_CLUTADDR: gstate_c.textureChanged = true; //DEBUG_LOG(G3D,"CLUT base addr: %06x", data); + gstate_c.textureChanged = true; break; case GE_CMD_CLUTADDRUPPER: @@ -954,6 +1081,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) int l = (cmd - GE_CMD_LAC0) / 3; int t = (cmd - GE_CMD_LAC0) % 3; + DEBUG_LOG(G3D,"DL Light color %i", l); gstate_c.lightColor[t][l][0] = r; gstate_c.lightColor[t][l][1] = g; gstate_c.lightColor[t][l][2] = b; @@ -1242,6 +1370,8 @@ bool GLES_GPU::InterpretList() op = Memory::ReadUnchecked_U32(dcontext.pc); //read from memory u32 cmd = op >> 24; u32 diff = op ^ gstate.cmdmem[cmd]; + if (diff && flushBeforeCommand[cmd]) + Flush(); gstate.cmdmem[cmd] = op; ExecuteOp(op, diff); diff --git a/GPU/GLES/DisplayListInterpreter.h b/GPU/GLES/DisplayListInterpreter.h index 7ea8261ce1..425bc999a3 100644 --- a/GPU/GLES/DisplayListInterpreter.h +++ b/GPU/GLES/DisplayListInterpreter.h @@ -22,11 +22,11 @@ #include "../GPUInterface.h" #include "Framebuffer.h" +#include "VertexDecoder.h" #include "gfx_es2/fbo.h" class ShaderManager; class LinkedShader; -struct DecVtxFormat; class GLES_GPU : public GPUInterface { @@ -53,10 +53,11 @@ public: private: // TransformPipeline.cpp + void InitTransform(); void TransformAndDrawPrim(void *verts, void *inds, int prim, int vertexCount, float *customUV, int forceIndexType, int *bytesRead = 0); //void SoftwareTransformAndDraw(int prim, LinkedShader *program, int forceIndexType, int vertexCount, void *inds, const DecVtxFormat &decVtxFormat, int indexLowerBound, int indexUpperBound, float *customUV); void ApplyDrawState(); - void Flush(int prim); + void Flush(); void UpdateViewportAndProjection(); void DrawBezier(int ucount, int vcount); void DoBlockTransfer(); diff --git a/GPU/GLES/IndexGenerator.cpp b/GPU/GLES/IndexGenerator.cpp index 7a786488cf..5c2cdf3cb9 100644 --- a/GPU/GLES/IndexGenerator.cpp +++ b/GPU/GLES/IndexGenerator.cpp @@ -25,24 +25,39 @@ const u8 indexedPrimitiveType[7] = { GE_PRIM_TRIANGLES, GE_PRIM_TRIANGLES, GE_PRIM_TRIANGLES, - GE_PRIM_TRIANGLES, + GE_PRIM_RECTANGLES, }; void IndexGenerator::Reset() { prim_ = -1; - inds_ = 0; + count_ = 0; + index_ = 0; + this->inds_ = indsBase_; } bool IndexGenerator::PrimCompatible(int prim) { if (prim_ == -1) return true; - return indexedPrimitiveType[prim] == indexedPrimitiveType[prim_]; + return indexedPrimitiveType[prim] == prim_; } -void IndexGenerator::Start(u16 *inds, int baseIndex, int prim) +void IndexGenerator::Setup(u16 *inds) { - this->inds_ = inds; - index_ = baseIndex; + this->indsBase_ = inds; + Reset(); +} + +void IndexGenerator::AddPoints(int numVerts) +{ + //if we have no vertices return + for (int i = 0; i < numVerts; i++) + { + *inds_++ = index_ + i; + } + // ignore overflow verts + index_ += numVerts; + count_ += numVerts; + prim_ = GE_PRIM_POINTS; } void IndexGenerator::AddList(int numVerts) @@ -58,6 +73,8 @@ void IndexGenerator::AddList(int numVerts) // ignore overflow verts index_ += numVerts; + count_ += numTris * 3; + prim_ = GE_PRIM_TRIANGLES; } void IndexGenerator::AddStrip(int numVerts) @@ -72,6 +89,8 @@ void IndexGenerator::AddStrip(int numVerts) wind = !wind; } index_ += numVerts; + count_ += numTris * 3; + prim_ = GE_PRIM_TRIANGLES; } void IndexGenerator::AddFan(int numVerts) @@ -84,6 +103,30 @@ void IndexGenerator::AddFan(int numVerts) *inds_++ = index_ + i + 2; } index_ += numVerts; + count_ += numTris * 3; + prim_ = GE_PRIM_TRIANGLES; +} + +void IndexGenerator::TranslatePoints(int numVerts, const u8 *inds, int offset) +{ + for (int i = 0; i < numVerts; i++) + { + *inds_++ = index_ + offset + inds[i]; + } + index_ += numVerts; + count_ += numVerts; + prim_ = GE_PRIM_POINTS; +} + +void IndexGenerator::TranslatePoints(int numVerts, const u16 *inds, int offset) +{ + for (int i = 0; i < numVerts; i++) + { + *inds_++ = index_ + offset + inds[i]; + } + index_ += numVerts; + count_ += numVerts; + prim_ = GE_PRIM_POINTS; } void IndexGenerator::TranslateList(int numVerts, const u8 *inds, int offset) @@ -96,6 +139,8 @@ void IndexGenerator::TranslateList(int numVerts, const u8 *inds, int offset) *inds_++ = index_ + offset + inds[i*3 + 2]; } index_ += numVerts; + count_ += numTris * 3; + prim_ = GE_PRIM_TRIANGLES; } void IndexGenerator::TranslateStrip(int numVerts, const u8 *inds, int offset) @@ -110,6 +155,8 @@ void IndexGenerator::TranslateStrip(int numVerts, const u8 *inds, int offset) wind = !wind; } index_ += numVerts; + count_ += numTris * 3; + prim_ = GE_PRIM_TRIANGLES; } void IndexGenerator::TranslateFan(int numVerts, const u8 *inds, int offset) @@ -123,6 +170,8 @@ void IndexGenerator::TranslateFan(int numVerts, const u8 *inds, int offset) *inds_++ = index_ + offset + inds[i + 2]; } index_ += numVerts; + count_ += numTris * 3; + prim_ = GE_PRIM_TRIANGLES; } void IndexGenerator::TranslateList(int numVerts, const u16 *inds, int offset) @@ -135,6 +184,8 @@ void IndexGenerator::TranslateList(int numVerts, const u16 *inds, int offset) *inds_++ = index_ + offset + inds[i*3 + 2]; } index_ += numVerts; + count_ += numTris * 3; + prim_ = GE_PRIM_TRIANGLES; } void IndexGenerator::TranslateStrip(int numVerts, const u16 *inds, int offset) @@ -149,6 +200,8 @@ void IndexGenerator::TranslateStrip(int numVerts, const u16 *inds, int offset) wind = !wind; } index_ += numVerts; + count_ += numTris * 3; + prim_ = GE_PRIM_TRIANGLES; } void IndexGenerator::TranslateFan(int numVerts, const u16 *inds, int offset) @@ -162,6 +215,8 @@ void IndexGenerator::TranslateFan(int numVerts, const u16 *inds, int offset) *inds_++ = index_ + offset + inds[i + 2]; } index_ += numVerts; + count_ += numTris * 3; + prim_ = GE_PRIM_TRIANGLES; } //Lines @@ -174,6 +229,8 @@ void IndexGenerator::AddLineList(int numVerts) *inds_++ = index_ + i*2+1; } index_ += numVerts; + count_ += numLines * 2; + prim_ = GE_PRIM_LINES; } void IndexGenerator::AddLineStrip(int numVerts) @@ -185,6 +242,21 @@ void IndexGenerator::AddLineStrip(int numVerts) *inds_++ = index_ + i + 1; } index_ += numVerts; + count_ += numLines * 2; + prim_ = GE_PRIM_LINES; +} + +void IndexGenerator::AddRectangles(int numVerts) +{ + int numRects = numVerts / 2; + for (int i = 0; i < numRects; i++) + { + *inds_++ = index_ + i*2; + *inds_++ = index_ + i*2+1; + } + index_ += numVerts; + count_ += numRects * 2; + prim_ = GE_PRIM_RECTANGLES; } void IndexGenerator::TranslateLineList(int numVerts, const u8 *inds, int offset) @@ -196,6 +268,8 @@ void IndexGenerator::TranslateLineList(int numVerts, const u8 *inds, int offset) *inds_++ = index_ + i*2+1; } index_ += numVerts; + count_ += numLines * 2; + prim_ = GE_PRIM_LINES; } void IndexGenerator::TranslateLineStrip(int numVerts, const u8 *inds, int offset) @@ -207,6 +281,8 @@ void IndexGenerator::TranslateLineStrip(int numVerts, const u8 *inds, int offset *inds_++ = index_ + i + 1; } index_ += numVerts; + count_ += numLines * 2; + prim_ = GE_PRIM_LINES; } void IndexGenerator::TranslateLineList(int numVerts, const u16 *inds, int offset) @@ -218,6 +294,8 @@ void IndexGenerator::TranslateLineList(int numVerts, const u16 *inds, int offset *inds_++ = index_ + i*2+1; } index_ += numVerts; + count_ += numLines * 2; + prim_ = GE_PRIM_LINES; } void IndexGenerator::TranslateLineStrip(int numVerts, const u16 *inds, int offset) @@ -229,4 +307,32 @@ void IndexGenerator::TranslateLineStrip(int numVerts, const u16 *inds, int offse *inds_++ = index_ + i + 1; } index_ += numVerts; -} \ No newline at end of file + count_ += numLines * 2; + prim_ = GE_PRIM_LINES; +} + +void IndexGenerator::TranslateRectangles(int numVerts, const u8 *inds, int offset) +{ + int numRects = numVerts / 2; + for (int i = 0; i < numRects; i++) + { + *inds_++ = index_ + i*2; + *inds_++ = index_ + i*2+1; + } + index_ += numVerts; + count_ += numRects * 2; + prim_ = GE_PRIM_RECTANGLES; +} + +void IndexGenerator::TranslateRectangles(int numVerts, const u16 *inds, int offset) +{ + int numRects = numVerts / 2; + for (int i = 0; i < numRects; i++) + { + *inds_++ = index_ + i*2; + *inds_++ = index_ + i*2+1; + } + index_ += numVerts; + count_ += numRects * 2; + prim_ = GE_PRIM_RECTANGLES; +} diff --git a/GPU/GLES/IndexGenerator.h b/GPU/GLES/IndexGenerator.h index 45d3a0bad3..2ed1d03d5a 100644 --- a/GPU/GLES/IndexGenerator.h +++ b/GPU/GLES/IndexGenerator.h @@ -24,10 +24,13 @@ class IndexGenerator { public: + void Setup(u16 *indexptr); void Reset(); - void Start(u16 *indexptr, int baseIndex, int prim); bool PrimCompatible(int prim); + int Prim() const { return prim_; } + // Points (why index these? code simplicity) + void AddPoints(int numVerts); // Triangles void AddList(int numVerts); void AddStrip(int numVerts); @@ -35,23 +38,39 @@ public: // Lines void AddLineList(int numVerts); void AddLineStrip(int numVerts); + // Rectangles + void AddRectangles(int numVerts); + void TranslatePoints(int numVerts, const u8 *inds, int offset); + void TranslatePoints(int numVerts, const u16 *inds, int offset); // Translates already indexed lists void TranslateLineList(int numVerts, const u8 *inds, int offset); - void TranslateLineStrip(int numVerts, const u8 *inds, int offset); void TranslateLineList(int numVerts, const u16 *inds, int offset); + void TranslateLineStrip(int numVerts, const u8 *inds, int offset); void TranslateLineStrip(int numVerts, const u16 *inds, int offset); + void TranslateRectangles(int numVerts, const u8 *inds, int offset); + void TranslateRectangles(int numVerts, const u16 *inds, int offset); + void TranslateList(int numVerts, const u8 *inds, int offset); void TranslateStrip(int numVerts, const u8 *inds, int offset); void TranslateFan(int numVerts, const u8 *inds, int offset); void TranslateList(int numVerts, const u16 *inds, int offset); void TranslateStrip(int numVerts, const u16 *inds, int offset); void TranslateFan(int numVerts, const u16 *inds, int offset); + + int MaxIndex() { return index_; } + int VertexCount() { return count_; } + + bool Empty() { return index_ == 0; } + + void SetIndex(int ind) { index_ = ind; } private: + u16 *indsBase_; u16 *inds_; int index_; + int count_; int prim_; }; diff --git a/GPU/GLES/ShaderManager.cpp b/GPU/GLES/ShaderManager.cpp index 819b2815b7..ec5b60e290 100644 --- a/GPU/GLES/ShaderManager.cpp +++ b/GPU/GLES/ShaderManager.cpp @@ -176,8 +176,10 @@ static void SetMatrix4x3(int uniform, const float *m4x3) { void LinkedShader::use() { glUseProgram(program); - glUniform1i(u_tex, 0); + updateUniforms(); +} +void LinkedShader::updateUniforms() { if (!dirtyUniforms) return; @@ -300,6 +302,7 @@ void ShaderManager::DirtyShader() // Forget the last shader ID lastFSID.clear(); lastVSID.clear(); + lastShader = 0; } @@ -318,8 +321,11 @@ LinkedShader *ShaderManager::ApplyShader(int prim) ComputeVertexShaderID(&VSID, prim); ComputeFragmentShaderID(&FSID); - // Bail quickly in the no-op case. TODO: why does it cause trouble? - // if (VSID == lastVSID && FSID == lastFSID) return lastShader; // Already all set. + // Just update uniforms if this is the same shader as last time. + if (lastShader != 0 && VSID == lastVSID && FSID == lastFSID) { + lastShader->updateUniforms(); + return lastShader; // Already all set. + } lastVSID = VSID; lastFSID = FSID; @@ -355,10 +361,9 @@ LinkedShader *ShaderManager::ApplyShader(int prim) linkedShaderCache[linkedID] = ls; } else { ls = iter->second; + ls->use(); } - ls->use(); - lastShader = ls; return ls; } diff --git a/GPU/GLES/ShaderManager.h b/GPU/GLES/ShaderManager.h index c7cf3d1040..275b479cdf 100644 --- a/GPU/GLES/ShaderManager.h +++ b/GPU/GLES/ShaderManager.h @@ -32,6 +32,7 @@ public: ~LinkedShader(); void use(); + void updateUniforms(); uint32_t program; u32 dirtyUniforms; diff --git a/GPU/GLES/TextureCache.cpp b/GPU/GLES/TextureCache.cpp index 05a9206f25..ad8e708a1b 100644 --- a/GPU/GLES/TextureCache.cpp +++ b/GPU/GLES/TextureCache.cpp @@ -617,17 +617,16 @@ void convertColors(u8 *finalBuf, GLuint dstFmt, int numPixels) void PSPSetTexture() { + static int lastBoundTexture = -1; + u32 texaddr = (gstate.texaddr[0] & 0xFFFFF0) | ((gstate.texbufwidth[0]<<8) & 0xFF000000); texaddr &= 0xFFFFFFF; - if (!texaddr) return; - u8 level = 0; u32 format = gstate.texformat & 0xF; u32 clutformat = gstate.clutformat & 3; u32 clutaddr = GetClutAddr(clutformat == GE_CMODE_32BIT_ABGR8888 ? 4 : 2); - DEBUG_LOG(G3D,"Texture at %08x",texaddr); u8 *texptr = Memory::GetPointer(texaddr); u32 texhash = texptr ? *(u32*)texptr : 0; @@ -656,8 +655,11 @@ void PSPSetTexture() if (match) { //got one! entry.frameCounter = gpuStats.numFrames; - glBindTexture(GL_TEXTURE_2D, entry.texture); - UpdateSamplingParams(); + if (true || entry.texture != lastBoundTexture) { + glBindTexture(GL_TEXTURE_2D, entry.texture); + UpdateSamplingParams(); + lastBoundTexture = entry.texture; + } DEBUG_LOG(G3D, "Texture at %08x Found in Cache, applying", texaddr); return; //Done! } else { @@ -673,7 +675,7 @@ void PSPSetTexture() //we have to decode it - TexCacheEntry entry; + TexCacheEntry entry = {0}; entry.addr = texaddr; entry.hash = texhash; @@ -691,9 +693,6 @@ void PSPSetTexture() entry.clutaddr = 0; } - glGenTextures(1, &entry.texture); - glBindTexture(GL_TEXTURE_2D, entry.texture); - int bufw = gstate.texbufwidth[0] & 0x3ff; entry.dim = gstate.texsize[0] & 0xF0F; @@ -701,8 +700,6 @@ void PSPSetTexture() int w = 1 << (gstate.texsize[0] & 0xf); int h = 1 << ((gstate.texsize[0]>>8) & 0xf); - INFO_LOG(G3D, "Creating texture %i from %08x: %i x %i (stride: %i). fmt: %i", entry.texture, entry.addr, w, h, bufw, entry.format); - gstate_c.curTextureWidth=w; gstate_c.curTextureHeight=h; GLenum dstFmt = 0; @@ -952,26 +949,27 @@ void PSPSetTexture() } } + gpuStats.numTexturesDecoded++; // Can restore these and remove the above fixup on some platforms. //glPixelStorei(GL_UNPACK_ROW_LENGTH, bufw); - glPixelStorei(GL_UNPACK_ALIGNMENT, texByteAlign); + //glPixelStorei(GL_UNPACK_ALIGNMENT, texByteAlign); //glPixelStorei(GL_PACK_ROW_LENGTH, bufw); - glPixelStorei(GL_PACK_ALIGNMENT, texByteAlign); + //glPixelStorei(GL_PACK_ALIGNMENT, texByteAlign); + INFO_LOG(G3D, "Creating texture %i from %08x: %i x %i (stride: %i). fmt: %i", entry.texture, entry.addr, w, h, bufw, entry.format); + + glGenTextures(1, &entry.texture); + glBindTexture(GL_TEXTURE_2D, entry.texture); + lastBoundTexture = entry.texture; GLuint components = dstFmt == GL_UNSIGNED_SHORT_5_6_5 ? GL_RGB : GL_RGBA; glTexImage2D(GL_TEXTURE_2D, 0, components, w, h, 0, components, dstFmt, finalBuf); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); - // glGenerateMipmap(GL_TEXTURE_2D); UpdateSamplingParams(); //glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); - glPixelStorei(GL_UNPACK_ALIGNMENT, 1); + //glPixelStorei(GL_UNPACK_ALIGNMENT, 1); //glPixelStorei(GL_PACK_ROW_LENGTH, 0); - glPixelStorei(GL_PACK_ALIGNMENT, 1); + //glPixelStorei(GL_PACK_ALIGNMENT, 1); cache[cachekey] = entry; } diff --git a/GPU/GLES/TransformPipeline.cpp b/GPU/GLES/TransformPipeline.cpp index 91a8548749..bece8c27e8 100644 --- a/GPU/GLES/TransformPipeline.cpp +++ b/GPU/GLES/TransformPipeline.cpp @@ -30,6 +30,7 @@ #include "VertexDecoder.h" #include "ShaderManager.h" #include "DisplayListInterpreter.h" +#include "IndexGenerator.h" const GLuint glprim[8] = { GL_POINTS, @@ -42,7 +43,11 @@ const GLuint glprim[8] = { }; u8 decoded[65536 * 32]; -// uint16_t decIndex[65536]; // Unused +VertexDecoder dec; +uint16_t decIndex[65536]; +int numVerts; + +IndexGenerator indexGen; TransformedVertex transformed[65536]; TransformedVertex transformedExpanded[65536]; @@ -262,7 +267,7 @@ static void DesetupDecFmtForDraw(LinkedShader *program, const DecVtxFormat &decF // Actually again, single quads could be drawn more efficiently using GL_TRIANGLE_STRIP, no need to duplicate verts as for // GL_TRIANGLES. Still need to sw transform to compute the extra two corners though. -void SoftwareTransformAndDraw(int prim, LinkedShader *program, int forceIndexType, int vertexCount, void *inds, const DecVtxFormat &decVtxFormat, int indexLowerBound, int indexUpperBound, float *customUV) +void SoftwareTransformAndDraw(int prim, LinkedShader *program, int vertexCount, void *inds, int indexType, const DecVtxFormat &decVtxFormat, int maxIndex) { /* DEBUG_LOG(G3D, "View matrix:"); @@ -289,7 +294,7 @@ void SoftwareTransformAndDraw(int prim, LinkedShader *program, int forceIndexTyp Lighter lighter; VertexReader reader(decoded, decVtxFormat); - for (int index = indexLowerBound; index <= indexUpperBound; index++) + for (int index = 0; index < maxIndex; index++) { reader.Goto(index); @@ -415,10 +420,7 @@ void SoftwareTransformAndDraw(int prim, LinkedShader *program, int forceIndexTyp } } - if (customUV) { - uv[0] = customUV[index * 2 + 0]*gstate_c.uScale + gstate_c.uOff; - uv[1] = customUV[index * 2 + 1]*gstate_c.vScale + gstate_c.vOff; - } else if (reader.hasUV()) { + if (reader.hasUV()) { float ruv[2]; reader.ReadUV(ruv); // Perform texture coordinate generation after the transform and lighting - one style of UV depends on lights. @@ -483,48 +485,19 @@ void SoftwareTransformAndDraw(int prim, LinkedShader *program, int forceIndexTyp const TransformedVertex *drawBuffer = transformed; int numTrans = 0; - int indexType = (gstate.vertType & GE_VTYPE_IDX_MASK); - if (forceIndexType != -1) { - indexType = forceIndexType; - } bool drawIndexed = false; - GLuint glIndexType = 0; if (prim != GE_PRIM_RECTANGLES) { // We can simply draw the unexpanded buffer. numTrans = vertexCount; - switch (indexType) { - case GE_VTYPE_IDX_8BIT: - drawIndexed = true; - glIndexType = GL_UNSIGNED_BYTE; - break; - case GE_VTYPE_IDX_16BIT: - drawIndexed = true; - glIndexType = GL_UNSIGNED_SHORT; - break; - default: - drawIndexed = false; - break; - } + drawIndexed = true; } else { numTrans = 0; drawBuffer = transformedExpanded; TransformedVertex *trans = &transformedExpanded[0]; TransformedVertex saved; for (int i = 0; i < vertexCount; i++) { - int index; - if (indexType == GE_VTYPE_IDX_8BIT) - { - index = ((u8*)inds)[i]; - } - else if (indexType == GE_VTYPE_IDX_16BIT) - { - index = ((u16*)inds)[i]; - } - else - { - index = i; - } + int index = ((u16*)inds)[i]; TransformedVertex &transVtx = transformed[index]; if ((i & 1) == 0) @@ -591,7 +564,7 @@ void SoftwareTransformAndDraw(int prim, LinkedShader *program, int forceIndexTyp if (program->a_color0 != -1) glVertexAttribPointer(program->a_color0, 4, GL_FLOAT, GL_FALSE, vertexSize, ((uint8_t*)drawBuffer) + 5 * 4); if (program->a_color1 != -1) glVertexAttribPointer(program->a_color1, 3, GL_FLOAT, GL_FALSE, vertexSize, ((uint8_t*)drawBuffer) + 9 * 4); if (drawIndexed) { - glDrawElements(glprim[prim], numTrans, glIndexType, (GLvoid *)inds); + glDrawElements(glprim[prim], numTrans, GL_UNSIGNED_SHORT, (GLvoid *)inds); } else { glDrawArrays(glprim[prim], 0, numTrans); } @@ -601,16 +574,80 @@ void SoftwareTransformAndDraw(int prim, LinkedShader *program, int forceIndexTyp if (program->a_color1 != -1) glDisableVertexAttribArray(program->a_color1); } +void GLES_GPU::InitTransform() { + indexGen.Setup(decIndex); + numVerts = 0; +} + void GLES_GPU::TransformAndDrawPrim(void *verts, void *inds, int prim, int vertexCount, float *customUV, int forceIndexType, int *bytesRead) { + // For the future + if (!indexGen.PrimCompatible(prim)) + Flush(); + + if (!indexGen.Empty()) { + gpuStats.numJoins++; + } + gpuStats.numDrawCalls++; + gpuStats.numVertsTransformed += vertexCount; + + indexGen.SetIndex(numVerts); int indexLowerBound, indexUpperBound; // First, decode the verts and apply morphing - VertexDecoder dec; dec.SetVertexType(gstate.vertType); - dec.DecodeVerts(decoded, verts, inds, prim, vertexCount, &indexLowerBound, &indexUpperBound); + dec.DecodeVerts(decoded + numVerts * (int)dec.GetDecVtxFmt().stride, verts, inds, prim, vertexCount, &indexLowerBound, &indexUpperBound); + numVerts += indexUpperBound - indexLowerBound + 1; + if (bytesRead) *bytesRead = vertexCount * dec.VertexSize(); + int indexType = (gstate.vertType & GE_VTYPE_IDX_MASK); + if (forceIndexType != -1) indexType = forceIndexType; + switch (indexType) { + case GE_VTYPE_IDX_NONE: + switch (prim) { + case GE_PRIM_POINTS: indexGen.AddPoints(vertexCount); break; + case GE_PRIM_LINES: indexGen.AddLineList(vertexCount); break; + case GE_PRIM_LINE_STRIP: indexGen.AddLineStrip(vertexCount); break; + case GE_PRIM_TRIANGLES: indexGen.AddList(vertexCount); break; + case GE_PRIM_TRIANGLE_STRIP: indexGen.AddStrip(vertexCount); break; + case GE_PRIM_TRIANGLE_FAN: indexGen.AddFan(vertexCount); break; + case GE_PRIM_RECTANGLES: indexGen.AddRectangles(vertexCount); break; // Same + } + break; + + case GE_VTYPE_IDX_8BIT: + switch (prim) { + case GE_PRIM_POINTS: indexGen.TranslatePoints(vertexCount, (const u8 *)inds, -indexLowerBound); break; + case GE_PRIM_LINES: indexGen.TranslateLineList(vertexCount, (const u8 *)inds, -indexLowerBound); break; + case GE_PRIM_LINE_STRIP: indexGen.TranslateLineStrip(vertexCount, (const u8 *)inds, -indexLowerBound); break; + case GE_PRIM_TRIANGLES: indexGen.TranslateList(vertexCount, (const u8 *)inds, -indexLowerBound); break; + case GE_PRIM_TRIANGLE_STRIP: indexGen.TranslateStrip(vertexCount, (const u8 *)inds, -indexLowerBound); break; + case GE_PRIM_TRIANGLE_FAN: indexGen.TranslateFan(vertexCount, (const u8 *)inds, -indexLowerBound); break; + case GE_PRIM_RECTANGLES: indexGen.TranslateRectangles(vertexCount, (const u8 *)inds, -indexLowerBound); break; // Same + } + break; + + case GE_VTYPE_IDX_16BIT: + switch (prim) { + case GE_PRIM_POINTS: indexGen.TranslatePoints(vertexCount, (const u16 *)inds, -indexLowerBound); break; + case GE_PRIM_LINES: indexGen.TranslateLineList(vertexCount, (const u16 *)inds, -indexLowerBound); break; + case GE_PRIM_LINE_STRIP: indexGen.TranslateLineStrip(vertexCount, (const u16 *)inds, -indexLowerBound); break; + case GE_PRIM_TRIANGLES: indexGen.TranslateList(vertexCount, (const u16 *)inds, -indexLowerBound); break; + case GE_PRIM_TRIANGLE_STRIP: indexGen.TranslateStrip(vertexCount, (const u16 *)inds, -indexLowerBound); break; + case GE_PRIM_TRIANGLE_FAN: indexGen.TranslateFan(vertexCount, (const u16 *)inds, -indexLowerBound); break; + case GE_PRIM_RECTANGLES: indexGen.TranslateRectangles(vertexCount, (const u16 *)inds, -indexLowerBound); break; // Same + } + break; + } +} + +void GLES_GPU::Flush() +{ + if (indexGen.Empty()) + return; + // From here on out, the index type is ALWAYS 16-bit. Deal with it. + // And here we should return, having collected the morphed but untransformed vertices. // Note that DecodeVerts should convert strips into indexed lists etc, adding to our // current vertex buffer and index buffer. @@ -622,63 +659,38 @@ void GLES_GPU::TransformAndDrawPrim(void *verts, void *inds, int prim, int verte PrintDecodedVertex(decoded[i], gstate.vertType); } #endif - bool useTexCoord = false; - // Check if anything needs updating if (gstate_c.textureChanged) { if ((gstate.textureMapEnable & 1) && !gstate.isModeClear()) { PSPSetTexture(); - useTexCoord = true; } gstate_c.textureChanged = false; } - gpuStats.numDrawCalls++; - gpuStats.numVertsTransformed += vertexCount; + gpuStats.numFlushes++; // TODO: This should not be done on every drawcall, we should collect vertex data // until critical state changes. That's when we draw (flush). + int prim = indexGen.Prim(); + ApplyDrawState(); UpdateViewportAndProjection(); LinkedShader *program = shaderManager_->ApplyShader(prim); + DEBUG_LOG(G3D, "Flush prim %i! %i verts in one go", prim, numVerts); + if (CanUseHardwareTransform(prim)) { SetupDecFmtForDraw(program, dec.GetDecVtxFmt(), decoded); - bool drawIndexed; - GLuint glIndexType; - int indexType = (gstate.vertType & GE_VTYPE_IDX_MASK); - if (forceIndexType != -1) { - indexType = forceIndexType; - } - int numTrans = vertexCount; - switch (indexType) { - case GE_VTYPE_IDX_8BIT: - drawIndexed = true; - glIndexType = GL_UNSIGNED_BYTE; - break; - case GE_VTYPE_IDX_16BIT: - drawIndexed = true; - glIndexType = GL_UNSIGNED_SHORT; - break; - default: - drawIndexed = false; - break; - } - // NOTICE_LOG(G3D,"DrawPrimitive: %i", numTrans); - if (drawIndexed) { - glDrawElements(glprim[prim], numTrans, glIndexType, (GLvoid *)inds); - } else { - glDrawArrays(glprim[prim], 0, numTrans); - } + glDrawElements(glprim[prim], indexGen.VertexCount(), GL_UNSIGNED_SHORT, (GLvoid *)decIndex); DesetupDecFmtForDraw(program, dec.GetDecVtxFmt()); } else { - SoftwareTransformAndDraw(prim, program, forceIndexType, vertexCount, inds, dec.GetDecVtxFmt(), indexLowerBound, indexUpperBound, customUV); + SoftwareTransformAndDraw(prim, program, indexGen.VertexCount(), (void *)decIndex, GE_VTYPE_IDX_16BIT, dec.GetDecVtxFmt(), + indexGen.MaxIndex()); } -} -void GLES_GPU::Flush(int prim) { - // TODO + indexGen.Reset(); + numVerts = 0; } diff --git a/GPU/GLES/TransformPipeline.h b/GPU/GLES/TransformPipeline.h index 5314e152f6..4c89dfcda3 100644 --- a/GPU/GLES/TransformPipeline.h +++ b/GPU/GLES/TransformPipeline.h @@ -65,4 +65,4 @@ struct Color4 } }; -void SoftwareTransformAndDraw(int prim, LinkedShader *program, int forceIndexType, int vertexCount, void *inds, const DecVtxFormat &decVtxFormat, int indexLowerBound, int indexUpperBound, float *customUV); +// void SoftwareTransformAndDraw(int prim, LinkedShader *program, int vertexCount, void *inds, int indexType, const DecVtxFormat &decVtxFormat, int maxIndex, float *customUV); diff --git a/GPU/GLES/VertexDecoder.cpp b/GPU/GLES/VertexDecoder.cpp index 8f7b7fb0b3..34bef88276 100644 --- a/GPU/GLES/VertexDecoder.cpp +++ b/GPU/GLES/VertexDecoder.cpp @@ -675,7 +675,7 @@ void VertexDecoder::DecodeVerts(u8 *decodedptr, const void *verts, const void *i *indexUpperBound = upperBound; // Decode the vertices within the found bounds, once each - decoded_ = decodedptr + lowerBound * decFmt.stride; + decoded_ = decodedptr; // + lowerBound * decFmt.stride; ptr_ = (const u8*)verts + lowerBound * size; for (int index = lowerBound; index <= upperBound; index++) { diff --git a/GPU/GLES/VertexDecoder.h b/GPU/GLES/VertexDecoder.h index 6e6bc887ea..f664d303a3 100644 --- a/GPU/GLES/VertexDecoder.h +++ b/GPU/GLES/VertexDecoder.h @@ -86,6 +86,7 @@ public: void SetVertexType(u32 vtype); const DecVtxFormat &GetDecVtxFmt() { return decFmt; } + void DecodeVerts(u8 *decoded, const void *verts, const void *inds, int prim, int count, int *indexLowerBound, int *indexUpperBound) const; bool hasColor() const { return col != 0; } int VertexSize() const { return size; } diff --git a/GPU/GLES/VertexShaderGenerator.cpp b/GPU/GLES/VertexShaderGenerator.cpp index a819ddfef1..8cd6cafa6a 100644 --- a/GPU/GLES/VertexShaderGenerator.cpp +++ b/GPU/GLES/VertexShaderGenerator.cpp @@ -88,19 +88,11 @@ void ComputeVertexShaderID(VertexShaderID *id, int prim) id->d[1] |= ((gstate.ltype[i] >> 8) & 3) << (i * 4 + 2); } id->d[1] |= (gstate.materialupdate & 7) << 16; + id->d[1] |= (gstate.lightingEnable & 1) << 19; + for (int i = 0; i < 4; i++) { + id->d[1] |= (gstate.lightEnable[i] & 1) << (20 + i); + } } - - // Bits that we will need: - // lightenable * 4 - // lighttype * 4 - // lightcomp * 4 - // uv gen: - // mapping type - // texshade light choices (ONLY IF uv mapping type is shade) -} - -void WriteLight(char *p, int l) { - // TODO } const char *boneWeightAttrDecl[8] = { @@ -131,7 +123,6 @@ enum DoLightComputation { LIGHT_FULL, }; - char *GenerateVertexShader(int prim) { char *p = buffer; @@ -400,10 +391,6 @@ char *GenerateVertexShader(int prim) WRITE(p, " v_depth = gl_Position.z;\n"); WRITE(p, "}\n"); - // DEBUG_LOG(HLE, "\n%s", buffer); -#if defined(_WIN32) && defined(_DEBUG) - OutputDebugString(buffer); -#endif return buffer; } diff --git a/GPU/GPUInterface.h b/GPU/GPUInterface.h index be31b0c026..09773f7cf2 100644 --- a/GPU/GPUInterface.h +++ b/GPU/GPUInterface.h @@ -51,4 +51,6 @@ public: // Internal hack to avoid interrupts from "PPGe" drawing (utility UI, etc) virtual void EnableInterrupts(bool enable) = 0; + + virtual void Flush() = 0; }; diff --git a/GPU/GPUState.cpp b/GPU/GPUState.cpp index ff4e868b27..2f60deb5d6 100644 --- a/GPU/GPUState.cpp +++ b/GPU/GPUState.cpp @@ -78,6 +78,7 @@ void ReapplyGfxState() { if (!gpu) return; + gpu->Flush(); // ShaderManager_DirtyShader(); // The commands are embedded in the command memory so we can just reexecute the words. Convenient. // To be safe we pass 0xFFFFFFF as the diff. diff --git a/GPU/GPUState.h b/GPU/GPUState.h index 66cb21d90b..4e271b4278 100644 --- a/GPU/GPUState.h +++ b/GPU/GPUState.h @@ -255,17 +255,23 @@ struct GPUStatistics memset(this, 0, sizeof(*this)); } void resetFrame() { + numJoins = 0; numDrawCalls = 0; numVertsTransformed = 0; numTextureSwitches = 0; numShaderSwitches = 0; + numFlushes = 0; + numTexturesDecoded = 0; } // Per frame statistics + int numJoins; int numDrawCalls; + int numFlushes; int numVertsTransformed; int numTextureSwitches; int numShaderSwitches; + int numTexturesDecoded; // Total statistics, updated by the GPU core in UpdateStats int numFrames; diff --git a/GPU/Null/NullGpu.h b/GPU/Null/NullGpu.h index eacee19084..93e9de9488 100644 --- a/GPU/Null/NullGpu.h +++ b/GPU/Null/NullGpu.h @@ -41,6 +41,7 @@ public: virtual void CopyDisplayToOutput() {} virtual void UpdateStats(); virtual void InvalidateCache(u32 addr, int size); + virtual void Flush() {} private: bool ProcessDLQueue();