diff --git a/Core/Dialog/PSPSaveDialog.cpp b/Core/Dialog/PSPSaveDialog.cpp index 3185a43a0b..5a1ff4ea53 100644 --- a/Core/Dialog/PSPSaveDialog.cpp +++ b/Core/Dialog/PSPSaveDialog.cpp @@ -33,6 +33,7 @@ PSPSaveDialog::~PSPSaveDialog() { u32 PSPSaveDialog::Init(int paramAddr) { + return 0; // Ignore if already running if (status != SCE_UTILITY_STATUS_NONE && status != SCE_UTILITY_STATUS_SHUTDOWN) { diff --git a/Core/HLE/sceDisplay.cpp b/Core/HLE/sceDisplay.cpp index be4a7cf43a..dba162c325 100644 --- a/Core/HLE/sceDisplay.cpp +++ b/Core/HLE/sceDisplay.cpp @@ -185,6 +185,7 @@ void hleEnterVblank(u64 userdata, int cyclesLate) "Draw flushes: %i\n" "Vertices Transformed: %i\n" "Textures active: %i\n" + "Textures decoded: %i\n" "Vertex shaders loaded: %i\n" "Fragment shaders loaded: %i\n" "Combined shaders loaded: %i\n", @@ -193,15 +194,15 @@ void hleEnterVblank(u64 userdata, int cyclesLate) gpuStats.numFlushes, gpuStats.numVertsTransformed, gpuStats.numTextures, + gpuStats.numTexturesDecoded, gpuStats.numVertexShaders, gpuStats.numFragmentShaders, gpuStats.numShaders ); - float zoom = 0.7f / g_Config.iWindowZoom; + float zoom = 0.7f; /// g_Config.iWindowZoom; PPGeBegin(); - PPGeDrawText(stats, 2, 2, 0, zoom, 0x90000000); - PPGeDrawText(stats, 0, 0, 0, zoom); + PPGeDrawText(stats, 0, 0, 0, zoom, 0xFFc0c0c0); PPGeEnd(); gpuStats.resetFrame(); diff --git a/Core/HLE/sceGe.cpp b/Core/HLE/sceGe.cpp index dd41e6e1db..3752622f7f 100644 --- a/Core/HLE/sceGe.cpp +++ b/Core/HLE/sceGe.cpp @@ -225,12 +225,12 @@ const HLEFunction sceGe_user[] = {0xE0D68148,&WrapV_UU, "sceGeListUpdateStallAddr"}, {0x03444EB4,&WrapI_UU, "sceGeListSync"}, {0xB287BD61,&WrapU_U, "sceGeDrawSync"}, - {0xB448EC0D,&WrapV_U, "sceGeBreak"}, + {0xB448EC0D,&WrapV_U, "sceGeBreak"}, {0x4C06E472,sceGeContinue, "sceGeContinue"}, {0xA4FC06A4,&WrapU_U, "sceGeSetCallback"}, {0x05DB22CE,&WrapV_U, "sceGeUnsetCallback"}, {0x1F6752AD,&WrapU_V, "sceGeEdramGetSize"}, - {0xB77905EA,&WrapU_I,"sceGeEdramSetAddrTranslation"}, + {0xB77905EA,&WrapU_I,"sceGeEdramSetAddrTranslation"}, {0xDC93CFEF,0,"sceGeGetCmd"}, {0x57C8945B,&sceGeGetMtx,"sceGeGetMtx"}, {0x438A385A,&WrapU_U,"sceGeSaveContext"}, diff --git a/GPU/GLES/DisplayListInterpreter.cpp b/GPU/GLES/DisplayListInterpreter.cpp index 54ec9715c0..4071efe200 100644 --- a/GPU/GLES/DisplayListInterpreter.cpp +++ b/GPU/GLES/DisplayListInterpreter.cpp @@ -43,6 +43,121 @@ ShaderManager shaderManager; extern u32 curTextureWidth; extern u32 curTextureHeight; +bool flushBeforeCommand[256] = {0}; +const bool flushBeforeCommandList[] = { + GE_CMD_BEZIER, + GE_CMD_SPLINE, + GE_CMD_SIGNAL, + GE_CMD_FINISH, + GE_CMD_END, + GE_CMD_BJUMP, + GE_CMD_VERTEXTYPE, + GE_CMD_OFFSETADDR, + GE_CMD_REGION1, + GE_CMD_REGION2, + GE_CMD_CULLFACEENABLE, + GE_CMD_TEXTUREMAPENABLE, + GE_CMD_LIGHTINGENABLE, + GE_CMD_FOGENABLE, + GE_CMD_TEXSCALEU, + GE_CMD_TEXSCALEV, + GE_CMD_TEXOFFSETU, + GE_CMD_TEXOFFSETV, + GE_CMD_SCISSOR1, + GE_CMD_SCISSOR2, + GE_CMD_MINZ, + GE_CMD_MAXZ, + GE_CMD_FRAMEBUFPTR, + GE_CMD_FRAMEBUFWIDTH, + GE_CMD_FRAMEBUFPIXFORMAT, + GE_CMD_TEXADDR0, + GE_CMD_CLUTADDR, + GE_CMD_LOADCLUT, + GE_CMD_TEXMAPMODE, + GE_CMD_TEXSHADELS, + GE_CMD_CLUTFORMAT, + GE_CMD_TRANSFERSTART, + GE_CMD_TEXSIZE0, + GE_CMD_TEXSIZE1, + GE_CMD_TEXSIZE2, + GE_CMD_TEXSIZE3, + GE_CMD_TEXSIZE4, + GE_CMD_TEXSIZE5, + GE_CMD_TEXSIZE6, + GE_CMD_TEXSIZE7, + GE_CMD_ZBUFPTR, + GE_CMD_ZBUFWIDTH, + GE_CMD_AMBIENTCOLOR, + GE_CMD_AMBIENTALPHA, + GE_CMD_MATERIALAMBIENT, + GE_CMD_MATERIALDIFFUSE, + GE_CMD_MATERIALEMISSIVE, + GE_CMD_MATERIALSPECULAR, + GE_CMD_MATERIALALPHA, + GE_CMD_MATERIALSPECULARCOEF, + GE_CMD_LIGHTTYPE0, + GE_CMD_LIGHTTYPE1, + GE_CMD_LIGHTTYPE2, + GE_CMD_LIGHTTYPE3, + GE_CMD_LX0, + GE_CMD_LX1, + GE_CMD_LX2, + GE_CMD_LX3, + GE_CMD_LDX0, + GE_CMD_LDX1, + GE_CMD_LDX2, + GE_CMD_LDX3, + GE_CMD_LKA0, + GE_CMD_LAC0, + GE_CMD_LDC0, + GE_CMD_LSC0, + GE_CMD_VIEWPORTX1, + GE_CMD_VIEWPORTY1, + GE_CMD_VIEWPORTX2, + GE_CMD_VIEWPORTY2, + GE_CMD_VIEWPORTZ1, + GE_CMD_VIEWPORTZ2, + GE_CMD_LIGHTENABLE0, + GE_CMD_LIGHTENABLE1, + GE_CMD_LIGHTENABLE2, + GE_CMD_LIGHTENABLE3, + GE_CMD_CULL, + GE_CMD_LMODE, + GE_CMD_PATCHDIVISION, + GE_CMD_MATERIALUPDATE, + GE_CMD_CLEARMODE, + GE_CMD_ALPHABLENDENABLE, + GE_CMD_BLENDMODE, + GE_CMD_BLENDFIXEDA, + GE_CMD_BLENDFIXEDB, + GE_CMD_ALPHATESTENABLE, + GE_CMD_ALPHATEST, + GE_CMD_TEXFUNC, + GE_CMD_TEXFILTER, + GE_CMD_TEXENVCOLOR, + GE_CMD_TEXMODE, + GE_CMD_TEXFORMAT, + GE_CMD_TEXFLUSH, + GE_CMD_TEXWRAP, + GE_CMD_ZTESTENABLE, + GE_CMD_STENCILTESTENABLE, + GE_CMD_ZTEST, + GE_CMD_MORPHWEIGHT0, + GE_CMD_MORPHWEIGHT1, + GE_CMD_MORPHWEIGHT2, + GE_CMD_MORPHWEIGHT3, + GE_CMD_MORPHWEIGHT4, + GE_CMD_MORPHWEIGHT5, + GE_CMD_MORPHWEIGHT6, + GE_CMD_MORPHWEIGHT7, + GE_CMD_WORLDMATRIXNUMBER, + GE_CMD_VIEWMATRIXNUMBER, + GE_CMD_PROJMATRIXNUMBER, + GE_CMD_PROJMATRIXDATA, + GE_CMD_TGENMATRIXNUMBER, + GE_CMD_BONEMATRIXNUMBER, +}; + GLES_GPU::GLES_GPU(int renderWidth, int renderHeight) : interruptsEnabled_(true), renderWidth_(renderWidth), @@ -59,6 +174,10 @@ GLES_GPU::GLES_GPU(int renderWidth, int renderHeight) if ((int *)&gstate.transferstart - (int *)&gstate != 0xEA) { ERROR_LOG(G3D, "gstate has drifted out of sync!"); } + + for (int i = 0; i < ARRAY_SIZE(flushBeforeCommandList); i++) { + flushBeforeCommand[flushBeforeCommandList[i]] = true; + } } GLES_GPU::~GLES_GPU() @@ -104,6 +223,7 @@ void GLES_GPU::BeginFrame() void GLES_GPU::SetDisplayFramebuffer(u32 framebuf, u32 stride, int format) { if (framebuf & 0x04000000) { + DEBUG_LOG(G3D, "Switch display framebuffer %08x", framebuf); displayFramebufPtr_ = framebuf; displayStride_ = stride; displayFormat_ = format; @@ -168,6 +288,7 @@ GLES_GPU::VirtualFramebuffer *GLES_GPU::GetDisplayFBO() void GLES_GPU::SetRenderFrameBuffer() { + Flush(); if (!g_Config.bBufferedRendering) return; // Get parameters @@ -300,7 +421,7 @@ void GLES_GPU::UpdateStall(int listid, u32 newstall) void GLES_GPU::DrawSync(int mode) { - + Flush(); } void GLES_GPU::Continue() @@ -432,6 +553,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) // The arrow and other rotary items in Puzbob are bezier patches, strangely enough. case GE_CMD_BEZIER: { + Flush(); int bz_ucount = data & 0xFF; int bz_vcount = (data >> 8) & 0xFF; DrawBezier(bz_ucount, bz_vcount); @@ -441,6 +563,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) case GE_CMD_SPLINE: { + Flush(); int sp_ucount = data & 0xFF; int sp_vcount = (data >> 8) & 0xFF; int sp_utype = (data >> 16) & 0x3; @@ -489,6 +612,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) break; case GE_CMD_FINISH: + Flush(); DEBUG_LOG(G3D,"DL CMD FINISH"); // TODO: Should this run while interrupts are suspended? if (interruptsEnabled_) @@ -544,6 +668,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) break; case GE_CMD_BJUMP: + Flush(); // bounding box jump. Let's just not jump, for now. DEBUG_LOG(G3D,"DL BBOX JUMP - unimplemented"); break; @@ -558,6 +683,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) break; case GE_CMD_VERTEXTYPE: + Flush(); DEBUG_LOG(G3D,"DL SetVertexType: %06x", data); if (diff & GE_VTYPE_THROUGH) { // Throughmode changed, let's make the proj matrix dirty. @@ -593,21 +719,25 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) break; case GE_CMD_CULLFACEENABLE: + Flush(); DEBUG_LOG(G3D, "DL CullFace Enable: %i (ignoring)", data); break; case GE_CMD_TEXTUREMAPENABLE: + Flush(); gstate_c.textureChanged = true; DEBUG_LOG(G3D, "DL Texture map enable: %i", data); break; case GE_CMD_LIGHTINGENABLE: + Flush(); DEBUG_LOG(G3D, "DL Lighting enable: %i", data); data += 1; //We don't use OpenGL lighting break; case GE_CMD_FOGENABLE: + Flush(); DEBUG_LOG(G3D, "DL Fog Enable: %i", data); break; @@ -624,24 +754,28 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) break; case GE_CMD_TEXSCALEU: + Flush(); gstate_c.uScale = getFloat24(data); DEBUG_LOG(G3D, "DL Texture U Scale: %f", gstate_c.uScale); shaderManager.DirtyUniform(DIRTY_UVSCALEOFFSET); break; case GE_CMD_TEXSCALEV: + Flush(); gstate_c.vScale = getFloat24(data); DEBUG_LOG(G3D, "DL Texture V Scale: %f", gstate_c.vScale); shaderManager.DirtyUniform(DIRTY_UVSCALEOFFSET); break; case GE_CMD_TEXOFFSETU: + Flush(); gstate_c.uOff = getFloat24(data); DEBUG_LOG(G3D, "DL Texture U Offset: %f", gstate_c.uOff); shaderManager.DirtyUniform(DIRTY_UVSCALEOFFSET); break; case GE_CMD_TEXOFFSETV: + Flush(); gstate_c.vOff = getFloat24(data); DEBUG_LOG(G3D, "DL Texture V Offset: %f", gstate_c.vOff); shaderManager.DirtyUniform(DIRTY_UVSCALEOFFSET); @@ -649,6 +783,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) case GE_CMD_SCISSOR1: { + Flush(); int x1 = data & 0x3ff; int y1 = data >> 10; DEBUG_LOG(G3D, "DL Scissor TL: %i, %i", x1,y1); @@ -656,6 +791,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) break; case GE_CMD_SCISSOR2: { + Flush(); int x2 = data & 0x3ff; int y2 = data >> 10; DEBUG_LOG(G3D, "DL Scissor BR: %i, %i", x2, y2); @@ -674,6 +810,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) case GE_CMD_FRAMEBUFPTR: { + Flush(); u32 ptr = op & 0xFFE000; DEBUG_LOG(G3D, "DL FramebufPtr: %08x", ptr); } @@ -681,6 +818,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) case GE_CMD_FRAMEBUFWIDTH: { + Flush(); u32 w = data & 0xFFFFFF; DEBUG_LOG(G3D, "DL FramebufWidth: %i", w); } @@ -690,6 +828,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) break; case GE_CMD_TEXADDR0: + Flush(); gstate_c.textureChanged = true; case GE_CMD_TEXADDR1: case GE_CMD_TEXADDR2: @@ -738,10 +877,12 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) break; case GE_CMD_TEXMAPMODE: + Flush(); DEBUG_LOG(G3D,"Tex map mode: %06x", data); break; case GE_CMD_TEXSHADELS: + Flush(); DEBUG_LOG(G3D,"Tex shade light sources: %06x", data); break; @@ -805,6 +946,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) case GE_CMD_TRANSFERSTART: // Orphis calls this TRXKICK { + Flush(); // TODO: Here we should check if the transfer overlaps a framebuffer or any textures, // and take appropriate action. This is a block transfer between RAM and VRAM, or vice versa. DoBlockTransfer(); @@ -812,6 +954,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) } case GE_CMD_TEXSIZE0: + Flush(); gstate_c.textureChanged = true; gstate_c.curTextureWidth = 1 << (gstate.texsize[0] & 0xf); gstate_c.curTextureHeight = 1 << ((gstate.texsize[0]>>8) & 0xf); @@ -828,6 +971,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) case GE_CMD_ZBUFPTR: { + Flush(); u32 ptr = op & 0xFFE000; DEBUG_LOG(G3D,"Zbuf Ptr: %06x", ptr); } @@ -841,44 +985,52 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) break; case GE_CMD_AMBIENTCOLOR: + Flush(); DEBUG_LOG(G3D,"DL Ambient Color: %06x", data); break; case GE_CMD_AMBIENTALPHA: + Flush(); DEBUG_LOG(G3D,"DL Ambient Alpha: %06x", data); break; case GE_CMD_MATERIALAMBIENT: + Flush(); DEBUG_LOG(G3D,"DL Material Ambient Color: %06x", data); if (diff) shaderManager.DirtyUniform(DIRTY_MATAMBIENTALPHA); break; case GE_CMD_MATERIALDIFFUSE: + Flush(); DEBUG_LOG(G3D,"DL Material Diffuse Color: %06x", data); if (diff) shaderManager.DirtyUniform(DIRTY_MATDIFFUSE); break; case GE_CMD_MATERIALEMISSIVE: + Flush(); DEBUG_LOG(G3D,"DL Material Emissive Color: %06x", data); if (diff) shaderManager.DirtyUniform(DIRTY_MATEMISSIVE); break; case GE_CMD_MATERIALSPECULAR: + Flush(); DEBUG_LOG(G3D,"DL Material Specular Color: %06x", data); if (diff) shaderManager.DirtyUniform(DIRTY_MATSPECULAR); break; case GE_CMD_MATERIALALPHA: + Flush(); DEBUG_LOG(G3D,"DL Material Alpha Color: %06x", data); if (diff) shaderManager.DirtyUniform(DIRTY_MATAMBIENTALPHA); break; case GE_CMD_MATERIALSPECULARCOEF: + Flush(); DEBUG_LOG(G3D,"DL Material specular coef: %f", getFloat24(data)); if (diff) shaderManager.DirtyUniform(DIRTY_MATSPECULAR); @@ -896,6 +1048,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) case GE_CMD_LX2:case GE_CMD_LY2:case GE_CMD_LZ2: case GE_CMD_LX3:case GE_CMD_LY3:case GE_CMD_LZ3: { + Flush(); int n = cmd - GE_CMD_LX0; int l = n / 3; int c = n % 3; @@ -912,6 +1065,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) case GE_CMD_LDX2:case GE_CMD_LDY2:case GE_CMD_LDZ2: case GE_CMD_LDX3:case GE_CMD_LDY3:case GE_CMD_LDZ3: { + Flush(); int n = cmd - GE_CMD_LDX0; int l = n / 3; int c = n % 3; @@ -928,6 +1082,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) case GE_CMD_LKA2:case GE_CMD_LKB2:case GE_CMD_LKC2: case GE_CMD_LKA3:case GE_CMD_LKB3:case GE_CMD_LKC3: { + Flush(); int n = cmd - GE_CMD_LKA0; int l = n / 3; int c = n % 3; @@ -944,6 +1099,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) case GE_CMD_LDC0:case GE_CMD_LDC1:case GE_CMD_LDC2:case GE_CMD_LDC3: case GE_CMD_LSC0:case GE_CMD_LSC1:case GE_CMD_LSC2:case GE_CMD_LSC3: { + Flush(); float r = (float)(data & 0xff)/255.0f; float g = (float)((data>>8) & 0xff)/255.0f; float b = (float)(data>>16)/255.0f; @@ -962,13 +1118,16 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) case GE_CMD_VIEWPORTY1: case GE_CMD_VIEWPORTX2: case GE_CMD_VIEWPORTY2: + Flush(); DEBUG_LOG(G3D,"DL Viewport param %i: %f", cmd-GE_CMD_VIEWPORTX1, getFloat24(data)); break; case GE_CMD_VIEWPORTZ1: + Flush(); gstate_c.zScale = getFloat24(data) / 65535.f; DEBUG_LOG(G3D,"DL Z scale: %f", gstate_c.zScale); break; case GE_CMD_VIEWPORTZ2: + Flush(); gstate_c.zOff = getFloat24(data) / 65535.f; DEBUG_LOG(G3D,"DL Z pos: %f", gstate_c.zOff); break; @@ -976,13 +1135,16 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) case GE_CMD_LIGHTENABLE1: case GE_CMD_LIGHTENABLE2: case GE_CMD_LIGHTENABLE3: + Flush(); DEBUG_LOG(G3D,"DL Light %i enable: %d", cmd-GE_CMD_LIGHTENABLE0, data); break; case GE_CMD_CULL: + Flush(); DEBUG_LOG(G3D,"DL cull: %06x", data); break; case GE_CMD_LMODE: + Flush(); DEBUG_LOG(G3D,"DL Shade mode: %06x", data); break; @@ -993,6 +1155,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) break; case GE_CMD_MATERIALUPDATE: + Flush(); DEBUG_LOG(G3D,"DL Material Update: %d", data); break; @@ -1001,6 +1164,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) // CLEARING ////////////////////////////////////////////////////////////////// case GE_CMD_CLEARMODE: + Flush(); // If it becomes a performance problem, check diff&1 if (data & 1) EnterClearMode(data); @@ -1014,33 +1178,40 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) // ALPHA BLENDING ////////////////////////////////////////////////////////////////// case GE_CMD_ALPHABLENDENABLE: + Flush(); DEBUG_LOG(G3D,"DL Alpha blend enable: %d", data); break; case GE_CMD_BLENDMODE: + Flush(); DEBUG_LOG(G3D,"DL Blend mode: %06x", data); break; case GE_CMD_BLENDFIXEDA: + Flush(); DEBUG_LOG(G3D,"DL Blend fix A: %06x", data); break; case GE_CMD_BLENDFIXEDB: + Flush(); DEBUG_LOG(G3D,"DL Blend fix B: %06x", data); break; case GE_CMD_ALPHATESTENABLE: + Flush(); DEBUG_LOG(G3D,"DL Alpha test enable: %d", data); // This is done in the shader. break; case GE_CMD_ALPHATEST: + Flush(); DEBUG_LOG(G3D,"DL Alpha test settings"); shaderManager.DirtyUniform(DIRTY_ALPHACOLORREF); break; case GE_CMD_TEXFUNC: { + Flush(); DEBUG_LOG(G3D,"DL TexFunc %i", data&7); /* int m=GL_MODULATE; @@ -1068,26 +1239,32 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) } case GE_CMD_TEXFILTER: { + Flush(); int min = data & 7; int mag = (data >> 8) & 1; DEBUG_LOG(G3D,"DL TexFilter min: %i mag: %i", min, mag); } break; case GE_CMD_TEXENVCOLOR: + Flush(); DEBUG_LOG(G3D,"DL TexEnvColor %06x", data); if (diff) shaderManager.DirtyUniform(DIRTY_TEXENV); break; case GE_CMD_TEXMODE: + Flush(); DEBUG_LOG(G3D,"DL TexMode %08x", data); break; case GE_CMD_TEXFORMAT: + Flush(); DEBUG_LOG(G3D,"DL TexFormat %08x", data); break; case GE_CMD_TEXFLUSH: + Flush(); DEBUG_LOG(G3D,"DL TexFlush"); break; case GE_CMD_TEXWRAP: + Flush(); DEBUG_LOG(G3D,"DL TexWrap %08x", data); break; ////////////////////////////////////////////////////////////////// @@ -1095,10 +1272,12 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) ////////////////////////////////////////////////////////////////// case GE_CMD_ZTESTENABLE: + Flush(); DEBUG_LOG(G3D,"DL Z test enable: %d", data & 1); break; case GE_CMD_STENCILTESTENABLE: + Flush(); DEBUG_LOG(G3D,"DL Stencil test enable: %d", data); break; @@ -1132,6 +1311,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) break; case GE_CMD_WORLDMATRIXNUMBER: + Flush(); DEBUG_LOG(G3D,"DL World # %i", data & 0xF); gstate.worldmtxnum &= 0xFF00000F; break; @@ -1148,6 +1328,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) break; case GE_CMD_VIEWMATRIXNUMBER: + Flush(); DEBUG_LOG(G3D,"DL VIEW # %i", data & 0xF); gstate.viewmtxnum &= 0xFF00000F; break; @@ -1164,6 +1345,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) break; case GE_CMD_PROJMATRIXNUMBER: + Flush(); DEBUG_LOG(G3D,"DL PROJECTION # %i", data & 0xF); gstate.projmtxnum &= 0xFF00000F; break; @@ -1179,6 +1361,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) break; case GE_CMD_TGENMATRIXNUMBER: + Flush(); DEBUG_LOG(G3D,"DL TGEN # %i", data & 0xF); gstate.texmtxnum &= 0xFF00000F; break; @@ -1195,6 +1378,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) break; case GE_CMD_BONEMATRIXNUMBER: + Flush(); DEBUG_LOG(G3D,"DL BONE #%i", data); gstate.boneMatrixNumber &= 0xFF00007F; break; @@ -1237,6 +1421,8 @@ bool GLES_GPU::InterpretList() op = Memory::ReadUnchecked_U32(dcontext.pc); //read from memory u32 cmd = op >> 24; + if (flushBeforeCommand[cmd]) + Flush(); u32 diff = op ^ gstate.cmdmem[cmd]; gstate.cmdmem[cmd] = op; diff --git a/GPU/GLES/IndexGenerator.h b/GPU/GLES/IndexGenerator.h index af6da67ae4..2b491c6789 100644 --- a/GPU/GLES/IndexGenerator.h +++ b/GPU/GLES/IndexGenerator.h @@ -41,7 +41,7 @@ public: // Rectangles void AddRectangles(int numVerts); - void TranslatePoints(int numVerts, const u8 *inds, int offset); + void TranslatePoints(int numVerts, const u8 *inds, int offset); void TranslatePoints(int numVerts, const u16 *inds, int offset); // Translates already indexed lists void TranslateLineList(int numVerts, const u8 *inds, int offset); @@ -65,6 +65,8 @@ public: bool Empty() { return index_ == 0; } + void SetIndex(int ind) { index_ = ind; } + private: u16 *indsBase_; u16 *inds_; diff --git a/GPU/GLES/ShaderManager.cpp b/GPU/GLES/ShaderManager.cpp index 819b2815b7..ec5b60e290 100644 --- a/GPU/GLES/ShaderManager.cpp +++ b/GPU/GLES/ShaderManager.cpp @@ -176,8 +176,10 @@ static void SetMatrix4x3(int uniform, const float *m4x3) { void LinkedShader::use() { glUseProgram(program); - glUniform1i(u_tex, 0); + updateUniforms(); +} +void LinkedShader::updateUniforms() { if (!dirtyUniforms) return; @@ -300,6 +302,7 @@ void ShaderManager::DirtyShader() // Forget the last shader ID lastFSID.clear(); lastVSID.clear(); + lastShader = 0; } @@ -318,8 +321,11 @@ LinkedShader *ShaderManager::ApplyShader(int prim) ComputeVertexShaderID(&VSID, prim); ComputeFragmentShaderID(&FSID); - // Bail quickly in the no-op case. TODO: why does it cause trouble? - // if (VSID == lastVSID && FSID == lastFSID) return lastShader; // Already all set. + // Just update uniforms if this is the same shader as last time. + if (lastShader != 0 && VSID == lastVSID && FSID == lastFSID) { + lastShader->updateUniforms(); + return lastShader; // Already all set. + } lastVSID = VSID; lastFSID = FSID; @@ -355,10 +361,9 @@ LinkedShader *ShaderManager::ApplyShader(int prim) linkedShaderCache[linkedID] = ls; } else { ls = iter->second; + ls->use(); } - ls->use(); - lastShader = ls; return ls; } diff --git a/GPU/GLES/ShaderManager.h b/GPU/GLES/ShaderManager.h index c7cf3d1040..275b479cdf 100644 --- a/GPU/GLES/ShaderManager.h +++ b/GPU/GLES/ShaderManager.h @@ -32,6 +32,7 @@ public: ~LinkedShader(); void use(); + void updateUniforms(); uint32_t program; u32 dirtyUniforms; diff --git a/GPU/GLES/TextureCache.cpp b/GPU/GLES/TextureCache.cpp index 9ad2685657..a09c0d1da3 100644 --- a/GPU/GLES/TextureCache.cpp +++ b/GPU/GLES/TextureCache.cpp @@ -597,17 +597,16 @@ void convertColors(u8 *finalBuf, GLuint dstFmt, int numPixels) void PSPSetTexture() { + static int lastBoundTexture = -1; + u32 texaddr = (gstate.texaddr[0] & 0xFFFFF0) | ((gstate.texbufwidth[0]<<8) & 0xFF000000); texaddr &= 0xFFFFFFF; - if (!texaddr) return; - u8 level = 0; u32 format = gstate.texformat & 0xF; u32 clutformat = gstate.clutformat & 3; u32 clutaddr = GetClutAddr(clutformat == GE_CMODE_32BIT_ABGR8888 ? 4 : 2); - DEBUG_LOG(G3D,"Texture at %08x",texaddr); u8 *texptr = Memory::GetPointer(texaddr); u32 texhash = texptr ? *(u32*)texptr : 0; @@ -636,8 +635,11 @@ void PSPSetTexture() if (match) { //got one! entry.frameCounter = gpuStats.numFrames; - glBindTexture(GL_TEXTURE_2D, entry.texture); - UpdateSamplingParams(); + if (entry.texture != lastBoundTexture) { + glBindTexture(GL_TEXTURE_2D, entry.texture); + UpdateSamplingParams(); + lastBoundTexture = entry.texture; + } DEBUG_LOG(G3D, "Texture at %08x Found in Cache, applying", texaddr); return; //Done! } else { @@ -653,7 +655,7 @@ void PSPSetTexture() //we have to decode it - TexCacheEntry entry; + TexCacheEntry entry = {0}; entry.addr = texaddr; entry.hash = texhash; @@ -671,9 +673,6 @@ void PSPSetTexture() entry.clutaddr = 0; } - glGenTextures(1, &entry.texture); - glBindTexture(GL_TEXTURE_2D, entry.texture); - int bufw = gstate.texbufwidth[0] & 0x3ff; entry.dim = gstate.texsize[0] & 0xF0F; @@ -681,8 +680,6 @@ void PSPSetTexture() int w = 1 << (gstate.texsize[0] & 0xf); int h = 1 << ((gstate.texsize[0]>>8) & 0xf); - INFO_LOG(G3D, "Creating texture %i from %08x: %i x %i (stride: %i). fmt: %i", entry.texture, entry.addr, w, h, bufw, entry.format); - gstate_c.curTextureWidth=w; gstate_c.curTextureHeight=h; GLenum dstFmt = 0; @@ -932,26 +929,27 @@ void PSPSetTexture() } } + gpuStats.numTexturesDecoded++; // Can restore these and remove the above fixup on some platforms. //glPixelStorei(GL_UNPACK_ROW_LENGTH, bufw); - glPixelStorei(GL_UNPACK_ALIGNMENT, texByteAlign); + //glPixelStorei(GL_UNPACK_ALIGNMENT, texByteAlign); //glPixelStorei(GL_PACK_ROW_LENGTH, bufw); - glPixelStorei(GL_PACK_ALIGNMENT, texByteAlign); + //glPixelStorei(GL_PACK_ALIGNMENT, texByteAlign); + INFO_LOG(G3D, "Creating texture %i from %08x: %i x %i (stride: %i). fmt: %i", entry.texture, entry.addr, w, h, bufw, entry.format); + + glGenTextures(1, &entry.texture); + glBindTexture(GL_TEXTURE_2D, entry.texture); + lastBoundTexture = entry.texture; GLuint components = dstFmt == GL_UNSIGNED_SHORT_5_6_5 ? GL_RGB : GL_RGBA; glTexImage2D(GL_TEXTURE_2D, 0, components, w, h, 0, components, dstFmt, finalBuf); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); - // glGenerateMipmap(GL_TEXTURE_2D); UpdateSamplingParams(); //glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); - glPixelStorei(GL_UNPACK_ALIGNMENT, 1); + //glPixelStorei(GL_UNPACK_ALIGNMENT, 1); //glPixelStorei(GL_PACK_ROW_LENGTH, 0); - glPixelStorei(GL_PACK_ALIGNMENT, 1); + //glPixelStorei(GL_PACK_ALIGNMENT, 1); cache[cachekey] = entry; } diff --git a/GPU/GLES/TransformPipeline.cpp b/GPU/GLES/TransformPipeline.cpp index 88d08908cb..03599d5ab3 100644 --- a/GPU/GLES/TransformPipeline.cpp +++ b/GPU/GLES/TransformPipeline.cpp @@ -44,9 +44,8 @@ const GLuint glprim[8] = { u8 decoded[65536 * 32]; VertexDecoder dec; -uint16_t decIndex[65536]; // Unused +uint16_t decIndex[65536]; int numVerts; -int numInds; IndexGenerator indexGen; @@ -577,6 +576,7 @@ void SoftwareTransformAndDraw(int prim, LinkedShader *program, int vertexCount, void GLES_GPU::InitTransform() { indexGen.Setup(decIndex); + numVerts = 0; } void GLES_GPU::TransformAndDrawPrim(void *verts, void *inds, int prim, int vertexCount, float *customUV, int forceIndexType, int *bytesRead) @@ -585,13 +585,18 @@ void GLES_GPU::TransformAndDrawPrim(void *verts, void *inds, int prim, int verte if (!indexGen.PrimCompatible(prim)) Flush(); + if (!indexGen.Empty()) { + gpuStats.numJoins++; + } gpuStats.numDrawCalls++; gpuStats.numVertsTransformed += vertexCount; + indexGen.SetIndex(numVerts); int indexLowerBound, indexUpperBound; // First, decode the verts and apply morphing dec.SetVertexType(gstate.vertType); - dec.DecodeVerts(decoded, verts, inds, prim, vertexCount, &indexLowerBound, &indexUpperBound); + dec.DecodeVerts(decoded + numVerts * (int)dec.GetDecVtxFmt().stride, verts, inds, prim, vertexCount, &indexLowerBound, &indexUpperBound); + numVerts += indexUpperBound - indexLowerBound + 1; if (bytesRead) *bytesRead = vertexCount * dec.VertexSize(); @@ -613,7 +618,7 @@ void GLES_GPU::TransformAndDrawPrim(void *verts, void *inds, int prim, int verte case GE_VTYPE_IDX_8BIT: switch (prim) { - case GE_PRIM_POINTS: indexGen.TranslatePoints(vertexCount, (const u16 *)inds, -indexLowerBound); break; + case GE_PRIM_POINTS: indexGen.TranslatePoints(vertexCount, (const u8 *)inds, -indexLowerBound); break; case GE_PRIM_LINES: indexGen.TranslateLineList(vertexCount, (const u8 *)inds, -indexLowerBound); break; case GE_PRIM_LINE_STRIP: indexGen.TranslateLineStrip(vertexCount, (const u8 *)inds, -indexLowerBound); break; case GE_PRIM_TRIANGLES: indexGen.TranslateList(vertexCount, (const u8 *)inds, -indexLowerBound); break; @@ -677,6 +682,8 @@ void GLES_GPU::Flush() LinkedShader *program = shaderManager_->ApplyShader(prim); + DEBUG_LOG(G3D, "Flush prim %i! %i verts in one go", prim, numVerts); + if (CanUseHardwareTransform(prim)) { SetupDecFmtForDraw(program, dec.GetDecVtxFmt(), decoded); glDrawElements(glprim[prim], indexGen.VertexCount(), GL_UNSIGNED_SHORT, (GLvoid *)decIndex); @@ -687,4 +694,5 @@ void GLES_GPU::Flush() } indexGen.Reset(); -} \ No newline at end of file + numVerts = 0; +} diff --git a/GPU/GLES/VertexShaderGenerator.cpp b/GPU/GLES/VertexShaderGenerator.cpp index 953b5c684d..0c41881839 100644 --- a/GPU/GLES/VertexShaderGenerator.cpp +++ b/GPU/GLES/VertexShaderGenerator.cpp @@ -88,19 +88,11 @@ void ComputeVertexShaderID(VertexShaderID *id, int prim) id->d[1] |= ((gstate.ltype[i] >> 8) & 3) << (i * 4 + 2); } id->d[1] |= (gstate.materialupdate & 7) << 16; + id->d[1] |= (gstate.lightingEnable & 1) << 19; + for (int i = 0; i < 4; i++) { + id->d[1] |= (gstate.lightEnable[i] & 1) << (20 + i); + } } - - // Bits that we will need: - // lightenable * 4 - // lighttype * 4 - // lightcomp * 4 - // uv gen: - // mapping type - // texshade light choices (ONLY IF uv mapping type is shade) -} - -void WriteLight(char *p, int l) { - // TODO } const char *boneWeightAttrDecl[8] = { @@ -131,7 +123,6 @@ enum DoLightComputation { LIGHT_FULL, }; - char *GenerateVertexShader(int prim) { char *p = buffer; @@ -316,7 +307,7 @@ char *GenerateVertexShader(int prim) WRITE(p, " float dot%i = dot(normalize(toLight%i), worldnormal);\n", i, i); if (poweredDiffuse) { - WRITE(p, " dot%i = pow(dot%i, u_matspecular.a);\n"); + WRITE(p, " dot%i = pow(dot%i, u_matspecular.a);\n", i); } if (doLight[i] == LIGHT_DOTONLY) @@ -400,10 +391,6 @@ char *GenerateVertexShader(int prim) WRITE(p, " v_depth = gl_Position.z;\n"); WRITE(p, "}\n"); - // DEBUG_LOG(HLE, "\n%s", buffer); -#if defined(_WIN32) && defined(_DEBUG) - OutputDebugString(buffer); -#endif return buffer; } diff --git a/GPU/GPUState.h b/GPU/GPUState.h index e031f6bc71..4e271b4278 100644 --- a/GPU/GPUState.h +++ b/GPU/GPUState.h @@ -255,18 +255,23 @@ struct GPUStatistics memset(this, 0, sizeof(*this)); } void resetFrame() { + numJoins = 0; numDrawCalls = 0; numVertsTransformed = 0; numTextureSwitches = 0; numShaderSwitches = 0; + numFlushes = 0; + numTexturesDecoded = 0; } // Per frame statistics + int numJoins; int numDrawCalls; int numFlushes; int numVertsTransformed; int numTextureSwitches; int numShaderSwitches; + int numTexturesDecoded; // Total statistics, updated by the GPU core in UpdateStats int numFrames; diff --git a/native b/native index 0de5e114f3..ff60f2341b 160000 --- a/native +++ b/native @@ -1 +1 @@ -Subproject commit 0de5e114f337859a03d0763c30beaf6e03af03c4 +Subproject commit ff60f2341b31d3a8764641c9bee5b824c1090b2a