From 36b8ed8a273f8469a4cfb785de8b8bbab6960c57 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Sun, 13 Apr 2014 17:14:45 +0200 Subject: [PATCH 01/21] Mac buildfix --- CMakeLists.txt | 19 ++++++++++--------- native | 2 +- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index d1f080fdec..bcc45c29e0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -48,15 +48,6 @@ if(${CMAKE_SYSTEM_NAME} MATCHES "Linux") if (NOT MOBILE_DEVICE) set(USE_FFMPEG ON) endif() - if (NOT ARM) - if(CMAKE_SIZEOF_VOID_P EQUAL 8) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -D_M_X64") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_M_X64") - else() - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -D_M_IX86") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_M_IX86") - endif() - endif() endif() if(${CMAKE_SYSTEM_NAME} MATCHES "Darwin") @@ -64,6 +55,16 @@ if(${CMAKE_SYSTEM_NAME} MATCHES "Darwin") set(USE_FFMPEG ON) endif() +if (NOT ARM AND NOT MIPS) + if(CMAKE_SIZEOF_VOID_P EQUAL 8) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -D_M_X64") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_M_X64") + else() + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -D_M_IX86") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_M_IX86") + endif() +endif() + if(NOT DEFINED HEADLESS) set(HEADLESS OFF) endif() diff --git a/native b/native index dc1e691ea8..3c13a90099 160000 --- a/native +++ b/native @@ -1 +1 @@ -Subproject commit dc1e691ea8e70778d6559366d3e6372b31fe0bf5 +Subproject commit 3c13a9009999bf883c4c608d8b4f30a7d0c75560 From 0d59fc0d2e1f6481b041b9d1fb630969cf414dfa Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sun, 13 Apr 2014 08:15:08 -0700 Subject: [PATCH 02/21] Oops, forgot to reset the frame when unchanged. Fixes #5861. --- GPU/GLES/Framebuffer.cpp | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/GPU/GLES/Framebuffer.cpp b/GPU/GLES/Framebuffer.cpp index 7463bdd654..29e02bca0f 100644 --- a/GPU/GLES/Framebuffer.cpp +++ b/GPU/GLES/Framebuffer.cpp @@ -731,17 +731,22 @@ void FramebufferManager::DoSetRenderFrameBuffer() { } } - if (vfb && (drawing_width != vfb->bufferWidth || drawing_height != vfb->bufferHeight)) { - // If it's newly wrong, or changing every frame, just keep track. - if (vfb->newWidth != drawing_width || vfb->newHeight != drawing_height) { - vfb->newWidth = drawing_width; - vfb->newHeight = drawing_height; + if (vfb) { + if ((drawing_width != vfb->bufferWidth || drawing_height != vfb->bufferHeight)) { + // If it's newly wrong, or changing every frame, just keep track. + if (vfb->newWidth != drawing_width || vfb->newHeight != drawing_height) { + vfb->newWidth = drawing_width; + vfb->newHeight = drawing_height; + vfb->lastFrameNewSize = gpuStats.numFlips; + } else if (vfb->lastFrameNewSize + FBO_OLD_AGE < gpuStats.numFlips) { + // Okay, it's changed for a while (and stayed that way.) Let's start over. + DestroyFramebuf(vfb); + vfbs_.erase(vfbs_.begin() + i); + vfb = NULL; + } + } else { + // It's not different, let's keep track of that too. vfb->lastFrameNewSize = gpuStats.numFlips; - } else if (vfb->lastFrameNewSize + FBO_OLD_AGE <= gpuStats.numFlips) { - // Okay, it's changed for a while (and stayed that way.) Let's start over. - DestroyFramebuf(vfb); - vfbs_.erase(vfbs_.begin() + i); - vfb = NULL; } } From 22a80fb7be32492c928e3d5a9df5c293d27fde40 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sun, 13 Apr 2014 09:01:14 -0700 Subject: [PATCH 03/21] softgpu: Avoid multithreading thin polygons. Small performance improvement (5-6% in some areas.) --- GPU/Software/Rasterizer.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/GPU/Software/Rasterizer.cpp b/GPU/Software/Rasterizer.cpp index 30187ecec5..264cf2c227 100644 --- a/GPU/Software/Rasterizer.cpp +++ b/GPU/Software/Rasterizer.cpp @@ -1314,12 +1314,12 @@ void DrawTriangle(const VertexData& v0, const VertexData& v1, const VertexData& int range = (maxY - minY) / 16 + 1; if (gstate.isModeClear()) { - if (range >= 24) + if (range >= 24 && (maxX - minX) >= 24 * 16) GlobalThreadPool::Loop(std::bind(&DrawTriangleSlice, v0, v1, v2, minX, minY, maxX, maxY, placeholder::_1, placeholder::_2), 0, range); else DrawTriangleSlice(v0, v1, v2, minX, minY, maxX, maxY, 0, range); } else { - if (range >= 24) + if (range >= 24 && (maxX - minX) >= 24 * 16) GlobalThreadPool::Loop(std::bind(&DrawTriangleSlice, v0, v1, v2, minX, minY, maxX, maxY, placeholder::_1, placeholder::_2), 0, range); else DrawTriangleSlice(v0, v1, v2, minX, minY, maxX, maxY, 0, range); From e780a645cb524a3e85c92df6de567d2e4bead81c Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sun, 13 Apr 2014 10:23:21 -0700 Subject: [PATCH 04/21] Use the FLAG_EXECUTEONCHANGE flag. Improves fps by 1-3% in some games, seems to be a win after all. --- GPU/GLES/GLES_GPU.cpp | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/GPU/GLES/GLES_GPU.cpp b/GPU/GLES/GLES_GPU.cpp index 266e54ad35..a4b73d8125 100644 --- a/GPU/GLES/GLES_GPU.cpp +++ b/GPU/GLES/GLES_GPU.cpp @@ -46,7 +46,7 @@ enum { FLAG_FLUSHBEFORE = 1, FLAG_FLUSHBEFOREONCHANGE = 2, FLAG_EXECUTE = 4, // needs to actually be executed. unused for now. - FLAG_EXECUTEONCHANGE = 8, // unused for now. not sure if checking for this will be more expensive than doing it. + FLAG_EXECUTEONCHANGE = 8, FLAG_ANY_EXECUTE = 4 | 8, FLAG_READS_PC = 16, FLAG_WRITES_PC = 32, @@ -627,7 +627,7 @@ void GLES_GPU::FastRunLoop(DisplayList &list) { transformDraw_.Flush(); } gstate.cmdmem[cmd] = op; // TODO: no need to write if diff==0... - if (cmdFlags & FLAG_ANY_EXECUTE) { // (cmdFlags & FLAG_EXECUTE) || (diff && (cmdFlags & FLAG_EXECUTEONCHANGE))) { + if ((cmdFlags & FLAG_EXECUTE) || (diff && (cmdFlags & FLAG_EXECUTEONCHANGE))) { ExecuteOpInternal(op, diff); } list.pc += 4; @@ -658,7 +658,7 @@ void GLES_GPU::ProcessEvent(GPUEvent ev) { } inline void GLES_GPU::CheckFlushOp(int cmd, u32 diff) { - u8 cmdFlags = commandFlags_[cmd]; + const u8 cmdFlags = commandFlags_[cmd]; if ((cmdFlags & FLAG_FLUSHBEFORE) || (diff && (cmdFlags & FLAG_FLUSHBEFOREONCHANGE))) { if (dumpThisFrame_) { NOTICE_LOG(G3D, "================ FLUSH ================"); @@ -672,7 +672,11 @@ void GLES_GPU::PreExecuteOp(u32 op, u32 diff) { } void GLES_GPU::ExecuteOp(u32 op, u32 diff) { - return ExecuteOpInternal(op, diff); + const u8 cmd = op >> 24; + const u8 cmdFlags = commandFlags_[cmd]; + if ((cmdFlags & FLAG_EXECUTE) || (diff && (cmdFlags & FLAG_EXECUTEONCHANGE))) { + ExecuteOpInternal(op, diff); + } } void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) { From f2ff8544c7de920fa99644f7c22121e85588f41d Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sun, 13 Apr 2014 10:25:14 -0700 Subject: [PATCH 05/21] Don't check diff in ExecuteOpInternal(). Since it's not called if !diff, there's no need. Small performance improvement. --- GPU/GLES/GLES_GPU.cpp | 159 +++++++++++++++--------------------------- 1 file changed, 58 insertions(+), 101 deletions(-) diff --git a/GPU/GLES/GLES_GPU.cpp b/GPU/GLES/GLES_GPU.cpp index a4b73d8125..ea7b6d8e37 100644 --- a/GPU/GLES/GLES_GPU.cpp +++ b/GPU/GLES/GLES_GPU.cpp @@ -887,30 +887,26 @@ void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) { break; case GE_CMD_VERTEXTYPE: - if (diff) { - if (!g_Config.bSoftwareSkinning) { + if (!g_Config.bSoftwareSkinning) { + if (diff & (GE_VTYPE_TC_MASK | GE_VTYPE_THROUGH_MASK)) + shaderManager_->DirtyUniform(DIRTY_UVSCALEOFFSET); + } else { + // Don't flush when weight count changes, unless morph is enabled. + if ((diff & ~GE_VTYPE_WEIGHTCOUNT_MASK) || (data & GE_VTYPE_MORPHCOUNT_MASK) != 0) { + // Restore and flush + gstate.vertType ^= diff; + Flush(); + gstate.vertType ^= diff; if (diff & (GE_VTYPE_TC_MASK | GE_VTYPE_THROUGH_MASK)) shaderManager_->DirtyUniform(DIRTY_UVSCALEOFFSET); - } else { - // Don't flush when weight count changes, unless morph is enabled. - if ((diff & ~GE_VTYPE_WEIGHTCOUNT_MASK) || (data & GE_VTYPE_MORPHCOUNT_MASK) != 0) { - // Restore and flush - gstate.vertType ^= diff; - Flush(); - gstate.vertType ^= diff; - if (diff & (GE_VTYPE_TC_MASK | GE_VTYPE_THROUGH_MASK)) - shaderManager_->DirtyUniform(DIRTY_UVSCALEOFFSET); - } } } break; case GE_CMD_REGION1: case GE_CMD_REGION2: - if (diff) { - gstate_c.framebufChanged = true; - gstate_c.textureChanged = true; - } + gstate_c.framebufChanged = true; + gstate_c.textureChanged = true; break; case GE_CMD_CLIPENABLE: @@ -922,26 +918,19 @@ void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) { break; case GE_CMD_TEXTUREMAPENABLE: - if (diff) - gstate_c.textureChanged = true; + gstate_c.textureChanged = true; break; case GE_CMD_LIGHTINGENABLE: break; case GE_CMD_FOGCOLOR: - if (diff) - shaderManager_->DirtyUniform(DIRTY_FOGCOLOR); + shaderManager_->DirtyUniform(DIRTY_FOGCOLOR); break; case GE_CMD_FOG1: - if (diff) - shaderManager_->DirtyUniform(DIRTY_FOGCOEF); - break; - case GE_CMD_FOG2: - if (diff) - shaderManager_->DirtyUniform(DIRTY_FOGCOEF); + shaderManager_->DirtyUniform(DIRTY_FOGCOEF); break; case GE_CMD_FOGENABLE: @@ -957,39 +946,29 @@ void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) { break; case GE_CMD_TEXSCALEU: - if (diff) { - gstate_c.uv.uScale = getFloat24(data); - shaderManager_->DirtyUniform(DIRTY_UVSCALEOFFSET); - } + gstate_c.uv.uScale = getFloat24(data); + shaderManager_->DirtyUniform(DIRTY_UVSCALEOFFSET); break; case GE_CMD_TEXSCALEV: - if (diff) { - gstate_c.uv.vScale = getFloat24(data); - shaderManager_->DirtyUniform(DIRTY_UVSCALEOFFSET); - } + gstate_c.uv.vScale = getFloat24(data); + shaderManager_->DirtyUniform(DIRTY_UVSCALEOFFSET); break; case GE_CMD_TEXOFFSETU: - if (diff) { - gstate_c.uv.uOff = getFloat24(data); - shaderManager_->DirtyUniform(DIRTY_UVSCALEOFFSET); - } + gstate_c.uv.uOff = getFloat24(data); + shaderManager_->DirtyUniform(DIRTY_UVSCALEOFFSET); break; case GE_CMD_TEXOFFSETV: - if (diff) { - gstate_c.uv.vOff = getFloat24(data); - shaderManager_->DirtyUniform(DIRTY_UVSCALEOFFSET); - } + gstate_c.uv.vOff = getFloat24(data); + shaderManager_->DirtyUniform(DIRTY_UVSCALEOFFSET); break; case GE_CMD_SCISSOR1: case GE_CMD_SCISSOR2: - if (diff) { - gstate_c.framebufChanged = true; - gstate_c.textureChanged = true; - } + gstate_c.framebufChanged = true; + gstate_c.textureChanged = true; break; /// @@ -1000,13 +979,15 @@ void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) { case GE_CMD_FRAMEBUFPTR: case GE_CMD_FRAMEBUFWIDTH: case GE_CMD_FRAMEBUFPIXFORMAT: - if (diff) { - gstate_c.framebufChanged = true; - gstate_c.textureChanged = true; - } + gstate_c.framebufChanged = true; + gstate_c.textureChanged = true; break; case GE_CMD_TEXADDR0: + gstate_c.textureChanged = true; + shaderManager_->DirtyUniform(DIRTY_UVSCALEOFFSET); + break; + case GE_CMD_TEXADDR1: case GE_CMD_TEXADDR2: case GE_CMD_TEXADDR3: @@ -1014,10 +995,7 @@ void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) { case GE_CMD_TEXADDR5: case GE_CMD_TEXADDR6: case GE_CMD_TEXADDR7: - if (diff) { - gstate_c.textureChanged = true; - shaderManager_->DirtyUniform(DIRTY_UVSCALEOFFSET); - } + gstate_c.textureChanged = true; break; case GE_CMD_TEXBUFWIDTH0: @@ -1028,15 +1006,11 @@ void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) { case GE_CMD_TEXBUFWIDTH5: case GE_CMD_TEXBUFWIDTH6: case GE_CMD_TEXBUFWIDTH7: - if (diff) { - gstate_c.textureChanged = true; - } + gstate_c.textureChanged = true; break; case GE_CMD_CLUTFORMAT: - if (diff) { - gstate_c.textureChanged = true; - } + gstate_c.textureChanged = true; // This could be used to "dirty" textures with clut. break; @@ -1052,9 +1026,7 @@ void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) { break; case GE_CMD_TEXMAPMODE: - if (diff) { - shaderManager_->DirtyUniform(DIRTY_UVSCALEOFFSET); - } + shaderManager_->DirtyUniform(DIRTY_UVSCALEOFFSET); break; case GE_CMD_TEXSHADELS: @@ -1093,7 +1065,7 @@ void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) { // We will need to reset the texture now. gstate_c.textureChanged = true; } - //fall thru - ignoring the mipmap sizes for now + break; case GE_CMD_TEXSIZE1: case GE_CMD_TEXSIZE2: @@ -1102,9 +1074,7 @@ void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) { case GE_CMD_TEXSIZE5: case GE_CMD_TEXSIZE6: case GE_CMD_TEXSIZE7: - if (diff) { - gstate_c.textureChanged = true; - } + gstate_c.textureChanged = true; break; case GE_CMD_ZBUFPTR: @@ -1113,30 +1083,25 @@ void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) { case GE_CMD_AMBIENTCOLOR: case GE_CMD_AMBIENTALPHA: - if (diff) - shaderManager_->DirtyUniform(DIRTY_AMBIENT); + shaderManager_->DirtyUniform(DIRTY_AMBIENT); break; case GE_CMD_MATERIALDIFFUSE: - if (diff) - shaderManager_->DirtyUniform(DIRTY_MATDIFFUSE); + shaderManager_->DirtyUniform(DIRTY_MATDIFFUSE); break; case GE_CMD_MATERIALEMISSIVE: - if (diff) - shaderManager_->DirtyUniform(DIRTY_MATEMISSIVE); + shaderManager_->DirtyUniform(DIRTY_MATEMISSIVE); break; case GE_CMD_MATERIALAMBIENT: case GE_CMD_MATERIALALPHA: - if (diff) - shaderManager_->DirtyUniform(DIRTY_MATAMBIENTALPHA); + shaderManager_->DirtyUniform(DIRTY_MATAMBIENTALPHA); break; case GE_CMD_MATERIALSPECULAR: case GE_CMD_MATERIALSPECULARCOEF: - if (diff) - shaderManager_->DirtyUniform(DIRTY_MATSPECULAR); + shaderManager_->DirtyUniform(DIRTY_MATSPECULAR); break; case GE_CMD_LIGHTTYPE0: @@ -1149,7 +1114,7 @@ void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) { case GE_CMD_LX1:case GE_CMD_LY1:case GE_CMD_LZ1: case GE_CMD_LX2:case GE_CMD_LY2:case GE_CMD_LZ2: case GE_CMD_LX3:case GE_CMD_LY3:case GE_CMD_LZ3: - if (diff) { + { int n = cmd - GE_CMD_LX0; int l = n / 3; int c = n % 3; @@ -1162,7 +1127,7 @@ void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) { case GE_CMD_LDX1:case GE_CMD_LDY1:case GE_CMD_LDZ1: case GE_CMD_LDX2:case GE_CMD_LDY2:case GE_CMD_LDZ2: case GE_CMD_LDX3:case GE_CMD_LDY3:case GE_CMD_LDZ3: - if (diff) { + { int n = cmd - GE_CMD_LDX0; int l = n / 3; int c = n % 3; @@ -1175,7 +1140,7 @@ void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) { case GE_CMD_LKA1:case GE_CMD_LKB1:case GE_CMD_LKC1: case GE_CMD_LKA2:case GE_CMD_LKB2:case GE_CMD_LKC2: case GE_CMD_LKA3:case GE_CMD_LKB3:case GE_CMD_LKC3: - if (diff) { + { int n = cmd - GE_CMD_LKA0; int l = n / 3; int c = n % 3; @@ -1188,7 +1153,7 @@ void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) { case GE_CMD_LKS1: case GE_CMD_LKS2: case GE_CMD_LKS3: - if (diff) { + { int l = cmd - GE_CMD_LKS0; gstate_c.lightspotCoef[l] = getFloat24(data); shaderManager_->DirtyUniform(DIRTY_LIGHT0 << l); @@ -1199,7 +1164,7 @@ void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) { case GE_CMD_LKO1: case GE_CMD_LKO2: case GE_CMD_LKO3: - if (diff) { + { int l = cmd - GE_CMD_LKO0; gstate_c.lightangle[l] = getFloat24(data); shaderManager_->DirtyUniform(DIRTY_LIGHT0 << l); @@ -1209,7 +1174,7 @@ void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) { case GE_CMD_LAC0:case GE_CMD_LAC1:case GE_CMD_LAC2:case GE_CMD_LAC3: case GE_CMD_LDC0:case GE_CMD_LDC1:case GE_CMD_LDC2:case GE_CMD_LDC3: case GE_CMD_LSC0:case GE_CMD_LSC1:case GE_CMD_LSC2:case GE_CMD_LSC3: - if (diff) { + { float r = (float)(data & 0xff) * (1.0f / 255.0f); float g = (float)((data >> 8) & 0xff) * (1.0f / 255.0f); float b = (float)(data >> 16) * (1.0f / 255.0f); @@ -1229,10 +1194,8 @@ void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) { case GE_CMD_VIEWPORTY2: case GE_CMD_VIEWPORTZ1: case GE_CMD_VIEWPORTZ2: - if (diff) { - gstate_c.framebufChanged = true; - gstate_c.textureChanged = true; - } + gstate_c.framebufChanged = true; + gstate_c.textureChanged = true; break; case GE_CMD_LIGHTENABLE0: @@ -1284,16 +1247,15 @@ void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) { if (((data >> 16) & 0xFF) != 0xFF && (data & 7) > 1) WARN_LOG_REPORT_ONCE(alphatestmask, G3D, "Unsupported alphatest mask: %02x", (data >> 16) & 0xFF); #endif - // Intentional fallthrough - we still need to dirty DIRTY_ALPHACOLORREF for GE_CMD_ALPHATEST. + shaderManager_->DirtyUniform(DIRTY_ALPHACOLORREF); + break; case GE_CMD_COLORREF: - if (diff) - shaderManager_->DirtyUniform(DIRTY_ALPHACOLORREF); + shaderManager_->DirtyUniform(DIRTY_ALPHACOLORREF); break; case GE_CMD_TEXENVCOLOR: - if (diff) - shaderManager_->DirtyUniform(DIRTY_TEXENV); + shaderManager_->DirtyUniform(DIRTY_TEXENV); break; case GE_CMD_TEXFUNC: @@ -1304,8 +1266,7 @@ void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) { case GE_CMD_TEXFORMAT: case GE_CMD_TEXFILTER: case GE_CMD_TEXWRAP: - if (diff) - gstate_c.textureChanged = true; + gstate_c.textureChanged = true; break; ////////////////////////////////////////////////////////////////// @@ -1325,8 +1286,7 @@ void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) { case GE_CMD_MORPHWEIGHT5: case GE_CMD_MORPHWEIGHT6: case GE_CMD_MORPHWEIGHT7: - if (diff) - gstate_c.morphWeights[cmd - GE_CMD_MORPHWEIGHT0] = getFloat24(data); + gstate_c.morphWeights[cmd - GE_CMD_MORPHWEIGHT0] = getFloat24(data); break; case GE_CMD_DITH0: @@ -1585,8 +1545,7 @@ void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) { #endif case GE_CMD_TEXLEVEL: - if (diff) - gstate_c.textureChanged = true; + gstate_c.textureChanged = true; break; ////////////////////////////////////////////////////////////////// @@ -1595,9 +1554,7 @@ void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) { case GE_CMD_STENCILTEST: // Handled in StateMapping. - if (diff) { - shaderManager_->DirtyUniform(DIRTY_STENCILREPLACEVALUE); - } + shaderManager_->DirtyUniform(DIRTY_STENCILREPLACEVALUE); break; case GE_CMD_STENCILTESTENABLE: From b6dc7eba8b199236d609bf3fba5f7cfeec8b2531 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sun, 13 Apr 2014 12:13:02 -0700 Subject: [PATCH 06/21] Simplify texture status checks a bit. --- GPU/GLES/TextureCache.cpp | 34 ++++++++++++++++------------------ GPU/GLES/TextureCache.h | 12 ++++++++++++ 2 files changed, 28 insertions(+), 18 deletions(-) diff --git a/GPU/GLES/TextureCache.cpp b/GPU/GLES/TextureCache.cpp index b69193105a..394100bed8 100644 --- a/GPU/GLES/TextureCache.cpp +++ b/GPU/GLES/TextureCache.cpp @@ -145,9 +145,8 @@ void TextureCache::Invalidate(u32 addr, int size, GPUInvalidationType type) { u32 texEnd = iter->second.addr + iter->second.sizeInRAM; if (texAddr < addr_end && addr < texEnd) { - if ((iter->second.status & TexCacheEntry::STATUS_MASK) == TexCacheEntry::STATUS_RELIABLE) { - // Clear status -> STATUS_HASHING. - iter->second.status &= ~TexCacheEntry::STATUS_MASK; + if (iter->second.GetHashStatus() == TexCacheEntry::STATUS_RELIABLE) { + iter->second.SetHashStatus(TexCacheEntry::STATUS_HASHING); } if (type != GPU_INVALIDATE_ALL) { gpuStats.numTextureInvalidations++; @@ -168,9 +167,8 @@ void TextureCache::InvalidateAll(GPUInvalidationType /*unused*/) { } for (TexCache::iterator iter = cache.begin(), end = cache.end(); iter != end; ++iter) { - if ((iter->second.status & TexCacheEntry::STATUS_MASK) == TexCacheEntry::STATUS_RELIABLE) { - // Clear status -> STATUS_HASHING. - iter->second.status &= ~TexCacheEntry::STATUS_MASK; + if (iter->second.GetHashStatus() == TexCacheEntry::STATUS_RELIABLE) { + iter->second.SetHashStatus(TexCacheEntry::STATUS_HASHING); } if (!iter->second.framebuffer) { iter->second.invalidHint++; @@ -921,7 +919,7 @@ void TextureCache::SetTexture(bool force) { } } - bool rehash = (entry->status & TexCacheEntry::STATUS_MASK) == TexCacheEntry::STATUS_UNRELIABLE; + bool rehash = entry->GetHashStatus() == TexCacheEntry::STATUS_UNRELIABLE; bool doDelete = true; if (match) { @@ -956,14 +954,14 @@ void TextureCache::SetTexture(bool force) { rehash = false; } - if (rehash && (entry->status & TexCacheEntry::STATUS_MASK) != TexCacheEntry::STATUS_RELIABLE) { + if (rehash && entry->GetHashStatus() != TexCacheEntry::STATUS_RELIABLE) { fullhash = QuickTexHash(texaddr, bufw, w, h, format); if (fullhash != entry->fullhash) { hashFail = true; - } else if ((entry->status & TexCacheEntry::STATUS_MASK) == TexCacheEntry::STATUS_UNRELIABLE && entry->numFrames > TexCacheEntry::FRAMES_REGAIN_TRUST) { + } else if (entry->GetHashStatus() != TexCacheEntry::STATUS_HASHING && entry->numFrames > TexCacheEntry::FRAMES_REGAIN_TRUST) { // Reset to STATUS_HASHING. if (g_Config.bTextureBackoffCache) { - entry->status &= ~TexCacheEntry::STATUS_MASK; + entry->SetHashStatus(TexCacheEntry::STATUS_HASHING); } } } @@ -1009,7 +1007,7 @@ void TextureCache::SetTexture(bool force) { if (entry->texture != lastBoundTexture) { glBindTexture(GL_TEXTURE_2D, entry->texture); lastBoundTexture = entry->texture; - gstate_c.textureFullAlpha = (entry->status & TexCacheEntry::STATUS_ALPHA_MASK) == TexCacheEntry::STATUS_ALPHA_FULL; + gstate_c.textureFullAlpha = entry->GetAlphaStatus() == TexCacheEntry::STATUS_ALPHA_FULL; } UpdateSamplingParams(*entry, false); VERBOSE_LOG(G3D, "Texture at %08x Found in Cache, applying", texaddr); @@ -1031,8 +1029,8 @@ void TextureCache::SetTexture(bool force) { } } // Clear the reliable bit if set. - if ((entry->status & TexCacheEntry::STATUS_MASK) == TexCacheEntry::STATUS_RELIABLE) { - entry->status &= ~TexCacheEntry::STATUS_MASK; + if (entry->GetHashStatus() == TexCacheEntry::STATUS_RELIABLE) { + entry->SetHashStatus(TexCacheEntry::STATUS_HASHING); } } } else { @@ -1229,7 +1227,7 @@ void TextureCache::SetTexture(bool force) { //glPixelStorei(GL_PACK_ROW_LENGTH, 0); glPixelStorei(GL_PACK_ALIGNMENT, 1); - gstate_c.textureFullAlpha = (entry->status & TexCacheEntry::STATUS_ALPHA_MASK) == TexCacheEntry::STATUS_ALPHA_FULL; + gstate_c.textureFullAlpha = entry->GetAlphaStatus() == TexCacheEntry::STATUS_ALPHA_FULL; } GLenum TextureCache::GetDestFormat(GETextureFormat format, GEPaletteFormat clutFormat) const { @@ -1537,11 +1535,11 @@ void TextureCache::CheckAlpha(TexCacheEntry &entry, u32 *pixelData, GLenum dstFm } if (hitSomeAlpha != 0) - entry.status |= TexCacheEntry::STATUS_ALPHA_UNKNOWN; + entry.SetAlphaStatus(TexCacheEntry::STATUS_ALPHA_UNKNOWN); else if (hitZeroAlpha != 0) - entry.status |= TexCacheEntry::STATUS_ALPHA_SIMPLE; + entry.SetAlphaStatus(TexCacheEntry::STATUS_ALPHA_SIMPLE); else - entry.status |= TexCacheEntry::STATUS_ALPHA_FULL; + entry.SetAlphaStatus(TexCacheEntry::STATUS_ALPHA_FULL); } void TextureCache::LoadTextureLevel(TexCacheEntry &entry, int level, bool replaceImages, int scaleFactor, GLenum dstFmt) { @@ -1578,7 +1576,7 @@ void TextureCache::LoadTextureLevel(TexCacheEntry &entry, int level, bool replac if ((entry.status & TexCacheEntry::STATUS_CHANGE_FREQUENT) == 0) CheckAlpha(entry, pixelData, dstFmt, useUnpack ? bufw : w, w, h); else - entry.status |= TexCacheEntry::STATUS_ALPHA_UNKNOWN; + entry.SetAlphaStatus(TexCacheEntry::STATUS_ALPHA_UNKNOWN); GLuint components = dstFmt == GL_UNSIGNED_SHORT_5_6_5 ? GL_RGB : GL_RGBA; diff --git a/GPU/GLES/TextureCache.h b/GPU/GLES/TextureCache.h index ec0c1006ad..0dfa7baf42 100644 --- a/GPU/GLES/TextureCache.h +++ b/GPU/GLES/TextureCache.h @@ -116,6 +116,18 @@ private: bool sClamp; bool tClamp; + Status GetHashStatus() { + return Status(status & STATUS_MASK); + } + void SetHashStatus(Status newStatus) { + status = (status & ~STATUS_MASK) | newStatus; + } + Status GetAlphaStatus() { + return Status(status & STATUS_ALPHA_MASK); + } + void SetAlphaStatus(Status newStatus) { + status = (status & ~STATUS_ALPHA_MASK) | newStatus; + } bool Matches(u16 dim2, u8 format2, int maxLevel2); }; From 2f72da8087acfd67b6fe73b53605c9132abf3b6f Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sun, 13 Apr 2014 13:01:18 -0700 Subject: [PATCH 07/21] Don't dirty the texture on texmapenable. Tales of Phantasia X flips on and off texturing during battle transitions. This causes tons of needless rehashes. --- GPU/GLES/GLES_GPU.cpp | 4 ++-- GPU/GLES/StateMapping.cpp | 6 ++---- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/GPU/GLES/GLES_GPU.cpp b/GPU/GLES/GLES_GPU.cpp index ea7b6d8e37..fec550719c 100644 --- a/GPU/GLES/GLES_GPU.cpp +++ b/GPU/GLES/GLES_GPU.cpp @@ -115,7 +115,7 @@ static const CommandTableEntry commandTable[] = { // These affect the fragment shader so need flushing. {GE_CMD_CLEARMODE, FLAG_FLUSHBEFOREONCHANGE}, - {GE_CMD_TEXTUREMAPENABLE, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE}, + {GE_CMD_TEXTUREMAPENABLE, FLAG_FLUSHBEFOREONCHANGE}, {GE_CMD_FOGENABLE, FLAG_FLUSHBEFOREONCHANGE}, {GE_CMD_TEXMODE, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE}, {GE_CMD_TEXSHADELS, FLAG_FLUSHBEFOREONCHANGE}, @@ -918,7 +918,7 @@ void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) { break; case GE_CMD_TEXTUREMAPENABLE: - gstate_c.textureChanged = true; + // Don't need to dirty the texture here, already dirtied at list start/etc. break; case GE_CMD_LIGHTINGENABLE: diff --git a/GPU/GLES/StateMapping.cpp b/GPU/GLES/StateMapping.cpp index 179039094f..9e8a6f1e4a 100644 --- a/GPU/GLES/StateMapping.cpp +++ b/GPU/GLES/StateMapping.cpp @@ -166,10 +166,8 @@ static inline bool blendColorSimilar(const Vec3f &a, const Vec3f &b, float margi void TransformDrawEngine::ApplyDrawState(int prim) { // TODO: All this setup is soon so expensive that we'll need dirty flags, or simply do it in the command writes where we detect dirty by xoring. Silly to do all this work on every drawcall. - if (gstate_c.textureChanged && !gstate.isModeClear()) { - if (gstate.isTextureMapEnabled()) { - textureCache_->SetTexture(); - } + if (gstate_c.textureChanged && !gstate.isModeClear() && gstate.isTextureMapEnabled()) { + textureCache_->SetTexture(); gstate_c.textureChanged = false; } From a7587815440c02c09e1e13840048606e7e433097 Mon Sep 17 00:00:00 2001 From: Klimis Ioannidis Date: Sun, 13 Apr 2014 23:25:56 +0300 Subject: [PATCH 08/21] Blacklist GLTools (Chainfire3D-like app for 3.0+) --- UI/EmuScreen.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/UI/EmuScreen.cpp b/UI/EmuScreen.cpp index 95643f06cd..6658d60672 100644 --- a/UI/EmuScreen.cpp +++ b/UI/EmuScreen.cpp @@ -138,6 +138,8 @@ void EmuScreen::bootComplete() { const char *renderer = (const char*)glGetString(GL_RENDERER); if (strstr(renderer, "Chainfire3D") != 0) { osm.Show(s->T("Chainfire3DWarning", "WARNING: Chainfire3D detected, may cause problems"), 10.0f, 0xFF30a0FF, -1, true); + } else if (strstr(renderer, "GLTools") != 0) { + osm.Show(s->T("GLToolsWarning", "WARNING: GLTools detected, may cause problems"), 10.0f, 0xFF30a0FF, -1, true); } System_SendMessage("event", "startgame"); From a53ecd7da31f4b48664942ba44354c1040f4ba18 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Sun, 13 Apr 2014 23:22:17 +0200 Subject: [PATCH 09/21] More elegant way of solving #5839 (d_private in sceIoDread) We flag filesystems as being FAT32 instead of checking for "ms0:". --- Core/FileSystems/DirectoryFileSystem.cpp | 10 +++---- Core/FileSystems/DirectoryFileSystem.h | 5 ++-- Core/FileSystems/FileSystem.h | 8 +++++- Core/FileSystems/ISOFileSystem.h | 1 + Core/FileSystems/MetaFileSystem.cpp | 11 ++++++++ Core/FileSystems/MetaFileSystem.h | 2 ++ Core/FileSystems/VirtualDiscFileSystem.h | 1 + Core/HLE/sceIo.cpp | 33 ++++++++++++++++++++---- 8 files changed, 58 insertions(+), 13 deletions(-) diff --git a/Core/FileSystems/DirectoryFileSystem.cpp b/Core/FileSystems/DirectoryFileSystem.cpp index 9586d1f580..d887ccb005 100644 --- a/Core/FileSystems/DirectoryFileSystem.cpp +++ b/Core/FileSystems/DirectoryFileSystem.cpp @@ -134,6 +134,11 @@ bool FixPathCase(std::string& basePath, std::string &path, FixPathCaseBehavior b #endif +DirectoryFileSystem::DirectoryFileSystem(IHandleAllocator *_hAlloc, std::string _basePath, int _flags) : basePath(_basePath), flags(_flags) { + File::CreateFullPath(basePath); + hAlloc = _hAlloc; +} + std::string DirectoryFileHandle::GetLocalPath(std::string& basePath, std::string localpath) { if (localpath.empty()) @@ -326,11 +331,6 @@ void DirectoryFileHandle::Close() #endif } -DirectoryFileSystem::DirectoryFileSystem(IHandleAllocator *_hAlloc, std::string _basePath) : basePath(_basePath) { - File::CreateFullPath(basePath); - hAlloc = _hAlloc; -} - void DirectoryFileSystem::CloseAll() { for (auto iter = entries.begin(); iter != entries.end(); ++iter) { iter->second.hFile.Close(); diff --git a/Core/FileSystems/DirectoryFileSystem.h b/Core/FileSystems/DirectoryFileSystem.h index 1f71d7315e..bfce1ec896 100644 --- a/Core/FileSystems/DirectoryFileSystem.h +++ b/Core/FileSystems/DirectoryFileSystem.h @@ -86,7 +86,7 @@ struct DirectoryFileHandle class DirectoryFileSystem : public IFileSystem { public: - DirectoryFileSystem(IHandleAllocator *_hAlloc, std::string _basePath); + DirectoryFileSystem(IHandleAllocator *_hAlloc, std::string _basePath, int _flags = 0); ~DirectoryFileSystem(); void CloseAll(); @@ -108,6 +108,7 @@ public: int RenameFile(const std::string &from, const std::string &to); bool RemoveFile(const std::string &filename); bool GetHostPath(const std::string &inpath, std::string &outpath); + int Flags() { return flags; } private: struct OpenFileEntry { @@ -120,7 +121,7 @@ private: EntryMap entries; std::string basePath; IHandleAllocator *hAlloc; - + int flags; // In case of Windows: Translate slashes, etc. std::string GetLocalPath(std::string localpath); }; diff --git a/Core/FileSystems/FileSystem.h b/Core/FileSystems/FileSystem.h index 9f938aa6ef..82855b9b44 100644 --- a/Core/FileSystems/FileSystem.h +++ b/Core/FileSystems/FileSystem.h @@ -50,6 +50,10 @@ enum DevType PSP_DEV_TYPE_ALIAS = 0x20, }; +enum FileSystemFlags +{ + FILESYSTEM_SIMULATE_FAT32 = 1, +}; class IHandleAllocator { public: @@ -112,6 +116,7 @@ public: virtual bool GetHostPath(const std::string &inpath, std::string &outpath) = 0; virtual int Ioctl(u32 handle, u32 cmd, u32 indataPtr, u32 inlen, u32 outdataPtr, u32 outlen, int &usec) = 0; virtual int DevType(u32 handle) = 0; + virtual int Flags() = 0; }; @@ -133,7 +138,8 @@ public: virtual bool RemoveFile(const std::string &filename) {return false;} virtual bool GetHostPath(const std::string &inpath, std::string &outpath) {return false;} virtual int Ioctl(u32 handle, u32 cmd, u32 indataPtr, u32 inlen, u32 outdataPtr, u32 outlen, int &usec) {return SCE_KERNEL_ERROR_ERRNO_FUNCTION_NOT_SUPPORTED; } - virtual int DevType(u32 handle) {return 0;} + virtual int DevType(u32 handle) { return 0; } + virtual int Flags() { return 0; } }; diff --git a/Core/FileSystems/ISOFileSystem.h b/Core/FileSystems/ISOFileSystem.h index 78ae4003b9..f124dfdf43 100644 --- a/Core/FileSystems/ISOFileSystem.h +++ b/Core/FileSystems/ISOFileSystem.h @@ -41,6 +41,7 @@ public: bool OwnsHandle(u32 handle); int Ioctl(u32 handle, u32 cmd, u32 indataPtr, u32 inlen, u32 outdataPtr, u32 outlen, int &usec); int DevType(u32 handle); + int Flags() { return 0; } size_t WriteFile(u32 handle, const u8 *pointer, s64 size); bool GetHostPath(const std::string &inpath, std::string &outpath) {return false;} diff --git a/Core/FileSystems/MetaFileSystem.cpp b/Core/FileSystems/MetaFileSystem.cpp index cec688554b..1c7197cd33 100644 --- a/Core/FileSystems/MetaFileSystem.cpp +++ b/Core/FileSystems/MetaFileSystem.cpp @@ -255,6 +255,10 @@ std::string MetaFileSystem::NormalizePrefix(std::string prefix) const { if (startsWith(prefix, "host")) prefix = "host0:"; + // Should we simply make this case insensitive? + if (prefix == "DISC0:") + prefix = "disc0:"; + return prefix; } @@ -284,6 +288,13 @@ void MetaFileSystem::Remount(IFileSystem *oldSystem, IFileSystem *newSystem) { } } +IFileSystem *MetaFileSystem::GetSystemFromFilename(const std::string &filename) { + size_t prefixPos = filename.find(':'); + if (prefixPos == filename.npos) + return 0; + return GetSystem(filename.substr(0, prefixPos + 1)); +} + IFileSystem *MetaFileSystem::GetSystem(const std::string &prefix) { for (auto it = fileSystems.begin(); it != fileSystems.end(); ++it) { if (it->prefix == NormalizePrefix(prefix)) diff --git a/Core/FileSystems/MetaFileSystem.h b/Core/FileSystems/MetaFileSystem.h index bae0408b92..7d9eb8ba4d 100644 --- a/Core/FileSystems/MetaFileSystem.h +++ b/Core/FileSystems/MetaFileSystem.h @@ -54,6 +54,7 @@ public: void Remount(IFileSystem *oldSystem, IFileSystem *newSystem); IFileSystem *GetSystem(const std::string &prefix); + IFileSystem *GetSystemFromFilename(const std::string &filename); void ThreadEnded(int threadID); @@ -106,6 +107,7 @@ public: virtual bool RemoveFile(const std::string &filename); virtual int Ioctl(u32 handle, u32 cmd, u32 indataPtr, u32 inlen, u32 outdataPtr, u32 outlen, int &usec); virtual int DevType(u32 handle); + virtual int Flags() { return 0; } // Convenience helper - returns < 0 on failure. int ReadEntireFile(const std::string &filename, std::vector &data); diff --git a/Core/FileSystems/VirtualDiscFileSystem.h b/Core/FileSystems/VirtualDiscFileSystem.h index 09a3d488e1..42b09abb81 100644 --- a/Core/FileSystems/VirtualDiscFileSystem.h +++ b/Core/FileSystems/VirtualDiscFileSystem.h @@ -40,6 +40,7 @@ public: int DevType(u32 handle); bool GetHostPath(const std::string &inpath, std::string &outpath); std::vector GetDirListing(std::string path); + int Flags() { return 0; } // unsupported operations size_t WriteFile(u32 handle, const u8 *pointer, s64 size); diff --git a/Core/HLE/sceIo.cpp b/Core/HLE/sceIo.cpp index ee4aae5443..a1aad26ee0 100644 --- a/Core/HLE/sceIo.cpp +++ b/Core/HLE/sceIo.cpp @@ -457,7 +457,7 @@ void __IoInit() { asyncNotifyEvent = CoreTiming::RegisterEvent("IoAsyncNotify", __IoAsyncNotify); syncNotifyEvent = CoreTiming::RegisterEvent("IoSyncNotify", __IoSyncNotify); - memstickSystem = new DirectoryFileSystem(&pspFileSystem, g_Config.memCardDirectory); + memstickSystem = new DirectoryFileSystem(&pspFileSystem, g_Config.memCardDirectory, FILESYSTEM_SIMULATE_FAT32); #if defined(USING_WIN_UI) || defined(APPLE) flash0System = new DirectoryFileSystem(&pspFileSystem, g_Config.flash0Directory); #else @@ -716,7 +716,7 @@ u32 npdrmRead(FileNode *f, u8 *data, int size) { memcpy(data, pgd->block_buf+offset, copy_size); block += 1; offset = 0; - }else{ + } else { copy_size = remain_size; memcpy(data, pgd->block_buf+offset, copy_size); } @@ -1925,6 +1925,19 @@ u32 sceIoDopen(const char *path) { return id; } +// For some reason strncpy will fill up the entire output buffer. No reason to do that, +// so we use this trivial replacement. +static void strcpy_limit(char *dest, const char *src, int count) { + int i; + for (i = 0; i < count; i++) { + if (!src[i]) // Do the check afterwards, so we don't exit before copying the null terminator. + break; + dest[i] = src[i]; + } + // Always null terminate. + dest[i] = 0; +} + u32 sceIoDread(int id, u32 dirent_addr) { u32 error; DirListing *dir = kernelObjects.Get(id, error); @@ -1943,8 +1956,15 @@ u32 sceIoDread(int id, u32 dirent_addr) { strncpy(entry->d_name, info.name.c_str(), 256); entry->d_name[255] = '\0'; + bool isFAT = false; + IFileSystem *sys = pspFileSystem.GetSystemFromFilename(dir->name); + if (sys && (sys->Flags() & FILESYSTEM_SIMULATE_FAT32)) + isFAT = true; + else + isFAT = false; + // Only write d_private for memory stick - if (dir->name.substr(0, 3) == "ms0") { + if (isFAT) { // write d_private for supporting Custom BGM // ref JPCSP https://code.google.com/p/jpcsp/source/detail?r=3468 if (Memory::IsValidAddress(entry->d_private)){ @@ -1952,7 +1972,9 @@ u32 sceIoDread(int id, u32 dirent_addr) { // d_private is pointing to an area of unknown size // - [0..12] "8.3" file name (null-terminated), could be empty. // - [13..???] long file name (null-terminated) - strncpy((char*)Memory::GetPointer(entry->d_private + 13), (const char*)entry->d_name, ARRAY_SIZE(entry->d_name)); + + // Hm, so currently we don't write the short name at all to d_private? TODO + strcpy_limit((char*)Memory::GetPointer(entry->d_private + 13), (const char*)entry->d_name, ARRAY_SIZE(entry->d_name)); } else { // d_private is pointing to an area of total size 1044 @@ -1960,8 +1982,9 @@ u32 sceIoDread(int id, u32 dirent_addr) { // - [4..19] "8.3" file name (null-terminated), could be empty. // - [20..???] long file name (null-terminated) auto size = Memory::Read_U32(entry->d_private); + // Hm, so currently we don't write the short name at all to d_private? TODO if (size >= 1044) { - strncpy((char*)Memory::GetPointer(entry->d_private + 20), (const char*)entry->d_name, ARRAY_SIZE(entry->d_name)); + strcpy_limit((char*)Memory::GetPointer(entry->d_private + 20), (const char*)entry->d_name, ARRAY_SIZE(entry->d_name)); } } } From 4c1c694d4b5072e69f1b989165b566cd2c0bdca1 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sun, 13 Apr 2014 14:02:00 -0700 Subject: [PATCH 10/21] Use flags to avoid hashing textures when unchanged. If only parameters change (like wrapping or clut, etc.) we don't need to rehash the data - we know it hasn't changed. Should reduce the distance between lazy texture hashing on and off. --- Core/HLE/sceDisplay.cpp | 2 +- GPU/Directx9/FramebufferDX9.cpp | 4 +- GPU/Directx9/GPU_DX9.cpp | 24 ++++----- GPU/Directx9/StateMappingDX9.cpp | 4 +- GPU/GLES/Framebuffer.cpp | 12 +++-- GPU/GLES/GLES_GPU.cpp | 66 ++++++++++++++++------- GPU/GLES/StateMapping.cpp | 4 +- GPU/GLES/TextureCache.cpp | 5 ++ GPU/GPUCommon.cpp | 2 +- GPU/GPUState.cpp | 91 ++++++++++++++++++++++++++++++++ GPU/GPUState.h | 11 +++- GPU/Null/NullGpu.cpp | 6 +-- 12 files changed, 186 insertions(+), 45 deletions(-) diff --git a/Core/HLE/sceDisplay.cpp b/Core/HLE/sceDisplay.cpp index b7698b8d52..d4a0bdf3f8 100644 --- a/Core/HLE/sceDisplay.cpp +++ b/Core/HLE/sceDisplay.cpp @@ -226,7 +226,7 @@ void __DisplayDoState(PointerWrap &p) { CoreTiming::RestoreRegisterEvent(afterFlipEvent, "AfterFlip", &hleAfterFlip); p.Do(gstate); - p.Do(gstate_c); + gstate_c.DoState(p); #ifndef _XBOX if (s < 2) { // This shouldn't have been savestated anyway, but it was. diff --git a/GPU/Directx9/FramebufferDX9.cpp b/GPU/Directx9/FramebufferDX9.cpp index 04d5db5712..92a4bec3a0 100644 --- a/GPU/Directx9/FramebufferDX9.cpp +++ b/GPU/Directx9/FramebufferDX9.cpp @@ -428,7 +428,7 @@ void FramebufferManagerDX9::SetRenderFrameBuffer() { // None found? Create one. if (!vfb) { - gstate_c.textureChanged = true; + gstate_c.textureChanged = TEXCHANGE_UPDATED; vfb = new VirtualFramebufferDX9(); vfb->fbo = 0; vfb->fb_address = fb_address; @@ -512,7 +512,7 @@ void FramebufferManagerDX9::SetRenderFrameBuffer() { // Use it as a render target. DEBUG_LOG(SCEGE, "Switching render target to FBO for %08x: %i x %i x %i ", vfb->fb_address, vfb->width, vfb->height, vfb->format); vfb->usageFlags |= FB_USAGE_RENDERTARGET; - gstate_c.textureChanged = true; + gstate_c.textureChanged = TEXCHANGE_UPDATED; vfb->last_frame_render = gpuStats.numFlips; frameLastFramebufUsed = gpuStats.numFlips; vfb->dirtyAfterDisplay = true; diff --git a/GPU/Directx9/GPU_DX9.cpp b/GPU/Directx9/GPU_DX9.cpp index 59ee9b712c..724f543f43 100644 --- a/GPU/Directx9/GPU_DX9.cpp +++ b/GPU/Directx9/GPU_DX9.cpp @@ -520,7 +520,7 @@ void DIRECTX9_GPU::CopyDisplayToOutputInternal() { shaderManager_->EndFrame(); - gstate_c.textureChanged = true; + gstate_c.textureChanged = TEXCHANGE_UPDATED; } // Maybe should write this in ASM... @@ -737,7 +737,7 @@ void DIRECTX9_GPU::ExecuteOp(u32 op, u32 diff) { case GE_CMD_REGION2: if (diff) { gstate_c.framebufChanged = true; - gstate_c.textureChanged = true; + gstate_c.textureChanged = TEXCHANGE_UPDATED; } break; @@ -751,7 +751,7 @@ void DIRECTX9_GPU::ExecuteOp(u32 op, u32 diff) { case GE_CMD_TEXTUREMAPENABLE: if (diff) - gstate_c.textureChanged = true; + gstate_c.textureChanged = TEXCHANGE_UPDATED; break; case GE_CMD_LIGHTINGENABLE: @@ -829,7 +829,7 @@ void DIRECTX9_GPU::ExecuteOp(u32 op, u32 diff) { case GE_CMD_FRAMEBUFPIXFORMAT: if (diff) { gstate_c.framebufChanged = true; - gstate_c.textureChanged = true; + gstate_c.textureChanged = TEXCHANGE_UPDATED; } break; @@ -841,7 +841,7 @@ void DIRECTX9_GPU::ExecuteOp(u32 op, u32 diff) { case GE_CMD_TEXADDR5: case GE_CMD_TEXADDR6: case GE_CMD_TEXADDR7: - gstate_c.textureChanged = true; + gstate_c.textureChanged = TEXCHANGE_UPDATED; shaderManager_->DirtyUniform(DIRTY_UVSCALEOFFSET); break; @@ -853,18 +853,18 @@ void DIRECTX9_GPU::ExecuteOp(u32 op, u32 diff) { case GE_CMD_TEXBUFWIDTH5: case GE_CMD_TEXBUFWIDTH6: case GE_CMD_TEXBUFWIDTH7: - gstate_c.textureChanged = true; + gstate_c.textureChanged = TEXCHANGE_UPDATED; break; case GE_CMD_CLUTADDR: case GE_CMD_CLUTADDRUPPER: case GE_CMD_CLUTFORMAT: - gstate_c.textureChanged = true; + gstate_c.textureChanged = TEXCHANGE_UPDATED; // This could be used to "dirty" textures with clut. break; case GE_CMD_LOADCLUT: - gstate_c.textureChanged = true; + gstate_c.textureChanged = TEXCHANGE_UPDATED; textureCache_.LoadClut(); // This could be used to "dirty" textures with clut. break; @@ -897,7 +897,7 @@ void DIRECTX9_GPU::ExecuteOp(u32 op, u32 diff) { DoBlockTransfer(); // Fixes Gran Turismo's funky text issue. - gstate_c.textureChanged = true; + gstate_c.textureChanged = TEXCHANGE_UPDATED; break; } @@ -913,7 +913,7 @@ void DIRECTX9_GPU::ExecuteOp(u32 op, u32 diff) { case GE_CMD_TEXSIZE5: case GE_CMD_TEXSIZE6: case GE_CMD_TEXSIZE7: - gstate_c.textureChanged = true; + gstate_c.textureChanged = TEXCHANGE_UPDATED; break; case GE_CMD_ZBUFPTR: @@ -1046,7 +1046,7 @@ void DIRECTX9_GPU::ExecuteOp(u32 op, u32 diff) { case GE_CMD_VIEWPORTZ2: if (diff) { gstate_c.framebufChanged = true; - gstate_c.textureChanged = true; + gstate_c.textureChanged = TEXCHANGE_UPDATED; } break; @@ -1381,7 +1381,7 @@ void DIRECTX9_GPU::DoState(PointerWrap &p) { textureCache_.Clear(true); transformDraw_.ClearTrackedVertexArrays(); - gstate_c.textureChanged = true; + gstate_c.textureChanged = TEXCHANGE_UPDATED; framebufferManager_.DestroyAllFBOs(); shaderManager_->ClearCache(true); } diff --git a/GPU/Directx9/StateMappingDX9.cpp b/GPU/Directx9/StateMappingDX9.cpp index 3ffa839563..1b9f57c85d 100644 --- a/GPU/Directx9/StateMappingDX9.cpp +++ b/GPU/Directx9/StateMappingDX9.cpp @@ -112,11 +112,11 @@ static bool blendColorSimilar(const Vec3f &a, const Vec3f &b, float margin = 0.1 void TransformDrawEngineDX9::ApplyDrawState(int prim) { // TODO: All this setup is soon so expensive that we'll need dirty flags, or simply do it in the command writes where we detect dirty by xoring. Silly to do all this work on every drawcall. - if (gstate_c.textureChanged) { + if (gstate_c.textureChanged != TEXCHANGE_UNCHANGED) { if (gstate.isTextureMapEnabled()) { textureCache_->SetTexture(); } - gstate_c.textureChanged = false; + gstate_c.textureChanged = TEXCHANGE_UNCHANGED; } // TODO: The top bit of the alpha channel should be written to the stencil bit somehow. This appears to require very expensive multipass rendering :( Alternatively, one could do a diff --git a/GPU/GLES/Framebuffer.cpp b/GPU/GLES/Framebuffer.cpp index 29e02bca0f..514bddb5b9 100644 --- a/GPU/GLES/Framebuffer.cpp +++ b/GPU/GLES/Framebuffer.cpp @@ -755,7 +755,9 @@ void FramebufferManager::DoSetRenderFrameBuffer() { // None found? Create one. if (!vfb) { - gstate_c.textureChanged = true; + if (gstate_c.textureChanged == TEXCHANGE_UNCHANGED) { + gstate_c.textureChanged = TEXCHANGE_PARAMSONLY; + } vfb = new VirtualFramebuffer(); vfb->fbo = 0; vfb->fb_address = fb_address; @@ -859,7 +861,9 @@ void FramebufferManager::DoSetRenderFrameBuffer() { // Use it as a render target. DEBUG_LOG(SCEGE, "Switching render target to FBO for %08x: %i x %i x %i ", vfb->fb_address, vfb->width, vfb->height, vfb->format); vfb->usageFlags |= FB_USAGE_RENDERTARGET; - gstate_c.textureChanged = true; + if (gstate_c.textureChanged == TEXCHANGE_UNCHANGED) { + gstate_c.textureChanged = TEXCHANGE_PARAMSONLY; + } vfb->last_frame_render = gpuStats.numFlips; frameLastFramebufUsed = gpuStats.numFlips; vfb->dirtyAfterDisplay = true; @@ -1191,7 +1195,9 @@ void FramebufferManager::ReadFramebufferToMemory(VirtualFramebuffer *vfb, bool s glEnable(GL_DITHER); } else { nvfb->usageFlags |= FB_USAGE_RENDERTARGET; - gstate_c.textureChanged = true; + if (gstate_c.textureChanged == TEXCHANGE_UNCHANGED) { + gstate_c.textureChanged = TEXCHANGE_PARAMSONLY; + } nvfb->last_frame_render = gpuStats.numFlips; nvfb->dirtyAfterDisplay = true; diff --git a/GPU/GLES/GLES_GPU.cpp b/GPU/GLES/GLES_GPU.cpp index fec550719c..a5fad0603a 100644 --- a/GPU/GLES/GLES_GPU.cpp +++ b/GPU/GLES/GLES_GPU.cpp @@ -610,7 +610,7 @@ void GLES_GPU::CopyDisplayToOutputInternal() { #endif #endif - gstate_c.textureChanged = true; + gstate_c.textureChanged = TEXCHANGE_UPDATED; } // Maybe should write this in ASM... @@ -906,7 +906,9 @@ void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) { case GE_CMD_REGION1: case GE_CMD_REGION2: gstate_c.framebufChanged = true; - gstate_c.textureChanged = true; + if (gstate_c.textureChanged == TEXCHANGE_UNCHANGED) { + gstate_c.textureChanged = TEXCHANGE_PARAMSONLY; + } break; case GE_CMD_CLIPENABLE: @@ -968,7 +970,9 @@ void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) { case GE_CMD_SCISSOR1: case GE_CMD_SCISSOR2: gstate_c.framebufChanged = true; - gstate_c.textureChanged = true; + if (gstate_c.textureChanged == TEXCHANGE_UNCHANGED) { + gstate_c.textureChanged = TEXCHANGE_PARAMSONLY; + } break; /// @@ -980,11 +984,13 @@ void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) { case GE_CMD_FRAMEBUFWIDTH: case GE_CMD_FRAMEBUFPIXFORMAT: gstate_c.framebufChanged = true; - gstate_c.textureChanged = true; + if (gstate_c.textureChanged == TEXCHANGE_UNCHANGED) { + gstate_c.textureChanged = TEXCHANGE_PARAMSONLY; + } break; case GE_CMD_TEXADDR0: - gstate_c.textureChanged = true; + gstate_c.textureChanged = TEXCHANGE_UPDATED; shaderManager_->DirtyUniform(DIRTY_UVSCALEOFFSET); break; @@ -995,10 +1001,15 @@ void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) { case GE_CMD_TEXADDR5: case GE_CMD_TEXADDR6: case GE_CMD_TEXADDR7: - gstate_c.textureChanged = true; + if (gstate_c.textureChanged == TEXCHANGE_UNCHANGED) { + gstate_c.textureChanged = TEXCHANGE_PARAMSONLY; + } break; case GE_CMD_TEXBUFWIDTH0: + gstate_c.textureChanged = TEXCHANGE_UPDATED; + break; + case GE_CMD_TEXBUFWIDTH1: case GE_CMD_TEXBUFWIDTH2: case GE_CMD_TEXBUFWIDTH3: @@ -1006,11 +1017,15 @@ void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) { case GE_CMD_TEXBUFWIDTH5: case GE_CMD_TEXBUFWIDTH6: case GE_CMD_TEXBUFWIDTH7: - gstate_c.textureChanged = true; + if (gstate_c.textureChanged == TEXCHANGE_UNCHANGED) { + gstate_c.textureChanged = TEXCHANGE_PARAMSONLY; + } break; case GE_CMD_CLUTFORMAT: - gstate_c.textureChanged = true; + if (gstate_c.textureChanged == TEXCHANGE_UNCHANGED) { + gstate_c.textureChanged = TEXCHANGE_PARAMSONLY; + } // This could be used to "dirty" textures with clut. break; @@ -1020,7 +1035,9 @@ void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) { break; case GE_CMD_LOADCLUT: - gstate_c.textureChanged = true; + if (gstate_c.textureChanged == TEXCHANGE_UNCHANGED) { + gstate_c.textureChanged = TEXCHANGE_PARAMSONLY; + } textureCache_.LoadClut(); // This could be used to "dirty" textures with clut. break; @@ -1051,19 +1068,21 @@ void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) { DoBlockTransfer(); // Fixes Gran Turismo's funky text issue, since it overwrites the current texture. - gstate_c.textureChanged = true; + gstate_c.textureChanged = TEXCHANGE_UPDATED; break; } case GE_CMD_TEXSIZE0: // Render to texture may have overridden the width/height. // Don't reset it unless the size is different / the texture has changed. - if (diff || gstate_c.textureChanged) { + if (diff || gstate_c.textureChanged != TEXCHANGE_UNCHANGED) { gstate_c.curTextureWidth = gstate.getTextureWidth(0); gstate_c.curTextureHeight = gstate.getTextureHeight(0); shaderManager_->DirtyUniform(DIRTY_UVSCALEOFFSET); // We will need to reset the texture now. - gstate_c.textureChanged = true; + if (gstate_c.textureChanged == TEXCHANGE_UNCHANGED) { + gstate_c.textureChanged = TEXCHANGE_PARAMSONLY; + } } break; @@ -1074,7 +1093,9 @@ void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) { case GE_CMD_TEXSIZE5: case GE_CMD_TEXSIZE6: case GE_CMD_TEXSIZE7: - gstate_c.textureChanged = true; + if (gstate_c.textureChanged == TEXCHANGE_UNCHANGED) { + gstate_c.textureChanged = TEXCHANGE_PARAMSONLY; + } break; case GE_CMD_ZBUFPTR: @@ -1195,7 +1216,9 @@ void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) { case GE_CMD_VIEWPORTZ1: case GE_CMD_VIEWPORTZ2: gstate_c.framebufChanged = true; - gstate_c.textureChanged = true; + if (gstate_c.textureChanged == TEXCHANGE_UNCHANGED) { + gstate_c.textureChanged = TEXCHANGE_PARAMSONLY; + } break; case GE_CMD_LIGHTENABLE0: @@ -1262,11 +1285,16 @@ void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) { case GE_CMD_TEXFLUSH: break; - case GE_CMD_TEXMODE: case GE_CMD_TEXFORMAT: + gstate_c.textureChanged = TEXCHANGE_UPDATED; + break; + + case GE_CMD_TEXMODE: case GE_CMD_TEXFILTER: case GE_CMD_TEXWRAP: - gstate_c.textureChanged = true; + if (gstate_c.textureChanged == TEXCHANGE_UNCHANGED) { + gstate_c.textureChanged = TEXCHANGE_PARAMSONLY; + } break; ////////////////////////////////////////////////////////////////// @@ -1545,7 +1573,9 @@ void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) { #endif case GE_CMD_TEXLEVEL: - gstate_c.textureChanged = true; + if (gstate_c.textureChanged == TEXCHANGE_UNCHANGED) { + gstate_c.textureChanged = TEXCHANGE_PARAMSONLY; + } break; ////////////////////////////////////////////////////////////////// @@ -1785,7 +1815,7 @@ void GLES_GPU::DoState(PointerWrap &p) { textureCache_.Clear(true); transformDraw_.ClearTrackedVertexArrays(); - gstate_c.textureChanged = true; + gstate_c.textureChanged = TEXCHANGE_UPDATED; framebufferManager_.DestroyAllFBOs(); } } diff --git a/GPU/GLES/StateMapping.cpp b/GPU/GLES/StateMapping.cpp index 9e8a6f1e4a..e6a7eb688c 100644 --- a/GPU/GLES/StateMapping.cpp +++ b/GPU/GLES/StateMapping.cpp @@ -166,9 +166,9 @@ static inline bool blendColorSimilar(const Vec3f &a, const Vec3f &b, float margi void TransformDrawEngine::ApplyDrawState(int prim) { // TODO: All this setup is soon so expensive that we'll need dirty flags, or simply do it in the command writes where we detect dirty by xoring. Silly to do all this work on every drawcall. - if (gstate_c.textureChanged && !gstate.isModeClear() && gstate.isTextureMapEnabled()) { + if (gstate_c.textureChanged != TEXCHANGE_UNCHANGED && !gstate.isModeClear() && gstate.isTextureMapEnabled()) { textureCache_->SetTexture(); - gstate_c.textureChanged = false; + gstate_c.textureChanged = TEXCHANGE_UNCHANGED; } // TODO: The top bit of the alpha channel should be written to the stencil bit somehow. This appears to require very expensive multipass rendering :( Alternatively, one could do a diff --git a/GPU/GLES/TextureCache.cpp b/GPU/GLES/TextureCache.cpp index 394100bed8..839140a99d 100644 --- a/GPU/GLES/TextureCache.cpp +++ b/GPU/GLES/TextureCache.cpp @@ -922,6 +922,11 @@ void TextureCache::SetTexture(bool force) { bool rehash = entry->GetHashStatus() == TexCacheEntry::STATUS_UNRELIABLE; bool doDelete = true; + // If we only loaded a clut, or changed framebuf or something, we don't need to rehash the texture data. + if (gstate_c.textureChanged == TEXCHANGE_PARAMSONLY) { + rehash = false; + } + if (match) { if (entry->lastFrame != gpuStats.numFlips) { u32 diff = gpuStats.numFlips - entry->lastFrame; diff --git a/GPU/GPUCommon.cpp b/GPU/GPUCommon.cpp index 13510e733c..3f0cf79060 100644 --- a/GPU/GPUCommon.cpp +++ b/GPU/GPUCommon.cpp @@ -648,7 +648,7 @@ void GPUCommon::ProcessDLQueueInternal() { UpdateTickEstimate(std::max(busyTicks, startingTicks + cyclesExecuted)); // Game might've written new texture data. - gstate_c.textureChanged = true; + gstate_c.textureChanged = TEXCHANGE_UPDATED; // Seems to be correct behaviour to process the list anyway? if (startingTicks < busyTicks) { diff --git a/GPU/GPUState.cpp b/GPU/GPUState.cpp index 47c0c4dfb3..a2d095a4db 100644 --- a/GPU/GPUState.cpp +++ b/GPU/GPUState.cpp @@ -27,6 +27,7 @@ #include "GPU/Directx9/helper/global.h" #include "GPU/Directx9/GPU_DX9.h" #endif +#include "Common/ChunkFile.h" #include "Core/CoreParameter.h" #include "Core/Config.h" #include "Core/System.h" @@ -249,3 +250,93 @@ bool vertTypeIsSkinningEnabled(u32 vertType) { else return ((vertType & GE_VTYPE_WEIGHT_MASK) != GE_VTYPE_WEIGHT_NONE); } + +struct GPUStateCache_v0 +{ + u32 vertexAddr; + u32 indexAddr; + + u32 offsetAddr; + + bool textureChanged; + bool textureFullAlpha; + bool vertexFullAlpha; + bool framebufChanged; + + int skipDrawReason; + + UVScale uv; + bool flipTexture; + + float lightpos[4][3]; + float lightdir[4][3]; + float lightatt[4][3]; + float lightColor[3][4][3]; // Ambient Diffuse Specular + float lightangle[4]; // spotlight cone angle (cosine) + float lightspotCoef[4]; // spotlight dropoff + float morphWeights[8]; + + u32 curTextureWidth; + u32 curTextureHeight; + u32 actualTextureHeight; + + float vpWidth; + float vpHeight; + + u32 curRTWidth; + u32 curRTHeight; +}; + +void GPUStateCache::DoState(PointerWrap &p) { + auto s = p.Section("GPUStateCache", 0, 1); + if (!s) { + // Old state, this was not versioned. + GPUStateCache_v0 old; + p.Do(old); + + vertexAddr = old.vertexAddr; + indexAddr = old.indexAddr; + offsetAddr = old.offsetAddr; + textureChanged = TEXCHANGE_UPDATED; + textureFullAlpha = old.textureFullAlpha; + vertexFullAlpha = old.vertexFullAlpha; + framebufChanged = old.framebufChanged; + + const size_t oldOffset = offsetof(GPUStateCache_v0, skipDrawReason); + const size_t newOffset = offsetof(GPUStateCache, skipDrawReason); + memcpy((char *)this + newOffset, (char *)&old + oldOffset, sizeof(old) - oldOffset); + return; + } + + p.Do(vertexAddr); + p.Do(indexAddr); + p.Do(offsetAddr); + + p.Do(textureChanged); + p.Do(textureFullAlpha); + p.Do(vertexFullAlpha); + p.Do(framebufChanged); + + p.Do(skipDrawReason); + + p.Do(uv); + p.Do(flipTexture); + + p.Do(lightpos); + p.Do(lightdir); + p.Do(lightatt); + p.Do(lightColor); + p.Do(lightangle); + p.Do(lightspotCoef); + p.Do(morphWeights); + + p.Do(curTextureWidth); + p.Do(curTextureHeight); + p.Do(actualTextureHeight); + + p.Do(vpWidth); + p.Do(vpHeight); + + p.Do(curRTWidth); + p.Do(curRTHeight); +} diff --git a/GPU/GPUState.h b/GPU/GPUState.h index f7533097e4..b2e4d723ce 100644 --- a/GPU/GPUState.h +++ b/GPU/GPUState.h @@ -23,6 +23,8 @@ #include "ge_constants.h" #include "Common/Common.h" +class PointerWrap; + // PSP uses a curious 24-bit float - it's basically the top 24 bits of a regular IEEE754 32-bit float. // This is used for light positions, transform matrices, you name it. inline float getFloat24(unsigned int data) @@ -432,6 +434,12 @@ struct UVScale { float uOff, vOff; }; +enum TextureChangeReason { + TEXCHANGE_UNCHANGED, + TEXCHANGE_UPDATED, + TEXCHANGE_PARAMSONLY, +}; + struct GPUStateCache { u32 vertexAddr; @@ -439,7 +447,7 @@ struct GPUStateCache u32 offsetAddr; - bool textureChanged; + TextureChangeReason textureChanged; bool textureFullAlpha; bool vertexFullAlpha; bool framebufChanged; @@ -468,6 +476,7 @@ struct GPUStateCache u32 curRTHeight; u32 getRelativeAddress(u32 data) const; + void DoState(PointerWrap &p); }; // TODO: Implement support for these. diff --git a/GPU/Null/NullGpu.cpp b/GPU/Null/NullGpu.cpp index 4f766001a5..88e067f369 100644 --- a/GPU/Null/NullGpu.cpp +++ b/GPU/Null/NullGpu.cpp @@ -214,7 +214,7 @@ void NullGPU::ExecuteOp(u32 op, u32 diff) { break; case GE_CMD_TEXADDR0: - gstate_c.textureChanged=true; + gstate_c.textureChanged = TEXCHANGE_UPDATED; case GE_CMD_TEXADDR1: case GE_CMD_TEXADDR2: case GE_CMD_TEXADDR3: @@ -226,7 +226,7 @@ void NullGPU::ExecuteOp(u32 op, u32 diff) { break; case GE_CMD_TEXBUFWIDTH0: - gstate_c.textureChanged=true; + gstate_c.textureChanged = TEXCHANGE_UPDATED; case GE_CMD_TEXBUFWIDTH1: case GE_CMD_TEXBUFWIDTH2: case GE_CMD_TEXBUFWIDTH3: @@ -314,7 +314,7 @@ void NullGPU::ExecuteOp(u32 op, u32 diff) { } case GE_CMD_TEXSIZE0: - gstate_c.textureChanged=true; + gstate_c.textureChanged = TEXCHANGE_UPDATED; gstate_c.curTextureWidth = 1 << (gstate.texsize[0] & 0xf); gstate_c.curTextureHeight = 1 << ((gstate.texsize[0]>>8) & 0xf); //fall thru - ignoring the mipmap sizes for now From c9eb4a1916ddbf07a9a5d3cfa443e52a748b342a Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Sun, 13 Apr 2014 23:34:08 +0200 Subject: [PATCH 11/21] VFSFileSystem too --- Core/FileSystems/DirectoryFileSystem.h | 1 + 1 file changed, 1 insertion(+) diff --git a/Core/FileSystems/DirectoryFileSystem.h b/Core/FileSystems/DirectoryFileSystem.h index bfce1ec896..7d6a8793bc 100644 --- a/Core/FileSystems/DirectoryFileSystem.h +++ b/Core/FileSystems/DirectoryFileSystem.h @@ -150,6 +150,7 @@ public: int RenameFile(const std::string &from, const std::string &to); bool RemoveFile(const std::string &filename); bool GetHostPath(const std::string &inpath, std::string &outpath); + int Flags() { return 0; } private: struct OpenFileEntry { From f285b12dcfb3f0cecefe915287c7da0893003b31 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Sun, 13 Apr 2014 23:43:32 +0200 Subject: [PATCH 12/21] Make strcpy_limit slightly more sane. --- Core/HLE/sceIo.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Core/HLE/sceIo.cpp b/Core/HLE/sceIo.cpp index a1aad26ee0..76607a0038 100644 --- a/Core/HLE/sceIo.cpp +++ b/Core/HLE/sceIo.cpp @@ -1927,10 +1927,10 @@ u32 sceIoDopen(const char *path) { // For some reason strncpy will fill up the entire output buffer. No reason to do that, // so we use this trivial replacement. -static void strcpy_limit(char *dest, const char *src, int count) { +static void strcpy_limit(char *dest, const char *src, int limit) { int i; - for (i = 0; i < count; i++) { - if (!src[i]) // Do the check afterwards, so we don't exit before copying the null terminator. + for (i = 0; i < limit - 1; i++) { + if (!src[i]) break; dest[i] = src[i]; } From 6cfc61665d2b8f95b1c5380f4c9e14ca83bee00c Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sun, 13 Apr 2014 14:45:55 -0700 Subject: [PATCH 13/21] Switch to a bitmask instead. --- GPU/GLES/Framebuffer.cpp | 12 +++------- GPU/GLES/GLES_GPU.cpp | 48 ++++++++++----------------------------- GPU/GLES/TextureCache.cpp | 2 +- GPU/GPUState.h | 8 +++---- 4 files changed, 20 insertions(+), 50 deletions(-) diff --git a/GPU/GLES/Framebuffer.cpp b/GPU/GLES/Framebuffer.cpp index 514bddb5b9..8871eaf57f 100644 --- a/GPU/GLES/Framebuffer.cpp +++ b/GPU/GLES/Framebuffer.cpp @@ -755,9 +755,7 @@ void FramebufferManager::DoSetRenderFrameBuffer() { // None found? Create one. if (!vfb) { - if (gstate_c.textureChanged == TEXCHANGE_UNCHANGED) { - gstate_c.textureChanged = TEXCHANGE_PARAMSONLY; - } + gstate_c.textureChanged |= TEXCHANGE_PARAMSONLY; vfb = new VirtualFramebuffer(); vfb->fbo = 0; vfb->fb_address = fb_address; @@ -861,9 +859,7 @@ void FramebufferManager::DoSetRenderFrameBuffer() { // Use it as a render target. DEBUG_LOG(SCEGE, "Switching render target to FBO for %08x: %i x %i x %i ", vfb->fb_address, vfb->width, vfb->height, vfb->format); vfb->usageFlags |= FB_USAGE_RENDERTARGET; - if (gstate_c.textureChanged == TEXCHANGE_UNCHANGED) { - gstate_c.textureChanged = TEXCHANGE_PARAMSONLY; - } + gstate_c.textureChanged |= TEXCHANGE_PARAMSONLY; vfb->last_frame_render = gpuStats.numFlips; frameLastFramebufUsed = gpuStats.numFlips; vfb->dirtyAfterDisplay = true; @@ -1195,9 +1191,7 @@ void FramebufferManager::ReadFramebufferToMemory(VirtualFramebuffer *vfb, bool s glEnable(GL_DITHER); } else { nvfb->usageFlags |= FB_USAGE_RENDERTARGET; - if (gstate_c.textureChanged == TEXCHANGE_UNCHANGED) { - gstate_c.textureChanged = TEXCHANGE_PARAMSONLY; - } + gstate_c.textureChanged |= TEXCHANGE_PARAMSONLY; nvfb->last_frame_render = gpuStats.numFlips; nvfb->dirtyAfterDisplay = true; diff --git a/GPU/GLES/GLES_GPU.cpp b/GPU/GLES/GLES_GPU.cpp index a5fad0603a..4c1842d646 100644 --- a/GPU/GLES/GLES_GPU.cpp +++ b/GPU/GLES/GLES_GPU.cpp @@ -906,9 +906,7 @@ void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) { case GE_CMD_REGION1: case GE_CMD_REGION2: gstate_c.framebufChanged = true; - if (gstate_c.textureChanged == TEXCHANGE_UNCHANGED) { - gstate_c.textureChanged = TEXCHANGE_PARAMSONLY; - } + gstate_c.textureChanged |= TEXCHANGE_PARAMSONLY; break; case GE_CMD_CLIPENABLE: @@ -970,9 +968,7 @@ void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) { case GE_CMD_SCISSOR1: case GE_CMD_SCISSOR2: gstate_c.framebufChanged = true; - if (gstate_c.textureChanged == TEXCHANGE_UNCHANGED) { - gstate_c.textureChanged = TEXCHANGE_PARAMSONLY; - } + gstate_c.textureChanged |= TEXCHANGE_PARAMSONLY; break; /// @@ -984,9 +980,7 @@ void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) { case GE_CMD_FRAMEBUFWIDTH: case GE_CMD_FRAMEBUFPIXFORMAT: gstate_c.framebufChanged = true; - if (gstate_c.textureChanged == TEXCHANGE_UNCHANGED) { - gstate_c.textureChanged = TEXCHANGE_PARAMSONLY; - } + gstate_c.textureChanged |= TEXCHANGE_PARAMSONLY; break; case GE_CMD_TEXADDR0: @@ -1001,9 +995,7 @@ void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) { case GE_CMD_TEXADDR5: case GE_CMD_TEXADDR6: case GE_CMD_TEXADDR7: - if (gstate_c.textureChanged == TEXCHANGE_UNCHANGED) { - gstate_c.textureChanged = TEXCHANGE_PARAMSONLY; - } + gstate_c.textureChanged |= TEXCHANGE_PARAMSONLY; break; case GE_CMD_TEXBUFWIDTH0: @@ -1017,15 +1009,11 @@ void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) { case GE_CMD_TEXBUFWIDTH5: case GE_CMD_TEXBUFWIDTH6: case GE_CMD_TEXBUFWIDTH7: - if (gstate_c.textureChanged == TEXCHANGE_UNCHANGED) { - gstate_c.textureChanged = TEXCHANGE_PARAMSONLY; - } + gstate_c.textureChanged |= TEXCHANGE_PARAMSONLY; break; case GE_CMD_CLUTFORMAT: - if (gstate_c.textureChanged == TEXCHANGE_UNCHANGED) { - gstate_c.textureChanged = TEXCHANGE_PARAMSONLY; - } + gstate_c.textureChanged |= TEXCHANGE_PARAMSONLY; // This could be used to "dirty" textures with clut. break; @@ -1035,9 +1023,7 @@ void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) { break; case GE_CMD_LOADCLUT: - if (gstate_c.textureChanged == TEXCHANGE_UNCHANGED) { - gstate_c.textureChanged = TEXCHANGE_PARAMSONLY; - } + gstate_c.textureChanged |= TEXCHANGE_PARAMSONLY; textureCache_.LoadClut(); // This could be used to "dirty" textures with clut. break; @@ -1080,9 +1066,7 @@ void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) { gstate_c.curTextureHeight = gstate.getTextureHeight(0); shaderManager_->DirtyUniform(DIRTY_UVSCALEOFFSET); // We will need to reset the texture now. - if (gstate_c.textureChanged == TEXCHANGE_UNCHANGED) { - gstate_c.textureChanged = TEXCHANGE_PARAMSONLY; - } + gstate_c.textureChanged |= TEXCHANGE_PARAMSONLY; } break; @@ -1093,9 +1077,7 @@ void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) { case GE_CMD_TEXSIZE5: case GE_CMD_TEXSIZE6: case GE_CMD_TEXSIZE7: - if (gstate_c.textureChanged == TEXCHANGE_UNCHANGED) { - gstate_c.textureChanged = TEXCHANGE_PARAMSONLY; - } + gstate_c.textureChanged |= TEXCHANGE_PARAMSONLY; break; case GE_CMD_ZBUFPTR: @@ -1216,9 +1198,7 @@ void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) { case GE_CMD_VIEWPORTZ1: case GE_CMD_VIEWPORTZ2: gstate_c.framebufChanged = true; - if (gstate_c.textureChanged == TEXCHANGE_UNCHANGED) { - gstate_c.textureChanged = TEXCHANGE_PARAMSONLY; - } + gstate_c.textureChanged |= TEXCHANGE_PARAMSONLY; break; case GE_CMD_LIGHTENABLE0: @@ -1292,9 +1272,7 @@ void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) { case GE_CMD_TEXMODE: case GE_CMD_TEXFILTER: case GE_CMD_TEXWRAP: - if (gstate_c.textureChanged == TEXCHANGE_UNCHANGED) { - gstate_c.textureChanged = TEXCHANGE_PARAMSONLY; - } + gstate_c.textureChanged |= TEXCHANGE_PARAMSONLY; break; ////////////////////////////////////////////////////////////////// @@ -1573,9 +1551,7 @@ void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) { #endif case GE_CMD_TEXLEVEL: - if (gstate_c.textureChanged == TEXCHANGE_UNCHANGED) { - gstate_c.textureChanged = TEXCHANGE_PARAMSONLY; - } + gstate_c.textureChanged |= TEXCHANGE_PARAMSONLY; break; ////////////////////////////////////////////////////////////////// diff --git a/GPU/GLES/TextureCache.cpp b/GPU/GLES/TextureCache.cpp index 839140a99d..280fc628d7 100644 --- a/GPU/GLES/TextureCache.cpp +++ b/GPU/GLES/TextureCache.cpp @@ -923,7 +923,7 @@ void TextureCache::SetTexture(bool force) { bool doDelete = true; // If we only loaded a clut, or changed framebuf or something, we don't need to rehash the texture data. - if (gstate_c.textureChanged == TEXCHANGE_PARAMSONLY) { + if ((gstate_c.textureChanged & TEXCHANGE_UPDATED) == 0) { rehash = false; } diff --git a/GPU/GPUState.h b/GPU/GPUState.h index b2e4d723ce..7933a91c35 100644 --- a/GPU/GPUState.h +++ b/GPU/GPUState.h @@ -435,9 +435,9 @@ struct UVScale { }; enum TextureChangeReason { - TEXCHANGE_UNCHANGED, - TEXCHANGE_UPDATED, - TEXCHANGE_PARAMSONLY, + TEXCHANGE_UNCHANGED = 0x00, + TEXCHANGE_UPDATED = 0x01, + TEXCHANGE_PARAMSONLY = 0x02, }; struct GPUStateCache @@ -447,7 +447,7 @@ struct GPUStateCache u32 offsetAddr; - TextureChangeReason textureChanged; + u8 textureChanged; bool textureFullAlpha; bool vertexFullAlpha; bool framebufChanged; From 58038c05fceba8dbfee306c65cc1251cf6b71caa Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sun, 13 Apr 2014 15:16:51 -0700 Subject: [PATCH 14/21] Simplify gstate_c savestate code a bit. So it's not as awkward to modify. --- GPU/GPUState.cpp | 52 +++++++++++++++--------------------------------- 1 file changed, 16 insertions(+), 36 deletions(-) diff --git a/GPU/GPUState.cpp b/GPU/GPUState.cpp index a2d095a4db..408c3b7cc4 100644 --- a/GPU/GPUState.cpp +++ b/GPU/GPUState.cpp @@ -267,24 +267,6 @@ struct GPUStateCache_v0 UVScale uv; bool flipTexture; - - float lightpos[4][3]; - float lightdir[4][3]; - float lightatt[4][3]; - float lightColor[3][4][3]; // Ambient Diffuse Specular - float lightangle[4]; // spotlight cone angle (cosine) - float lightspotCoef[4]; // spotlight dropoff - float morphWeights[8]; - - u32 curTextureWidth; - u32 curTextureHeight; - u32 actualTextureHeight; - - float vpWidth; - float vpHeight; - - u32 curRTWidth; - u32 curRTHeight; }; void GPUStateCache::DoState(PointerWrap &p) { @@ -301,27 +283,25 @@ void GPUStateCache::DoState(PointerWrap &p) { textureFullAlpha = old.textureFullAlpha; vertexFullAlpha = old.vertexFullAlpha; framebufChanged = old.framebufChanged; + skipDrawReason = old.skipDrawReason; + uv = old.uv; + flipTexture = old.flipTexture; + } else { + p.Do(vertexAddr); + p.Do(indexAddr); + p.Do(offsetAddr); - const size_t oldOffset = offsetof(GPUStateCache_v0, skipDrawReason); - const size_t newOffset = offsetof(GPUStateCache, skipDrawReason); - memcpy((char *)this + newOffset, (char *)&old + oldOffset, sizeof(old) - oldOffset); - return; + p.Do(textureChanged); + p.Do(textureFullAlpha); + p.Do(vertexFullAlpha); + p.Do(framebufChanged); + + p.Do(skipDrawReason); + + p.Do(uv); + p.Do(flipTexture); } - p.Do(vertexAddr); - p.Do(indexAddr); - p.Do(offsetAddr); - - p.Do(textureChanged); - p.Do(textureFullAlpha); - p.Do(vertexFullAlpha); - p.Do(framebufChanged); - - p.Do(skipDrawReason); - - p.Do(uv); - p.Do(flipTexture); - p.Do(lightpos); p.Do(lightdir); p.Do(lightatt); From dc0eea05220a58c0c3f6797e1af7b74759db5620 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sun, 13 Apr 2014 16:19:18 -0700 Subject: [PATCH 15/21] Propagate texcache hash fails through cluts. In games that use palette swapping (hi, FF2), if we detect a change in a texture, another texture with the same address but new clut might not be rehashed. This just marks all other textures as dangerous when a hashfail occurs. Fixes FF2 from recent optimizations. It's slower, but still much faster than before. --- GPU/GLES/TextureCache.cpp | 20 ++++++++++++++++++-- GPU/GLES/TextureCache.h | 1 + 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/GPU/GLES/TextureCache.cpp b/GPU/GLES/TextureCache.cpp index 280fc628d7..1f1e90cf76 100644 --- a/GPU/GLES/TextureCache.cpp +++ b/GPU/GLES/TextureCache.cpp @@ -922,8 +922,13 @@ void TextureCache::SetTexture(bool force) { bool rehash = entry->GetHashStatus() == TexCacheEntry::STATUS_UNRELIABLE; bool doDelete = true; - // If we only loaded a clut, or changed framebuf or something, we don't need to rehash the texture data. - if ((gstate_c.textureChanged & TEXCHANGE_UPDATED) == 0) { + // First let's see if another texture with the same address had a hashfail. + if (entry->status & TexCacheEntry::STATUS_CLUT_RECHECK) { + // Always rehash in this case, if one changed the rest all probably did. + rehash = true; + entry->status &= ~TexCacheEntry::STATUS_CLUT_RECHECK; + } else if ((gstate_c.textureChanged & TEXCHANGE_UPDATED) == 0) { + // Okay, just some parameter change - the data didn't change, no need to rehash. rehash = false; } @@ -1037,6 +1042,17 @@ void TextureCache::SetTexture(bool force) { if (entry->GetHashStatus() == TexCacheEntry::STATUS_RELIABLE) { entry->SetHashStatus(TexCacheEntry::STATUS_HASHING); } + + // Also, mark any textures with the same address but different clut. They need rechecking. + if (cluthash != 0) { + const u64 cachekeyMin = (u64)texaddr << 32; + const u64 cachekeyMax = (u64)(texaddr + 1) << 32; + for (auto it = cache.lower_bound(cachekeyMin), end = cache.upper_bound(cachekeyMax); it != end; ++it) { + if (it->second.cluthash != cluthash) { + it->second.status |= TexCacheEntry::STATUS_CLUT_RECHECK; + } + } + } } } else { VERBOSE_LOG(G3D, "No texture in cache, decoding..."); diff --git a/GPU/GLES/TextureCache.h b/GPU/GLES/TextureCache.h index 0dfa7baf42..43688bd2c9 100644 --- a/GPU/GLES/TextureCache.h +++ b/GPU/GLES/TextureCache.h @@ -87,6 +87,7 @@ private: STATUS_ALPHA_MASK = 0x0c, STATUS_CHANGE_FREQUENT = 0x10, // Changes often (less than 15 frames in between.) + STATUS_CLUT_RECHECK = 0x20, // Another texture with same addr had a hashfail. }; // Status, but int so we can zero initialize. From ca6e4635aa32a724296d4804c63a4a530d79ed21 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sun, 13 Apr 2014 16:25:09 -0700 Subject: [PATCH 16/21] Fix texture cache invalidation. --- GPU/GLES/TextureCache.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/GPU/GLES/TextureCache.cpp b/GPU/GLES/TextureCache.cpp index 1f1e90cf76..be9ca20a5a 100644 --- a/GPU/GLES/TextureCache.cpp +++ b/GPU/GLES/TextureCache.cpp @@ -138,8 +138,8 @@ void TextureCache::Invalidate(u32 addr, int size, GPUInvalidationType type) { // They could invalidate inside the texture, let's just give a bit of leeway. const int LARGEST_TEXTURE_SIZE = 512 * 512 * 4; - u64 startKey = addr - LARGEST_TEXTURE_SIZE; - u64 endKey = addr + size + LARGEST_TEXTURE_SIZE; + u64 startKey = (u64)(addr - LARGEST_TEXTURE_SIZE) << 32; + u64 endKey = (u64)(addr + size + LARGEST_TEXTURE_SIZE) << 32; for (TexCache::iterator iter = cache.lower_bound(startKey), end = cache.upper_bound(endKey); iter != end; ++iter) { u32 texAddr = iter->second.addr; u32 texEnd = iter->second.addr + iter->second.sizeInRAM; From f4458edc76715fadefd473514c73c1136eabc5ce Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sun, 13 Apr 2014 21:11:17 -0700 Subject: [PATCH 17/21] Avoid unbinding the current fbo on block transfer. Improves God of War performance by 25% (at least in the demo.) --- GPU/GLES/Framebuffer.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/GPU/GLES/Framebuffer.cpp b/GPU/GLES/Framebuffer.cpp index 8871eaf57f..8105d237e5 100644 --- a/GPU/GLES/Framebuffer.cpp +++ b/GPU/GLES/Framebuffer.cpp @@ -1674,13 +1674,13 @@ void FramebufferManager::UpdateFromMemory(u32 addr, int size, bool safe) { if (!Memory::IsValidAddress(displayFramebufPtr_)) return; - fbo_unbind(); - currentRenderVfb_ = 0; - bool needUnbind = false; for (size_t i = 0; i < vfbs_.size(); ++i) { VirtualFramebuffer *vfb = vfbs_[i]; if (MaskedEqual(vfb->fb_address, addr)) { + fbo_unbind(); + currentRenderVfb_ = 0; + vfb->dirtyAfterDisplay = true; vfb->reallyDirtyAfterDisplay = true; // TODO: This without the fbo_unbind() above would be better than destroying the FBO. From 9982d04f9d945b11adfbfa0ff6a4b68564b2d321 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sun, 13 Apr 2014 21:29:55 -0700 Subject: [PATCH 18/21] Optimize the case of a direct byte copy. Small improvement (like 3.5%) in God of War. --- GPU/GLES/GLES_GPU.cpp | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/GPU/GLES/GLES_GPU.cpp b/GPU/GLES/GLES_GPU.cpp index 4c1842d646..8edebcfbd9 100644 --- a/GPU/GLES/GLES_GPU.cpp +++ b/GPU/GLES/GLES_GPU.cpp @@ -1698,14 +1698,22 @@ void GLES_GPU::DoBlockTransfer() { // Do the copy! (Hm, if we detect a drawn video frame (see below) then we could maybe skip this?) // Can use GetPointerUnchecked because we checked the addresses above. We could also avoid them // entirely by walking a couple of pointers... - // GetPointerUnchecked crash in windows 64 bit of issue 2301 - for (int y = 0; y < height; y++) { - u32 srcLineStartAddr = srcBasePtr + ((y + srcY) * srcStride + srcX) * bpp; - u32 dstLineStartAddr = dstBasePtr + ((y + dstY) * dstStride + dstX) * bpp; - + if (srcStride == dstStride && width == srcStride) { + // Common case in God of War, let's do it all in one chunk. + u32 srcLineStartAddr = srcBasePtr + (srcY * srcStride + srcX) * bpp; + u32 dstLineStartAddr = dstBasePtr + (dstY * dstStride + dstX) * bpp; const u8 *src = Memory::GetPointerUnchecked(srcLineStartAddr); u8 *dst = Memory::GetPointerUnchecked(dstLineStartAddr); - memcpy(dst, src, width * bpp); + memcpy(dst, src, width * height * bpp); + } else { + for (int y = 0; y < height; y++) { + u32 srcLineStartAddr = srcBasePtr + ((y + srcY) * srcStride + srcX) * bpp; + u32 dstLineStartAddr = dstBasePtr + ((y + dstY) * dstStride + dstX) * bpp; + + const u8 *src = Memory::GetPointerUnchecked(srcLineStartAddr); + u8 *dst = Memory::GetPointerUnchecked(dstLineStartAddr); + memcpy(dst, src, width * bpp); + } } // TODO: Notify all overlapping FBOs that they need to reload. From ed99c33cd72213b29b5d027980cd1e2ff148ad9f Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sun, 13 Apr 2014 21:47:11 -0700 Subject: [PATCH 19/21] Interpret iForceMaxEmulatedFPS based on 1.001 NTSC. --- Core/HLE/sceDisplay.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Core/HLE/sceDisplay.cpp b/Core/HLE/sceDisplay.cpp index d4a0bdf3f8..12c3741e0e 100644 --- a/Core/HLE/sceDisplay.cpp +++ b/Core/HLE/sceDisplay.cpp @@ -672,7 +672,8 @@ u32 sceDisplaySetFramebuf(u32 topaddr, int linesize, int pixelformat, int sync) const int FLIP_DELAY_MIN_FLIPS = 30; u64 now = CoreTiming::GetTicks(); - u64 expected = msToCycles(1000) / g_Config.iForceMaxEmulatedFPS; + // 1001 to account for NTSC timing (59.94 fps.) + u64 expected = msToCycles(1001) / g_Config.iForceMaxEmulatedFPS; u64 actual = now - lastFlipCycles; if (actual < expected - FLIP_DELAY_CYCLES_MIN) { if (lastFlipsTooFrequent >= FLIP_DELAY_MIN_FLIPS) { From 943353faac087828e04ea57071da16f2d8c990c2 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sun, 13 Apr 2014 18:34:40 -0700 Subject: [PATCH 20/21] Use GL_BGRA for 32 bit textures on Windows. 25% improvement in Popolocrois, probably decent gains in other games using 32-bit textures often. FF2 for example doesn't. --- GPU/GLES/TextureCache.cpp | 66 +++++++++++++++++++++++++++++++++------ native | 2 +- 2 files changed, 57 insertions(+), 11 deletions(-) diff --git a/GPU/GLES/TextureCache.cpp b/GPU/GLES/TextureCache.cpp index be9ca20a5a..e96a978e78 100644 --- a/GPU/GLES/TextureCache.cpp +++ b/GPU/GLES/TextureCache.cpp @@ -56,6 +56,14 @@ extern int g_iNumVideos; +static inline bool UseBGRA8888() { + // TODO: Other platforms? May depend on vendor which is faster? +#ifdef _WIN32 + return gl_extensions.EXT_bgra; +#endif + return false; +} + TextureCache::TextureCache() : clearCacheNextFrame_(false), lowMemoryMode_(false), clutBuf_(NULL) { lastBoundTexture = -1; decimationCounter_ = TEXCACHE_DECIMATION_INTERVAL; @@ -633,7 +641,35 @@ static void ConvertColors(void *dstBuf, const void *srcBuf, GLuint dstFmt, int n } break; default: - { + if (UseBGRA8888()) { +#ifdef _M_SSE + const __m128i maskGA = _mm_set1_epi32(0xFF00FF00); + + __m128i *srcp = (__m128i *)src; + __m128i *dstp = (__m128i *)dst; + const int sseChunks = numPixels / 4; + for (int i = 0; i < sseChunks; ++i) { + __m128i c = _mm_load_si128(&srcp[i]); + __m128i rb = _mm_andnot_si128(maskGA, c); + c = _mm_and_si128(c, maskGA); + + __m128i b = _mm_srli_epi32(rb, 16); + __m128i r = _mm_slli_epi32(rb, 16); + c = _mm_or_si128(_mm_or_si128(c, r), b); + _mm_store_si128(&dstp[i], c); + } + // The remainder starts right after those done via SSE. + int i = sseChunks * 4; +#else + int i = 0; +#endif + for (; i < numPixels; i++) { + u32 c = src[i]; + dst[i] = ((c >> 16) & 0x000000FF) | + ((c >> 0) & 0xFF00FF00) | + ((c << 16) & 0x00FF0000); + } + } else { // No need to convert RGBA8888, right order already if (dst != src) memcpy(dst, src, numPixels * sizeof(u32)); @@ -735,7 +771,7 @@ void TextureCache::UpdateCurrentClut() { clutHash_ = DoReliableHash((const char *)clutBufRaw_, clutExtendedBytes, 0xC0108888); // Avoid a copy when we don't need to convert colors. - if (clutFormat != GE_CMODE_32BIT_ABGR8888) { + if (UseBGRA8888() || clutFormat != GE_CMODE_32BIT_ABGR8888) { ConvertColors(clutBufConverted_, clutBufRaw_, getClutDestFormat(clutFormat), clutExtendedBytes / sizeof(u16)); clutBuf_ = clutBufConverted_; } else { @@ -1381,18 +1417,23 @@ void *TextureCache::DecodeTextureLevel(GETextureFormat format, GEPaletteFormat c if (!gstate.isTextureSwizzled()) { // Special case: if we don't need to deal with packing, we don't need to copy. if ((g_Config.iTexScalingLevel == 1 && gl_extensions.EXT_unpack_subimage) || w == bufw) { - finalBuf = (void *)texptr; + if (UseBGRA8888()) { + finalBuf = tmpTexBuf32.data(); + ConvertColors(finalBuf, texptr, dstFmt, bufw * h); + } else { + finalBuf = (void *)texptr; + } } else { - int len = bufw * h; tmpTexBuf32.resize(std::max(bufw, w) * h); tmpTexBufRearrange.resize(std::max(bufw, w) * h); - memcpy(tmpTexBuf32.data(), texptr, len * sizeof(u32)); finalBuf = tmpTexBuf32.data(); + ConvertColors(finalBuf, texptr, dstFmt, bufw * h); } } else { tmpTexBuf32.resize(std::max(bufw, w) * h); finalBuf = UnswizzleFromMem(texptr, bufw, 4, level); + ConvertColors(finalBuf, finalBuf, dstFmt, bufw * h); } break; @@ -1567,8 +1608,6 @@ void TextureCache::LoadTextureLevel(TexCacheEntry &entry, int level, bool replac // TODO: only do this once u32 texByteAlign = 1; - // TODO: Look into using BGRA for 32-bit textures when the GL_EXT_texture_format_BGRA8888 extension is available, as it's faster than RGBA on some chips. - GEPaletteFormat clutformat = gstate.getClutPaletteFormat(); int bufw; void *finalBuf = DecodeTextureLevel(GETextureFormat(entry.format), clutformat, level, texByteAlign, dstFmt, &bufw); @@ -1601,16 +1640,23 @@ void TextureCache::LoadTextureLevel(TexCacheEntry &entry, int level, bool replac GLuint components = dstFmt == GL_UNSIGNED_SHORT_5_6_5 ? GL_RGB : GL_RGBA; + GLuint components2 = components; +#if defined(MAY_HAVE_GLES3) + if (UseBGRA8888() && dstFmt == GL_UNSIGNED_BYTE) { + components2 = GL_BGRA_EXT; + } +#endif + if (replaceImages) { - glTexSubImage2D(GL_TEXTURE_2D, level, 0, 0, w, h, components, dstFmt, pixelData); + glTexSubImage2D(GL_TEXTURE_2D, level, 0, 0, w, h, components2, dstFmt, pixelData); } else { - glTexImage2D(GL_TEXTURE_2D, level, components, w, h, 0, components, dstFmt, pixelData); + glTexImage2D(GL_TEXTURE_2D, level, components, w, h, 0, components2, dstFmt, pixelData); GLenum err = glGetError(); if (err == GL_OUT_OF_MEMORY) { lowMemoryMode_ = true; Decimate(); // Try again. - glTexImage2D(GL_TEXTURE_2D, level, components, w, h, 0, components, dstFmt, pixelData); + glTexImage2D(GL_TEXTURE_2D, level, components, w, h, 0, components2, dstFmt, pixelData); } } diff --git a/native b/native index 3c13a90099..980e6eaca6 160000 --- a/native +++ b/native @@ -1 +1 @@ -Subproject commit 3c13a9009999bf883c4c608d8b4f30a7d0c75560 +Subproject commit 980e6eaca6e51e07c0ea5b38bce71499f10acc21 From 419e812c9a4ccce9c02cb69619fa6ab858c77b03 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Mon, 14 Apr 2014 01:27:47 -0700 Subject: [PATCH 21/21] Also color swap DXT textures. Probably #5878. --- GPU/GLES/TextureCache.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/GPU/GLES/TextureCache.cpp b/GPU/GLES/TextureCache.cpp index e96a978e78..6c44755bd6 100644 --- a/GPU/GLES/TextureCache.cpp +++ b/GPU/GLES/TextureCache.cpp @@ -1453,6 +1453,7 @@ void *TextureCache::DecodeTextureLevel(GETextureFormat format, GEPaletteFormat c } } finalBuf = tmpTexBuf32.data(); + ConvertColors(finalBuf, finalBuf, dstFmt, bufw * h); w = (w + 3) & ~3; } break; @@ -1474,6 +1475,7 @@ void *TextureCache::DecodeTextureLevel(GETextureFormat format, GEPaletteFormat c } w = (w + 3) & ~3; finalBuf = tmpTexBuf32.data(); + ConvertColors(finalBuf, finalBuf, dstFmt, bufw * h); } break; @@ -1494,6 +1496,7 @@ void *TextureCache::DecodeTextureLevel(GETextureFormat format, GEPaletteFormat c } w = (w + 3) & ~3; finalBuf = tmpTexBuf32.data(); + ConvertColors(finalBuf, finalBuf, dstFmt, bufw * h); } break;