From b308aaad0337e633dcc05d5746d5aa6a3049c460 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 31 May 2014 13:07:19 -0700 Subject: [PATCH 1/6] Update disabled IsReallyAClear() code. Not sure if there's still something wrong about depth. --- GPU/GLES/Framebuffer.cpp | 2 ++ GPU/GLES/SoftwareTransform.cpp | 44 +++++++++++++++++----------------- 2 files changed, 24 insertions(+), 22 deletions(-) diff --git a/GPU/GLES/Framebuffer.cpp b/GPU/GLES/Framebuffer.cpp index 16d13e3d2a..a5e69f7e7c 100644 --- a/GPU/GLES/Framebuffer.cpp +++ b/GPU/GLES/Framebuffer.cpp @@ -528,6 +528,7 @@ void FramebufferManager::DrawPlainColor(u32 color) { glEnableVertexAttribArray(program->a_position); glVertexAttribPointer(program->a_position, 3, GL_FLOAT, GL_FALSE, 12, pos); glDrawElements(GL_TRIANGLE_STRIP, 4, GL_UNSIGNED_BYTE, indices); + // TODO: Really disable this? glDisableVertexAttribArray(program->a_position); glsl_unbind(); @@ -602,6 +603,7 @@ void FramebufferManager::DrawActiveTexture(GLuint texture, float x, float y, flo glVertexAttribPointer(program->a_position, 3, GL_FLOAT, GL_FALSE, 12, pos); glVertexAttribPointer(program->a_texcoord0, 2, GL_FLOAT, GL_FALSE, 8, texCoords); glDrawElements(GL_TRIANGLE_STRIP, 4, GL_UNSIGNED_SHORT, indices); + // TODO: Really disable these? glDisableVertexAttribArray(program->a_position); glDisableVertexAttribArray(program->a_texcoord0); diff --git a/GPU/GLES/SoftwareTransform.cpp b/GPU/GLES/SoftwareTransform.cpp index 62ab0e36ad..3a3409c0b3 100644 --- a/GPU/GLES/SoftwareTransform.cpp +++ b/GPU/GLES/SoftwareTransform.cpp @@ -23,6 +23,7 @@ #include "GPU/Math3D.h" #include "GPU/Common/VertexDecoderCommon.h" #include "GPU/Common/TransformCommon.h" +#include "GPU/GLES/Framebuffer.h" #include "GPU/GLES/ShaderManager.h" #include "GPU/GLES/TransformPipeline.h" @@ -88,8 +89,7 @@ bool TransformDrawEngine::IsReallyAClear(int numVerts) const { if (transformed[0].x != 0.0f || transformed[0].y != 0.0f) return false; - u32 matchcolor; - memcpy(&matchcolor, transformed[0].color0, 4); + u32 matchcolor = transformed[0].color0_32; float matchz = transformed[0].z; int bufW = gstate_c.curRTWidth; @@ -97,9 +97,7 @@ bool TransformDrawEngine::IsReallyAClear(int numVerts) const { float prevX = 0.0f; for (int i = 1; i < numVerts; i++) { - u32 vcolor; - memcpy(&vcolor, transformed[i].color0, 4); - if (vcolor != matchcolor || transformed[i].z != matchz) + if (transformed[i].color0_32 != matchcolor || transformed[i].z != matchz) return false; if ((i & 1) == 0) { @@ -386,8 +384,7 @@ void TransformDrawEngine::SoftwareTransformAndDraw( // An alternative option is to simply ditch all the verts except the first and last to create a single // rectangle out of many. Quite a small optimization though. if (false && maxIndex > 1 && gstate.isModeClear() && prim == GE_PRIM_RECTANGLES && IsReallyAClear(maxIndex)) { - u32 clearColor; - memcpy(&clearColor, transformed[0].color0, 4); + u32 clearColor = transformed[0].color0_32; float clearDepth = transformed[0].z; const float col[4] = { ((clearColor & 0xFF)) / 255.0f, @@ -398,22 +395,17 @@ void TransformDrawEngine::SoftwareTransformAndDraw( bool colorMask = gstate.isClearModeColorMask(); bool alphaMask = gstate.isClearModeAlphaMask(); - glstate.colorMask.set(colorMask, colorMask, colorMask, alphaMask); - if (alphaMask) { - glstate.stencilTest.set(true); - // Clear stencil - // TODO: extract the stencilValue properly, see below - int stencilValue = 0; - glstate.stencilFunc.set(GL_ALWAYS, stencilValue, 255); - } else { - // Don't touch stencil - glstate.stencilTest.set(false); - } - glstate.scissorTest.set(false); bool depthMask = gstate.isClearModeDepthMask(); + if (depthMask) { + framebufferManager_->SetDepthUpdated(); + } - int target = 0; - if (colorMask || alphaMask) target |= GL_COLOR_BUFFER_BIT | GL_STENCIL_BUFFER_BIT; + glstate.stencilTest.set(false); + glstate.scissorTest.set(false); + + GLbitfield target = 0; + if (colorMask || alphaMask) target |= GL_COLOR_BUFFER_BIT; + if (alphaMask) target |= GL_STENCIL_BUFFER_BIT; if (depthMask) target |= GL_DEPTH_BUFFER_BIT; glClearColor(col[0], col[1], col[2], col[3]); @@ -422,8 +414,16 @@ void TransformDrawEngine::SoftwareTransformAndDraw( #else glClearDepth(clearDepth); #endif - glClearStencil(0); // TODO - take from alpha? + // Stencil takes alpha. + glClearStencil(clearColor >> 24); glClear(target); + + // TODO: Now we may have enabled vertex arrays with no data. + // This will crash later in DrawActiveTexture(). So we just give it dummy values. + if (program->attrMask & (1 << ATTR_COLOR0)) { + glBindBuffer(GL_ARRAY_BUFFER, 0); + glVertexAttribPointer(ATTR_COLOR0, 4, GL_UNSIGNED_BYTE, GL_TRUE, 4, transformed); + } return; } From bd67cdab3f9661248117b5a8f2b9da2b8de2bed9 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 31 May 2014 13:08:17 -0700 Subject: [PATCH 2/6] Add basic decimation to the depal clut cache. --- GPU/GLES/DepalettizeShader.cpp | 16 ++++++++++++++-- GPU/GLES/DepalettizeShader.h | 1 + GPU/GLES/GLES_GPU.cpp | 1 + 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/GPU/GLES/DepalettizeShader.cpp b/GPU/GLES/DepalettizeShader.cpp index 21a2f876a8..45ad860a3a 100644 --- a/GPU/GLES/DepalettizeShader.cpp +++ b/GPU/GLES/DepalettizeShader.cpp @@ -24,6 +24,8 @@ #include "GPU/GPUState.h" #include "GPU/GLES/TextureCache.h" +static const int DEPAL_TEXTURE_OLD_AGE = 120; + #ifdef _WIN32 #define SHADERLOG #endif @@ -95,7 +97,7 @@ DepalShaderCache::DepalShaderCache() { glShaderSource(vertexShader_, 1, useGL3_ ? &depalVShader300 : &depalVShader100, 0); glCompileShader(vertexShader_); - if (CheckShaderCompileSuccess(vertexShader_, depalVShader100)) { + if (!CheckShaderCompileSuccess(vertexShader_, useGL3_ ? depalVShader300 : depalVShader100)) { // ... } } @@ -322,6 +324,7 @@ GLuint DepalShaderCache::GetClutTexture(const u32 clutID, u32 *rawClut) { auto oldtex = texCache_.find(realClutID); if (oldtex != texCache_.end()) { + oldtex->second->lastFrame = gpuStats.numFlips; return oldtex->second->texture; } @@ -348,6 +351,7 @@ GLuint DepalShaderCache::GetClutTexture(const u32 clutID, u32 *rawClut) { glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + tex->lastFrame = gpuStats.numFlips; texCache_[realClutID] = tex; return tex->texture; } @@ -367,7 +371,15 @@ void DepalShaderCache::Clear() { } void DepalShaderCache::Decimate() { - // TODO + for (auto tex = texCache_.begin(); tex != texCache_.end(); ) { + if (tex->second->lastFrame + DEPAL_TEXTURE_OLD_AGE < gpuStats.numFlips) { + glDeleteTextures(1, &tex->second->texture); + delete tex->second; + texCache_.erase(tex++); + } else { + ++tex; + } + } } GLuint DepalShaderCache::GetDepalettizeShader(GEBufferFormat pixelFormat) { diff --git a/GPU/GLES/DepalettizeShader.h b/GPU/GLES/DepalettizeShader.h index 79793a755d..34cbc01447 100644 --- a/GPU/GLES/DepalettizeShader.h +++ b/GPU/GLES/DepalettizeShader.h @@ -30,6 +30,7 @@ public: class DepalTexture { public: GLuint texture; + int lastFrame; }; // Caches both shaders and palette textures. diff --git a/GPU/GLES/GLES_GPU.cpp b/GPU/GLES/GLES_GPU.cpp index 900821d374..18b7451429 100644 --- a/GPU/GLES/GLES_GPU.cpp +++ b/GPU/GLES/GLES_GPU.cpp @@ -545,6 +545,7 @@ void GLES_GPU::BeginFrameInternal() { textureCache_.StartFrame(); transformDraw_.DecimateTrackedVertexArrays(); + depalShaderCache_.Decimate(); if (dumpNextFrame_) { NOTICE_LOG(G3D, "DUMPING THIS FRAME"); From e109a547aeb82d4283e142a943107aed876c3b61 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 31 May 2014 18:43:35 -0700 Subject: [PATCH 3/6] Disable vertex arrays not in use. --- GPU/GLES/Framebuffer.cpp | 8 ++++---- GPU/GLES/SoftwareTransform.cpp | 7 ------- 2 files changed, 4 insertions(+), 11 deletions(-) diff --git a/GPU/GLES/Framebuffer.cpp b/GPU/GLES/Framebuffer.cpp index a5e69f7e7c..a27fa87484 100644 --- a/GPU/GLES/Framebuffer.cpp +++ b/GPU/GLES/Framebuffer.cpp @@ -521,6 +521,8 @@ void FramebufferManager::DrawPlainColor(u32 color) { ((color & 0xFF000000) >> 24) / 255.0f, }; + shaderManager_->DirtyLastShader(); + glsl_bind(program); glUniform4fv(plainColorLoc_, 1, col); glBindBuffer(GL_ARRAY_BUFFER, 0); @@ -528,7 +530,6 @@ void FramebufferManager::DrawPlainColor(u32 color) { glEnableVertexAttribArray(program->a_position); glVertexAttribPointer(program->a_position, 3, GL_FLOAT, GL_FALSE, 12, pos); glDrawElements(GL_TRIANGLE_STRIP, 4, GL_UNSIGNED_BYTE, indices); - // TODO: Really disable this? glDisableVertexAttribArray(program->a_position); glsl_unbind(); @@ -589,6 +590,8 @@ void FramebufferManager::DrawActiveTexture(GLuint texture, float x, float y, flo glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + shaderManager_->DirtyLastShader(); // dirty lastShader_ + glsl_bind(program); if (program == postShaderProgram_ && timeLoc_ != -1) { int flipCount = __DisplayGetFlipCount(); @@ -603,13 +606,10 @@ void FramebufferManager::DrawActiveTexture(GLuint texture, float x, float y, flo glVertexAttribPointer(program->a_position, 3, GL_FLOAT, GL_FALSE, 12, pos); glVertexAttribPointer(program->a_texcoord0, 2, GL_FLOAT, GL_FALSE, 8, texCoords); glDrawElements(GL_TRIANGLE_STRIP, 4, GL_UNSIGNED_SHORT, indices); - // TODO: Really disable these? glDisableVertexAttribArray(program->a_position); glDisableVertexAttribArray(program->a_texcoord0); glsl_unbind(); - - shaderManager_->DirtyLastShader(); // dirty lastShader_ } diff --git a/GPU/GLES/SoftwareTransform.cpp b/GPU/GLES/SoftwareTransform.cpp index 3a3409c0b3..cbc6c1be3c 100644 --- a/GPU/GLES/SoftwareTransform.cpp +++ b/GPU/GLES/SoftwareTransform.cpp @@ -417,13 +417,6 @@ void TransformDrawEngine::SoftwareTransformAndDraw( // Stencil takes alpha. glClearStencil(clearColor >> 24); glClear(target); - - // TODO: Now we may have enabled vertex arrays with no data. - // This will crash later in DrawActiveTexture(). So we just give it dummy values. - if (program->attrMask & (1 << ATTR_COLOR0)) { - glBindBuffer(GL_ARRAY_BUFFER, 0); - glVertexAttribPointer(ATTR_COLOR0, 4, GL_UNSIGNED_BYTE, GL_TRUE, 4, transformed); - } return; } From 580143e5e264f1295a60adef2d5836eaf967bf62 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 31 May 2014 18:55:00 -0700 Subject: [PATCH 4/6] Disable vertex arrays before depal as well. --- GPU/GLES/GLES_GPU.cpp | 1 + GPU/GLES/ShaderManager.cpp | 3 +-- GPU/GLES/TextureCache.cpp | 21 ++++++++++++--------- GPU/GLES/TextureCache.h | 5 +++++ GPU/GPUState.h | 2 -- 5 files changed, 19 insertions(+), 13 deletions(-) diff --git a/GPU/GLES/GLES_GPU.cpp b/GPU/GLES/GLES_GPU.cpp index 18b7451429..3ccd375063 100644 --- a/GPU/GLES/GLES_GPU.cpp +++ b/GPU/GLES/GLES_GPU.cpp @@ -411,6 +411,7 @@ GLES_GPU::GLES_GPU() framebufferManager_.SetShaderManager(shaderManager_); textureCache_.SetFramebufferManager(&framebufferManager_); textureCache_.SetDepalShaderCache(&depalShaderCache_); + textureCache_.SetShaderManager(shaderManager_); // Sanity check gstate if ((int *)&gstate.transferstart - (int *)&gstate != 0xEA) { diff --git a/GPU/GLES/ShaderManager.cpp b/GPU/GLES/ShaderManager.cpp index 3d1ecf7db8..8872960182 100644 --- a/GPU/GLES/ShaderManager.cpp +++ b/GPU/GLES/ShaderManager.cpp @@ -684,11 +684,10 @@ Shader *ShaderManager::ApplyVertexShader(int prim, u32 vertType) { LinkedShader *ShaderManager::ApplyFragmentShader(Shader *vs, int prim, u32 vertType) { FragmentShaderID FSID; ComputeFragmentShaderID(&FSID); - if (lastVShaderSame_ && FSID == lastFSID_ && !gstate_c.shaderChanged) { + if (lastVShaderSame_ && FSID == lastFSID_) { lastShader_->UpdateUniforms(vertType); return lastShader_; } - gstate_c.shaderChanged = false; lastFSID_ = FSID; diff --git a/GPU/GLES/TextureCache.cpp b/GPU/GLES/TextureCache.cpp index 958be67070..3345dce644 100644 --- a/GPU/GLES/TextureCache.cpp +++ b/GPU/GLES/TextureCache.cpp @@ -28,6 +28,7 @@ #include "GPU/GLES/Framebuffer.h" #include "GPU/GLES/FragmentShaderGenerator.h" #include "GPU/GLES/DepalettizeShader.h" +#include "GPU/GLES/ShaderManager.h" #include "GPU/Common/TextureDecoder.h" #include "Core/Config.h" #include "Core/Host.h" @@ -916,13 +917,17 @@ void TextureCache::SetTextureFramebuffer(TexCacheEntry *entry) { }; static const GLubyte indices[4] = { 0, 1, 3, 2 }; + shaderManager_->DirtyLastShader(); + glUseProgram(program); - gstate_c.shaderChanged = true; + + GLint a_position = glGetAttribLocation(program, "a_position"); + GLint a_texcoord0 = glGetAttribLocation(program, "a_texcoord0"); glBindBuffer(GL_ARRAY_BUFFER, 0); glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); - glEnableVertexAttribArray(0); - glEnableVertexAttribArray(1); + glEnableVertexAttribArray(a_position); + glEnableVertexAttribArray(a_texcoord0); glActiveTexture(GL_TEXTURE1); glBindTexture(GL_TEXTURE_2D, clutTexture); @@ -944,14 +949,12 @@ void TextureCache::SetTextureFramebuffer(TexCacheEntry *entry) { #endif glViewport(0, 0, entry->framebuffer->renderWidth, entry->framebuffer->renderHeight); - glVertexAttribPointer(0, 3, GL_FLOAT, GL_FALSE, 12, pos); - glVertexAttribPointer(1, 2, GL_FLOAT, GL_FALSE, 8, uv); + glVertexAttribPointer(a_position, 3, GL_FLOAT, GL_FALSE, 12, pos); + glVertexAttribPointer(a_texcoord0, 2, GL_FLOAT, GL_FALSE, 8, uv); glDrawElements(GL_TRIANGLE_STRIP, 4, GL_UNSIGNED_BYTE, indices); + glDisableVertexAttribArray(a_position); + glDisableVertexAttribArray(a_texcoord0); - /* - glDisableVertexAttribArray(0); - glDisableVertexAttribArray(1); - */ fbo_bind_color_as_texture(entry->depalFBO, 0); glstate.Restore(); framebufferManager_->RebindFramebuffer(); diff --git a/GPU/GLES/TextureCache.h b/GPU/GLES/TextureCache.h index 8ee87e5d6d..4b2bcfd3c7 100644 --- a/GPU/GLES/TextureCache.h +++ b/GPU/GLES/TextureCache.h @@ -28,6 +28,7 @@ struct VirtualFramebuffer; class FramebufferManager; class DepalShaderCache; +class ShaderManager; enum TextureFiltering { AUTO = 1, @@ -74,6 +75,9 @@ public: void SetDepalShaderCache(DepalShaderCache *dpCache) { depalShaderCache_ = dpCache; } + void SetShaderManager(ShaderManager *sm) { + shaderManager_ = sm; + } size_t NumLoadedTextures() const { return cache.size(); @@ -203,6 +207,7 @@ private: int decimationCounter_; FramebufferManager *framebufferManager_; DepalShaderCache *depalShaderCache_; + ShaderManager *shaderManager_; }; GLenum getClutDestFormat(GEPaletteFormat format); diff --git a/GPU/GPUState.h b/GPU/GPUState.h index ca8b668ca1..b8e79488e7 100644 --- a/GPU/GPUState.h +++ b/GPU/GPUState.h @@ -453,8 +453,6 @@ struct GPUStateCache bool textureSimpleAlpha; bool vertexFullAlpha; bool framebufChanged; - // Doesn't need savestating. - bool shaderChanged; int skipDrawReason; From 097ab5395280180e662b100d2d29df2e3f25481b Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 31 May 2014 18:55:35 -0700 Subject: [PATCH 5/6] Enable the glClear() optimization. It seems to help and so far have not noticed any problems. --- GPU/GLES/SoftwareTransform.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GPU/GLES/SoftwareTransform.cpp b/GPU/GLES/SoftwareTransform.cpp index cbc6c1be3c..fa1f00ea14 100644 --- a/GPU/GLES/SoftwareTransform.cpp +++ b/GPU/GLES/SoftwareTransform.cpp @@ -383,7 +383,7 @@ void TransformDrawEngine::SoftwareTransformAndDraw( // // An alternative option is to simply ditch all the verts except the first and last to create a single // rectangle out of many. Quite a small optimization though. - if (false && maxIndex > 1 && gstate.isModeClear() && prim == GE_PRIM_RECTANGLES && IsReallyAClear(maxIndex)) { + if (maxIndex > 1 && gstate.isModeClear() && prim == GE_PRIM_RECTANGLES && IsReallyAClear(maxIndex)) { u32 clearColor = transformed[0].color0_32; float clearDepth = transformed[0].z; const float col[4] = { From 4461c7e3c4275930f1c4b03e0e5cbb1d019cc83b Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 31 May 2014 21:12:50 -0700 Subject: [PATCH 6/6] Oops, remove an old wrong comment. --- GPU/GLES/FragmentShaderGenerator.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/GPU/GLES/FragmentShaderGenerator.cpp b/GPU/GLES/FragmentShaderGenerator.cpp index b4a52583d8..220e6e1f2f 100644 --- a/GPU/GLES/FragmentShaderGenerator.cpp +++ b/GPU/GLES/FragmentShaderGenerator.cpp @@ -202,7 +202,6 @@ StencilValueType ReplaceAlphaWithStencilType() { return STENCIL_VALUE_KEEP; } - // Decrementing always zeros, since there's only one bit. case GE_STENCILOP_DECR: case GE_STENCILOP_INCR: case GE_STENCILOP_INVERT: