From 8fba7fa98eeead157fb627e0d92b4b76a5580a55 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Sat, 29 Mar 2014 21:58:38 +0100 Subject: [PATCH 01/24] Initial work on depalettization. --- GPU/GLES/DepalettizeShader.cpp | 261 +++++++++++++++++++++++++++++++++ GPU/GLES/DepalettizeShader.h | 54 +++++++ GPU/GLES/Framebuffer.h | 1 + GPU/GLES/GLES_GPU.cpp | 1 + GPU/GLES/GLES_GPU.h | 2 + GPU/GLES/TextureCache.cpp | 83 +++++++++-- GPU/GLES/TextureCache.h | 10 ++ GPU/GPU.vcxproj | 2 + GPU/GPU.vcxproj.filters | 8 +- 9 files changed, 405 insertions(+), 17 deletions(-) diff --git a/GPU/GLES/DepalettizeShader.cpp b/GPU/GLES/DepalettizeShader.cpp index e69de29bb2..70b7dfb300 100644 --- a/GPU/GLES/DepalettizeShader.cpp +++ b/GPU/GLES/DepalettizeShader.cpp @@ -0,0 +1,261 @@ +// Copyright (c) 2014- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#include + +#include "Common/Log.h" +#include "DepalettizeShader.h" +#include "GPU/GPUState.h" +#include "GPU/GLES/TextureCache.h" + +static const char *depalVShader = +"#version 100\n" +"// Depal shader\n" +"attribute vec4 a_position;\n" +"attribute vec2 a_texcoord0;\n" +"varying vec2 v_texcoord0;\n" +"void main() {\n" +" v_texcoord0 = a_texcoord0;\n" +" gl_Position = a_position;\n" +"}\n"; + + +static bool CheckShaderCompileSuccess(GLuint shader, const char *code) { + GLint success; + glGetShaderiv(shader, GL_COMPILE_STATUS, &success); + if (!success) { +#define MAX_INFO_LOG_SIZE 2048 + GLchar infoLog[MAX_INFO_LOG_SIZE]; + GLsizei len; + glGetShaderInfoLog(shader, MAX_INFO_LOG_SIZE, &len, infoLog); + infoLog[len] = '\0'; +#ifdef ANDROID + ELOG("Error in shader compilation! %s\n", infoLog); + ELOG("Shader source:\n%s\n", (const char *)code); +#endif + ERROR_LOG(G3D, "Error in shader compilation!\n"); + ERROR_LOG(G3D, "Info log: %s\n", infoLog); + ERROR_LOG(G3D, "Shader source:\n%s\n", (const char *)code); +#ifdef SHADERLOG + OutputDebugStringUTF8(infoLog); +#endif + shader = 0; + return false; + } else { + DEBUG_LOG(G3D, "Compiled shader:\n%s\n", (const char *)code); + return true; + } +} + +DepalShaderCache::DepalShaderCache() { + // Pre-build the vertex program + vertexShader_ = glCreateShader(GL_VERTEX_SHADER); + glShaderSource(vertexShader_, 1, &depalVShader, 0); + glCompileShader(vertexShader_); + + if (CheckShaderCompileSuccess(vertexShader_, depalVShader)) { + // ... + } +} + +DepalShaderCache::~DepalShaderCache() { + Clear(); + glDeleteShader(vertexShader_); +} + +void GenerateDepalShader(char *buffer, GEBufferFormat pixelFormat) { + char *p = buffer; +#define WRITE p+=sprintf + + WRITE(p, "#version 100\n"); + WRITE(p, "varying vec2 texcoord0;\n"); + WRITE(p, "uniform sampler2D tex;\n"); + WRITE(p, "uniform sampler2D pal;\n"); + WRITE(p, "void main() {\n"); + WRITE(p, " vec4 index = texture2D(tex);\n"); + + char lookupMethod[128] = "index.r"; + char offset[128] = ""; + + const GEPaletteFormat clutFormat = gstate.getClutPaletteFormat(); + const u32 clutBase = gstate.getClutIndexStartPos(); + + int shift = gstate.getClutIndexShift(); + int mask = gstate.getClutIndexMask(); + + // pixelformat is the format of the texture we are sampling. + switch (pixelFormat) { + case GE_FORMAT_8888: + if (mask == 0xFF) { + switch (shift) { // bgra? + case 0: strcpy(lookupMethod, "index.r"); break; + case 8: strcpy(lookupMethod, "index.g"); break; + case 16: strcpy(lookupMethod, "index.b"); break; + default: + case 24: strcpy(lookupMethod, "index.a"); break; + } + } else { + // Ugh + } + break; + case GE_FORMAT_4444: + if ((mask & 0xF) == 0xF) { + switch (shift) { // bgra? + case 0: strcpy(lookupMethod, "index.r"); break; + case 4: strcpy(lookupMethod, "index.g"); break; + case 8: strcpy(lookupMethod, "index.b"); break; + default: + case 12: strcpy(lookupMethod, "index.a"); break; + } + } else { + // Ugh + } + break; + case GE_FORMAT_565: + if ((mask & 0x3f) == 0x3F) { + switch (shift) { // bgra? + case 0: strcpy(lookupMethod, "index.r"); break; + case 5: strcpy(lookupMethod, "index.g"); break; + default: + case 11: strcpy(lookupMethod, "index.b"); break; + } + } else { + // Ugh + } + break; + case GE_FORMAT_5551: + if ((mask & 0x1F) == 0x1F) { + switch (shift) { // bgra? + case 0: strcpy(lookupMethod, "index.r"); break; + case 4: strcpy(lookupMethod, "index.g"); break; + case 8: strcpy(lookupMethod, "index.b"); break; + default: + case 15: strcpy(lookupMethod, "index.a"); break; + } + } else { + // Ugh + } + break; + } + + if (clutBase != 0) { + sprintf(offset, " + %.0f", (float)clutBase / 255.0f); // 256? + } + + WRITE(p, " vec4 color = texture2D(pal, vec2(%s%s, 0.0));\n", lookupMethod, offset); + WRITE(p, " gl_Color = color;\n"); + WRITE(p, "}\n"); +} + +u32 DepalShaderCache::GenerateShaderID(GEBufferFormat pixelFormat) { + return (gstate.clutformat & 0xFFFFFF) | (pixelFormat << 24); +} + +GLuint DepalShaderCache::GetClutTexture(const u32 clutID, u32 *rawClut) { + auto oldtex = texCache_.find(clutID); + if (oldtex != texCache_.end()) { + return oldtex->second->texture; + } + + GLuint dstFmt = getClutDestFormat(gstate.getClutPaletteFormat()); + + DepalTexture *tex = new DepalTexture(); + glGenTextures(1, &tex->texture); + glActiveTexture(1); + glBindTexture(GL_TEXTURE_2D, tex->texture); + GLuint components = dstFmt == GL_UNSIGNED_SHORT_5_6_5 ? GL_RGB : GL_RGBA; + glTexImage2D(GL_TEXTURE_2D, 0, components, 256, 1, 0, components, dstFmt, (void *)rawClut); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glActiveTexture(0); + + texCache_[clutID] = tex; + return tex->texture; +} + +void DepalShaderCache::Clear() { + for (auto shader : cache_) { + glDeleteShader(shader.second->fragShader); + glDeleteProgram(shader.second->program); + delete shader.second; + } + for (auto tex : texCache_) { + glDeleteTextures(1, &tex.second->texture); + delete tex.second; + } +} + +void DepalShaderCache::Decimate() { + // TODO +} + +GLuint DepalShaderCache::GetDepalettizeShader(GEBufferFormat pixelFormat) { + u32 id = GenerateShaderID(pixelFormat); + + auto shader = cache_.find(id); + if (shader != cache_.end()) { + return shader->second->program; + } + + char buffer[2048]; + + GenerateDepalShader(buffer, pixelFormat); + + GLuint fragShader = glCreateShader(GL_VERTEX_SHADER); + glShaderSource(vertexShader_, 1, &depalVShader, 0); + glCompileShader(vertexShader_); + + GLuint program = glCreateProgram(); + glAttachShader(program, vertexShader_); + glAttachShader(program, fragShader); + + glBindAttribLocation(program, 0, "a_position"); + glBindAttribLocation(program, 1, "a_texcoord0"); + + glLinkProgram(program); + glUseProgram(program); + + GLint u_tex = glGetUniformLocation(program, "tex"); + GLint u_pal = glGetUniformLocation(program, "pal"); + + glUniform1d(u_tex, 0); + glUniform1d(u_pal, 1); + + GLint linkStatus = GL_FALSE; + glGetProgramiv(program, GL_LINK_STATUS, &linkStatus); + if (linkStatus != GL_TRUE) { + GLint bufLength = 0; + glGetProgramiv(program, GL_INFO_LOG_LENGTH, &bufLength); + if (bufLength) { + char* buf = new char[bufLength]; + glGetProgramInfoLog(program, bufLength, NULL, buf); + ERROR_LOG(G3D, "Could not link program:\n %s", buf); + delete[] buf; // we're dead! + } + return 0; + } + + DepalShader *depal = new DepalShader(); + depal->program = program; + depal->fragShader = fragShader; + + cache_[id] = depal; + + return depal->program; +} diff --git a/GPU/GLES/DepalettizeShader.h b/GPU/GLES/DepalettizeShader.h index e69de29bb2..f5861181b5 100644 --- a/GPU/GLES/DepalettizeShader.h +++ b/GPU/GLES/DepalettizeShader.h @@ -0,0 +1,54 @@ +// Copyright (c) 2014- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#include + +#include "Common/CommonTypes.h" +#include "gfx_es2/gl_state.h" +#include "GPU/ge_constants.h" + +class DepalShader { +public: + GLuint program; + GLuint fragShader; +}; + +class DepalTexture { +public: + GLuint texture; +}; + +// Caches both shaders and palette textures. +class DepalShaderCache { +public: + DepalShaderCache(); + ~DepalShaderCache(); + + // This also uploads the palette and binds the correct texture. + GLuint GetDepalettizeShader(GEBufferFormat pixelFormat); + GLuint GetClutTexture(const u32 clutHash, u32 *rawClut); + void Clear(); + void Decimate(); + +private: + u32 GenerateShaderID(GEBufferFormat pixelFormat); + + GLuint vertexShader_; + std::map cache_; + std::map texCache_; +}; + diff --git a/GPU/GLES/Framebuffer.h b/GPU/GLES/Framebuffer.h index e65b4b6804..3f874e8d0d 100644 --- a/GPU/GLES/Framebuffer.h +++ b/GPU/GLES/Framebuffer.h @@ -86,6 +86,7 @@ struct VirtualFramebuffer { GEBufferFormat format; // virtual, right now they are all RGBA8888 FBOColorDepth colorDepth; FBO *fbo; + FBO *depalFBO; bool dirtyAfterDisplay; bool reallyDirtyAfterDisplay; // takes frame skipping into account diff --git a/GPU/GLES/GLES_GPU.cpp b/GPU/GLES/GLES_GPU.cpp index 160257bba1..5c16584452 100644 --- a/GPU/GLES/GLES_GPU.cpp +++ b/GPU/GLES/GLES_GPU.cpp @@ -410,6 +410,7 @@ GLES_GPU::GLES_GPU() framebufferManager_.SetTextureCache(&textureCache_); framebufferManager_.SetShaderManager(shaderManager_); textureCache_.SetFramebufferManager(&framebufferManager_); + textureCache_.SetDepalShaderCache(&depalShaderCache_); // Sanity check gstate if ((int *)&gstate.transferstart - (int *)&gstate != 0xEA) { diff --git a/GPU/GLES/GLES_GPU.h b/GPU/GLES/GLES_GPU.h index 5a57540186..22b59d8208 100644 --- a/GPU/GLES/GLES_GPU.h +++ b/GPU/GLES/GLES_GPU.h @@ -26,6 +26,7 @@ #include "GPU/GLES/Framebuffer.h" #include "GPU/GLES/TransformPipeline.h" #include "GPU/GLES/TextureCache.h" +#include "GPU/GLES/DepalettizeShader.h" class ShaderManager; class LinkedShader; @@ -162,6 +163,7 @@ private: FramebufferManager framebufferManager_; TextureCache textureCache_; + DepalShaderCache depalShaderCache_; TransformDrawEngine transformDraw_; ShaderManager *shaderManager_; diff --git a/GPU/GLES/TextureCache.cpp b/GPU/GLES/TextureCache.cpp index 54a3c7a0e3..6be2a546e6 100644 --- a/GPU/GLES/TextureCache.cpp +++ b/GPU/GLES/TextureCache.cpp @@ -27,6 +27,7 @@ #include "GPU/GLES/TextureCache.h" #include "GPU/GLES/Framebuffer.h" #include "GPU/GLES/FragmentShaderGenerator.h" +#include "GPU/GLES/DepalettizeShader.h" #include "GPU/Common/TextureDecoder.h" #include "Core/Config.h" #include "Core/Host.h" @@ -117,6 +118,9 @@ void TextureCache::Decimate() { for (TexCache::iterator iter = cache.begin(); iter != cache.end(); ) { if (iter->second.lastFrame + killAge < gpuStats.numFlips) { glDeleteTextures(1, &iter->second.texture); + if (iter->second.depalFBO) { + fbo_destroy(iter->second.depalFBO); + } cache.erase(iter++); } else { ++iter; @@ -128,6 +132,9 @@ void TextureCache::Decimate() { // In low memory mode, we kill them all. if (lowMemoryMode_ || iter->second.lastFrame + TEXTURE_SECOND_KILL_AGE < gpuStats.numFlips) { glDeleteTextures(1, &iter->second.texture); + if (iter->second.depalFBO) { + fbo_destroy(iter->second.depalFBO); + } secondCache.erase(iter++); } else { ++iter; @@ -214,7 +221,18 @@ inline void AttachFramebufferInvalid(T &entry, VirtualFramebuffer *framebuffer) } } -inline void TextureCache::AttachFramebuffer(TexCacheEntry *entry, u32 address, VirtualFramebuffer *framebuffer, bool exactMatch) { +bool TextureCache::AttachFramebufferCLUT(TextureCache::TexCacheEntry *entry, VirtualFramebuffer *framebuffer, u32 address) { + GLuint program = depalShaderCache_->GetDepalettizeShader(framebuffer->format); + if (program) { + entry->framebuffer = framebuffer; + entry->invalidHint = -1; + entry->status |= TexCacheEntry::STATUS_DEPALETTIZE; + return true; + } + return false; +} + +void TextureCache::AttachFramebuffer(TexCacheEntry *entry, u32 address, VirtualFramebuffer *framebuffer, bool exactMatch) { // If they match exactly, it's non-CLUT and from the top left. if (exactMatch) { // Apply to non-buffered and buffered mode only. @@ -237,22 +255,31 @@ inline void TextureCache::AttachFramebuffer(TexCacheEntry *entry, u32 address, V if (!(g_Config.iRenderingMode == FB_BUFFERED_MODE)) return; - // 3rd Birthday (and possibly other games) render to a 16 bit clut texture. - const bool compatFormat = framebuffer->format == entry->format - || (framebuffer->format == GE_FORMAT_8888 && entry->format == GE_TFMT_CLUT32) - || (framebuffer->format != GE_FORMAT_8888 && entry->format == GE_TFMT_CLUT16); + // Check for CLUT. The framebuffer is always RGB, but it can be interpreted as a CLUT texture. + // 3rd Birthday (and a bunch of other games) render to a 16 bit clut texture. + bool clutSuccess = false; + if (((framebuffer->format == GE_FORMAT_8888 && entry->format == GE_TFMT_CLUT32) || (framebuffer->format != GE_FORMAT_8888 && entry->format == GE_TFMT_CLUT16))) { + clutSuccess = AttachFramebufferCLUT(entry, framebuffer, address); + } - // Is it at least the right stride? - if (framebuffer->fb_stride == entry->bufw && compatFormat) { - if (framebuffer->format != entry->format) { - WARN_LOG_REPORT_ONCE(diffFormat2, G3D, "Render to texture with different formats %d != %d at %08x", entry->format, framebuffer->format, address); - // TODO: Use an FBO to translate the palette? - AttachFramebufferValid(entry, framebuffer); - } else if ((entry->addr - address) / entry->bufw < framebuffer->height) { - WARN_LOG_REPORT_ONCE(subarea, G3D, "Render to area containing texture at %08x", address); - // TODO: Keep track of the y offset. - // If "AttachFramebufferValid" , God of War Ghost of Sparta/Chains of Olympus will be missing special effect. - AttachFramebufferInvalid(entry, framebuffer); + if (!clutSuccess) { + // This is either normal or we failed to generate a shader to depalettize + const bool compatFormat = framebuffer->format == entry->format || + (framebuffer->format == GE_FORMAT_8888 && entry->format == GE_TFMT_CLUT32) || + (framebuffer->format != GE_FORMAT_8888 && entry->format == GE_TFMT_CLUT16); + + // Is it at least the right stride? + if (framebuffer->fb_stride == entry->bufw && compatFormat) { + if (framebuffer->format != entry->format) { + WARN_LOG_REPORT_ONCE(diffFormat2, G3D, "Render to texture with different formats %d != %d at %08x", entry->format, framebuffer->format, address); + // TODO: Use an FBO to translate the palette? + AttachFramebufferValid(entry, framebuffer); + } else if ((entry->addr - address) / entry->bufw < framebuffer->height) { + WARN_LOG_REPORT_ONCE(subarea, G3D, "Render to area containing texture at %08x", address); + // TODO: Keep track of the y offset. + // If "AttachFramebufferValid" , God of War Ghost of Sparta/Chains of Olympus will be missing special effect. + AttachFramebufferInvalid(entry, framebuffer); + } } } } @@ -723,6 +750,7 @@ static inline u32 MiniHash(const u32 *ptr) { return ptr[0]; } +// TODO: Unused, remove? static inline u32 QuickClutHash(const u8 *clut, u32 bytes) { // CLUTs always come in multiples of 32 bytes, can't load them any other way. _dbg_assert_msg_(G3D, (bytes & 31) == 0, "CLUT should always have a multiple of 32 bytes."); @@ -889,6 +917,28 @@ void TextureCache::SetTextureFramebuffer(TexCacheEntry *entry) { if (useBufferedRendering) { framebufferManager_->BindFramebufferColor(entry->framebuffer); + if (entry->status & TexCacheEntry::STATUS_DEPALETTIZE) { + GLuint program = depalShaderCache_->GetDepalettizeShader(entry->framebuffer->format); + glUseProgram(program); + + // Check if we can handle the current setup + + GLuint clutTexture = depalShaderCache_->GetClutTexture(clutHash_, clutBufConverted_); + glActiveTexture(1); + glBindTexture(GL_TEXTURE_2D, clutTexture); + glActiveTexture(0); + + if (!entry->depalFBO) { + entry->depalFBO = fbo_create(entry->framebuffer->bufferWidth, entry->framebuffer->bufferHeight, 1, false, FBO_8888); + } + fbo_bind_as_render_target(entry->depalFBO); + + // ... + + fbo_bind_color_as_texture(entry->depalFBO, 0); + } + + // Keep the framebuffer alive. entry->framebuffer->last_frame_used = gpuStats.numFlips; @@ -1149,6 +1199,7 @@ void TextureCache::SetTexture(bool force) { entry->framebuffer = 0; entry->maxLevel = maxLevel; entry->lodBias = 0.0f; + entry->depalFBO = 0; entry->dim = gstate.getTextureDimension(0); entry->bufw = bufw; diff --git a/GPU/GLES/TextureCache.h b/GPU/GLES/TextureCache.h index 10ccc857dc..69c7355259 100644 --- a/GPU/GLES/TextureCache.h +++ b/GPU/GLES/TextureCache.h @@ -25,6 +25,7 @@ struct VirtualFramebuffer; class FramebufferManager; +class DepalShaderCache; enum TextureFiltering { AUTO = 1, @@ -60,6 +61,9 @@ public: void SetFramebufferManager(FramebufferManager *fbManager) { framebufferManager_ = fbManager; } + void SetDepalShaderCache(DepalShaderCache *dpCache) { + depalShaderCache_ = dpCache; + } size_t NumLoadedTextures() const { return cache.size(); @@ -93,6 +97,8 @@ private: STATUS_CHANGE_FREQUENT = 0x10, // Changes often (less than 15 frames in between.) STATUS_CLUT_RECHECK = 0x20, // Another texture with same addr had a hashfail. + STATUS_DEPALETTIZE = 0x40, + STATUS_DEPALETTIZE_DIRTY = 0x80 }; // Status, but int so we can zero initialize. @@ -100,6 +106,7 @@ private: u32 addr; u32 hash; VirtualFramebuffer *framebuffer; // if null, not sourced from an FBO. + FBO *depalFBO; u32 sizeInRAM; int lastFrame; int numFrames; @@ -150,6 +157,7 @@ private: u32 GetCurrentClutHash(); void UpdateCurrentClut(); void AttachFramebuffer(TexCacheEntry *entry, u32 address, VirtualFramebuffer *framebuffer, bool exactMatch); + bool AttachFramebufferCLUT(TextureCache::TexCacheEntry *entry, VirtualFramebuffer *framebuffer, u32 address); void DetachFramebuffer(TexCacheEntry *entry, u32 address, VirtualFramebuffer *framebuffer); void SetTextureFramebuffer(TexCacheEntry *entry); @@ -184,5 +192,7 @@ private: int decimationCounter_; FramebufferManager *framebufferManager_; + DepalShaderCache *depalShaderCache_; }; +GLenum getClutDestFormat(GEPaletteFormat format); diff --git a/GPU/GPU.vcxproj b/GPU/GPU.vcxproj index 46f94cb47b..679f65db58 100644 --- a/GPU/GPU.vcxproj +++ b/GPU/GPU.vcxproj @@ -190,6 +190,7 @@ + @@ -242,6 +243,7 @@ + diff --git a/GPU/GPU.vcxproj.filters b/GPU/GPU.vcxproj.filters index 8c82c34f72..b420625616 100644 --- a/GPU/GPU.vcxproj.filters +++ b/GPU/GPU.vcxproj.filters @@ -165,6 +165,9 @@ Common + + GLES + @@ -308,8 +311,11 @@ Common + + GLES + - \ No newline at end of file + From d0e65054a4ed4da9eb8b3f7321272dbf5aaeb944 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Sun, 30 Mar 2014 00:11:01 +0100 Subject: [PATCH 02/24] A bit closer to working. Shadow visible --- GPU/GLES/DepalettizeShader.cpp | 39 ++++++++++++++++------- GPU/GLES/Framebuffer.cpp | 5 +++ GPU/GLES/Framebuffer.h | 2 ++ GPU/GLES/TextureCache.cpp | 57 ++++++++++++++++++++++++++++------ 4 files changed, 82 insertions(+), 21 deletions(-) diff --git a/GPU/GLES/DepalettizeShader.cpp b/GPU/GLES/DepalettizeShader.cpp index 70b7dfb300..c4bd19912b 100644 --- a/GPU/GLES/DepalettizeShader.cpp +++ b/GPU/GLES/DepalettizeShader.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2014- PPSSPP Project. +// Copyright (c) 2014- PPSSPP Project. // This program is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by @@ -17,6 +17,9 @@ #include +#define SHADERLOG + +#include "base/logging.h" #include "Common/Log.h" #include "DepalettizeShader.h" #include "GPU/GPUState.h" @@ -24,6 +27,7 @@ static const char *depalVShader = "#version 100\n" +"precision highp float;\n" "// Depal shader\n" "attribute vec4 a_position;\n" "attribute vec2 a_texcoord0;\n" @@ -82,11 +86,12 @@ void GenerateDepalShader(char *buffer, GEBufferFormat pixelFormat) { #define WRITE p+=sprintf WRITE(p, "#version 100\n"); - WRITE(p, "varying vec2 texcoord0;\n"); + WRITE(p, "precision mediump float;\n"); + WRITE(p, "varying vec2 v_texcoord0;\n"); WRITE(p, "uniform sampler2D tex;\n"); WRITE(p, "uniform sampler2D pal;\n"); WRITE(p, "void main() {\n"); - WRITE(p, " vec4 index = texture2D(tex);\n"); + WRITE(p, " vec4 index = texture2D(tex, v_texcoord0);\n"); char lookupMethod[128] = "index.r"; char offset[128] = ""; @@ -97,6 +102,7 @@ void GenerateDepalShader(char *buffer, GEBufferFormat pixelFormat) { int shift = gstate.getClutIndexShift(); int mask = gstate.getClutIndexMask(); + float multiplier = 1.0f; // pixelformat is the format of the texture we are sampling. switch (pixelFormat) { case GE_FORMAT_8888: @@ -121,6 +127,7 @@ void GenerateDepalShader(char *buffer, GEBufferFormat pixelFormat) { default: case 12: strcpy(lookupMethod, "index.a"); break; } + multiplier = 1.0f / 15.0f; } else { // Ugh } @@ -133,6 +140,7 @@ void GenerateDepalShader(char *buffer, GEBufferFormat pixelFormat) { default: case 11: strcpy(lookupMethod, "index.b"); break; } + multiplier = 1.0f / 31.0f; } else { // Ugh } @@ -156,8 +164,13 @@ void GenerateDepalShader(char *buffer, GEBufferFormat pixelFormat) { sprintf(offset, " + %.0f", (float)clutBase / 255.0f); // 256? } - WRITE(p, " vec4 color = texture2D(pal, vec2(%s%s, 0.0));\n", lookupMethod, offset); - WRITE(p, " gl_Color = color;\n"); + if (true) { + + WRITE(p, " gl_FragColor = vec4(index.r);\n", lookupMethod, offset); + //WRITE(p, " gl_FragColor = vec4(index) + texture2D(pal, vec2(v_texcoord0.x, 0));\n", lookupMethod, offset); + } else { + WRITE(p, " gl_FragColor = texture2D(pal, vec2((%s * %f)%s, 0.0));\n", lookupMethod, multiplier, offset); + } WRITE(p, "}\n"); } @@ -175,7 +188,6 @@ GLuint DepalShaderCache::GetClutTexture(const u32 clutID, u32 *rawClut) { DepalTexture *tex = new DepalTexture(); glGenTextures(1, &tex->texture); - glActiveTexture(1); glBindTexture(GL_TEXTURE_2D, tex->texture); GLuint components = dstFmt == GL_UNSIGNED_SHORT_5_6_5 ? GL_RGB : GL_RGBA; glTexImage2D(GL_TEXTURE_2D, 0, components, 256, 1, 0, components, dstFmt, (void *)rawClut); @@ -183,7 +195,6 @@ GLuint DepalShaderCache::GetClutTexture(const u32 clutID, u32 *rawClut) { glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); - glActiveTexture(0); texCache_[clutID] = tex; return tex->texture; @@ -213,13 +224,19 @@ GLuint DepalShaderCache::GetDepalettizeShader(GEBufferFormat pixelFormat) { return shader->second->program; } - char buffer[2048]; + char *buffer = new char[2048]; GenerateDepalShader(buffer, pixelFormat); - GLuint fragShader = glCreateShader(GL_VERTEX_SHADER); - glShaderSource(vertexShader_, 1, &depalVShader, 0); - glCompileShader(vertexShader_); + GLuint fragShader = glCreateShader(GL_FRAGMENT_SHADER); + + const char *buf = buffer; + glShaderSource(fragShader, 1, &buf, 0); + glCompileShader(fragShader); + + CheckShaderCompileSuccess(fragShader, buffer); + + delete[] buffer; GLuint program = glCreateProgram(); glAttachShader(program, vertexShader_); diff --git a/GPU/GLES/Framebuffer.cpp b/GPU/GLES/Framebuffer.cpp index 367fdfd703..5344f05033 100644 --- a/GPU/GLES/Framebuffer.cpp +++ b/GPU/GLES/Framebuffer.cpp @@ -610,6 +610,7 @@ void FramebufferManager::DrawActiveTexture(GLuint texture, float x, float y, flo shaderManager_->DirtyLastShader(); // dirty lastShader_ } + VirtualFramebuffer *FramebufferManager::GetVFBAt(u32 addr) { VirtualFramebuffer *match = NULL; for (size_t i = 0; i < vfbs_.size(); ++i) { @@ -684,6 +685,10 @@ void FramebufferManager::DestroyFramebuf(VirtualFramebuffer *v) { delete v; } +void FramebufferManager::RebindFramebuffer() { + fbo_bind_as_render_target(currentRenderVfb_->fbo); +} + void FramebufferManager::DoSetRenderFrameBuffer() { /* if (useBufferedRendering_ && currentRenderVfb_) { diff --git a/GPU/GLES/Framebuffer.h b/GPU/GLES/Framebuffer.h index 3f874e8d0d..cb9e1b22ad 100644 --- a/GPU/GLES/Framebuffer.h +++ b/GPU/GLES/Framebuffer.h @@ -220,6 +220,8 @@ public: bool GetCurrentDepthbuffer(GPUDebugBuffer &buffer); bool GetCurrentStencilbuffer(GPUDebugBuffer &buffer); + void RebindFramebuffer(); + private: void CompileDraw2DProgram(); void DestroyDraw2DProgram(); diff --git a/GPU/GLES/TextureCache.cpp b/GPU/GLES/TextureCache.cpp index 6be2a546e6..b4e4805adb 100644 --- a/GPU/GLES/TextureCache.cpp +++ b/GPU/GLES/TextureCache.cpp @@ -915,30 +915,67 @@ void TextureCache::SetTextureFramebuffer(TexCacheEntry *entry) { entry->framebuffer->usageFlags |= FB_USAGE_TEXTURE; bool useBufferedRendering = g_Config.iRenderingMode != FB_NON_BUFFERED_MODE; if (useBufferedRendering) { - framebufferManager_->BindFramebufferColor(entry->framebuffer); - if (entry->status & TexCacheEntry::STATUS_DEPALETTIZE) { - GLuint program = depalShaderCache_->GetDepalettizeShader(entry->framebuffer->format); - glUseProgram(program); - // Check if we can handle the current setup GLuint clutTexture = depalShaderCache_->GetClutTexture(clutHash_, clutBufConverted_); - glActiveTexture(1); - glBindTexture(GL_TEXTURE_2D, clutTexture); - glActiveTexture(0); if (!entry->depalFBO) { entry->depalFBO = fbo_create(entry->framebuffer->bufferWidth, entry->framebuffer->bufferHeight, 1, false, FBO_8888); } fbo_bind_as_render_target(entry->depalFBO); + glViewport(0, 0, entry->framebuffer->bufferWidth, entry->framebuffer->bufferHeight); - // ... + static const float pos[12] = { + -1, -1, -1, + 1, -1, -1, + 1, 1, -1, + -1, 1, -1 + }; + static const float uv[8] = { + 0, 0, + 1, 0, + 1, 1, + 0, 1, + }; + static const GLubyte indices[4] = { 0, 1, 3, 2 }; + GLuint program = depalShaderCache_->GetDepalettizeShader(entry->framebuffer->format); + glUseProgram(program); + + glBindBuffer(GL_ARRAY_BUFFER, 0); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); + glEnableVertexAttribArray(0); + glEnableVertexAttribArray(1); + + glActiveTexture(GL_TEXTURE1); + glBindTexture(GL_TEXTURE_2D, clutTexture); + glActiveTexture(GL_TEXTURE0); + + framebufferManager_->BindFramebufferColor(entry->framebuffer); + + glstate.blend.disable(); + glstate.colorMask.set(true, true, true, true); + glstate.scissorTest.disable(); + glstate.cullFace.disable(); + glstate.depthTest.disable(); + glstate.viewport.set(0, 0, entry->framebuffer->bufferWidth, entry->framebuffer->bufferHeight); + + glVertexAttribPointer(0, 3, GL_FLOAT, GL_FALSE, 12, pos); + glVertexAttribPointer(1, 2, GL_FLOAT, GL_FALSE, 8, uv); + glDrawElements(GL_TRIANGLE_STRIP, 4, GL_UNSIGNED_BYTE, indices); + + /* + glDisableVertexAttribArray(0); + glDisableVertexAttribArray(1); + */ fbo_bind_color_as_texture(entry->depalFBO, 0); + glstate.Restore(); + framebufferManager_->RebindFramebuffer(); + } else { + framebufferManager_->BindFramebufferColor(entry->framebuffer); } - // Keep the framebuffer alive. entry->framebuffer->last_frame_used = gpuStats.numFlips; From f517fa4ac8715041a5a1f52cb47b3d5efa9e631d Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Sun, 30 Mar 2014 01:06:11 +0100 Subject: [PATCH 03/24] Fix typo-bug when setting sampler uniforms, tweak fudge factors. --- GPU/GLES/DepalettizeShader.cpp | 30 ++++++++++++------------------ 1 file changed, 12 insertions(+), 18 deletions(-) diff --git a/GPU/GLES/DepalettizeShader.cpp b/GPU/GLES/DepalettizeShader.cpp index c4bd19912b..6b9499830b 100644 --- a/GPU/GLES/DepalettizeShader.cpp +++ b/GPU/GLES/DepalettizeShader.cpp @@ -127,7 +127,7 @@ void GenerateDepalShader(char *buffer, GEBufferFormat pixelFormat) { default: case 12: strcpy(lookupMethod, "index.a"); break; } - multiplier = 1.0f / 15.0f; + multiplier = (1.0f / 16.0f) * 255.0f / 256.0f; // Need the fudge factor to not "wrap"... } else { // Ugh } @@ -135,12 +135,11 @@ void GenerateDepalShader(char *buffer, GEBufferFormat pixelFormat) { case GE_FORMAT_565: if ((mask & 0x3f) == 0x3F) { switch (shift) { // bgra? - case 0: strcpy(lookupMethod, "index.r"); break; - case 5: strcpy(lookupMethod, "index.g"); break; + case 0: strcpy(lookupMethod, "index.r"); multiplier = 1.0f / 32.0f; break; + case 5: strcpy(lookupMethod, "index.g"); multiplier = 1.0f / 64.0f; break; default: - case 11: strcpy(lookupMethod, "index.b"); break; + case 11: strcpy(lookupMethod, "index.b"); multiplier = 1.0f / 32.0f; break; } - multiplier = 1.0f / 31.0f; } else { // Ugh } @@ -148,11 +147,11 @@ void GenerateDepalShader(char *buffer, GEBufferFormat pixelFormat) { case GE_FORMAT_5551: if ((mask & 0x1F) == 0x1F) { switch (shift) { // bgra? - case 0: strcpy(lookupMethod, "index.r"); break; - case 4: strcpy(lookupMethod, "index.g"); break; - case 8: strcpy(lookupMethod, "index.b"); break; + case 0: strcpy(lookupMethod, "index.r"); multiplier = 1.0f / 32.0f; break; + case 4: strcpy(lookupMethod, "index.g"); multiplier = 1.0f / 32.0f; break; + case 8: strcpy(lookupMethod, "index.b"); multiplier = 1.0f / 32.0f; break; default: - case 15: strcpy(lookupMethod, "index.a"); break; + case 15: strcpy(lookupMethod, "index.a"); multiplier = 1.0f / 128.0f; break; } } else { // Ugh @@ -164,13 +163,7 @@ void GenerateDepalShader(char *buffer, GEBufferFormat pixelFormat) { sprintf(offset, " + %.0f", (float)clutBase / 255.0f); // 256? } - if (true) { - - WRITE(p, " gl_FragColor = vec4(index.r);\n", lookupMethod, offset); - //WRITE(p, " gl_FragColor = vec4(index) + texture2D(pal, vec2(v_texcoord0.x, 0));\n", lookupMethod, offset); - } else { - WRITE(p, " gl_FragColor = texture2D(pal, vec2((%s * %f)%s, 0.0));\n", lookupMethod, multiplier, offset); - } + WRITE(p, " gl_FragColor = texture2D(pal, vec2((%s * %f)%s, 0.0));\n", lookupMethod, multiplier, offset); WRITE(p, "}\n"); } @@ -191,6 +184,7 @@ GLuint DepalShaderCache::GetClutTexture(const u32 clutID, u32 *rawClut) { glBindTexture(GL_TEXTURE_2D, tex->texture); GLuint components = dstFmt == GL_UNSIGNED_SHORT_5_6_5 ? GL_RGB : GL_RGBA; glTexImage2D(GL_TEXTURE_2D, 0, components, 256, 1, 0, components, dstFmt, (void *)rawClut); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); @@ -251,8 +245,8 @@ GLuint DepalShaderCache::GetDepalettizeShader(GEBufferFormat pixelFormat) { GLint u_tex = glGetUniformLocation(program, "tex"); GLint u_pal = glGetUniformLocation(program, "pal"); - glUniform1d(u_tex, 0); - glUniform1d(u_pal, 1); + glUniform1i(u_tex, 0); + glUniform1i(u_pal, 1); GLint linkStatus = GL_FALSE; glGetProgramiv(program, GL_LINK_STATUS, &linkStatus); From ee150fadbb2fb60d25854f9e46dbead509675b97 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Sun, 30 Mar 2014 01:11:15 +0100 Subject: [PATCH 04/24] Update CMake and Android build --- CMakeLists.txt | 2 ++ GPU/GLES/DepalettizeShader.cpp | 2 -- android/jni/Android.mk | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index f3685a05fd..f68da258c2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1243,6 +1243,8 @@ add_library(GPU OBJECT GPU/Debugger/Breakpoints.h GPU/Debugger/Stepping.cpp GPU/Debugger/Stepping.h + GPU/GLES/DepalettizeShader.cpp + GPU/GLES/DepalettizeShader.h GPU/GLES/GLES_GPU.cpp GPU/GLES/GLES_GPU.h GPU/GLES/FragmentShaderGenerator.cpp diff --git a/GPU/GLES/DepalettizeShader.cpp b/GPU/GLES/DepalettizeShader.cpp index 6b9499830b..9a4620fa6c 100644 --- a/GPU/GLES/DepalettizeShader.cpp +++ b/GPU/GLES/DepalettizeShader.cpp @@ -17,8 +17,6 @@ #include -#define SHADERLOG - #include "base/logging.h" #include "Common/Log.h" #include "DepalettizeShader.h" diff --git a/android/jni/Android.mk b/android/jni/Android.mk index 711e9368fe..8bd8a9abaa 100644 --- a/android/jni/Android.mk +++ b/android/jni/Android.mk @@ -144,6 +144,7 @@ EXEC_AND_LIB_FILES := \ $(SRC)/GPU/Debugger/Breakpoints.cpp \ $(SRC)/GPU/Debugger/Stepping.cpp \ $(SRC)/GPU/GLES/Framebuffer.cpp \ + $(SRC)/GPU/GLES/DepalettizeShader.cpp \ $(SRC)/GPU/GLES/GLES_GPU.cpp.arm \ $(SRC)/GPU/GLES/TextureCache.cpp.arm \ $(SRC)/GPU/GLES/TransformPipeline.cpp.arm \ From 00c24ad3fcb248fbd7ea6b032ca77153c9b73c09 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Sun, 30 Mar 2014 01:24:23 +0100 Subject: [PATCH 05/24] Support aligned 4-bit accesses to 32-bit framebuffers --- GPU/GLES/DepalettizeShader.cpp | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/GPU/GLES/DepalettizeShader.cpp b/GPU/GLES/DepalettizeShader.cpp index 9a4620fa6c..e4bce40361 100644 --- a/GPU/GLES/DepalettizeShader.cpp +++ b/GPU/GLES/DepalettizeShader.cpp @@ -83,14 +83,6 @@ void GenerateDepalShader(char *buffer, GEBufferFormat pixelFormat) { char *p = buffer; #define WRITE p+=sprintf - WRITE(p, "#version 100\n"); - WRITE(p, "precision mediump float;\n"); - WRITE(p, "varying vec2 v_texcoord0;\n"); - WRITE(p, "uniform sampler2D tex;\n"); - WRITE(p, "uniform sampler2D pal;\n"); - WRITE(p, "void main() {\n"); - WRITE(p, " vec4 index = texture2D(tex, v_texcoord0);\n"); - char lookupMethod[128] = "index.r"; char offset[128] = ""; @@ -104,13 +96,16 @@ void GenerateDepalShader(char *buffer, GEBufferFormat pixelFormat) { // pixelformat is the format of the texture we are sampling. switch (pixelFormat) { case GE_FORMAT_8888: - if (mask == 0xFF) { + if ((mask & 0xF) == 0xF) { switch (shift) { // bgra? case 0: strcpy(lookupMethod, "index.r"); break; + case 4: strcpy(lookupMethod, "index.r"); multiplier = (1.0f / 16.0f); break; case 8: strcpy(lookupMethod, "index.g"); break; + case 12: strcpy(lookupMethod, "index.g"); multiplier = (1.0f / 16.0f); break; case 16: strcpy(lookupMethod, "index.b"); break; - default: + case 20: strcpy(lookupMethod, "index.b"); multiplier = (1.0f / 16.0f); break; case 24: strcpy(lookupMethod, "index.a"); break; + case 28: strcpy(lookupMethod, "index.a"); multiplier = (1.0f / 16.0f); break; } } else { // Ugh @@ -122,7 +117,6 @@ void GenerateDepalShader(char *buffer, GEBufferFormat pixelFormat) { case 0: strcpy(lookupMethod, "index.r"); break; case 4: strcpy(lookupMethod, "index.g"); break; case 8: strcpy(lookupMethod, "index.b"); break; - default: case 12: strcpy(lookupMethod, "index.a"); break; } multiplier = (1.0f / 16.0f) * 255.0f / 256.0f; // Need the fudge factor to not "wrap"... @@ -135,7 +129,6 @@ void GenerateDepalShader(char *buffer, GEBufferFormat pixelFormat) { switch (shift) { // bgra? case 0: strcpy(lookupMethod, "index.r"); multiplier = 1.0f / 32.0f; break; case 5: strcpy(lookupMethod, "index.g"); multiplier = 1.0f / 64.0f; break; - default: case 11: strcpy(lookupMethod, "index.b"); multiplier = 1.0f / 32.0f; break; } } else { @@ -148,7 +141,6 @@ void GenerateDepalShader(char *buffer, GEBufferFormat pixelFormat) { case 0: strcpy(lookupMethod, "index.r"); multiplier = 1.0f / 32.0f; break; case 4: strcpy(lookupMethod, "index.g"); multiplier = 1.0f / 32.0f; break; case 8: strcpy(lookupMethod, "index.b"); multiplier = 1.0f / 32.0f; break; - default: case 15: strcpy(lookupMethod, "index.a"); multiplier = 1.0f / 128.0f; break; } } else { @@ -161,6 +153,13 @@ void GenerateDepalShader(char *buffer, GEBufferFormat pixelFormat) { sprintf(offset, " + %.0f", (float)clutBase / 255.0f); // 256? } + WRITE(p, "#version 100\n"); + WRITE(p, "precision mediump float;\n"); + WRITE(p, "varying vec2 v_texcoord0;\n"); + WRITE(p, "uniform sampler2D tex;\n"); + WRITE(p, "uniform sampler2D pal;\n"); + WRITE(p, "void main() {\n"); + WRITE(p, " vec4 index = texture2D(tex, v_texcoord0);\n"); WRITE(p, " gl_FragColor = texture2D(pal, vec2((%s * %f)%s, 0.0));\n", lookupMethod, multiplier, offset); WRITE(p, "}\n"); } From d414327da7814bbd6556183c8d743066ba038585 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Sun, 30 Mar 2014 11:43:23 +0200 Subject: [PATCH 06/24] Use a small offset to turn NEAREST into floor() when sampling the clut. Minor fixes and cleanups. --- GPU/GLES/DepalettizeShader.cpp | 29 ++++++++++---------- GPU/GLES/TextureCache.cpp | 50 +++++----------------------------- 2 files changed, 22 insertions(+), 57 deletions(-) diff --git a/GPU/GLES/DepalettizeShader.cpp b/GPU/GLES/DepalettizeShader.cpp index e4bce40361..6281477207 100644 --- a/GPU/GLES/DepalettizeShader.cpp +++ b/GPU/GLES/DepalettizeShader.cpp @@ -119,7 +119,7 @@ void GenerateDepalShader(char *buffer, GEBufferFormat pixelFormat) { case 8: strcpy(lookupMethod, "index.b"); break; case 12: strcpy(lookupMethod, "index.a"); break; } - multiplier = (1.0f / 16.0f) * 255.0f / 256.0f; // Need the fudge factor to not "wrap"... + multiplier = 1.0f / 16.0f; } else { // Ugh } @@ -149,9 +149,8 @@ void GenerateDepalShader(char *buffer, GEBufferFormat pixelFormat) { break; } - if (clutBase != 0) { - sprintf(offset, " + %.0f", (float)clutBase / 255.0f); // 256? - } + // Offset by half a texel (plus clutBase) to turn NEAREST filtering into FLOOR. + sprintf(offset, " + %f", (float)clutBase / 255.0f - 0.5f / 256.0f); // 256? WRITE(p, "#version 100\n"); WRITE(p, "precision mediump float;\n"); @@ -192,14 +191,14 @@ GLuint DepalShaderCache::GetClutTexture(const u32 clutID, u32 *rawClut) { } void DepalShaderCache::Clear() { - for (auto shader : cache_) { - glDeleteShader(shader.second->fragShader); - glDeleteProgram(shader.second->program); - delete shader.second; + for (auto shader = cache_.begin(); shader != cache_.end(); ++shader) { + glDeleteShader(shader->second->fragShader); + glDeleteProgram(shader->second->program); + delete shader->second; } - for (auto tex : texCache_) { - glDeleteTextures(1, &tex.second->texture); - delete tex.second; + for (auto tex = texCache_.begin(); tex != texCache_.end(); ++tex) { + glDeleteTextures(1, &tex->second->texture); + delete tex->second; } } @@ -227,8 +226,6 @@ GLuint DepalShaderCache::GetDepalettizeShader(GEBufferFormat pixelFormat) { CheckShaderCompileSuccess(fragShader, buffer); - delete[] buffer; - GLuint program = glCreateProgram(); glAttachShader(program, vertexShader_); glAttachShader(program, fragShader); @@ -256,6 +253,8 @@ GLuint DepalShaderCache::GetDepalettizeShader(GEBufferFormat pixelFormat) { ERROR_LOG(G3D, "Could not link program:\n %s", buf); delete[] buf; // we're dead! } + + delete[] buffer; return 0; } @@ -264,6 +263,8 @@ GLuint DepalShaderCache::GetDepalettizeShader(GEBufferFormat pixelFormat) { depal->fragShader = fragShader; cache_[id] = depal; - + + delete[] buffer; + return depal->program; } diff --git a/GPU/GLES/TextureCache.cpp b/GPU/GLES/TextureCache.cpp index b4e4805adb..0151fce75c 100644 --- a/GPU/GLES/TextureCache.cpp +++ b/GPU/GLES/TextureCache.cpp @@ -750,38 +750,6 @@ static inline u32 MiniHash(const u32 *ptr) { return ptr[0]; } -// TODO: Unused, remove? -static inline u32 QuickClutHash(const u8 *clut, u32 bytes) { - // CLUTs always come in multiples of 32 bytes, can't load them any other way. - _dbg_assert_msg_(G3D, (bytes & 31) == 0, "CLUT should always have a multiple of 32 bytes."); - - const u32 prime = 2246822519U; - u32 hash = 0; -#ifdef _M_SSE - if ((((u32)(intptr_t)clut) & 0xf) == 0) { - __m128i cursor = _mm_set1_epi32(0); - const __m128i mult = _mm_set1_epi32(prime); - const __m128i *p = (const __m128i *)clut; - for (u32 i = 0; i < bytes / 16; ++i) { - cursor = _mm_add_epi32(cursor, _mm_mul_epu32(_mm_load_si128(&p[i]), mult)); - } - // Add the four parts into the low i32. - cursor = _mm_add_epi32(cursor, _mm_srli_si128(cursor, 8)); - cursor = _mm_add_epi32(cursor, _mm_srli_si128(cursor, 4)); - hash = _mm_cvtsi128_si32(cursor); - } else { -#else - // TODO: ARM NEON implementation (using CPUDetect to be sure it has NEON.) - { -#endif - for (const u32 *p = (u32 *)clut, *end = (u32 *)(clut + bytes); p < end; ) { - hash += *p++ * prime; - } - } - - return hash; -} - static inline u32 QuickTexHash(u32 addr, int bufw, int w, int h, GETextureFormat format) { const u32 sizeInRAM = (textureBitsPerPixel[format] * bufw * h) / 8; const u32 *checkp = (const u32 *) Memory::GetPointer(addr); @@ -916,16 +884,11 @@ void TextureCache::SetTextureFramebuffer(TexCacheEntry *entry) { bool useBufferedRendering = g_Config.iRenderingMode != FB_NON_BUFFERED_MODE; if (useBufferedRendering) { if (entry->status & TexCacheEntry::STATUS_DEPALETTIZE) { - // Check if we can handle the current setup - GLuint clutTexture = depalShaderCache_->GetClutTexture(clutHash_, clutBufConverted_); - if (!entry->depalFBO) { entry->depalFBO = fbo_create(entry->framebuffer->bufferWidth, entry->framebuffer->bufferHeight, 1, false, FBO_8888); } fbo_bind_as_render_target(entry->depalFBO); - glViewport(0, 0, entry->framebuffer->bufferWidth, entry->framebuffer->bufferHeight); - static const float pos[12] = { -1, -1, -1, 1, -1, -1, @@ -954,12 +917,13 @@ void TextureCache::SetTextureFramebuffer(TexCacheEntry *entry) { framebufferManager_->BindFramebufferColor(entry->framebuffer); - glstate.blend.disable(); - glstate.colorMask.set(true, true, true, true); - glstate.scissorTest.disable(); - glstate.cullFace.disable(); - glstate.depthTest.disable(); - glstate.viewport.set(0, 0, entry->framebuffer->bufferWidth, entry->framebuffer->bufferHeight); + glDisable(GL_BLEND); + glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); + glDisable(GL_SCISSOR_TEST); + glDisable(GL_CULL_FACE); + glDisable(GL_DEPTH_TEST); + glDisable(GL_STENCIL_TEST); + glViewport(0, 0, entry->framebuffer->bufferWidth, entry->framebuffer->bufferHeight); glVertexAttribPointer(0, 3, GL_FLOAT, GL_FALSE, 12, pos); glVertexAttribPointer(1, 2, GL_FLOAT, GL_FALSE, 8, uv); From 634add3a46561c6cb7fe255dde3ac926093ab3d7 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Sun, 30 Mar 2014 15:37:51 +0200 Subject: [PATCH 07/24] Depalettize works at other resolutions than 1x. For some strange reason though if you change resolution in game it breaks. --- GPU/GLES/DepalettizeShader.cpp | 50 ++++++++++++++++++++++++++-------- GPU/GLES/GLES_GPU.cpp | 3 ++ GPU/GLES/TextureCache.cpp | 4 +-- 3 files changed, 44 insertions(+), 13 deletions(-) diff --git a/GPU/GLES/DepalettizeShader.cpp b/GPU/GLES/DepalettizeShader.cpp index 6281477207..6d8174d1d7 100644 --- a/GPU/GLES/DepalettizeShader.cpp +++ b/GPU/GLES/DepalettizeShader.cpp @@ -23,10 +23,15 @@ #include "GPU/GPUState.h" #include "GPU/GLES/TextureCache.h" -static const char *depalVShader = +#ifdef _WIN32 +#define SHADERLOG +#endif + +static const char *depalVShader100 = +#ifdef USING_GLES "#version 100\n" "precision highp float;\n" -"// Depal shader\n" +#endif "attribute vec4 a_position;\n" "attribute vec2 a_texcoord0;\n" "varying vec2 v_texcoord0;\n" @@ -35,6 +40,21 @@ static const char *depalVShader = " gl_Position = a_position;\n" "}\n"; +static const char *depalVShader300 = +#ifdef USING_GLES +"#version 300 es\n" +"precision highp float;\n" +#else +"#version 330\n" +#endif +"in vec4 a_position;\n" +"in vec2 a_texcoord0;\n" +"out vec2 v_texcoord0;\n" +"void main() {\n" +" v_texcoord0 = a_texcoord0;\n" +" gl_Position = a_position;\n" +"}\n"; + static bool CheckShaderCompileSuccess(GLuint shader, const char *code) { GLint success; @@ -66,10 +86,10 @@ static bool CheckShaderCompileSuccess(GLuint shader, const char *code) { DepalShaderCache::DepalShaderCache() { // Pre-build the vertex program vertexShader_ = glCreateShader(GL_VERTEX_SHADER); - glShaderSource(vertexShader_, 1, &depalVShader, 0); + glShaderSource(vertexShader_, 1, &depalVShader100, 0); glCompileShader(vertexShader_); - if (CheckShaderCompileSuccess(vertexShader_, depalVShader)) { + if (CheckShaderCompileSuccess(vertexShader_, depalVShader100)) { // ... } } @@ -79,7 +99,7 @@ DepalShaderCache::~DepalShaderCache() { glDeleteShader(vertexShader_); } -void GenerateDepalShader(char *buffer, GEBufferFormat pixelFormat) { +void GenerateDepalShader100(char *buffer, GEBufferFormat pixelFormat) { char *p = buffer; #define WRITE p+=sprintf @@ -150,10 +170,14 @@ void GenerateDepalShader(char *buffer, GEBufferFormat pixelFormat) { } // Offset by half a texel (plus clutBase) to turn NEAREST filtering into FLOOR. - sprintf(offset, " + %f", (float)clutBase / 255.0f - 0.5f / 256.0f); // 256? + sprintf(offset, " + %f", (float)clutBase / 256.0f - 0.5f / 256.0f); +#ifdef USING_GLES WRITE(p, "#version 100\n"); WRITE(p, "precision mediump float;\n"); +#else + WRITE(p, "#version 110\n"); +#endif WRITE(p, "varying vec2 v_texcoord0;\n"); WRITE(p, "uniform sampler2D tex;\n"); WRITE(p, "uniform sampler2D pal;\n"); @@ -216,7 +240,7 @@ GLuint DepalShaderCache::GetDepalettizeShader(GEBufferFormat pixelFormat) { char *buffer = new char[2048]; - GenerateDepalShader(buffer, pixelFormat); + GenerateDepalShader100(buffer, pixelFormat); GLuint fragShader = glCreateShader(GL_FRAGMENT_SHADER); @@ -248,10 +272,14 @@ GLuint DepalShaderCache::GetDepalettizeShader(GEBufferFormat pixelFormat) { GLint bufLength = 0; glGetProgramiv(program, GL_INFO_LOG_LENGTH, &bufLength); if (bufLength) { - char* buf = new char[bufLength]; - glGetProgramInfoLog(program, bufLength, NULL, buf); - ERROR_LOG(G3D, "Could not link program:\n %s", buf); - delete[] buf; // we're dead! + char* errorbuf = new char[bufLength]; + glGetProgramInfoLog(program, bufLength, NULL, errorbuf); +#ifdef SHADERLOG + OutputDebugStringUTF8(buffer); + OutputDebugStringUTF8(errorbuf); +#endif + ERROR_LOG(G3D, "Could not link program:\n %s \n\n %s", errorbuf, buf); + delete[] errorbuf; // we're dead! } delete[] buffer; diff --git a/GPU/GLES/GLES_GPU.cpp b/GPU/GLES/GLES_GPU.cpp index 5c16584452..af47aad550 100644 --- a/GPU/GLES/GLES_GPU.cpp +++ b/GPU/GLES/GLES_GPU.cpp @@ -460,6 +460,7 @@ GLES_GPU::GLES_GPU() GLES_GPU::~GLES_GPU() { framebufferManager_.DestroyAllFBOs(); shaderManager_->ClearCache(true); + depalShaderCache_.Clear(); delete shaderManager_; } @@ -496,6 +497,7 @@ void GLES_GPU::DeviceLost() { // TransformDraw has registered as a GfxResourceHolder. shaderManager_->ClearCache(false); textureCache_.Clear(false); + depalShaderCache_.Clear(); framebufferManager_.DeviceLost(); } @@ -2070,6 +2072,7 @@ void GLES_GPU::DoState(PointerWrap &p) { // In Freeze-Frame mode, we don't want to do any of this. if (p.mode == p.MODE_READ && !PSP_CoreParameter().frozen) { textureCache_.Clear(true); + depalShaderCache_.Clear(); transformDraw_.ClearTrackedVertexArrays(); gstate_c.textureChanged = TEXCHANGE_UPDATED; diff --git a/GPU/GLES/TextureCache.cpp b/GPU/GLES/TextureCache.cpp index 0151fce75c..387ff7925b 100644 --- a/GPU/GLES/TextureCache.cpp +++ b/GPU/GLES/TextureCache.cpp @@ -886,7 +886,7 @@ void TextureCache::SetTextureFramebuffer(TexCacheEntry *entry) { if (entry->status & TexCacheEntry::STATUS_DEPALETTIZE) { GLuint clutTexture = depalShaderCache_->GetClutTexture(clutHash_, clutBufConverted_); if (!entry->depalFBO) { - entry->depalFBO = fbo_create(entry->framebuffer->bufferWidth, entry->framebuffer->bufferHeight, 1, false, FBO_8888); + entry->depalFBO = fbo_create(entry->framebuffer->renderWidth, entry->framebuffer->renderHeight, 1, false, FBO_8888); } fbo_bind_as_render_target(entry->depalFBO); static const float pos[12] = { @@ -923,7 +923,7 @@ void TextureCache::SetTextureFramebuffer(TexCacheEntry *entry) { glDisable(GL_CULL_FACE); glDisable(GL_DEPTH_TEST); glDisable(GL_STENCIL_TEST); - glViewport(0, 0, entry->framebuffer->bufferWidth, entry->framebuffer->bufferHeight); + glViewport(0, 0, entry->framebuffer->renderWidth, entry->framebuffer->renderHeight); glVertexAttribPointer(0, 3, GL_FLOAT, GL_FALSE, 12, pos); glVertexAttribPointer(1, 2, GL_FLOAT, GL_FALSE, 8, uv); From 2d31dd5e737b875214ffb4c703ef3fdd8b89caef Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Sun, 30 Mar 2014 17:08:24 +0200 Subject: [PATCH 08/24] Try to fix Qt build --- Qt/Core.pro | 1 + 1 file changed, 1 insertion(+) diff --git a/Qt/Core.pro b/Qt/Core.pro index 454d257a09..5d0bd6befb 100755 --- a/Qt/Core.pro +++ b/Qt/Core.pro @@ -46,6 +46,7 @@ SOURCES += $$P/Core/*.cpp \ # Core $$P/GPU/GPUState.cpp \ $$P/GPU/Math3D.cpp \ $$P/GPU/Null/NullGpu.cpp \ + $$P/GPU/GLES/DepalettizeShader.cpp \ $$P/GPU/GLES/FragmentShaderGenerator.cpp \ $$P/GPU/GLES/Framebuffer.cpp \ $$P/GPU/GLES/GLES_GPU.cpp \ From 8b3f317bbfcbb401e4723f03799ddee0e54667cc Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Sun, 30 Mar 2014 17:37:15 +0200 Subject: [PATCH 09/24] Typo fixes, thanks raven02. Fix deinitialization crashes. --- GPU/GLES/DepalettizeShader.cpp | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/GPU/GLES/DepalettizeShader.cpp b/GPU/GLES/DepalettizeShader.cpp index 6d8174d1d7..17c8c315ea 100644 --- a/GPU/GLES/DepalettizeShader.cpp +++ b/GPU/GLES/DepalettizeShader.cpp @@ -19,6 +19,7 @@ #include "base/logging.h" #include "Common/Log.h" +#include "Core/Reporting.h" #include "DepalettizeShader.h" #include "GPU/GPUState.h" #include "GPU/GLES/TextureCache.h" @@ -128,7 +129,7 @@ void GenerateDepalShader100(char *buffer, GEBufferFormat pixelFormat) { case 28: strcpy(lookupMethod, "index.a"); multiplier = (1.0f / 16.0f); break; } } else { - // Ugh + ERROR_LOG_ONCE(depal8888, G3D, "8888 depal unsupported: %i %02x", shift, mask); } break; case GE_FORMAT_4444: @@ -141,7 +142,7 @@ void GenerateDepalShader100(char *buffer, GEBufferFormat pixelFormat) { } multiplier = 1.0f / 16.0f; } else { - // Ugh + ERROR_LOG_ONCE(depal4444, G3D, "4444 depal unsupported: %i %02x", shift, mask); } break; case GE_FORMAT_565: @@ -152,19 +153,19 @@ void GenerateDepalShader100(char *buffer, GEBufferFormat pixelFormat) { case 11: strcpy(lookupMethod, "index.b"); multiplier = 1.0f / 32.0f; break; } } else { - // Ugh + ERROR_LOG_ONCE(depal565, G3D, "565 depal unsupported: %i %02x", shift, mask); } break; case GE_FORMAT_5551: if ((mask & 0x1F) == 0x1F) { switch (shift) { // bgra? case 0: strcpy(lookupMethod, "index.r"); multiplier = 1.0f / 32.0f; break; - case 4: strcpy(lookupMethod, "index.g"); multiplier = 1.0f / 32.0f; break; - case 8: strcpy(lookupMethod, "index.b"); multiplier = 1.0f / 32.0f; break; - case 15: strcpy(lookupMethod, "index.a"); multiplier = 1.0f / 128.0f; break; + case 5: strcpy(lookupMethod, "index.g"); multiplier = 1.0f / 32.0f; break; + case 10: strcpy(lookupMethod, "index.b"); multiplier = 1.0f / 32.0f; break; + case 15: strcpy(lookupMethod, "index.a"); multiplier = 1.0f / 256.0f; break; } } else { - // Ugh + ERROR_LOG_ONCE(depal5551, G3D, "5551 depal unsupported: %i %02x", shift, mask); } break; } @@ -220,10 +221,12 @@ void DepalShaderCache::Clear() { glDeleteProgram(shader->second->program); delete shader->second; } + cache_.clear(); for (auto tex = texCache_.begin(); tex != texCache_.end(); ++tex) { glDeleteTextures(1, &tex->second->texture); delete tex->second; } + texCache_.clear(); } void DepalShaderCache::Decimate() { From b82de69a2d18b63ee631b401375c7974a60238ba Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Mon, 31 Mar 2014 00:30:40 +0200 Subject: [PATCH 10/24] depal: slightly better error reporting. --- GPU/GLES/DepalettizeShader.cpp | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/GPU/GLES/DepalettizeShader.cpp b/GPU/GLES/DepalettizeShader.cpp index 17c8c315ea..c905f9b20c 100644 --- a/GPU/GLES/DepalettizeShader.cpp +++ b/GPU/GLES/DepalettizeShader.cpp @@ -115,6 +115,7 @@ void GenerateDepalShader100(char *buffer, GEBufferFormat pixelFormat) { float multiplier = 1.0f; // pixelformat is the format of the texture we are sampling. + bool formatOK = true; switch (pixelFormat) { case GE_FORMAT_8888: if ((mask & 0xF) == 0xF) { @@ -127,9 +128,11 @@ void GenerateDepalShader100(char *buffer, GEBufferFormat pixelFormat) { case 20: strcpy(lookupMethod, "index.b"); multiplier = (1.0f / 16.0f); break; case 24: strcpy(lookupMethod, "index.a"); break; case 28: strcpy(lookupMethod, "index.a"); multiplier = (1.0f / 16.0f); break; + default: + formatOK = false; } } else { - ERROR_LOG_ONCE(depal8888, G3D, "8888 depal unsupported: %i %02x", shift, mask); + formatOK = false; } break; case GE_FORMAT_4444: @@ -139,10 +142,12 @@ void GenerateDepalShader100(char *buffer, GEBufferFormat pixelFormat) { case 4: strcpy(lookupMethod, "index.g"); break; case 8: strcpy(lookupMethod, "index.b"); break; case 12: strcpy(lookupMethod, "index.a"); break; + default: + formatOK = false; } multiplier = 1.0f / 16.0f; } else { - ERROR_LOG_ONCE(depal4444, G3D, "4444 depal unsupported: %i %02x", shift, mask); + formatOK = false; } break; case GE_FORMAT_565: @@ -151,9 +156,11 @@ void GenerateDepalShader100(char *buffer, GEBufferFormat pixelFormat) { case 0: strcpy(lookupMethod, "index.r"); multiplier = 1.0f / 32.0f; break; case 5: strcpy(lookupMethod, "index.g"); multiplier = 1.0f / 64.0f; break; case 11: strcpy(lookupMethod, "index.b"); multiplier = 1.0f / 32.0f; break; + default: + formatOK = false; } } else { - ERROR_LOG_ONCE(depal565, G3D, "565 depal unsupported: %i %02x", shift, mask); + formatOK = false; } break; case GE_FORMAT_5551: @@ -163,13 +170,19 @@ void GenerateDepalShader100(char *buffer, GEBufferFormat pixelFormat) { case 5: strcpy(lookupMethod, "index.g"); multiplier = 1.0f / 32.0f; break; case 10: strcpy(lookupMethod, "index.b"); multiplier = 1.0f / 32.0f; break; case 15: strcpy(lookupMethod, "index.a"); multiplier = 1.0f / 256.0f; break; + default: + formatOK = false; } } else { - ERROR_LOG_ONCE(depal5551, G3D, "5551 depal unsupported: %i %02x", shift, mask); + formatOK = false; } break; } + if (!formatOK) { + ERROR_LOG_REPORT_ONCE(depal, G3D, "%i depal unsupported: shift=%i mask=%02x", pixelFormat, shift, mask); + } + // Offset by half a texel (plus clutBase) to turn NEAREST filtering into FLOOR. sprintf(offset, " + %f", (float)clutBase / 256.0f - 0.5f / 256.0f); From c3524246de069bd9a6d7c2a31333ca65960cf122 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Tue, 1 Apr 2014 11:01:45 +0200 Subject: [PATCH 11/24] Add GLES3 path to depalettization. Doesn't seem to help much though, must be missing something. --- GPU/GLES/DepalettizeShader.cpp | 80 ++++++++++++++++++++++++++++++++-- 1 file changed, 77 insertions(+), 3 deletions(-) diff --git a/GPU/GLES/DepalettizeShader.cpp b/GPU/GLES/DepalettizeShader.cpp index c905f9b20c..5d0b7c42a7 100644 --- a/GPU/GLES/DepalettizeShader.cpp +++ b/GPU/GLES/DepalettizeShader.cpp @@ -80,14 +80,19 @@ static bool CheckShaderCompileSuccess(GLuint shader, const char *code) { return false; } else { DEBUG_LOG(G3D, "Compiled shader:\n%s\n", (const char *)code); +#ifdef SHADERLOG + OutputDebugStringUTF8(code); +#endif return true; } } DepalShaderCache::DepalShaderCache() { // Pre-build the vertex program + bool useGL3 = gl_extensions.GLES3; + vertexShader_ = glCreateShader(GL_VERTEX_SHADER); - glShaderSource(vertexShader_, 1, &depalVShader100, 0); + glShaderSource(vertexShader_, 1, useGL3 ? &depalVShader300 : &depalVShader100, 0); glCompileShader(vertexShader_); if (CheckShaderCompileSuccess(vertexShader_, depalVShader100)) { @@ -100,9 +105,65 @@ DepalShaderCache::~DepalShaderCache() { glDeleteShader(vertexShader_); } +#define WRITE p+=sprintf + +void GenerateDepalShader300(char *buffer, GEBufferFormat pixelFormat) { + char *p = buffer; +#ifdef USING_GLES + WRITE(p, "#version 300 es\n"); + WRITE(p, "precision mediump float;\n"); +#else + WRITE(p, "#version 330\n"); +#endif + WRITE(p, "in vec2 v_texcoord0;\n"); + WRITE(p, "out vec4 fragColor0;\n"); + WRITE(p, "uniform sampler2D tex;\n"); + WRITE(p, "uniform sampler2D pal;\n"); + + WRITE(p, "void main() {\n"); + WRITE(p, " vec4 index = texture2D(tex, v_texcoord0);\n"); + + // Unfortunately sampling turned our texture into floating point. To avoid this, might be able + // to declare them as isampler2D objects, but these require integer textures, which needs more work. + // Anyhow, we simply work around this by converting back to integer. Hopefully there will be no loss of precision. + // Use the mask to skip reading some components. + int shiftedMask = gstate.getClutIndexMask() << gstate.getClutIndexShift(); + switch (pixelFormat) { + case GE_FORMAT_8888: + if (shiftedMask & 0xFF) WRITE(p, " int r = int(index.r * 15.99);\n"); else WRITE(p, " int r = 0;\n"); + if (shiftedMask & 0xFF00) WRITE(p, " int g = int(index.g * 15.99);\n"); else WRITE(p, " int g = 0;\n"); + if (shiftedMask & 0xFF0000) WRITE(p, " int b = int(index.b * 15.99);\n"); else WRITE(p, " int b = 0;\n"); + if (shiftedMask & 0xFF000000) WRITE(p, " int a = int(index.a * 15.99);\n"); else WRITE(p, " int a = 0;\n"); + WRITE(p, " int color = (a << 24) | (b << 16) | (g << 8) | (r);\n"); + break; + case GE_FORMAT_4444: + if (shiftedMask & 0xF) WRITE(p, " int r = int(index.r * 15.99);\n"); else WRITE(p, " int r = 0;\n"); + if (shiftedMask & 0xF0) WRITE(p, " int g = int(index.g * 15.99);\n"); else WRITE(p, " int g = 0;\n"); + if (shiftedMask & 0xF00) WRITE(p, " int b = int(index.b * 15.99);\n"); else WRITE(p, " int b = 0;\n"); + if (shiftedMask & 0xF000) WRITE(p, " int a = int(index.a * 15.99);\n"); else WRITE(p, " int a = 0;\n"); + WRITE(p, " int color = (a << 12) | (b << 8) | (g << 4) | (r);\n"); + break; + case GE_FORMAT_565: + if (shiftedMask & 0x1F) WRITE(p, " int r = int(index.r * 31.99);\n"); else WRITE(p, " int r = 0;\n"); + if (shiftedMask & 0x7E0) WRITE(p, " int g = int(index.g * 63.99);\n"); else WRITE(p, " int g = 0;\n"); + if (shiftedMask & 0xF800) WRITE(p, " int b = int(index.b * 31.99);\n"); else WRITE(p, " int b = 0;\n"); + WRITE(p, " int color = (b << 11) | (g << 5) | (r);"); + break; + case GE_FORMAT_5551: + if (shiftedMask & 0x1F) WRITE(p, " int r = int(index.r * 31.99);\n"); else WRITE(p, " int r = 0;\n"); + if (shiftedMask & 0x3E0) WRITE(p, " int g = int(index.g * 31.99);\n"); else WRITE(p, " int g = 0;\n"); + if (shiftedMask & 0x7C00) WRITE(p, " int b = int(index.b * 31.99);\n"); else WRITE(p, " int b = 0;\n"); + if (shiftedMask & 0xF800) WRITE(p, " int a = int(index.a);\n"); else WRITE(p, " int a = 0;\n"); + WRITE(p, "int color = (a << 15) | (b << 10) | (g << 5) | (r);"); + break; + } + WRITE(p, " color = (color >> %i) & 0x%02x;\n", gstate.getClutIndexShift(), gstate.getClutIndexMask()); + WRITE(p, " fragColor0 = texture2D(pal, vec2(float(color) / 256.0f, 0.0));\n"); + WRITE(p, "}\n"); +} + void GenerateDepalShader100(char *buffer, GEBufferFormat pixelFormat) { char *p = buffer; -#define WRITE p+=sprintf char lookupMethod[128] = "index.r"; char offset[128] = ""; @@ -201,6 +262,9 @@ void GenerateDepalShader100(char *buffer, GEBufferFormat pixelFormat) { WRITE(p, "}\n"); } +#undef WRITE + + u32 DepalShaderCache::GenerateShaderID(GEBufferFormat pixelFormat) { return (gstate.clutformat & 0xFFFFFF) | (pixelFormat << 24); } @@ -249,6 +313,8 @@ void DepalShaderCache::Decimate() { GLuint DepalShaderCache::GetDepalettizeShader(GEBufferFormat pixelFormat) { u32 id = GenerateShaderID(pixelFormat); + bool useGL3 = gl_extensions.GLES3; + auto shader = cache_.find(id); if (shader != cache_.end()) { return shader->second->program; @@ -256,7 +322,11 @@ GLuint DepalShaderCache::GetDepalettizeShader(GEBufferFormat pixelFormat) { char *buffer = new char[2048]; - GenerateDepalShader100(buffer, pixelFormat); + if (useGL3) { + GenerateDepalShader300(buffer, pixelFormat); + } else { + GenerateDepalShader100(buffer, pixelFormat); + } GLuint fragShader = glCreateShader(GL_FRAGMENT_SHADER); @@ -273,6 +343,10 @@ GLuint DepalShaderCache::GetDepalettizeShader(GEBufferFormat pixelFormat) { glBindAttribLocation(program, 0, "a_position"); glBindAttribLocation(program, 1, "a_texcoord0"); + if (useGL3) { + glBindFragDataLocation(program, 0, "fragColor0"); + } + glLinkProgram(program); glUseProgram(program); From 3f16765f1ca3486979bcade905f32024e709bfc1 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Tue, 1 Apr 2014 11:39:19 +0200 Subject: [PATCH 12/24] Allow 512 entries in 16-bit CLUTs. Also, forgot offset. --- GPU/GLES/DepalettizeShader.cpp | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/GPU/GLES/DepalettizeShader.cpp b/GPU/GLES/DepalettizeShader.cpp index 5d0b7c42a7..5f76eb0b68 100644 --- a/GPU/GLES/DepalettizeShader.cpp +++ b/GPU/GLES/DepalettizeShader.cpp @@ -157,8 +157,13 @@ void GenerateDepalShader300(char *buffer, GEBufferFormat pixelFormat) { WRITE(p, "int color = (a << 15) | (b << 10) | (g << 5) | (r);"); break; } - WRITE(p, " color = (color >> %i) & 0x%02x;\n", gstate.getClutIndexShift(), gstate.getClutIndexMask()); - WRITE(p, " fragColor0 = texture2D(pal, vec2(float(color) / 256.0f, 0.0));\n"); + float texturePixels = 256; + const GEPaletteFormat clutFormat = gstate.getClutPaletteFormat(); + if (clutFormat != GE_CMODE_32BIT_ABGR8888) + texturePixels = 512; + + WRITE(p, " color = ((color >> %i) & 0x%02x) + %i;\n", gstate.getClutIndexShift(), gstate.getClutIndexMask(), gstate.getClutIndexStartPos()); + WRITE(p, " fragColor0 = texture2D(pal, vec2(float(color) / %f, 0.0));\n", texturePixels); WRITE(p, "}\n"); } @@ -240,12 +245,18 @@ void GenerateDepalShader100(char *buffer, GEBufferFormat pixelFormat) { break; } + float texturePixels = 256.f; + if (clutFormat != GE_CMODE_32BIT_ABGR8888) { + texturePixels = 512.f; + multiplier *= 0.5f; + } + if (!formatOK) { - ERROR_LOG_REPORT_ONCE(depal, G3D, "%i depal unsupported: shift=%i mask=%02x", pixelFormat, shift, mask); + ERROR_LOG_REPORT_ONCE(depal, G3D, "%i depal unsupported: shift=%i mask=%02x offset=%i", pixelFormat, shift, mask, offset); } // Offset by half a texel (plus clutBase) to turn NEAREST filtering into FLOOR. - sprintf(offset, " + %f", (float)clutBase / 256.0f - 0.5f / 256.0f); + sprintf(offset, " + %f", (float)clutBase / texturePixels - 0.5f / texturePixels); #ifdef USING_GLES WRITE(p, "#version 100\n"); @@ -275,13 +286,14 @@ GLuint DepalShaderCache::GetClutTexture(const u32 clutID, u32 *rawClut) { return oldtex->second->texture; } - GLuint dstFmt = getClutDestFormat(gstate.getClutPaletteFormat()); + GEPaletteFormat palFormat = gstate.getClutPaletteFormat(); + GLuint dstFmt = getClutDestFormat(palFormat); DepalTexture *tex = new DepalTexture(); glGenTextures(1, &tex->texture); glBindTexture(GL_TEXTURE_2D, tex->texture); GLuint components = dstFmt == GL_UNSIGNED_SHORT_5_6_5 ? GL_RGB : GL_RGBA; - glTexImage2D(GL_TEXTURE_2D, 0, components, 256, 1, 0, components, dstFmt, (void *)rawClut); + glTexImage2D(GL_TEXTURE_2D, 0, components, palFormat == GE_CMODE_32BIT_ABGR8888 ? 256 : 512, 1, 0, components, dstFmt, (void *)rawClut); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); From 7062da048345d5ba77e312ed32ef7c1ef993dfda Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Tue, 1 Apr 2014 14:14:56 +0200 Subject: [PATCH 13/24] Fix some more issues. The snow scene in FF Type 0 now looks B/W instead of broken. --- GPU/GLES/DepalettizeShader.cpp | 22 ++++++++----- GPU/GLES/TextureCache.cpp | 56 +++++++++++++++++++++++++--------- GPU/GLES/TextureCache.h | 5 +-- 3 files changed, 59 insertions(+), 24 deletions(-) diff --git a/GPU/GLES/DepalettizeShader.cpp b/GPU/GLES/DepalettizeShader.cpp index 5f76eb0b68..882ee59ade 100644 --- a/GPU/GLES/DepalettizeShader.cpp +++ b/GPU/GLES/DepalettizeShader.cpp @@ -123,17 +123,21 @@ void GenerateDepalShader300(char *buffer, GEBufferFormat pixelFormat) { WRITE(p, "void main() {\n"); WRITE(p, " vec4 index = texture2D(tex, v_texcoord0);\n"); + int mask = gstate.getClutIndexMask(); + int shift = gstate.getClutIndexShift(); + int offset = gstate.getClutIndexStartPos(); + const GEPaletteFormat clutFormat = gstate.getClutPaletteFormat(); // Unfortunately sampling turned our texture into floating point. To avoid this, might be able // to declare them as isampler2D objects, but these require integer textures, which needs more work. // Anyhow, we simply work around this by converting back to integer. Hopefully there will be no loss of precision. // Use the mask to skip reading some components. - int shiftedMask = gstate.getClutIndexMask() << gstate.getClutIndexShift(); + int shiftedMask = mask << shift; switch (pixelFormat) { case GE_FORMAT_8888: - if (shiftedMask & 0xFF) WRITE(p, " int r = int(index.r * 15.99);\n"); else WRITE(p, " int r = 0;\n"); - if (shiftedMask & 0xFF00) WRITE(p, " int g = int(index.g * 15.99);\n"); else WRITE(p, " int g = 0;\n"); - if (shiftedMask & 0xFF0000) WRITE(p, " int b = int(index.b * 15.99);\n"); else WRITE(p, " int b = 0;\n"); - if (shiftedMask & 0xFF000000) WRITE(p, " int a = int(index.a * 15.99);\n"); else WRITE(p, " int a = 0;\n"); + if (shiftedMask & 0xFF) WRITE(p, " int r = int(index.r * 255.99);\n"); else WRITE(p, " int r = 0;\n"); + if (shiftedMask & 0xFF00) WRITE(p, " int g = int(index.g * 255.99);\n"); else WRITE(p, " int g = 0;\n"); + if (shiftedMask & 0xFF0000) WRITE(p, " int b = int(index.b * 255.99);\n"); else WRITE(p, " int b = 0;\n"); + if (shiftedMask & 0xFF000000) WRITE(p, " int a = int(index.a * 255.99);\n"); else WRITE(p, " int a = 0;\n"); WRITE(p, " int color = (a << 24) | (b << 16) | (g << 8) | (r);\n"); break; case GE_FORMAT_4444: @@ -158,12 +162,11 @@ void GenerateDepalShader300(char *buffer, GEBufferFormat pixelFormat) { break; } float texturePixels = 256; - const GEPaletteFormat clutFormat = gstate.getClutPaletteFormat(); if (clutFormat != GE_CMODE_32BIT_ABGR8888) texturePixels = 512; - WRITE(p, " color = ((color >> %i) & 0x%02x) + %i;\n", gstate.getClutIndexShift(), gstate.getClutIndexMask(), gstate.getClutIndexStartPos()); - WRITE(p, " fragColor0 = texture2D(pal, vec2(float(color) / %f, 0.0));\n", texturePixels); + WRITE(p, " color = ((color >> %i) & 0x%02x) | %i;\n", shift, mask, offset); // '|' matches what we have in gstate.h + WRITE(p, " fragColor0 = texture2D(pal, vec2((floor(float(color)) - 0.5) * (1.0 / %f), 0.0));\n", texturePixels); WRITE(p, "}\n"); } @@ -356,7 +359,10 @@ GLuint DepalShaderCache::GetDepalettizeShader(GEBufferFormat pixelFormat) { glBindAttribLocation(program, 1, "a_texcoord0"); if (useGL3) { + // This call is not really necessary, I think. +#ifndef MOBILE_DEVICE glBindFragDataLocation(program, 0, "fragColor0"); +#endif } glLinkProgram(program); diff --git a/GPU/GLES/TextureCache.cpp b/GPU/GLES/TextureCache.cpp index 387ff7925b..dcf8bb80bf 100644 --- a/GPU/GLES/TextureCache.cpp +++ b/GPU/GLES/TextureCache.cpp @@ -73,8 +73,16 @@ TextureCache::TextureCache() : clearCacheNextFrame_(false), lowMemoryMode_(false tmpTexBuf32.resize(1024 * 512); // 2MB tmpTexBuf16.resize(1024 * 512); // 1MB tmpTexBufRearrange.resize(1024 * 512); // 2MB + + // Aren't these way too big? clutBufConverted_ = (u32 *)AllocateAlignedMemory(4096 * sizeof(u32), 16); // 16KB clutBufRaw_ = (u32 *)AllocateAlignedMemory(4096 * sizeof(u32), 16); // 16KB + + // Zap these so that reads from uninitialized parts of the CLUT look the same in + // release and debug + memset(clutBufConverted_, 0, 4096 * sizeof(u32)); + memset(clutBufRaw_, 0, 4096 * sizeof(u32)); + glGetFloatv(GL_MAX_TEXTURE_MAX_ANISOTROPY_EXT, &maxAnisotropyLevel); SetupTextureDecoder(); } @@ -208,6 +216,7 @@ inline void AttachFramebufferValid(T &entry, VirtualFramebuffer *framebuffer) { if (hasInvalidFramebuffer || hasOlderFramebuffer) { entry->framebuffer = framebuffer; entry->invalidHint = 0; + entry->status &= ~TextureCache::TexCacheEntry::STATUS_DEPALETTIZE; host->GPUNotifyTextureAttachment(entry->addr); } } @@ -217,6 +226,7 @@ inline void AttachFramebufferInvalid(T &entry, VirtualFramebuffer *framebuffer) if (entry->framebuffer == 0 || entry->framebuffer == framebuffer) { entry->framebuffer = framebuffer; entry->invalidHint = -1; + entry->status &= ~TextureCache::TexCacheEntry::STATUS_DEPALETTIZE; host->GPUNotifyTextureAttachment(entry->addr); } } @@ -258,8 +268,11 @@ void TextureCache::AttachFramebuffer(TexCacheEntry *entry, u32 address, VirtualF // Check for CLUT. The framebuffer is always RGB, but it can be interpreted as a CLUT texture. // 3rd Birthday (and a bunch of other games) render to a 16 bit clut texture. bool clutSuccess = false; - if (((framebuffer->format == GE_FORMAT_8888 && entry->format == GE_TFMT_CLUT32) || (framebuffer->format != GE_FORMAT_8888 && entry->format == GE_TFMT_CLUT16))) { + if (((framebuffer->format == GE_FORMAT_8888 && entry->format == GE_TFMT_CLUT32) || + (framebuffer->format != GE_FORMAT_8888 && entry->format == GE_TFMT_CLUT16))) { clutSuccess = AttachFramebufferCLUT(entry, framebuffer, address); + } else if (entry->format == GE_TFMT_CLUT8 || entry->format == GE_TFMT_CLUT4) { + ERROR_LOG_REPORT_ONCE(fourEightBit, G3D, "4 and 8-bit CLUT format not supported for framebuffers"); } if (!clutSuccess) { @@ -269,16 +282,20 @@ void TextureCache::AttachFramebuffer(TexCacheEntry *entry, u32 address, VirtualF (framebuffer->format != GE_FORMAT_8888 && entry->format == GE_TFMT_CLUT16); // Is it at least the right stride? - if (framebuffer->fb_stride == entry->bufw && compatFormat) { - if (framebuffer->format != entry->format) { - WARN_LOG_REPORT_ONCE(diffFormat2, G3D, "Render to texture with different formats %d != %d at %08x", entry->format, framebuffer->format, address); - // TODO: Use an FBO to translate the palette? - AttachFramebufferValid(entry, framebuffer); - } else if ((entry->addr - address) / entry->bufw < framebuffer->height) { - WARN_LOG_REPORT_ONCE(subarea, G3D, "Render to area containing texture at %08x", address); - // TODO: Keep track of the y offset. - // If "AttachFramebufferValid" , God of War Ghost of Sparta/Chains of Olympus will be missing special effect. - AttachFramebufferInvalid(entry, framebuffer); + if (framebuffer->fb_stride == entry->bufw) { + if (compatFormat) { + if (framebuffer->format != entry->format) { + WARN_LOG_REPORT_ONCE(diffFormat2, G3D, "Render to texture with different formats %d != %d at %08x", entry->format, framebuffer->format, address); + // TODO: Use an FBO to translate the palette? + AttachFramebufferValid(entry, framebuffer); + } else if ((entry->addr - address) / entry->bufw < framebuffer->height) { + WARN_LOG_REPORT_ONCE(subarea, G3D, "Render to area containing texture at %08x", address); + // TODO: Keep track of the y offset. + // If "AttachFramebufferValid" , God of War Ghost of Sparta/Chains of Olympus will be missing special effect. + AttachFramebufferInvalid(entry, framebuffer); + } + } else { + WARN_LOG_REPORT_ONCE(diffFormat2, G3D, "Render to texture with incompatible formats %d != %d at %08x", entry->format, framebuffer->format, address); } } } @@ -522,6 +539,11 @@ void TextureCache::UpdateSamplingParams(TexCacheEntry &entry, bool force) { bool sClamp = gstate.isTexCoordClampedS(); bool tClamp = gstate.isTexCoordClampedT(); + if (entry.status & TexCacheEntry::STATUS_TEXPARAM_DIRTY) { + entry.status &= ~TexCacheEntry::STATUS_TEXPARAM_DIRTY; + force = true; + } + bool noMip = (gstate.texlevel & 0xFFFFFF) == 0x000001 || (gstate.texlevel & 0xFFFFFF) == 0x100001 ; // Fix texlevel at 0 if (entry.maxLevel == 0) { @@ -605,7 +627,6 @@ void TextureCache::UpdateSamplingParams(TexCacheEntry &entry, bool force) { static void ConvertColors(void *dstBuf, const void *srcBuf, GLuint dstFmt, int numPixels) { const u32 *src = (const u32 *)srcBuf; u32 *dst = (u32 *)dstBuf; - // TODO: NEON. switch (dstFmt) { case GL_UNSIGNED_SHORT_4_4_4_4: { @@ -628,6 +649,7 @@ static void ConvertColors(void *dstBuf, const void *srcBuf, GLuint dstFmt, int n int i = sseChunks * 8 / 2; #else int i = 0; + // TODO: NEON. #endif for (; i < (numPixels + 1) / 2; i++) { u32 c = src[i]; @@ -660,6 +682,7 @@ static void ConvertColors(void *dstBuf, const void *srcBuf, GLuint dstFmt, int n int i = sseChunks * 8 / 2; #else int i = 0; + // TODO: NEON. #endif for (; i < (numPixels + 1) / 2; i++) { u32 c = src[i]; @@ -689,6 +712,7 @@ static void ConvertColors(void *dstBuf, const void *srcBuf, GLuint dstFmt, int n int i = sseChunks * 8 / 2; #else int i = 0; + // TODO: NEON. #endif for (; i < (numPixels + 1) / 2; i++) { u32 c = src[i]; @@ -817,7 +841,8 @@ void TextureCache::UpdateCurrentClut() { clutAlphaLinear_ = false; break; } - // Alpha 0 doesn't matter. + // Alpha 0 doesn't matter. + // TODO: Well, depending on blend mode etc, it can actually matter, although unlikely. if (i != 0 && (clut[i] & 0xFFF0) != clutAlphaLinearColor_) { clutAlphaLinear_ = false; break; @@ -884,7 +909,7 @@ void TextureCache::SetTextureFramebuffer(TexCacheEntry *entry) { bool useBufferedRendering = g_Config.iRenderingMode != FB_NON_BUFFERED_MODE; if (useBufferedRendering) { if (entry->status & TexCacheEntry::STATUS_DEPALETTIZE) { - GLuint clutTexture = depalShaderCache_->GetClutTexture(clutHash_, clutBufConverted_); + GLuint clutTexture = depalShaderCache_->GetClutTexture(clutHash_, clutBuf_); if (!entry->depalFBO) { entry->depalFBO = fbo_create(entry->framebuffer->renderWidth, entry->framebuffer->renderHeight, 1, false, FBO_8888); } @@ -916,6 +941,9 @@ void TextureCache::SetTextureFramebuffer(TexCacheEntry *entry) { glActiveTexture(GL_TEXTURE0); framebufferManager_->BindFramebufferColor(entry->framebuffer); + glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + entry->status |= TexCacheEntry::STATUS_TEXPARAM_DIRTY; glDisable(GL_BLEND); glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); diff --git a/GPU/GLES/TextureCache.h b/GPU/GLES/TextureCache.h index 69c7355259..d03f2df78b 100644 --- a/GPU/GLES/TextureCache.h +++ b/GPU/GLES/TextureCache.h @@ -77,7 +77,6 @@ public: // Only used by Qt UI? bool DecodeTexture(u8 *output, GPUgstate state); -private: // Wow this is starting to grow big. Soon need to start looking at resizing it. // Must stay a POD. struct TexCacheEntry { @@ -98,7 +97,8 @@ private: STATUS_CHANGE_FREQUENT = 0x10, // Changes often (less than 15 frames in between.) STATUS_CLUT_RECHECK = 0x20, // Another texture with same addr had a hashfail. STATUS_DEPALETTIZE = 0x40, - STATUS_DEPALETTIZE_DIRTY = 0x80 + STATUS_DEPALETTIZE_DIRTY = 0x80, + STATUS_TEXPARAM_DIRTY = 0x100 }; // Status, but int so we can zero initialize. @@ -144,6 +144,7 @@ private: bool Matches(u16 dim2, u8 format2, int maxLevel2); }; +private: void Decimate(); // Run this once per frame to get rid of old textures. void *UnswizzleFromMem(const u8 *texptr, u32 bufw, u32 bytesPerPixel, u32 level); void *ReadIndexedTex(int level, const u8 *texptr, int bytesPerIndex, GLuint dstFmt, int bufw); From e2a4e13e38e91fb755a6c4f3132dcb6bf1ad426f Mon Sep 17 00:00:00 2001 From: raven02 Date: Sat, 5 Apr 2014 14:45:15 +0800 Subject: [PATCH 14/24] Small one , not a big deal --- GPU/GLES/DepalettizeShader.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GPU/GLES/DepalettizeShader.cpp b/GPU/GLES/DepalettizeShader.cpp index 882ee59ade..2f6285f4a2 100644 --- a/GPU/GLES/DepalettizeShader.cpp +++ b/GPU/GLES/DepalettizeShader.cpp @@ -157,7 +157,7 @@ void GenerateDepalShader300(char *buffer, GEBufferFormat pixelFormat) { if (shiftedMask & 0x1F) WRITE(p, " int r = int(index.r * 31.99);\n"); else WRITE(p, " int r = 0;\n"); if (shiftedMask & 0x3E0) WRITE(p, " int g = int(index.g * 31.99);\n"); else WRITE(p, " int g = 0;\n"); if (shiftedMask & 0x7C00) WRITE(p, " int b = int(index.b * 31.99);\n"); else WRITE(p, " int b = 0;\n"); - if (shiftedMask & 0xF800) WRITE(p, " int a = int(index.a);\n"); else WRITE(p, " int a = 0;\n"); + if (shiftedMask & 0x8000) WRITE(p, " int a = int(index.a);\n"); else WRITE(p, " int a = 0;\n"); WRITE(p, "int color = (a << 15) | (b << 10) | (g << 5) | (r);"); break; } From d90da45a76d3159585fab68a792711ef9b01b283 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Sun, 6 Apr 2014 11:09:41 +0200 Subject: [PATCH 15/24] On PC, OpenGL 3.3 is enough for the new path, don't need ES 3 full compat. --- GPU/GLES/DepalettizeShader.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/GPU/GLES/DepalettizeShader.cpp b/GPU/GLES/DepalettizeShader.cpp index 2f6285f4a2..1241740608 100644 --- a/GPU/GLES/DepalettizeShader.cpp +++ b/GPU/GLES/DepalettizeShader.cpp @@ -89,7 +89,7 @@ static bool CheckShaderCompileSuccess(GLuint shader, const char *code) { DepalShaderCache::DepalShaderCache() { // Pre-build the vertex program - bool useGL3 = gl_extensions.GLES3; + bool useGL3 = gl_extensions.GLES3 || gl_extensions.VersionGEThan(3, 3); vertexShader_ = glCreateShader(GL_VERTEX_SHADER); glShaderSource(vertexShader_, 1, useGL3 ? &depalVShader300 : &depalVShader100, 0); @@ -328,7 +328,7 @@ void DepalShaderCache::Decimate() { GLuint DepalShaderCache::GetDepalettizeShader(GEBufferFormat pixelFormat) { u32 id = GenerateShaderID(pixelFormat); - bool useGL3 = gl_extensions.GLES3; + bool useGL3 = gl_extensions.GLES3 || gl_extensions.VersionGEThan(3, 3); auto shader = cache_.find(id); if (shader != cache_.end()) { From 608d43898de4efa92fa440f8f9c9f3f656eb3611 Mon Sep 17 00:00:00 2001 From: raven02 Date: Wed, 23 Apr 2014 20:19:44 +0800 Subject: [PATCH 16/24] Remove AlphaMaskHack --- GPU/GLES/StateMapping.cpp | 3 --- UI/GameSettingsScreen.cpp | 1 - 2 files changed, 4 deletions(-) diff --git a/GPU/GLES/StateMapping.cpp b/GPU/GLES/StateMapping.cpp index c89173fadf..47cd3a49c9 100644 --- a/GPU/GLES/StateMapping.cpp +++ b/GPU/GLES/StateMapping.cpp @@ -444,9 +444,6 @@ void TransformDrawEngine::ApplyDrawState(int prim) { amask = false; } } - if (g_Config.bAlphaMaskHack) { - amask = true; // Yes, this makes no sense, but it "fixes" the 3rd Birthday by popular demand. - } glstate.colorMask.set(rmask, gmask, bmask, amask); diff --git a/UI/GameSettingsScreen.cpp b/UI/GameSettingsScreen.cpp index a0ca036523..29d97b33f1 100644 --- a/UI/GameSettingsScreen.cpp +++ b/UI/GameSettingsScreen.cpp @@ -190,7 +190,6 @@ void GameSettingsScreen::CreateViews() { graphicsSettings->Add(new ItemHeader(gs->T("Hack Settings", "Hack Settings (these WILL cause glitches)"))); graphicsSettings->Add(new CheckBox(&g_Config.bTimerHack, gs->T("Timer Hack"))); graphicsSettings->Add(new CheckBox(&g_Config.bDisableStencilTest, gs->T("Disable Stencil Test"))); - graphicsSettings->Add(new CheckBox(&g_Config.bAlphaMaskHack, gs->T("Alpha Mask Hack (3rd Birthday)"))); graphicsSettings->Add(new CheckBox(&g_Config.bAlwaysDepthWrite, gs->T("Always Depth Write"))); CheckBox *prescale = graphicsSettings->Add(new CheckBox(&g_Config.bPrescaleUV, gs->T("Texture Coord Speedhack"))); if (PSP_IsInited()) From aa32bd6aa4cc35a5c2db528126786a765bd526ee Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 10 May 2014 09:14:29 -0700 Subject: [PATCH 17/24] Also disable the logic op when rendering FB w/clut. --- GPU/GLES/TextureCache.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/GPU/GLES/TextureCache.cpp b/GPU/GLES/TextureCache.cpp index dcf8bb80bf..523e0e7cf1 100644 --- a/GPU/GLES/TextureCache.cpp +++ b/GPU/GLES/TextureCache.cpp @@ -951,6 +951,9 @@ void TextureCache::SetTextureFramebuffer(TexCacheEntry *entry) { glDisable(GL_CULL_FACE); glDisable(GL_DEPTH_TEST); glDisable(GL_STENCIL_TEST); +#if !defined(USING_GLES2) + glDisable(GL_LOGIC_OP); +#endif glViewport(0, 0, entry->framebuffer->renderWidth, entry->framebuffer->renderHeight); glVertexAttribPointer(0, 3, GL_FLOAT, GL_FALSE, 12, pos); From 7ee6546609604594e6c77883c2e2b1528485ed3a Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 10 May 2014 12:33:33 -0700 Subject: [PATCH 18/24] Cache clut textures based on clut format. Since we would end up with a different palette (size, interpretation of bits, etc.) --- GPU/GLES/DepalettizeShader.cpp | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/GPU/GLES/DepalettizeShader.cpp b/GPU/GLES/DepalettizeShader.cpp index 1241740608..b8a579ac1e 100644 --- a/GPU/GLES/DepalettizeShader.cpp +++ b/GPU/GLES/DepalettizeShader.cpp @@ -284,26 +284,29 @@ u32 DepalShaderCache::GenerateShaderID(GEBufferFormat pixelFormat) { } GLuint DepalShaderCache::GetClutTexture(const u32 clutID, u32 *rawClut) { - auto oldtex = texCache_.find(clutID); + GEPaletteFormat palFormat = gstate.getClutPaletteFormat(); + const u32 realClutID = clutID ^ palFormat; + + auto oldtex = texCache_.find(realClutID); if (oldtex != texCache_.end()) { return oldtex->second->texture; } - GEPaletteFormat palFormat = gstate.getClutPaletteFormat(); GLuint dstFmt = getClutDestFormat(palFormat); - + int texturePixels = palFormat == GE_CMODE_32BIT_ABGR8888 ? 256 : 512; + DepalTexture *tex = new DepalTexture(); glGenTextures(1, &tex->texture); glBindTexture(GL_TEXTURE_2D, tex->texture); GLuint components = dstFmt == GL_UNSIGNED_SHORT_5_6_5 ? GL_RGB : GL_RGBA; - glTexImage2D(GL_TEXTURE_2D, 0, components, palFormat == GE_CMODE_32BIT_ABGR8888 ? 256 : 512, 1, 0, components, dstFmt, (void *)rawClut); + glTexImage2D(GL_TEXTURE_2D, 0, components, texturePixels, 1, 0, components, dstFmt, (void *)rawClut); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); - texCache_[clutID] = tex; + texCache_[realClutID] = tex; return tex->texture; } From 5ac6fae8bbea5dacead6e01a1b3a519b21f582d9 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 10 May 2014 13:15:39 -0700 Subject: [PATCH 19/24] Validate framebuffer clut shaders on use. When their parameters are actually the ones we are going to use. --- GPU/GLES/TextureCache.cpp | 22 +++++++++------------- GPU/GLES/TextureCache.h | 1 - 2 files changed, 9 insertions(+), 14 deletions(-) diff --git a/GPU/GLES/TextureCache.cpp b/GPU/GLES/TextureCache.cpp index 523e0e7cf1..08e75fc192 100644 --- a/GPU/GLES/TextureCache.cpp +++ b/GPU/GLES/TextureCache.cpp @@ -231,17 +231,6 @@ inline void AttachFramebufferInvalid(T &entry, VirtualFramebuffer *framebuffer) } } -bool TextureCache::AttachFramebufferCLUT(TextureCache::TexCacheEntry *entry, VirtualFramebuffer *framebuffer, u32 address) { - GLuint program = depalShaderCache_->GetDepalettizeShader(framebuffer->format); - if (program) { - entry->framebuffer = framebuffer; - entry->invalidHint = -1; - entry->status |= TexCacheEntry::STATUS_DEPALETTIZE; - return true; - } - return false; -} - void TextureCache::AttachFramebuffer(TexCacheEntry *entry, u32 address, VirtualFramebuffer *framebuffer, bool exactMatch) { // If they match exactly, it's non-CLUT and from the top left. if (exactMatch) { @@ -270,7 +259,10 @@ void TextureCache::AttachFramebuffer(TexCacheEntry *entry, u32 address, VirtualF bool clutSuccess = false; if (((framebuffer->format == GE_FORMAT_8888 && entry->format == GE_TFMT_CLUT32) || (framebuffer->format != GE_FORMAT_8888 && entry->format == GE_TFMT_CLUT16))) { - clutSuccess = AttachFramebufferCLUT(entry, framebuffer, address); + AttachFramebufferValid(entry, framebuffer); + entry->status |= TexCacheEntry::STATUS_DEPALETTIZE; + // We'll validate it later. + clutSuccess = true; } else if (entry->format == GE_TFMT_CLUT8 || entry->format == GE_TFMT_CLUT4) { ERROR_LOG_REPORT_ONCE(fourEightBit, G3D, "4 and 8-bit CLUT format not supported for framebuffers"); } @@ -908,7 +900,11 @@ void TextureCache::SetTextureFramebuffer(TexCacheEntry *entry) { entry->framebuffer->usageFlags |= FB_USAGE_TEXTURE; bool useBufferedRendering = g_Config.iRenderingMode != FB_NON_BUFFERED_MODE; if (useBufferedRendering) { + GLuint program = 0; if (entry->status & TexCacheEntry::STATUS_DEPALETTIZE) { + program = depalShaderCache_->GetDepalettizeShader(entry->framebuffer->format); + } + if (program) { GLuint clutTexture = depalShaderCache_->GetClutTexture(clutHash_, clutBuf_); if (!entry->depalFBO) { entry->depalFBO = fbo_create(entry->framebuffer->renderWidth, entry->framebuffer->renderHeight, 1, false, FBO_8888); @@ -928,7 +924,6 @@ void TextureCache::SetTextureFramebuffer(TexCacheEntry *entry) { }; static const GLubyte indices[4] = { 0, 1, 3, 2 }; - GLuint program = depalShaderCache_->GetDepalettizeShader(entry->framebuffer->format); glUseProgram(program); glBindBuffer(GL_ARRAY_BUFFER, 0); @@ -968,6 +963,7 @@ void TextureCache::SetTextureFramebuffer(TexCacheEntry *entry) { glstate.Restore(); framebufferManager_->RebindFramebuffer(); } else { + entry->status &= ~TexCacheEntry::STATUS_DEPALETTIZE; framebufferManager_->BindFramebufferColor(entry->framebuffer); } diff --git a/GPU/GLES/TextureCache.h b/GPU/GLES/TextureCache.h index d03f2df78b..2beea8295b 100644 --- a/GPU/GLES/TextureCache.h +++ b/GPU/GLES/TextureCache.h @@ -158,7 +158,6 @@ private: u32 GetCurrentClutHash(); void UpdateCurrentClut(); void AttachFramebuffer(TexCacheEntry *entry, u32 address, VirtualFramebuffer *framebuffer, bool exactMatch); - bool AttachFramebufferCLUT(TextureCache::TexCacheEntry *entry, VirtualFramebuffer *framebuffer, u32 address); void DetachFramebuffer(TexCacheEntry *entry, u32 address, VirtualFramebuffer *framebuffer); void SetTextureFramebuffer(TexCacheEntry *entry); From 126ed25a489d21e128160d5ebc530549cd86694a Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 10 May 2014 16:00:33 -0700 Subject: [PATCH 20/24] Correct clut depal indexing. --- GPU/GLES/DepalettizeShader.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/GPU/GLES/DepalettizeShader.cpp b/GPU/GLES/DepalettizeShader.cpp index b8a579ac1e..3d0723d8eb 100644 --- a/GPU/GLES/DepalettizeShader.cpp +++ b/GPU/GLES/DepalettizeShader.cpp @@ -158,7 +158,7 @@ void GenerateDepalShader300(char *buffer, GEBufferFormat pixelFormat) { if (shiftedMask & 0x3E0) WRITE(p, " int g = int(index.g * 31.99);\n"); else WRITE(p, " int g = 0;\n"); if (shiftedMask & 0x7C00) WRITE(p, " int b = int(index.b * 31.99);\n"); else WRITE(p, " int b = 0;\n"); if (shiftedMask & 0x8000) WRITE(p, " int a = int(index.a);\n"); else WRITE(p, " int a = 0;\n"); - WRITE(p, "int color = (a << 15) | (b << 10) | (g << 5) | (r);"); + WRITE(p, " int color = (a << 15) | (b << 10) | (g << 5) | (r);"); break; } float texturePixels = 256; @@ -166,7 +166,7 @@ void GenerateDepalShader300(char *buffer, GEBufferFormat pixelFormat) { texturePixels = 512; WRITE(p, " color = ((color >> %i) & 0x%02x) | %i;\n", shift, mask, offset); // '|' matches what we have in gstate.h - WRITE(p, " fragColor0 = texture2D(pal, vec2((floor(float(color)) - 0.5) * (1.0 / %f), 0.0));\n", texturePixels); + WRITE(p, " fragColor0 = texture2D(pal, vec2((floor(float(color)) + 0.5) * (1.0 / %f), 0.0));\n", texturePixels); WRITE(p, "}\n"); } From 5f99f663efa3fc469ce4da40f3f518bd840433a3 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Mon, 26 May 2014 21:26:40 -0700 Subject: [PATCH 21/24] Avoid duplicating an FBO when depaletizing it. --- GPU/GLES/Framebuffer.cpp | 4 ++-- GPU/GLES/Framebuffer.h | 2 +- GPU/GLES/TextureCache.cpp | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/GPU/GLES/Framebuffer.cpp b/GPU/GLES/Framebuffer.cpp index 5344f05033..d1de28ef07 100644 --- a/GPU/GLES/Framebuffer.cpp +++ b/GPU/GLES/Framebuffer.cpp @@ -1014,7 +1014,7 @@ void FramebufferManager::BindFramebufferDepth(VirtualFramebuffer *sourceframebuf } } -void FramebufferManager::BindFramebufferColor(VirtualFramebuffer *framebuffer) { +void FramebufferManager::BindFramebufferColor(VirtualFramebuffer *framebuffer, bool skipCopy) { if (framebuffer == NULL) { framebuffer = currentRenderVfb_; } @@ -1027,7 +1027,7 @@ void FramebufferManager::BindFramebufferColor(VirtualFramebuffer *framebuffer) { // currentRenderVfb_ will always be set when this is called, except from the GE debugger. // Let's just not bother with the copy in that case. - if (currentRenderVfb_ && MaskedEqual(framebuffer->fb_address, gstate.getFrameBufRawAddress())) { + if (!skipCopy && currentRenderVfb_ && MaskedEqual(framebuffer->fb_address, gstate.getFrameBufRawAddress())) { #ifndef USING_GLES2 if (gl_extensions.FBO_ARB) { bool useNV = false; diff --git a/GPU/GLES/Framebuffer.h b/GPU/GLES/Framebuffer.h index cb9e1b22ad..26eee974e4 100644 --- a/GPU/GLES/Framebuffer.h +++ b/GPU/GLES/Framebuffer.h @@ -162,7 +162,7 @@ public: void BindFramebufferDepth(VirtualFramebuffer *sourceframebuffer, VirtualFramebuffer *targetframebuffer); // For use when texturing from a framebuffer. May create a duplicate if target. - void BindFramebufferColor(VirtualFramebuffer *framebuffer); + void BindFramebufferColor(VirtualFramebuffer *framebuffer, bool skipCopy = false); // Returns true if it's sure this is a direct FBO->FBO transfer and it has already handle it. // In that case we hardly need to actually copy the bytes in VRAM, they will be wrong anyway (unless diff --git a/GPU/GLES/TextureCache.cpp b/GPU/GLES/TextureCache.cpp index 08e75fc192..3b809e4bcb 100644 --- a/GPU/GLES/TextureCache.cpp +++ b/GPU/GLES/TextureCache.cpp @@ -935,7 +935,7 @@ void TextureCache::SetTextureFramebuffer(TexCacheEntry *entry) { glBindTexture(GL_TEXTURE_2D, clutTexture); glActiveTexture(GL_TEXTURE0); - framebufferManager_->BindFramebufferColor(entry->framebuffer); + framebufferManager_->BindFramebufferColor(entry->framebuffer, true); glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); entry->status |= TexCacheEntry::STATUS_TEXPARAM_DIRTY; From 14c081904c8ae1ca735db6eaadd8bdb737c12f4b Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Mon, 26 May 2014 21:28:44 -0700 Subject: [PATCH 22/24] 5650 is also simple alpha. --- GPU/GLES/TextureCache.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GPU/GLES/TextureCache.cpp b/GPU/GLES/TextureCache.cpp index 3b809e4bcb..63d8c3368d 100644 --- a/GPU/GLES/TextureCache.cpp +++ b/GPU/GLES/TextureCache.cpp @@ -975,7 +975,7 @@ void TextureCache::SetTextureFramebuffer(TexCacheEntry *entry) { gstate_c.curTextureHeight = entry->framebuffer->height; gstate_c.flipTexture = true; gstate_c.textureFullAlpha = entry->framebuffer->format == GE_FORMAT_565; - gstate_c.textureSimpleAlpha = false; + gstate_c.textureSimpleAlpha = gstate_c.textureFullAlpha; UpdateSamplingParams(*entry, true); } else { if (entry->framebuffer->fbo) From 29a9ff369ef06ea1a5a441584fdf8769ef4ae9b5 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Mon, 26 May 2014 21:39:59 -0700 Subject: [PATCH 23/24] Make a note when we need to reset the shader. Hmm, not sure this is the cleanest way. --- Core/HW/MediaEngine.cpp | 2 +- GPU/GLES/ShaderManager.cpp | 3 ++- GPU/GLES/TextureCache.cpp | 1 + GPU/GPUState.h | 2 ++ 4 files changed, 6 insertions(+), 2 deletions(-) diff --git a/Core/HW/MediaEngine.cpp b/Core/HW/MediaEngine.cpp index 3929adddf4..57a9ec4dc2 100644 --- a/Core/HW/MediaEngine.cpp +++ b/Core/HW/MediaEngine.cpp @@ -739,7 +739,7 @@ int MediaEngine::writeVideoImageWithRange(u32 bufferPtr, int frameWidth, int vid break; default: - ERROR_LOG(ME, "Unsupported video pixel format %d", videoPixelMode); + ERROR_LOG_REPORT(ME, "Unsupported video pixel format %d", videoPixelMode); break; } return videoImageSize; diff --git a/GPU/GLES/ShaderManager.cpp b/GPU/GLES/ShaderManager.cpp index 8872960182..3d1ecf7db8 100644 --- a/GPU/GLES/ShaderManager.cpp +++ b/GPU/GLES/ShaderManager.cpp @@ -684,10 +684,11 @@ Shader *ShaderManager::ApplyVertexShader(int prim, u32 vertType) { LinkedShader *ShaderManager::ApplyFragmentShader(Shader *vs, int prim, u32 vertType) { FragmentShaderID FSID; ComputeFragmentShaderID(&FSID); - if (lastVShaderSame_ && FSID == lastFSID_) { + if (lastVShaderSame_ && FSID == lastFSID_ && !gstate_c.shaderChanged) { lastShader_->UpdateUniforms(vertType); return lastShader_; } + gstate_c.shaderChanged = false; lastFSID_ = FSID; diff --git a/GPU/GLES/TextureCache.cpp b/GPU/GLES/TextureCache.cpp index 63d8c3368d..427025b67a 100644 --- a/GPU/GLES/TextureCache.cpp +++ b/GPU/GLES/TextureCache.cpp @@ -925,6 +925,7 @@ void TextureCache::SetTextureFramebuffer(TexCacheEntry *entry) { static const GLubyte indices[4] = { 0, 1, 3, 2 }; glUseProgram(program); + gstate_c.shaderChanged = true; glBindBuffer(GL_ARRAY_BUFFER, 0); glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); diff --git a/GPU/GPUState.h b/GPU/GPUState.h index b8e79488e7..ca8b668ca1 100644 --- a/GPU/GPUState.h +++ b/GPU/GPUState.h @@ -453,6 +453,8 @@ struct GPUStateCache bool textureSimpleAlpha; bool vertexFullAlpha; bool framebufChanged; + // Doesn't need savestating. + bool shaderChanged; int skipDrawReason; From 44d9af922238a604d3893b0b4ae2b831361c1e59 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Tue, 27 May 2014 22:22:56 +0200 Subject: [PATCH 24/24] Hopefully fix the Manhunt red/green swap --- GPU/GLES/DepalettizeShader.cpp | 11 ++++++++++- GPU/GLES/Framebuffer.cpp | 8 -------- GPU/GLES/TextureCache.cpp | 8 -------- GPU/GLES/TextureCache.h | 14 ++++++++++++-- 4 files changed, 22 insertions(+), 19 deletions(-) diff --git a/GPU/GLES/DepalettizeShader.cpp b/GPU/GLES/DepalettizeShader.cpp index 3d0723d8eb..6040ec86bd 100644 --- a/GPU/GLES/DepalettizeShader.cpp +++ b/GPU/GLES/DepalettizeShader.cpp @@ -295,11 +295,20 @@ GLuint DepalShaderCache::GetClutTexture(const u32 clutID, u32 *rawClut) { GLuint dstFmt = getClutDestFormat(palFormat); int texturePixels = palFormat == GE_CMODE_32BIT_ABGR8888 ? 256 : 512; + bool useBGRA = UseBGRA8888() && dstFmt == GL_UNSIGNED_BYTE; + DepalTexture *tex = new DepalTexture(); glGenTextures(1, &tex->texture); glBindTexture(GL_TEXTURE_2D, tex->texture); GLuint components = dstFmt == GL_UNSIGNED_SHORT_5_6_5 ? GL_RGB : GL_RGBA; - glTexImage2D(GL_TEXTURE_2D, 0, components, texturePixels, 1, 0, components, dstFmt, (void *)rawClut); + + GLuint components2 = components; +#if defined(MAY_HAVE_GLES3) + if (useBGRA) { + components2 = GL_BGRA_EXT; + } +#endif + glTexImage2D(GL_TEXTURE_2D, 0, components, texturePixels, 1, 0, components2, dstFmt, (void *)rawClut); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); diff --git a/GPU/GLES/Framebuffer.cpp b/GPU/GLES/Framebuffer.cpp index d1de28ef07..a20751183a 100644 --- a/GPU/GLES/Framebuffer.cpp +++ b/GPU/GLES/Framebuffer.cpp @@ -1405,14 +1405,6 @@ void FramebufferManager::BlitFramebuffer_(VirtualFramebuffer *dst, int dstX, int fbo_unbind(); } -static inline bool UseBGRA8888() { - // TODO: Other platforms? May depend on vendor which is faster? -#ifdef _WIN32 - return gl_extensions.EXT_bgra; -#endif - return false; -} - // TODO: SSE/NEON // Could also make C fake-simd for 64-bit, two 8888 pixels fit in a register :) void ConvertFromRGBA8888(u8 *dst, const u8 *src, u32 stride, u32 height, GEBufferFormat format) { diff --git a/GPU/GLES/TextureCache.cpp b/GPU/GLES/TextureCache.cpp index 427025b67a..958be67070 100644 --- a/GPU/GLES/TextureCache.cpp +++ b/GPU/GLES/TextureCache.cpp @@ -58,14 +58,6 @@ extern int g_iNumVideos; -static inline bool UseBGRA8888() { - // TODO: Other platforms? May depend on vendor which is faster? -#ifdef _WIN32 - return gl_extensions.EXT_bgra; -#endif - return false; -} - TextureCache::TextureCache() : clearCacheNextFrame_(false), lowMemoryMode_(false), clutBuf_(NULL) { lastBoundTexture = -1; decimationCounter_ = TEXCACHE_DECIMATION_INTERVAL; diff --git a/GPU/GLES/TextureCache.h b/GPU/GLES/TextureCache.h index 2beea8295b..8ee87e5d6d 100644 --- a/GPU/GLES/TextureCache.h +++ b/GPU/GLES/TextureCache.h @@ -17,11 +17,13 @@ #pragma once -#include "../Globals.h" #include "gfx_es2/fbo.h" +#include "gfx_es2/gpu_features.h" + +#include "Globals.h" #include "GPU/GPUInterface.h" #include "GPU/GPUState.h" -#include "TextureScaler.h" +#include "GPU/GLES/TextureScaler.h" struct VirtualFramebuffer; class FramebufferManager; @@ -40,6 +42,14 @@ enum FramebufferNotification { NOTIFY_FB_DESTROYED, }; +inline bool UseBGRA8888() { + // TODO: Other platforms? May depend on vendor which is faster? +#ifdef _WIN32 + return gl_extensions.EXT_bgra; +#endif + return false; +} + class TextureCache { public: TextureCache();