diff --git a/Core/Config.cpp b/Core/Config.cpp index df77f7ba2f..5ec26364f4 100644 --- a/Core/Config.cpp +++ b/Core/Config.cpp @@ -432,7 +432,9 @@ static ConfigSetting graphicsSettings[] = { ConfigSetting("FrameSkipUnthrottle", &g_Config.bFrameSkipUnthrottle, true), #endif ReportedConfigSetting("ForceMaxEmulatedFPS", &g_Config.iForceMaxEmulatedFPS, 60, true, true), -#ifdef USING_GLES2 + + // TODO: Hm, on fast mobile GPUs we should definitely default to at least 4... +#ifdef MOBILE_DEVICE ConfigSetting("AnisotropyLevel", &g_Config.iAnisotropyLevel, 0, true, true), #else ConfigSetting("AnisotropyLevel", &g_Config.iAnisotropyLevel, 8, true, true), diff --git a/GPU/Common/DepalettizeShaderCommon.cpp b/GPU/Common/DepalettizeShaderCommon.cpp index 36bd191a97..a6e52bdb4a 100644 --- a/GPU/Common/DepalettizeShaderCommon.cpp +++ b/GPU/Common/DepalettizeShaderCommon.cpp @@ -17,6 +17,7 @@ #include +#include "gfx_es2/gl_state.h" #include "Common/Log.h" #include "Core/Reporting.h" #include "GPU/GPUState.h" @@ -28,12 +29,12 @@ // Uses integer instructions available since OpenGL 3.0. Suitable for ES 3.0 as well. void GenerateDepalShader300(char *buffer, GEBufferFormat pixelFormat) { char *p = buffer; -#ifdef USING_GLES2 - WRITE(p, "#version 300 es\n"); - WRITE(p, "precision mediump float;\n"); -#else - WRITE(p, "#version 330\n"); -#endif + if (gl_extensions.IsGLES) { + WRITE(p, "#version 300 es\n"); + WRITE(p, "precision mediump float;\n"); + } else { + WRITE(p, "#version 330\n"); + } WRITE(p, "in vec2 v_texcoord0;\n"); WRITE(p, "out vec4 fragColor0;\n"); WRITE(p, "uniform sampler2D tex;\n"); @@ -218,12 +219,12 @@ void GenerateDepalShaderFloat(char *buffer, GEBufferFormat pixelFormat, ShaderLa sprintf(offset, " + %f", texel_offset); if (lang == GLSL_140) { -#ifdef USING_GLES2 - WRITE(p, "#version 100\n"); - WRITE(p, "precision mediump float;\n"); -#else - WRITE(p, "#version 110\n"); -#endif + if (gl_extensions.IsGLES) { + WRITE(p, "#version 100\n"); + WRITE(p, "precision mediump float;\n"); + } else { + WRITE(p, "#version 110\n"); + } WRITE(p, "varying vec2 v_texcoord0;\n"); WRITE(p, "uniform sampler2D tex;\n"); WRITE(p, "uniform sampler2D pal;\n"); diff --git a/GPU/Common/PostShader.cpp b/GPU/Common/PostShader.cpp index f17d825f3f..1c95298716 100644 --- a/GPU/Common/PostShader.cpp +++ b/GPU/Common/PostShader.cpp @@ -25,6 +25,7 @@ #include "file/ini_file.h" #include "file/file_util.h" #include "file/vfs.h" +#include "gfx_es2/gpu_features.h" #include "Core/Config.h" #include "GPU/Common/PostShader.h" @@ -84,13 +85,14 @@ void LoadPostShaderInfo(std::vector directories) { info.vertexShaderFile = path + "/" + temp; section.Get("OutputResolution", &info.outputResolution, false); -#ifdef USING_GLES2 - // Let's ignore shaders we can't support. TODO: Check for GLES 3.0 - bool requiresIntegerSupport; - section.Get("RequiresIntSupport", &requiresIntegerSupport, false); - if (requiresIntegerSupport) - continue; -#endif + // Let's ignore shaders we can't support. TODO: Not a very good check + if (gl_extensions.IsGLES && !gl_extensions.GLES3) { + bool requiresIntegerSupport; + section.Get("RequiresIntSupport", &requiresIntegerSupport, false); + if (requiresIntegerSupport) + continue; + } + auto beginErase = std::find(shaderInfo.begin(), shaderInfo.end(), info.name); if (beginErase != shaderInfo.end()) { shaderInfo.erase(beginErase, shaderInfo.end()); diff --git a/GPU/Directx9/helper/dx_state.h b/GPU/Directx9/helper/dx_state.h index 9217c7a9c6..e1f6845e18 100644 --- a/GPU/Directx9/helper/dx_state.h +++ b/GPU/Directx9/helper/dx_state.h @@ -408,7 +408,6 @@ struct GLExtensions { bool OES_depth_texture; bool EXT_discard_framebuffer; bool FBO_ARB; - bool FBO_EXT; }; extern GLExtensions gl_extensions; diff --git a/GPU/GLES/FragmentShaderGenerator.cpp b/GPU/GLES/FragmentShaderGenerator.cpp index f44a282513..858a200ca6 100644 --- a/GPU/GLES/FragmentShaderGenerator.cpp +++ b/GPU/GLES/FragmentShaderGenerator.cpp @@ -16,8 +16,9 @@ // https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. #if !defined(USING_GLES2) -// SDL 1.2 on Apple does not have support for OpenGL 3 and hence needs -// special treatment in the shader generator. +// We do not yet enable OpenGL 3 on Apple, so we need +// special treatment in the shader generator. However, the GL version check +// should be enough? TODO #if defined(__APPLE__) #define FORCE_OPENGL_2_0 #endif @@ -156,7 +157,7 @@ ReplaceAlphaType ReplaceAlphaWithStencil(ReplaceBlendType replaceBlend) { if (nonAlphaSrcFactors[gstate.getBlendFuncA()] && nonAlphaDestFactors[gstate.getBlendFuncB()]) { return REPLACE_ALPHA_YES; } else { - if (gl_extensions.ARB_blend_func_extended) { + if (gstate_c.featureFlags & GPU_SUPPORTS_DUALSOURCE_BLEND) { return REPLACE_ALPHA_DUALSOURCE; } else { return REPLACE_ALPHA_NO; @@ -253,7 +254,7 @@ ReplaceBlendType ReplaceBlendWithShader(bool allowShaderBlend) { case GE_BLENDMODE_MIN: case GE_BLENDMODE_MAX: - if (gl_extensions.EXT_blend_minmax || gl_extensions.GLES3) { + if (gstate_c.Supports(GPU_SUPPORTS_BLEND_MINMAX)) { return REPLACE_BLEND_STANDARD; } else { return !allowShaderBlend ? REPLACE_BLEND_STANDARD : REPLACE_BLEND_COPY_FBO; @@ -285,7 +286,7 @@ ReplaceBlendType ReplaceBlendWithShader(bool allowShaderBlend) { case GE_DSTBLEND_DOUBLEINVSRCALPHA: // We can't technically do this correctly (due to clamping) without reading the dst color. // Using a copy isn't accurate either, though, when there's overlap. - if (gl_extensions.ANY_shader_framebuffer_fetch) + if (gstate_c.featureFlags & GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH) return !allowShaderBlend ? REPLACE_BLEND_PRE_SRC_2X_ALPHA : REPLACE_BLEND_COPY_FBO; return REPLACE_BLEND_PRE_SRC_2X_ALPHA; @@ -344,14 +345,14 @@ ReplaceBlendType ReplaceBlendWithShader(bool allowShaderBlend) { case GE_DSTBLEND_DOUBLEINVSRCALPHA: if (funcA == GE_SRCBLEND_SRCALPHA || funcA == GE_SRCBLEND_INVSRCALPHA) { // Can't safely double alpha, will clamp. However, a copy may easily be worse due to overlap. - if (gl_extensions.ANY_shader_framebuffer_fetch) + if (gstate_c.featureFlags & GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH) return !allowShaderBlend ? REPLACE_BLEND_PRE_SRC_2X_ALPHA : REPLACE_BLEND_COPY_FBO; return REPLACE_BLEND_PRE_SRC_2X_ALPHA; } else { // This means dst alpha/color is used in the src factor. // Unfortunately, copying here causes overlap problems in Silent Hill games (it seems?) // We will just hope that doubling alpha for the dst factor will not clamp too badly. - if (gl_extensions.ANY_shader_framebuffer_fetch) + if (gstate_c.featureFlags & GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH) return !allowShaderBlend ? REPLACE_BLEND_2X_ALPHA : REPLACE_BLEND_COPY_FBO; return REPLACE_BLEND_2X_ALPHA; } @@ -373,8 +374,7 @@ enum LogicOpReplaceType { }; static inline LogicOpReplaceType ReplaceLogicOpType() { -#if defined(USING_GLES2) - if (gstate.isLogicOpEnabled()) { + if (!gstate_c.Supports(GPU_SUPPORTS_LOGIC_OP) && gstate.isLogicOpEnabled()) { switch (gstate.getLogicOp()) { case GE_LOGIC_COPY_INVERTED: case GE_LOGIC_AND_INVERTED: @@ -391,7 +391,6 @@ static inline LogicOpReplaceType ReplaceLogicOpType() { return LOGICOPTYPE_NORMAL; } } -#endif return LOGICOPTYPE_NORMAL; } @@ -456,7 +455,7 @@ void ComputeFragmentShaderID(ShaderID *id) { id0 |= (enableColorDoubling & 1) << 23; // 2 bits id0 |= (stencilToAlpha) << 24; - + if (stencilToAlpha != REPLACE_ALPHA_NO) { // 4 bits id0 |= ReplaceAlphaWithStencilType() << 26; @@ -501,86 +500,78 @@ void GenerateFragmentShader(char *buffer) { bool bitwiseOps = false; const char *lastFragData = nullptr; -#if defined(USING_GLES2) - // Let's wait until we have a real use for this. - // ES doesn't support dual source alpha :( - if (gl_extensions.GLES3) { - WRITE(p, "#version 300 es\n"); // GLSL ES 3.0 - fragColor0 = "fragColor0"; - texture = "texture"; - glslES30 = true; - bitwiseOps = true; - texelFetch = "texelFetch"; - } else { - WRITE(p, "#version 100\n"); // GLSL ES 1.0 - if (gl_extensions.EXT_gpu_shader4) { - WRITE(p, "#extension GL_EXT_gpu_shader4 : enable\n"); + if (gl_extensions.IsGLES) { + // ES doesn't support dual source alpha :( + if (gstate_c.featureFlags & GPU_SUPPORTS_GLSL_ES_300) { + WRITE(p, "#version 300 es\n"); // GLSL ES 3.0 + fragColor0 = "fragColor0"; + texture = "texture"; + glslES30 = true; bitwiseOps = true; - texelFetch = "texelFetch2D"; + texelFetch = "texelFetch"; + } else { + WRITE(p, "#version 100\n"); // GLSL ES 1.0 + if (gl_extensions.EXT_gpu_shader4) { + WRITE(p, "#extension GL_EXT_gpu_shader4 : enable\n"); + bitwiseOps = true; + texelFetch = "texelFetch2D"; + } } - } - // PowerVR needs highp to do the fog in MHU correctly. - // Others don't, and some can't handle highp in the fragment shader. - highpFog = (gl_extensions.bugs & BUG_PVR_SHADER_PRECISION_BAD) ? true : false; - highpTexcoord = highpFog; + // PowerVR needs highp to do the fog in MHU correctly. + // Others don't, and some can't handle highp in the fragment shader. + highpFog = (gl_extensions.bugs & BUG_PVR_SHADER_PRECISION_BAD) ? true : false; + highpTexcoord = highpFog; - if (gl_extensions.EXT_shader_framebuffer_fetch) { - WRITE(p, "#extension GL_EXT_shader_framebuffer_fetch : require\n"); - lastFragData = "gl_LastFragData[0]"; - } else if (gl_extensions.NV_shader_framebuffer_fetch) { - // GL_NV_shader_framebuffer_fetch is available on mobile platform and ES 2.0 only but not on desktop. - WRITE(p, "#extension GL_NV_shader_framebuffer_fetch : require\n"); - lastFragData = "gl_LastFragData[0]"; - } else if (gl_extensions.ARM_shader_framebuffer_fetch) { - WRITE(p, "#extension GL_ARM_shader_framebuffer_fetch : require\n"); - lastFragData = "gl_LastFragColorARM"; - } - - WRITE(p, "precision lowp float;\n"); - -#elif !defined(FORCE_OPENGL_2_0) - if (gl_extensions.VersionGEThan(3, 3, 0)) { - fragColor0 = "fragColor0"; - texture = "texture"; - glslES30 = true; - bitwiseOps = true; - texelFetch = "texelFetch"; - WRITE(p, "#version 330\n"); - WRITE(p, "#define lowp\n"); - WRITE(p, "#define mediump\n"); - WRITE(p, "#define highp\n"); - } else if (gl_extensions.VersionGEThan(3, 0, 0)) { - fragColor0 = "fragColor0"; - bitwiseOps = true; - texelFetch = "texelFetch"; - WRITE(p, "#version 130\n"); - if (gl_extensions.EXT_gpu_shader4) { - WRITE(p, "#extension GL_EXT_gpu_shader4 : enable\n"); + if (gstate_c.featureFlags & GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH) { + if (gl_extensions.EXT_shader_framebuffer_fetch) { + WRITE(p, "#extension GL_EXT_shader_framebuffer_fetch : require\n"); + lastFragData = "gl_LastFragData[0]"; + } else if (gl_extensions.NV_shader_framebuffer_fetch) { + // GL_NV_shader_framebuffer_fetch is available on mobile platform and ES 2.0 only but not on desktop. + WRITE(p, "#extension GL_NV_shader_framebuffer_fetch : require\n"); + lastFragData = "gl_LastFragData[0]"; + } else if (gl_extensions.ARM_shader_framebuffer_fetch) { + WRITE(p, "#extension GL_ARM_shader_framebuffer_fetch : require\n"); + lastFragData = "gl_LastFragColorARM"; + } } - // Remove lowp/mediump in non-mobile non-glsl 3 implementations - WRITE(p, "#define lowp\n"); - WRITE(p, "#define mediump\n"); - WRITE(p, "#define highp\n"); + + WRITE(p, "precision lowp float;\n"); } else { - WRITE(p, "#version 110\n"); - if (gl_extensions.EXT_gpu_shader4) { - WRITE(p, "#extension GL_EXT_gpu_shader4 : enable\n"); + // TODO: Handle this in VersionGEThan? +#if !defined(FORCE_OPENGL_2_0) + if (gl_extensions.VersionGEThan(3, 3, 0)) { + fragColor0 = "fragColor0"; + texture = "texture"; + glslES30 = true; bitwiseOps = true; - texelFetch = "texelFetch2D"; + texelFetch = "texelFetch"; + WRITE(p, "#version 330\n"); + } else if (gl_extensions.VersionGEThan(3, 0, 0)) { + fragColor0 = "fragColor0"; + bitwiseOps = true; + texelFetch = "texelFetch"; + WRITE(p, "#version 130\n"); + if (gl_extensions.EXT_gpu_shader4) { + WRITE(p, "#extension GL_EXT_gpu_shader4 : enable\n"); + } + } else { + WRITE(p, "#version 110\n"); + if (gl_extensions.EXT_gpu_shader4) { + WRITE(p, "#extension GL_EXT_gpu_shader4 : enable\n"); + bitwiseOps = true; + texelFetch = "texelFetch2D"; + } } - // Remove lowp/mediump in non-mobile non-glsl 3 implementations - WRITE(p, "#define lowp\n"); - WRITE(p, "#define mediump\n"); - WRITE(p, "#define highp\n"); - } -#else - // Need to remove lowp/mediump for Mac - WRITE(p, "#define lowp\n"); - WRITE(p, "#define mediump\n"); - WRITE(p, "#define highp\n"); #endif + // We remove these everywhere - GL4, GL3, Mac-forced-GL2, etc. + WRITE(p, "#define lowp\n"); + WRITE(p, "#define mediump\n"); + WRITE(p, "#define highp\n"); + } + if (glslES30) { varying = "in"; } @@ -611,7 +602,7 @@ void GenerateFragmentShader(char *buffer) { if (doTexture) WRITE(p, "uniform sampler2D tex;\n"); if (!gstate.isModeClear() && replaceBlend > REPLACE_BLEND_STANDARD) { - if (!gl_extensions.ANY_shader_framebuffer_fetch && replaceBlend == REPLACE_BLEND_COPY_FBO) { + if (!(gstate_c.featureFlags & GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH) && replaceBlend == REPLACE_BLEND_COPY_FBO) { if (!texelFetch) { WRITE(p, "uniform vec2 u_fbotexSize;\n"); } @@ -962,7 +953,7 @@ void GenerateFragmentShader(char *buffer) { if (replaceBlend == REPLACE_BLEND_COPY_FBO) { // If we have NV_shader_framebuffer_fetch / EXT_shader_framebuffer_fetch, we skip the blit. // We can just read the prev value more directly. - if (gl_extensions.ANY_shader_framebuffer_fetch) { + if (gstate_c.featureFlags & GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH) { WRITE(p, " lowp vec4 destColor = %s;\n", lastFragData); } else if (!texelFetch) { WRITE(p, " lowp vec4 destColor = %s(fbotex, gl_FragCoord.xy * u_fbotexSize.xy);\n", texture); diff --git a/GPU/GLES/Framebuffer.cpp b/GPU/GLES/Framebuffer.cpp index 1908a25682..8164547c5c 100644 --- a/GPU/GLES/Framebuffer.cpp +++ b/GPU/GLES/Framebuffer.cpp @@ -48,22 +48,6 @@ #include "UI/OnScreenDisplay.h" -#if defined(USING_GLES2) -#ifndef GL_READ_FRAMEBUFFER -#define GL_READ_FRAMEBUFFER GL_FRAMEBUFFER -#define GL_DRAW_FRAMEBUFFER GL_FRAMEBUFFER -#endif -#ifndef GL_RGBA8 -#define GL_RGBA8 GL_RGBA -#endif -#ifndef GL_DEPTH_COMPONENT24 -#define GL_DEPTH_COMPONENT24 GL_DEPTH_COMPONENT24_OES -#endif -#ifndef GL_DEPTH24_STENCIL8_OES -#define GL_DEPTH24_STENCIL8_OES 0x88F0 -#endif -#endif - extern int g_iNumVideos; static const char tex_fs[] = @@ -284,12 +268,9 @@ FramebufferManager::FramebufferManager() : usePostShader_(false), postShaderAtOutputResolution_(false), resized_(false), - gameUsesSequentialCopies_(false) -#ifndef USING_GLES2 - , + gameUsesSequentialCopies_(false), pixelBufObj_(nullptr), currentPBO_(0) -#endif { } @@ -312,9 +293,7 @@ FramebufferManager::~FramebufferManager() { fbo_destroy(it->second.fbo); } -#ifndef USING_GLES2 delete [] pixelBufObj_; -#endif delete [] convBuf_; } @@ -520,20 +499,7 @@ void FramebufferManager::DrawActiveTexture(GLuint texture, float x, float y, flo } if (texture) { - // We know the texture, we can do a DrawTexture shortcut on nvidia. -#if defined(ANDROID) - // Don't remember why I disabled this - no win? - if (false && gl_extensions.NV_draw_texture && !program) { - // Fast path for Tegra. TODO: Make this path work on desktop nvidia, seems GLEW doesn't have a clue. - // Actually, on Desktop we should just use glBlitFramebuffer - although we take a texture here - // so that's a little gnarly, will have to modify all callers. - glDrawTextureNV(texture, 0, - x, y, w, h, 0.0f, - u0, v1, u1, v0); - return; - } -#endif - + // Previously had NVDrawTexture fallback here but wasn't worth it. glBindTexture(GL_TEXTURE_2D, texture); } @@ -739,15 +705,15 @@ void FramebufferManager::NotifyRenderFramebufferSwitched(VirtualFramebuffer *pre } textureCache_->NotifyFramebuffer(vfb->fb_address, vfb, NOTIFY_FB_UPDATED); -#ifdef USING_GLES2 - // Some tiled mobile GPUs benefit IMMENSELY from clearing an FBO before rendering - // to it. This broke stuff before, so now it only clears on the first use of an - // FBO in a frame. This means that some games won't be able to avoid the on-some-GPUs - // performance-crushing framebuffer reloads from RAM, but we'll have to live with that. - if (vfb->last_frame_render != gpuStats.numFlips) { - ClearBuffer(); + if (gl_extensions.IsGLES) { + // Some tiled mobile GPUs benefit IMMENSELY from clearing an FBO before rendering + // to it. This broke stuff before, so now it only clears on the first use of an + // FBO in a frame. This means that some games won't be able to avoid the on-some-GPUs + // performance-crushing framebuffer reloads from RAM, but we'll have to live with that. + if (vfb->last_frame_render != gpuStats.numFlips) { + ClearBuffer(); + } } -#endif // Copy depth pixel value from the read framebuffer to the draw framebuffer if (prevVfb && !g_Config.bDisableSlowFramebufEffects) { @@ -831,24 +797,21 @@ void FramebufferManager::BlitFramebufferDepth(VirtualFramebuffer *src, VirtualFr src->renderWidth == dst->renderWidth && src->renderHeight == dst->renderHeight) { -#ifndef USING_GLES2 - if (gl_extensions.FBO_ARB) { - bool useNV = false; -#else - if (gl_extensions.GLES3 || gl_extensions.NV_framebuffer_blit) { - bool useNV = !gl_extensions.GLES3; -#endif + if (gstate_c.Supports(GPU_SUPPORTS_ARB_FRAMEBUFFER_BLIT | GPU_SUPPORTS_NV_FRAMEBUFFER_BLIT)) { + // Only use NV if ARB isn't supported. + bool useNV = !gstate_c.Supports(GPU_SUPPORTS_ARB_FRAMEBUFFER_BLIT); // Let's only do this if not clearing depth. fbo_bind_for_read(src->fbo); glDisable(GL_SCISSOR_TEST); -#if defined(USING_GLES2) && defined(ANDROID) // We only support this extension on Android, it's not even available on PC. if (useNV) { +#if defined(USING_GLES2) && defined(ANDROID) // We only support this extension on Android, it's not even available on PC. glBlitFramebufferNV(0, 0, src->renderWidth, src->renderHeight, 0, 0, dst->renderWidth, dst->renderHeight, GL_DEPTH_BUFFER_BIT, GL_NEAREST); - } else #endif // defined(USING_GLES2) && defined(ANDROID) + } else { glBlitFramebuffer(0, 0, src->renderWidth, src->renderHeight, 0, 0, dst->renderWidth, dst->renderHeight, GL_DEPTH_BUFFER_BIT, GL_NEAREST); + } // If we set dst->depthUpdated here, our optimization above would be pointless. glstate.scissorTest.restore(); @@ -1128,24 +1091,6 @@ void FramebufferManager::CopyDisplayToOutput() { } } -inline bool FramebufferManager::ShouldDownloadUsingCPU(const VirtualFramebuffer *vfb) const { -#ifndef USING_GLES2 - bool useCPU = g_Config.iRenderingMode == FB_READFBOMEMORY_CPU; - // We might get here if hackForce04154000Download_ is hit. - // Some cards or drivers seem to always dither when downloading a framebuffer to 16-bit. - // This causes glitches in games that expect the exact values. - // It has not been experienced on NVIDIA cards, so those are left using the GPU (which is faster.) - if (g_Config.iRenderingMode == FB_BUFFERED_MODE) { - if (gl_extensions.gpuVendor != GPU_VENDOR_NVIDIA || gl_extensions.ver[0] < 3) { - useCPU = true; - } - } - return useCPU; -#else - return true; -#endif -} - void FramebufferManager::ReadFramebufferToMemory(VirtualFramebuffer *vfb, bool sync, int x, int y, int w, int h) { PROFILE_THIS_SCOPE("gpu-readback"); #ifndef USING_GLES2 @@ -1212,7 +1157,7 @@ void FramebufferManager::ReadFramebufferToMemory(VirtualFramebuffer *vfb, bool s nvfb->colorDepth = FBO_8888; break; } - if (ShouldDownloadUsingCPU(vfb)) { + if (gstate_c.Supports(GPU_PREFER_CPU_DOWNLOAD)) { nvfb->colorDepth = vfb->colorDepth; } @@ -1281,7 +1226,7 @@ void FramebufferManager::ReadFramebufferToMemory(VirtualFramebuffer *vfb, bool s #ifdef USING_GLES2 PackFramebufferSync_(nvfb, x, y, w, h); #else - if (gl_extensions.PBO_ARB && gl_extensions.OES_texture_npot) { + if (gl_extensions.ARB_pixel_buffer_object && gl_extensions.OES_texture_npot) { if (!sync) { PackFramebufferAsync_(nvfb); } else { @@ -1305,20 +1250,8 @@ void FramebufferManager::BlitFramebuffer(VirtualFramebuffer *dst, int dstX, int fbo_bind_as_render_target(dst->fbo); glDisable(GL_SCISSOR_TEST); - bool useBlit = false; - bool useNV = false; - -#ifndef USING_GLES2 - if (gl_extensions.FBO_ARB) { - useNV = false; - useBlit = true; - } -#else - if (gl_extensions.GLES3 || gl_extensions.NV_framebuffer_blit) { - useNV = !gl_extensions.GLES3; - useBlit = true; - } -#endif + bool useBlit = gstate_c.Supports(GPU_SUPPORTS_ARB_FRAMEBUFFER_BLIT | GPU_SUPPORTS_NV_FRAMEBUFFER_BLIT); + bool useNV = useBlit && !gstate_c.Supports(GPU_SUPPORTS_ARB_FRAMEBUFFER_BLIT); float srcXFactor = useBlit ? (float)src->renderWidth / (float)src->bufferWidth : 1.0f; float srcYFactor = useBlit ? (float)src->renderHeight / (float)src->bufferHeight : 1.0f; @@ -1497,7 +1430,7 @@ void FramebufferManager::PackFramebufferAsync_(VirtualFramebuffer *vfb) { GLubyte *packed = 0; bool unbind = false; const u8 nextPBO = (currentPBO_ + 1) % MAX_PBO; - const bool useCPU = ShouldDownloadUsingCPU(vfb); + const bool useCPU = gstate_c.Supports(GPU_PREFER_CPU_DOWNLOAD); // We'll prepare two PBOs to switch between readying and reading if (!pixelBufObj_) { @@ -1541,7 +1474,7 @@ void FramebufferManager::PackFramebufferAsync_(VirtualFramebuffer *vfb) { if (vfb) { int pixelType, pixelSize, pixelFormat, align; - bool reverseOrder = (gl_extensions.gpuVendor == GPU_VENDOR_NVIDIA) || (gl_extensions.gpuVendor == GPU_VENDOR_AMD); + bool reverseOrder = gstate_c.Supports(GPU_PREFER_REVERSE_COLOR_ORDER); switch (vfb->format) { // GL_UNSIGNED_INT_8_8_8_8 returns A B G R (little-endian, tested in Nvidia card/x86 PC) // GL_UNSIGNED_BYTE returns R G B A in consecutive bytes ("big-endian"/not treated as 32-bit value) @@ -1586,15 +1519,7 @@ void FramebufferManager::PackFramebufferAsync_(VirtualFramebuffer *vfb) { } GLenum fbStatus; -#ifndef USING_GLES2 - if (!gl_extensions.FBO_ARB) { - fbStatus = glCheckFramebufferStatusEXT(GL_READ_FRAMEBUFFER); - } else { - fbStatus = glCheckFramebufferStatus(GL_READ_FRAMEBUFFER); - } -#else - fbStatus = glCheckFramebufferStatus(GL_READ_FRAMEBUFFER); -#endif + fbStatus = (GLenum)fbo_check_framebuffer_status(vfb->fbo); if (fbStatus != GL_FRAMEBUFFER_COMPLETE) { ERROR_LOG(SCEGE, "Incomplete source framebuffer, aborting read"); @@ -1877,12 +1802,11 @@ bool FramebufferManager::GetFramebuffer(u32 fb_address, int fb_stride, GEBufferF buffer.Allocate(vfb->renderWidth, vfb->renderHeight, GE_FORMAT_8888, true, true); if (vfb->fbo) fbo_bind_for_read(vfb->fbo); -#ifndef USING_GLES2 - glReadBuffer(GL_COLOR_ATTACHMENT0); -#endif + if (gl_extensions.GLES3 || !gl_extensions.IsGLES) + glReadBuffer(GL_COLOR_ATTACHMENT0); + glPixelStorei(GL_PACK_ALIGNMENT, 4); glReadPixels(0, 0, vfb->renderWidth, vfb->renderHeight, GL_RGBA, GL_UNSIGNED_BYTE, buffer.GetData()); - return true; } @@ -1906,18 +1830,15 @@ bool FramebufferManager::GetDepthbuffer(u32 fb_address, int fb_stride, u32 z_add return true; } -#ifndef USING_GLES2 buffer.Allocate(vfb->renderWidth, vfb->renderHeight, GPU_DBG_FORMAT_FLOAT, true); if (vfb->fbo) fbo_bind_for_read(vfb->fbo); - glReadBuffer(GL_DEPTH_ATTACHMENT); + if (gl_extensions.GLES3 || !gl_extensions.IsGLES) + glReadBuffer(GL_DEPTH_ATTACHMENT); glPixelStorei(GL_PACK_ALIGNMENT, 4); glReadPixels(0, 0, vfb->renderWidth, vfb->renderHeight, GL_DEPTH_COMPONENT, GL_FLOAT, buffer.GetData()); return true; -#else - return false; -#endif } bool FramebufferManager::GetStencilbuffer(u32 fb_address, int fb_stride, GPUDebugBuffer &buffer) { diff --git a/GPU/GLES/Framebuffer.h b/GPU/GLES/Framebuffer.h index 27b784807f..341f4b19bc 100644 --- a/GPU/GLES/Framebuffer.h +++ b/GPU/GLES/Framebuffer.h @@ -38,7 +38,6 @@ class TextureCache; class TransformDrawEngine; class ShaderManager; -#ifndef USING_GLES2 // Simple struct for asynchronous PBO readbacks struct AsyncPBO { GLuint handle; @@ -52,8 +51,6 @@ struct AsyncPBO { bool reading; }; -#endif - struct CardboardSettings { bool enabled; float leftEyeXPosition; @@ -147,9 +144,7 @@ private: inline bool ShouldDownloadUsingCPU(const VirtualFramebuffer *vfb) const; -#ifndef USING_GLES2 - void PackFramebufferAsync_(VirtualFramebuffer *vfb); -#endif + void PackFramebufferAsync_(VirtualFramebuffer *vfb); // Not used under ES currently void PackFramebufferSync_(VirtualFramebuffer *vfb, int x, int y, int w, int h); // Used by DrawPixels @@ -187,8 +182,7 @@ private: std::vector bvfbs_; // blitting framebuffers (for download) std::map tempFBOs_; -#ifndef USING_GLES2 + // Not used under ES currently. AsyncPBO *pixelBufObj_; //this isn't that large u8 currentPBO_; -#endif }; diff --git a/GPU/GLES/GLES_GPU.cpp b/GPU/GLES/GLES_GPU.cpp index 41a2616281..52686ef8e1 100644 --- a/GPU/GLES/GLES_GPU.cpp +++ b/GPU/GLES/GLES_GPU.cpp @@ -21,6 +21,7 @@ #include "Common/ChunkFile.h" +#include "Core/Config.h" #include "Core/Debugger/Breakpoints.h" #include "Core/MemMapHelpers.h" #include "Core/Host.h" @@ -31,6 +32,7 @@ #include "GPU/GPUState.h" #include "GPU/ge_constants.h" #include "GPU/GeDisasm.h" +#include "GPU/Common/FramebufferCommon.h" #include "GPU/GLES/ShaderManager.h" #include "GPU/GLES/GLES_GPU.h" @@ -396,6 +398,7 @@ GLES_GPU::CommandInfo GLES_GPU::cmdInfo_[256]; GLES_GPU::GLES_GPU() : resized_(false) { UpdateVsyncInterval(true); + CheckGPUFeatures(); shaderManager_ = new ShaderManager(); transformDraw_.SetShaderManager(shaderManager_); @@ -463,6 +466,94 @@ GLES_GPU::~GLES_GPU() { glstate.SetVSyncInterval(0); } +// Take the raw GL extension and versioning data and turn into feature flags. +void GLES_GPU::CheckGPUFeatures() { + u32 features = 0; + if (gl_extensions.ARB_blend_func_extended /*|| gl_extensions.EXT_blend_func_extended*/) { + if (gl_extensions.gpuVendor == GPU_VENDOR_INTEL || !gl_extensions.VersionGEThan(3, 0, 0)) { + // Don't use this extension to off on sub 3.0 OpenGL versions as it does not seem reliable + // Also on Intel, see https://github.com/hrydgard/ppsspp/issues/4867 + } else { + features |= GPU_SUPPORTS_DUALSOURCE_BLEND; + } + } + + if (gl_extensions.IsGLES) { + if (gl_extensions.GLES3) + features |= GPU_SUPPORTS_GLSL_ES_300; + } else { + if (gl_extensions.VersionGEThan(3, 3, 0)) + features |= GPU_SUPPORTS_GLSL_330; + } + + // Framebuffer fetch appears to be buggy at least on Tegra 3 devices. So we blacklist it. + // Tales of Destiny 2 has been reported to display green. + if (gl_extensions.EXT_shader_framebuffer_fetch || gl_extensions.NV_shader_framebuffer_fetch || gl_extensions.ARM_shader_framebuffer_fetch) { + features |= GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH; + // Blacklist Tegra 3, doesn't work very well. + if (strstr(gl_extensions.model, "NVIDIA Tegra 3") != 0) { + features &= ~GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH; + } + } + + if (gl_extensions.ARB_framebuffer_object || gl_extensions.EXT_framebuffer_object || gl_extensions.IsGLES) { + features |= GPU_SUPPORTS_FBO; + } + if (gl_extensions.ARB_framebuffer_object || gl_extensions.GLES3) { + features |= GPU_SUPPORTS_ARB_FRAMEBUFFER_BLIT; + } + if (gl_extensions.NV_framebuffer_blit) { + features |= GPU_SUPPORTS_NV_FRAMEBUFFER_BLIT; + } + + bool useCPU = false; + if (!gl_extensions.IsGLES) { + // Urrgh, we don't even define FB_READFBOMEMORY_CPU on mobile +#ifndef USING_GLES2 + useCPU = g_Config.iRenderingMode == FB_READFBOMEMORY_CPU; +#endif + // Some cards or drivers seem to always dither when downloading a framebuffer to 16-bit. + // This causes glitches in games that expect the exact values. + // It has not been experienced on NVIDIA cards, so those are left using the GPU (which is faster.) + if (g_Config.iRenderingMode == FB_BUFFERED_MODE) { + if (gl_extensions.gpuVendor != GPU_VENDOR_NVIDIA || gl_extensions.ver[0] < 3) { + useCPU = true; + } + } + } else { + useCPU = true; + } + + if (useCPU) + features |= GPU_PREFER_CPU_DOWNLOAD; + + if ((gl_extensions.gpuVendor == GPU_VENDOR_NVIDIA) || (gl_extensions.gpuVendor == GPU_VENDOR_AMD)) + features |= GPU_PREFER_REVERSE_COLOR_ORDER; + + if (gl_extensions.OES_texture_npot) + features |= GPU_SUPPORTS_OES_TEXTURE_NPOT; + + if (gl_extensions.EXT_unpack_subimage || !gl_extensions.IsGLES) + features |= GPU_SUPPORTS_UNPACK_SUBIMAGE; + + if (gl_extensions.EXT_blend_minmax || gl_extensions.GLES3) + features |= GPU_SUPPORTS_BLEND_MINMAX; + + if (!gl_extensions.IsGLES) + features |= GPU_SUPPORTS_LOGIC_OP; + + if (gl_extensions.GLES3 || !gl_extensions.IsGLES) { + features |= GPU_SUPPORTS_TEXTURE_LOD_CONTROL; + } + +#ifdef MOBILE_DEVICE + // Arguably, we should turn off GPU_IS_MOBILE on like modern Tegras, etc. + features |= GPU_IS_MOBILE; +#endif + + gstate_c.featureFlags = features; +} + // Let's avoid passing nulls into snprintf(). static const char *GetGLStringAlways(GLenum name) { const GLubyte *value = glGetString(name); @@ -589,6 +680,7 @@ void GLES_GPU::UpdateCmdInfo() { void GLES_GPU::BeginFrameInternal() { if (resized_) { + CheckGPUFeatures(); UpdateCmdInfo(); transformDraw_.Resized(); } diff --git a/GPU/GLES/GLES_GPU.h b/GPU/GLES/GLES_GPU.h index 052b409e96..b2a97684e1 100644 --- a/GPU/GLES/GLES_GPU.h +++ b/GPU/GLES/GLES_GPU.h @@ -36,6 +36,10 @@ class GLES_GPU : public GPUCommon { public: GLES_GPU(); ~GLES_GPU(); + + // This gets called on startup and when we get back from settings. + void CheckGPUFeatures(); + void InitClear() override; void Reinitialize() override; void PreExecuteOp(u32 op, u32 diff) override; diff --git a/GPU/GLES/ShaderManager.cpp b/GPU/GLES/ShaderManager.cpp index 3216572d45..dc86026fec 100644 --- a/GPU/GLES/ShaderManager.cpp +++ b/GPU/GLES/ShaderManager.cpp @@ -103,7 +103,7 @@ LinkedShader::LinkedShader(Shader *vs, Shader *fs, u32 vertType, bool useHWTrans glBindAttribLocation(program, ATTR_COLOR1, "color1"); #ifndef USING_GLES2 - if (gl_extensions.ARB_blend_func_extended) { + if (gstate_c.featureFlags & GPU_SUPPORTS_DUALSOURCE_BLEND) { // Dual source alpha glBindFragDataLocationIndexed(program, 0, 0, "fragColor0"); glBindFragDataLocationIndexed(program, 0, 1, "fragColor1"); diff --git a/GPU/GLES/StateMapping.cpp b/GPU/GLES/StateMapping.cpp index 30aa49ebe4..e1cba640be 100644 --- a/GPU/GLES/StateMapping.cpp +++ b/GPU/GLES/StateMapping.cpp @@ -132,7 +132,7 @@ static const GLushort logicOps[] = { static GLenum toDualSource(GLenum blendfunc) { switch (blendfunc) { -#ifndef USING_GLES2 +#if !defined(USING_GLES2) // TODO: Remove when we have better headers case GL_SRC_ALPHA: return GL_SRC1_ALPHA; case GL_ONE_MINUS_SRC_ALPHA: @@ -168,7 +168,7 @@ static inline bool blendColorSimilar(const Vec3f &a, const Vec3f &b, float margi } bool TransformDrawEngine::ApplyShaderBlending() { - if (gl_extensions.ANY_shader_framebuffer_fetch) { + if (gstate_c.featureFlags & GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH) { return true; } @@ -212,6 +212,7 @@ inline void TransformDrawEngine::ResetShaderBlending() { } } +// Try to simulate some common logic ops. void TransformDrawEngine::ApplyStencilReplaceAndLogicOp(ReplaceAlphaType replaceAlphaWithStencil) { StencilValueType stencilType = STENCIL_VALUE_KEEP; if (replaceAlphaWithStencil == REPLACE_ALPHA_YES) { @@ -222,59 +223,60 @@ void TransformDrawEngine::ApplyStencilReplaceAndLogicOp(ReplaceAlphaType replace GLenum srcBlend = GL_ONE; GLenum dstBlend = GL_ZERO; GLenum blendOp = GL_FUNC_ADD; -#if defined(USING_GLES2) - if (gstate.isLogicOpEnabled()) { - switch (gstate.getLogicOp()) - { - case GE_LOGIC_CLEAR: - srcBlend = GL_ZERO; - break; - case GE_LOGIC_AND: - case GE_LOGIC_AND_REVERSE: - WARN_LOG_REPORT_ONCE(d3dLogicOpAnd, G3D, "Unsupported AND logic op: %x", gstate.getLogicOp()); - break; - case GE_LOGIC_COPY: - // This is the same as off. - break; - case GE_LOGIC_COPY_INVERTED: - // Handled in the shader. - break; - case GE_LOGIC_AND_INVERTED: - case GE_LOGIC_NOR: - case GE_LOGIC_NAND: - case GE_LOGIC_EQUIV: - // Handled in the shader. - WARN_LOG_REPORT_ONCE(d3dLogicOpAndInverted, G3D, "Attempted invert for logic op: %x", gstate.getLogicOp()); - break; - case GE_LOGIC_INVERTED: - srcBlend = GL_ONE; - dstBlend = GL_ONE; - blendOp = GL_FUNC_SUBTRACT; - WARN_LOG_REPORT_ONCE(d3dLogicOpInverted, G3D, "Attempted inverse for logic op: %x", gstate.getLogicOp()); - break; - case GE_LOGIC_NOOP: - srcBlend = GL_ZERO; - dstBlend = GL_ONE; - break; - case GE_LOGIC_XOR: - WARN_LOG_REPORT_ONCE(d3dLogicOpOrXor, G3D, "Unsupported XOR logic op: %x", gstate.getLogicOp()); - break; - case GE_LOGIC_OR: - case GE_LOGIC_OR_INVERTED: - // Inverted in shader. - dstBlend = GL_ONE; - WARN_LOG_REPORT_ONCE(d3dLogicOpOr, G3D, "Attempted or for logic op: %x", gstate.getLogicOp()); - break; - case GE_LOGIC_OR_REVERSE: - WARN_LOG_REPORT_ONCE(d3dLogicOpOrReverse, G3D, "Unsupported OR REVERSE logic op: %x", gstate.getLogicOp()); - break; - case GE_LOGIC_SET: - dstBlend = GL_ONE; - WARN_LOG_REPORT_ONCE(d3dLogicOpSet, G3D, "Attempted set for logic op: %x", gstate.getLogicOp()); - break; + + if (!gstate_c.Supports(GPU_SUPPORTS_LOGIC_OP)) { + if (gstate.isLogicOpEnabled()) { + switch (gstate.getLogicOp()) + { + case GE_LOGIC_CLEAR: + srcBlend = GL_ZERO; + break; + case GE_LOGIC_AND: + case GE_LOGIC_AND_REVERSE: + WARN_LOG_REPORT_ONCE(d3dLogicOpAnd, G3D, "Unsupported AND logic op: %x", gstate.getLogicOp()); + break; + case GE_LOGIC_COPY: + // This is the same as off. + break; + case GE_LOGIC_COPY_INVERTED: + // Handled in the shader. + break; + case GE_LOGIC_AND_INVERTED: + case GE_LOGIC_NOR: + case GE_LOGIC_NAND: + case GE_LOGIC_EQUIV: + // Handled in the shader. + WARN_LOG_REPORT_ONCE(d3dLogicOpAndInverted, G3D, "Attempted invert for logic op: %x", gstate.getLogicOp()); + break; + case GE_LOGIC_INVERTED: + srcBlend = GL_ONE; + dstBlend = GL_ONE; + blendOp = GL_FUNC_SUBTRACT; + WARN_LOG_REPORT_ONCE(d3dLogicOpInverted, G3D, "Attempted inverse for logic op: %x", gstate.getLogicOp()); + break; + case GE_LOGIC_NOOP: + srcBlend = GL_ZERO; + dstBlend = GL_ONE; + break; + case GE_LOGIC_XOR: + WARN_LOG_REPORT_ONCE(d3dLogicOpOrXor, G3D, "Unsupported XOR logic op: %x", gstate.getLogicOp()); + break; + case GE_LOGIC_OR: + case GE_LOGIC_OR_INVERTED: + // Inverted in shader. + dstBlend = GL_ONE; + WARN_LOG_REPORT_ONCE(d3dLogicOpOr, G3D, "Attempted or for logic op: %x", gstate.getLogicOp()); + break; + case GE_LOGIC_OR_REVERSE: + WARN_LOG_REPORT_ONCE(d3dLogicOpOrReverse, G3D, "Unsupported OR REVERSE logic op: %x", gstate.getLogicOp()); + break; + case GE_LOGIC_SET: + dstBlend = GL_ONE; + WARN_LOG_REPORT_ONCE(d3dLogicOpSet, G3D, "Attempted set for logic op: %x", gstate.getLogicOp()); + break; + } } } -#endif // We're not blending, but we may still want to blend for stencil. // This is only useful for INCR/DECR/INVERT. Others can write directly. @@ -418,7 +420,7 @@ void TransformDrawEngine::ApplyBlendState() { } } - if (replaceAlphaWithStencil == REPLACE_ALPHA_DUALSOURCE) { + if (replaceAlphaWithStencil == REPLACE_ALPHA_DUALSOURCE && gstate_c.Supports(GPU_SUPPORTS_DUALSOURCE_BLEND)) { glBlendFuncA = toDualSource(glBlendFuncA); glBlendFuncB = toDualSource(glBlendFuncB); } @@ -567,7 +569,7 @@ void TransformDrawEngine::ApplyBlendState() { glstate.blendFuncSeparate.set(glBlendFuncA, glBlendFuncB, GL_ZERO, GL_ONE); } - if (gl_extensions.EXT_blend_minmax || gl_extensions.GLES3) { + if (gstate_c.Supports(GPU_SUPPORTS_BLEND_MINMAX)) { glstate.blendEquationSeparate.set(eqLookup[blendFuncEq], alphaEq); } else { glstate.blendEquationSeparate.set(eqLookupNoMinMax[blendFuncEq], alphaEq); @@ -605,9 +607,11 @@ void TransformDrawEngine::ApplyDrawState(int prim) { glstate.dither.disable(); if (gstate.isModeClear()) { -#if !defined(USING_GLES2) - // Logic Ops - glstate.colorLogicOp.disable(); +#ifndef USING_GLES2 + if (gstate_c.Supports(GPU_SUPPORTS_LOGIC_OP)) { + // Logic Ops + glstate.colorLogicOp.disable(); + } #endif // Culling glstate.cullFace.disable(); @@ -639,13 +643,16 @@ void TransformDrawEngine::ApplyDrawState(int prim) { glstate.stencilTest.disable(); } } else { -#if !defined(USING_GLES2) - // Logic Ops - if (gstate.isLogicOpEnabled() && gstate.getLogicOp() != GE_LOGIC_COPY) { - glstate.colorLogicOp.enable(); - glstate.logicOp.set(logicOps[gstate.getLogicOp()]); - } else { - glstate.colorLogicOp.disable(); +#ifndef USING_GLES2 + if (gstate_c.Supports(GPU_SUPPORTS_LOGIC_OP)) { + // TODO: Make this dynamic + // Logic Ops + if (gstate.isLogicOpEnabled() && gstate.getLogicOp() != GE_LOGIC_COPY) { + glstate.colorLogicOp.enable(); + glstate.logicOp.set(logicOps[gstate.getLogicOp()]); + } else { + glstate.colorLogicOp.disable(); + } } #endif // Set cull diff --git a/GPU/GLES/StencilBuffer.cpp b/GPU/GLES/StencilBuffer.cpp index cc1195805a..6cfd6ad52e 100644 --- a/GPU/GLES/StencilBuffer.cpp +++ b/GPU/GLES/StencilBuffer.cpp @@ -22,11 +22,11 @@ #include "GPU/GLES/ShaderManager.h" #include "GPU/GLES/TextureCache.h" -static const char *stencil_fs = -#ifdef USING_GLES2 +static const char *gles_prefix = "#version 100\n" -"precision highp float;\n" -#endif +"precision highp float;\n"; + +static const char *stencil_fs = "varying vec2 v_texcoord0;\n" "uniform float u_stencilValue;\n" "uniform sampler2D tex;\n" @@ -39,10 +39,6 @@ static const char *stencil_fs = "}\n"; static const char *stencil_vs = -#ifdef USING_GLES2 -"#version 100\n" -"precision highp float;\n" -#endif "attribute vec4 a_position;\n" "attribute vec2 a_texcoord0;\n" "varying vec2 v_texcoord0;\n" @@ -51,6 +47,14 @@ static const char *stencil_vs = " gl_Position = a_position;\n" "}\n"; +std::string GLSLES100PrefixProgram(std::string code) { + if (gl_extensions.IsGLES) { + return std::string(gles_prefix) + code; + } else { + return code; + } +} + static u8 StencilBits5551(const u8 *ptr8, u32 numPixels) { const u32 *ptr = (const u32 *)ptr8; @@ -146,7 +150,7 @@ bool FramebufferManager::NotifyStencilUpload(u32 addr, int size, bool skipZero) if (!stencilUploadProgram_) { std::string errorString; - stencilUploadProgram_ = glsl_create_source(stencil_vs, stencil_fs, &errorString); + stencilUploadProgram_ = glsl_create_source(GLSLES100PrefixProgram(stencil_vs).c_str(), GLSLES100PrefixProgram(stencil_fs).c_str(), &errorString); if (!stencilUploadProgram_) { ERROR_LOG_REPORT(G3D, "Failed to compile stencilUploadProgram! This shouldn't happen.\n%s", errorString.c_str()); } else { @@ -166,20 +170,8 @@ bool FramebufferManager::NotifyStencilUpload(u32 addr, int size, bool skipZero) glstate.stencilTest.enable(); glstate.stencilOp.set(GL_REPLACE, GL_REPLACE, GL_REPLACE); - bool useBlit = false; - bool useNV = false; - -#ifndef USING_GLES2 - if (gl_extensions.FBO_ARB) { - useNV = false; - useBlit = true; - } -#else - if (gl_extensions.GLES3 || gl_extensions.NV_framebuffer_blit) { - useNV = !gl_extensions.GLES3; - useBlit = true; - } -#endif + bool useBlit = gstate_c.Supports(GPU_SUPPORTS_ARB_FRAMEBUFFER_BLIT | GPU_SUPPORTS_NV_FRAMEBUFFER_BLIT); + bool useNV = useBlit && !gstate_c.Supports(GPU_SUPPORTS_ARB_FRAMEBUFFER_BLIT); // Our fragment shader (and discard) is slow. Since the source is 1x, we can stencil to 1x. // Then after we're done, we'll just blit it across and stretch it there. diff --git a/GPU/GLES/TextureCache.cpp b/GPU/GLES/TextureCache.cpp index d953087a89..aba39f7ad1 100644 --- a/GPU/GLES/TextureCache.cpp +++ b/GPU/GLES/TextureCache.cpp @@ -66,6 +66,7 @@ #define GL_UNPACK_ROW_LENGTH 0x0CF2 #endif +// Hack! extern int g_iNumVideos; TextureCache::TextureCache() : cacheSizeEstimate_(0), secondCacheSizeEstimate_(0), clearCacheNextFrame_(false), lowMemoryMode_(false), clutBuf_(NULL), clutMaxBytes_(0), texelsScaledThisFrame_(0) { @@ -718,20 +719,23 @@ void TextureCache::UpdateSamplingParams(TexCacheEntry &entry, bool force) { if (entry.maxLevel != 0) { if (force || entry.lodBias != lodBias) { + if (gstate_c.Supports(GPU_SUPPORTS_TEXTURE_LOD_CONTROL)) { + GETexLevelMode mode = gstate.getTexLevelMode(); + switch (mode) { + case GE_TEXLEVEL_MODE_AUTO: + // TODO + break; + case GE_TEXLEVEL_MODE_CONST: + // Sigh, LOD_BIAS is not even in ES 3.0.. #ifndef USING_GLES2 - GETexLevelMode mode = gstate.getTexLevelMode(); - switch (mode) { - case GE_TEXLEVEL_MODE_AUTO: - // TODO - break; - case GE_TEXLEVEL_MODE_CONST: - glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_LOD_BIAS, lodBias); - break; - case GE_TEXLEVEL_MODE_SLOPE: - // TODO - break; - } + glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_LOD_BIAS, lodBias); #endif + break; + case GE_TEXLEVEL_MODE_SLOPE: + // TODO + break; + } + } entry.lodBias = lodBias; } } @@ -1418,8 +1422,7 @@ void TextureCache::SetTexture(bool force) { break; } -#ifndef USING_GLES2 - if (i > 0) { + if (i > 0 && gstate_c.Supports(GPU_SUPPORTS_TEXTURE_LOD_CONTROL)) { int tw = gstate.getTextureWidth(i); int th = gstate.getTextureHeight(i); if (tw != 1 && tw != (gstate.getTextureWidth(i - 1) >> 1)) @@ -1427,7 +1430,6 @@ void TextureCache::SetTexture(bool force) { else if (th != 1 && th != (gstate.getTextureHeight(i - 1) >> 1)) badMipSizes = true; } -#endif } // In addition, simply don't load more than level 0 if g_Config.bMipMap is false. @@ -1446,14 +1448,15 @@ void TextureCache::SetTexture(bool force) { scaleFactor = (PSP_CoreParameter().renderWidth + 479) / 480; } -#ifndef MOBILE_DEVICE - scaleFactor = std::min(gl_extensions.OES_texture_npot ? 5 : 4, scaleFactor); - if (!gl_extensions.OES_texture_npot && scaleFactor == 3) { - scaleFactor = 2; + // Mobile devices don't get the higher scale factors, too expensive. Very rough way to decide though... + if (!gstate_c.Supports(GPU_IS_MOBILE)) { + scaleFactor = std::min(gstate_c.Supports(GPU_SUPPORTS_OES_TEXTURE_NPOT) ? 5 : 4, scaleFactor); + if (!gl_extensions.OES_texture_npot && scaleFactor == 3) { + scaleFactor = 2; + } + } else { + scaleFactor = std::min(gstate_c.Supports(GPU_SUPPORTS_OES_TEXTURE_NPOT) ? 3 : 2, scaleFactor); } -#else - scaleFactor = std::min(gl_extensions.OES_texture_npot ? 3 : 2, scaleFactor); -#endif } else { scaleFactor = g_Config.iTexScalingLevel; } @@ -1504,34 +1507,28 @@ void TextureCache::SetTexture(bool force) { // Mipmapping only enable when texture scaling disable if (maxLevel > 0 && g_Config.iTexScalingLevel == 1) { -#ifndef USING_GLES2 - if (badMipSizes) { - // WARN_LOG(G3D, "Bad mipmap for texture sized %dx%dx%d - autogenerating", w, h, (int)format); - glGenerateMipmap(GL_TEXTURE_2D); - } else { - for (int i = 1; i <= maxLevel; i++) { - LoadTextureLevel(*entry, i, replaceImages, scaleFactor, dstFmt); + if (gstate_c.Supports(GPU_SUPPORTS_TEXTURE_LOD_CONTROL)) { + if (badMipSizes) { + // WARN_LOG(G3D, "Bad mipmap for texture sized %dx%dx%d - autogenerating", w, h, (int)format); + glGenerateMipmap(GL_TEXTURE_2D); + } else { + for (int i = 1; i <= maxLevel; i++) { + LoadTextureLevel(*entry, i, replaceImages, scaleFactor, dstFmt); + } + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, maxLevel); + glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAX_LOD, (float)maxLevel); } - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, maxLevel); - glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAX_LOD, (float)maxLevel); - } -#else - // Avoid PowerVR driver bug - if (w > 1 && h > 1 && !(gl_extensions.gpuVendor == GPU_VENDOR_POWERVR && h > w)) { // Really! only seems to fail if height > width - // NOTICE_LOG(G3D, "Generating mipmap for texture sized %dx%d%d", w, h, (int)format); - glGenerateMipmap(GL_TEXTURE_2D); } else { - entry->maxLevel = 0; + // Avoid PowerVR driver bug + if (w > 1 && h > 1 && !(h > w && (gl_extensions.bugs & BUG_PVR_GENMIPMAP_HEIGHT_GREATER))) { // Really! only seems to fail if height > width + // NOTICE_LOG(G3D, "Generating mipmap for texture sized %dx%d%d", w, h, (int)format); + glGenerateMipmap(GL_TEXTURE_2D); + } else { + entry->maxLevel = 0; + } } -#endif - } else { -#ifndef USING_GLES2 + } else if (gstate_c.Supports(GPU_SUPPORTS_TEXTURE_LOD_CONTROL)) { glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0); -#else - if (gl_extensions.GLES3) { - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0); - } -#endif } int aniso = 1 << g_Config.iAnisotropyLevel; @@ -1698,7 +1695,7 @@ void *TextureCache::DecodeTextureLevel(GETextureFormat format, GEPaletteFormat c case GE_TFMT_8888: if (!swizzled) { // Special case: if we don't need to deal with packing, we don't need to copy. - if ((g_Config.iTexScalingLevel == 1 && gl_extensions.EXT_unpack_subimage) || w == bufw) { + if ((g_Config.iTexScalingLevel == 1 && gstate_c.Supports(GPU_SUPPORTS_UNPACK_SUBIMAGE)) || w == bufw) { if (UseBGRA8888()) { tmpTexBuf32.resize(std::max(bufw, w) * h); finalBuf = tmpTexBuf32.data(); @@ -1791,7 +1788,7 @@ void *TextureCache::DecodeTextureLevel(GETextureFormat format, GEPaletteFormat c ERROR_LOG_REPORT(G3D, "NO finalbuf! Will crash!"); } - if (!(g_Config.iTexScalingLevel == 1 && gl_extensions.EXT_unpack_subimage) && w != bufw) { + if (!(g_Config.iTexScalingLevel == 1 && gstate_c.Supports(GPU_SUPPORTS_UNPACK_SUBIMAGE)) && w != bufw) { int pixelSize; switch (dstFmt) { case GL_UNSIGNED_SHORT_4_4_4_4: @@ -1803,6 +1800,7 @@ void *TextureCache::DecodeTextureLevel(GETextureFormat format, GEPaletteFormat c pixelSize = 4; break; } + // Need to rearrange the buffer to simulate GL_UNPACK_ROW_LENGTH etc. int inRowBytes = bufw * pixelSize; int outRowBytes = w * pixelSize; @@ -1868,7 +1866,7 @@ void TextureCache::LoadTextureLevel(TexCacheEntry &entry, int level, bool replac gpuStats.numTexturesDecoded++; // Can restore these and remove the fixup at the end of DecodeTextureLevel on desktop GL and GLES 3. - if ((g_Config.iTexScalingLevel == 1 && gl_extensions.EXT_unpack_subimage) && w != bufw) { + if ((g_Config.iTexScalingLevel == 1 && gstate_c.Supports(GPU_SUPPORTS_UNPACK_SUBIMAGE)) && w != bufw) { glPixelStorei(GL_UNPACK_ROW_LENGTH, bufw); useUnpack = true; } diff --git a/GPU/GLES/TransformPipeline.cpp b/GPU/GLES/TransformPipeline.cpp index 2994fe26bd..a3a8b8b7f6 100644 --- a/GPU/GLES/TransformPipeline.cpp +++ b/GPU/GLES/TransformPipeline.cpp @@ -777,11 +777,7 @@ rotateVBO: SetupDecFmtForDraw(program, dec_->GetDecVtxFmt(), vbo ? 0 : decoded); if (useElements) { -#if 1 // USING_GLES2 glDrawElements(glprim[prim], vertexCount, GL_UNSIGNED_SHORT, ebo ? 0 : (GLvoid*)decIndex); -#else - glDrawRangeElements(glprim[prim], 0, maxIndex, vertexCount, GL_UNSIGNED_SHORT, ebo ? 0 : (GLvoid*)decIndex); -#endif } else { glDrawArrays(glprim[prim], 0, vertexCount); } @@ -830,12 +826,7 @@ rotateVBO: if (attrMask & (1 << ATTR_COLOR0)) glVertexAttribPointer(ATTR_COLOR0, 4, GL_UNSIGNED_BYTE, GL_TRUE, vertexSize, ((uint8_t*)drawBuffer) + offsetof(TransformedVertex, color0)); if (attrMask & (1 << ATTR_COLOR1)) glVertexAttribPointer(ATTR_COLOR1, 3, GL_UNSIGNED_BYTE, GL_TRUE, vertexSize, ((uint8_t*)drawBuffer) + offsetof(TransformedVertex, color1)); if (drawIndexed) { -#if 1 // USING_GLES2 glDrawElements(glprim[prim], numTrans, GL_UNSIGNED_SHORT, inds); -#else - // This doesn't seem to provide much of a win. - glDrawRangeElements(glprim[prim], 0, maxIndex, numTrans, GL_UNSIGNED_SHORT, inds); -#endif } else { glDrawArrays(glprim[prim], 0, numTrans); } diff --git a/GPU/GLES/VertexShaderGenerator.cpp b/GPU/GLES/VertexShaderGenerator.cpp index 379043ca0b..9c25ddb1f9 100644 --- a/GPU/GLES/VertexShaderGenerator.cpp +++ b/GPU/GLES/VertexShaderGenerator.cpp @@ -192,48 +192,38 @@ void GenerateVertexShader(int prim, u32 vertType, char *buffer, bool useHWTransf bool highpFog = false; bool highpTexcoord = false; -#if defined(USING_GLES2) - // Let's wait until we have a real use for this. - // ES doesn't support dual source alpha :( - if (gl_extensions.GLES3) { - WRITE(p, "#version 300 es\n"); - glslES30 = true; + if (gl_extensions.IsGLES) { + // ES doesn't support dual source alpha :( + if (gl_extensions.GLES3) { + WRITE(p, "#version 300 es\n"); + glslES30 = true; + } else { + WRITE(p, "#version 100\n"); // GLSL ES 1.0 + } + WRITE(p, "precision highp float;\n"); + + // PowerVR needs highp to do the fog in MHU correctly. + // Others don't, and some can't handle highp in the fragment shader. + highpFog = (gl_extensions.bugs & BUG_PVR_SHADER_PRECISION_BAD) ? true : false; + highpTexcoord = highpFog; } else { - WRITE(p, "#version 100\n"); // GLSL ES 1.0 - } - WRITE(p, "precision highp float;\n"); - - // PowerVR needs highp to do the fog in MHU correctly. - // Others don't, and some can't handle highp in the fragment shader. - highpFog = (gl_extensions.bugs & BUG_PVR_SHADER_PRECISION_BAD) ? true : false; - highpTexcoord = highpFog; - -#elif !defined(FORCE_OPENGL_2_0) + // TODO: Handle this in VersionGEThan? +#if !defined(FORCE_OPENGL_2_0) if (gl_extensions.VersionGEThan(3, 3, 0)) { glslES30 = true; WRITE(p, "#version 330\n"); - WRITE(p, "#define lowp\n"); - WRITE(p, "#define mediump\n"); - WRITE(p, "#define highp\n"); } else if (gl_extensions.VersionGEThan(3, 0, 0)) { WRITE(p, "#version 130\n"); - // Remove lowp/mediump in non-mobile non-glsl 3 implementations - WRITE(p, "#define lowp\n"); - WRITE(p, "#define mediump\n"); - WRITE(p, "#define highp\n"); } else { WRITE(p, "#version 110\n"); - // Remove lowp/mediump in non-mobile non-glsl 3 implementations + } +#endif + + // We remove these everywhere - GL4, GL3, Mac-forced-GL2, etc. WRITE(p, "#define lowp\n"); WRITE(p, "#define mediump\n"); WRITE(p, "#define highp\n"); } -#else - // Need to remove lowp/mediump for Mac - WRITE(p, "#define lowp\n"); - WRITE(p, "#define mediump\n"); - WRITE(p, "#define highp\n"); -#endif if (glslES30) { attribute = "in"; diff --git a/GPU/GPUCommon.h b/GPU/GPUCommon.h index cc60d15080..fa1288fd41 100644 --- a/GPU/GPUCommon.h +++ b/GPU/GPUCommon.h @@ -18,6 +18,7 @@ class GPUCommon : public GPUThreadEventQueue, public GPUDebugInterface { public: GPUCommon(); virtual ~GPUCommon(); + virtual void Reinitialize(); virtual void InterruptStart(int listid); diff --git a/GPU/GPUState.h b/GPU/GPUState.h index 8a65db7804..9099d846b7 100644 --- a/GPU/GPUState.h +++ b/GPU/GPUState.h @@ -442,7 +442,33 @@ enum TextureChangeReason { TEXCHANGE_PARAMSONLY = 0x02, }; +#define FLAG_BIT(x) (1 << x) + +// Some of these are OpenGL-specific even though this file is neutral, unfortunately. +// Might want to move this mechanism into the backend later. +enum { + GPU_SUPPORTS_DUALSOURCE_BLEND = FLAG_BIT(0), + GPU_SUPPORTS_GLSL_ES_300 = FLAG_BIT(1), + GPU_SUPPORTS_GLSL_330 = FLAG_BIT(2), + GPU_SUPPORTS_UNPACK_SUBIMAGE = FLAG_BIT(3), + GPU_SUPPORTS_BLEND_MINMAX = FLAG_BIT(4), + GPU_SUPPORTS_LOGIC_OP = FLAG_BIT(5), + GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH = FLAG_BIT(20), + GPU_SUPPORTS_TEXTURE_LOD_CONTROL = FLAG_BIT(24), + GPU_SUPPORTS_FBO = FLAG_BIT(25), + GPU_SUPPORTS_ARB_FRAMEBUFFER_BLIT = FLAG_BIT(26), + GPU_SUPPORTS_NV_FRAMEBUFFER_BLIT = FLAG_BIT(27), + GPU_SUPPORTS_OES_TEXTURE_NPOT = FLAG_BIT(28), + GPU_IS_MOBILE = FLAG_BIT(29), + GPU_PREFER_CPU_DOWNLOAD = FLAG_BIT(30), + GPU_PREFER_REVERSE_COLOR_ORDER = FLAG_BIT(31), +}; + struct GPUStateCache { + bool Supports(int flag) { return (featureFlags & flag) != 0; } + + u32 featureFlags; + u32 vertexAddr; u32 indexAddr; u32 offsetAddr; diff --git a/UI/DevScreens.cpp b/UI/DevScreens.cpp index 61bfce2618..514e729751 100644 --- a/UI/DevScreens.cpp +++ b/UI/DevScreens.cpp @@ -365,11 +365,11 @@ void SystemInfoScreen::CreateViews() { deviceSpecs->Add(new ItemHeader("Version Information")); std::string apiVersion; if (g_Config.iGPUBackend == GPU_BACKEND_OPENGL) { -#ifdef USING_GLES2 - apiVersion = StringFromFormat("v%d.%d.%d ES", gl_extensions.ver[0], gl_extensions.ver[1], gl_extensions.ver[2]); -#else - apiVersion = StringFromFormat("v%d.%d.%d", gl_extensions.ver[0], gl_extensions.ver[1], gl_extensions.ver[2]); -#endif + if (gl_extensions.IsGLES) { + apiVersion = StringFromFormat("v%d.%d.%d ES", gl_extensions.ver[0], gl_extensions.ver[1], gl_extensions.ver[2]); + } else { + apiVersion = StringFromFormat("v%d.%d.%d", gl_extensions.ver[0], gl_extensions.ver[1], gl_extensions.ver[2]); + } } else { apiVersion = thin3d->GetInfoString(T3DInfo::APIVERSION); if (apiVersion.size() > 30) @@ -413,14 +413,13 @@ void SystemInfoScreen::CreateViews() { tabHolder->AddTab("OGL Extensions", oglExtensionsScroll); -#ifndef USING_GLES2 - oglExtensions->Add(new ItemHeader("OpenGL Extensions")); -#else - if (gl_extensions.GLES3) + if (!gl_extensions.IsGLES) { + oglExtensions->Add(new ItemHeader("OpenGL Extensions")); + } else if (gl_extensions.GLES3) { oglExtensions->Add(new ItemHeader("OpenGL ES 3.0 Extensions")); - else + } else { oglExtensions->Add(new ItemHeader("OpenGL ES 2.0 Extensions")); -#endif + } exts.clear(); SplitString(g_all_gl_extensions, ' ', exts); diff --git a/UI/EmuScreen.cpp b/UI/EmuScreen.cpp index 8b2db8bdc0..f218ae5d43 100644 --- a/UI/EmuScreen.cpp +++ b/UI/EmuScreen.cpp @@ -923,20 +923,20 @@ void EmuScreen::render() { screenManager()->getUIContext()->End(); } -#ifdef USING_GLES2 // We have no use for backbuffer depth or stencil, so let tiled renderers discard them after tiling. if (gl_extensions.GLES3 && glInvalidateFramebuffer != nullptr) { GLenum attachments[2] = { GL_DEPTH, GL_STENCIL }; glInvalidateFramebuffer(GL_FRAMEBUFFER, 2, attachments); } else if (!gl_extensions.GLES3) { +#ifdef USING_GLES2 // Tiled renderers like PowerVR should benefit greatly from this. However - seems I can't call it? bool hasDiscard = gl_extensions.EXT_discard_framebuffer; // TODO if (hasDiscard) { //const GLenum targets[3] = { GL_COLOR_EXT, GL_DEPTH_EXT, GL_STENCIL_EXT }; //glDiscardFramebufferEXT(GL_FRAMEBUFFER, 3, targets); } - } #endif + } } void EmuScreen::deviceLost() { diff --git a/native b/native index da38466d29..f4f2a95067 160000 --- a/native +++ b/native @@ -1 +1 @@ -Subproject commit da38466d29a8db045ad56d1fe00601bf43b88d57 +Subproject commit f4f2a950678b9d46e3f1babe54cf9df75f72f91c