diff --git a/GPU/GLES/DepthBufferGLES.cpp b/GPU/GLES/DepthBufferGLES.cpp index d142f8b9a4..16bd8568a7 100644 --- a/GPU/GLES/DepthBufferGLES.cpp +++ b/GPU/GLES/DepthBufferGLES.cpp @@ -16,10 +16,58 @@ // https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. #include - +#include "gfx_es2/gpu_features.h" +#include "Core/ConfigValues.h" #include "Core/Reporting.h" #include "GPU/Common/GPUStateUtils.h" +#include "GPU/GLES/DrawEngineGLES.h" #include "GPU/GLES/FramebufferManagerGLES.h" +#include "GPU/GLES/ShaderManagerGLES.h" +#include "GPU/GLES/TextureCacheGLES.h" + +static const char *depth_dl_fs = R"( +#ifdef GL_ES +precision highp float; +#endif +#if __VERSION__ >= 130 +#define varying in +#define texture2D texture +#define gl_FragColor fragColor0 +out vec4 fragColor0; +#endif +varying vec2 v_texcoord0; +uniform float u_depthScaleFactor; +uniform sampler2D tex; +void main() { + float depth = texture2D(tex, v_texcoord0).r; + float offset = 0.5 * (u_depthScaleFactor - 1.0) * (1.0 / u_depthScaleFactor); + // At this point, clamped maps [0, 1] to [0, 65535]. + float clamped = clamp((depth - offset) * u_depthScaleFactor, 0.0, 1.0); + + vec4 enc = vec4(16777215.0, 16777215.0 / 256.0, 16777215.0 / 65536.0, 16777215.0 / 16777216.0) * clamped; + enc = floor(mod(enc, 256.0)); + enc = enc * vec4(1.0 / 255.0, 1.0 / 255.0, 1.0 / 255.0, 1.0 / 255.0); + // Let's ignore the bits outside 16 bit precision. + gl_FragColor = enc.yzww; +} +)"; + +static const char *depth_vs = R"( +#ifdef GL_ES +precision highp float; +#endif +#if __VERSION__ >= 130 +#define attribute in +#define varying out +#endif +attribute vec4 a_position; +attribute vec2 a_texcoord0; +varying vec2 v_texcoord0; +void main() { + v_texcoord0 = a_texcoord0; + gl_Position = a_position; +} +)"; void FramebufferManagerGLES::PackDepthbuffer(VirtualFramebuffer *vfb, int x, int y, int w, int h) { if (!vfb->fbo) { @@ -40,25 +88,96 @@ void FramebufferManagerGLES::PackDepthbuffer(VirtualFramebuffer *vfb, int x, int DEBUG_LOG(FRAMEBUF, "Reading depthbuffer to mem at %08x for vfb=%08x", z_address, vfb->fb_address); - draw_->CopyFramebufferToMemorySync(vfb->fbo, Draw::FB_DEPTH_BIT, 0, y, packWidth, h, Draw::DataFormat::D32F, convBuf_, vfb->z_stride); + // TODO: On desktop, we can just directly download, but for now testing. + const bool useColorPath = true; // gl_extensions.IsGLES; + bool format16Bit = false; + + if (useColorPath) { + if (!depthDownloadProgram_) { + std::string errorString; + static std::string vs_code, fs_code; + vs_code = ApplyGLSLPrelude(depth_vs, GL_VERTEX_SHADER); + fs_code = ApplyGLSLPrelude(depth_dl_fs, GL_FRAGMENT_SHADER); + std::vector shaders; + shaders.push_back(render_->CreateShader(GL_VERTEX_SHADER, vs_code, "depth_dl")); + shaders.push_back(render_->CreateShader(GL_FRAGMENT_SHADER, fs_code, "depth_dl")); + std::vector semantics; + semantics.push_back({ 0, "a_position" }); + semantics.push_back({ 1, "a_texcoord0" }); + std::vector queries; + queries.push_back({ &u_depthDownloadTex, "tex" }); + queries.push_back({ &u_depthDownloadFactor, "u_depthScaleFactor" }); + std::vector inits; + inits.push_back({ &u_depthDownloadTex, 0, TEX_SLOT_PSP_TEXTURE }); + depthDownloadProgram_ = render_->CreateProgram(shaders, semantics, queries, inits, false); + for (auto iter : shaders) { + render_->DeleteShader(iter); + } + if (!depthDownloadProgram_) { + ERROR_LOG_REPORT(G3D, "Failed to compile depthDownloadProgram! This shouldn't happen.\n%s", errorString.c_str()); + } + } + + shaderManagerGL_->DirtyLastShader(); + auto *blitFBO = GetTempFBO(TempFBO::COPY, vfb->renderWidth, vfb->renderHeight, Draw::FBO_8888); + draw_->BindFramebufferAsRenderTarget(blitFBO, { Draw::RPAction::CLEAR, Draw::RPAction::DONT_CARE, Draw::RPAction::DONT_CARE }); + render_->SetViewport({ 0, 0, (float)vfb->renderWidth, (float)vfb->renderHeight, 0.0f, 1.0f }); + textureCacheGL_->ForgetLastTexture(); + + // We must bind the program after starting the render pass, and set the color mask after clearing. + render_->SetScissor({ 0, 0, vfb->renderWidth, vfb->renderHeight }); + render_->SetDepth(false, false, GL_ALWAYS); + render_->SetRaster(false, GL_CCW, GL_FRONT, GL_FALSE); + render_->BindProgram(depthDownloadProgram_); + + if (!gstate_c.Supports(GPU_SUPPORTS_ACCURATE_DEPTH)) { + render_->SetUniformF1(&u_depthDownloadFactor, 1.0f); + } else { + render_->SetUniformF1(&u_depthDownloadFactor, DepthSliceFactor()); + } + draw_->BindFramebufferAsTexture(vfb->fbo, TEX_SLOT_PSP_TEXTURE, Draw::FB_DEPTH_BIT, 0); + float u1 = 1.0f; + float v1 = 1.0f; + DrawActiveTexture(x, y, w, h, vfb->renderWidth, vfb->renderHeight, 0.0f, 0.0f, u1, v1, ROTATION_LOCKED_HORIZONTAL, DRAWTEX_NEAREST); + + draw_->CopyFramebufferToMemorySync(blitFBO, Draw::FB_COLOR_BIT, 0, y, packWidth, h, Draw::DataFormat::R8G8B8A8_UNORM, convBuf_, vfb->z_stride); + // TODO: Use 4444 so we can copy lines directly? + format16Bit = true; + } else { + draw_->CopyFramebufferToMemorySync(vfb->fbo, Draw::FB_DEPTH_BIT, 0, y, packWidth, h, Draw::DataFormat::D32F, convBuf_, vfb->z_stride); + format16Bit = false; + } int dstByteOffset = y * vfb->z_stride * sizeof(u16); u16 *depth = (u16 *)Memory::GetPointer(z_address + dstByteOffset); - GLfloat *packed = (GLfloat *)convBuf_; + u32_le *packed32 = (u32_le *)convBuf_; + GLfloat *packedf = (GLfloat *)convBuf_; int totalPixels = h == 1 ? packWidth : vfb->z_stride * h; - for (int yp = 0; yp < h; ++yp) { - int row_offset = vfb->z_stride * yp; - for (int xp = 0; xp < packWidth; ++xp) { - const int i = row_offset + xp; - float scaled = FromScaledDepth(packed[i]); - if (scaled <= 0.0f) { - depth[i] = 0; - } else if (scaled >= 65535.0f) { - depth[i] = 65535; - } else { - depth[i] = (int)scaled; + if (format16Bit) { + for (int yp = 0; yp < h; ++yp) { + int row_offset = vfb->z_stride * yp; + for (int xp = 0; xp < packWidth; ++xp) { + const int i = row_offset + xp; + depth[i] = packed32[i] & 0xFFFF; + } + } + } else { + for (int yp = 0; yp < h; ++yp) { + int row_offset = vfb->z_stride * yp; + for (int xp = 0; xp < packWidth; ++xp) { + const int i = row_offset + xp; + float scaled = FromScaledDepth(packedf[i]); + if (scaled <= 0.0f) { + depth[i] = 0; + } else if (scaled >= 65535.0f) { + depth[i] = 65535; + } else { + depth[i] = (int)scaled; + } } } } + + gstate_c.Dirty(DIRTY_BLEND_STATE | DIRTY_RASTER_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_VIEWPORTSCISSOR_STATE); } diff --git a/GPU/GLES/FramebufferManagerGLES.cpp b/GPU/GLES/FramebufferManagerGLES.cpp index 144ca24dc7..6d9887b422 100644 --- a/GPU/GLES/FramebufferManagerGLES.cpp +++ b/GPU/GLES/FramebufferManagerGLES.cpp @@ -324,6 +324,10 @@ void FramebufferManagerGLES::DestroyDeviceObjects() { render_->DeleteProgram(stencilUploadProgram_); stencilUploadProgram_ = nullptr; } + if (depthDownloadProgram_) { + render_->DeleteProgram(depthDownloadProgram_); + depthDownloadProgram_ = nullptr; + } } FramebufferManagerGLES::~FramebufferManagerGLES() { diff --git a/GPU/GLES/FramebufferManagerGLES.h b/GPU/GLES/FramebufferManagerGLES.h index 8c0be0ac1e..db10d01583 100644 --- a/GPU/GLES/FramebufferManagerGLES.h +++ b/GPU/GLES/FramebufferManagerGLES.h @@ -103,6 +103,10 @@ private: int u_stencilUploadTex = -1; int u_stencilValue = -1; int u_postShaderTex = -1; + + GLRProgram *depthDownloadProgram_ = nullptr; + int u_depthDownloadTex = -1; + int u_depthDownloadFactor = -1; // Cached uniform locs int u_draw2d_tex = -1; diff --git a/GPU/GLES/StencilBufferGLES.cpp b/GPU/GLES/StencilBufferGLES.cpp index 17d7469dfa..164e7ae6d0 100644 --- a/GPU/GLES/StencilBufferGLES.cpp +++ b/GPU/GLES/StencilBufferGLES.cpp @@ -20,6 +20,7 @@ #include "Core/ConfigValues.h" #include "Core/Reporting.h" #include "GPU/Common/StencilCommon.h" +#include "GPU/GLES/DrawEngineGLES.h" #include "GPU/GLES/FramebufferManagerGLES.h" #include "GPU/GLES/ShaderManagerGLES.h" #include "GPU/GLES/TextureCacheGLES.h" @@ -137,7 +138,7 @@ bool FramebufferManagerGLES::NotifyStencilUpload(u32 addr, int size, bool skipZe queries.push_back({ &u_stencilUploadTex, "tex" }); queries.push_back({ &u_stencilValue, "u_stencilValue" }); std::vector inits; - inits.push_back({ &u_stencilUploadTex, 0, 0 }); + inits.push_back({ &u_stencilUploadTex, 0, TEX_SLOT_PSP_TEXTURE }); stencilUploadProgram_ = render_->CreateProgram(shaders, semantics, queries, inits, false); for (auto iter : shaders) { render_->DeleteShader(iter); diff --git a/ext/native/thin3d/GLQueueRunner.cpp b/ext/native/thin3d/GLQueueRunner.cpp index 6ecae517a1..cf4dd780dd 100644 --- a/ext/native/thin3d/GLQueueRunner.cpp +++ b/ext/native/thin3d/GLQueueRunner.cpp @@ -249,6 +249,7 @@ void GLQueueRunner::RunInitSteps(const std::vector &steps, bool ski switch (init.type) { case 0: glUniform1i(uniform, init.value); + break; } } } @@ -951,10 +952,14 @@ void GLQueueRunner::PerformRenderPass(const GLRStep &step) { } if (c.bind_fb_texture.aspect == GL_COLOR_BUFFER_BIT) { if (curTex[slot] != &c.bind_fb_texture.framebuffer->color_texture) - glBindTexture(GL_TEXTURE_2D, c.bind_fb_texture.framebuffer->color_texture.texture); + glBindTexture(GL_TEXTURE_2D, c.bind_fb_texture.framebuffer->color_texture.texture); curTex[slot] = &c.bind_fb_texture.framebuffer->color_texture; + } else if (c.bind_fb_texture.aspect == GL_DEPTH_BUFFER_BIT) { + if (curTex[slot] != &c.bind_fb_texture.framebuffer->z_stencil_texture) + glBindTexture(GL_TEXTURE_2D, c.bind_fb_texture.framebuffer->z_stencil_texture.texture); + curTex[slot] = &c.bind_fb_texture.framebuffer->z_stencil_texture; } else { - // TODO: Depth texturing? + // TODO: Stencil texturing? curTex[slot] = nullptr; } CHECK_GL_ERROR_IF_DEBUG();