diff --git a/CMakeLists.txt b/CMakeLists.txt index dad23f4d8d..028b59e23e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1269,6 +1269,7 @@ add_library(GPU OBJECT GPU/GLES/Spline.cpp GPU/GLES/StateMapping.cpp GPU/GLES/StateMapping.h + GPU/GLES/StencilBuffer.cpp GPU/GLES/TextureCache.cpp GPU/GLES/TextureCache.h GPU/GLES/TextureScaler.cpp diff --git a/GPU/Directx9/GPU_DX9.cpp b/GPU/Directx9/GPU_DX9.cpp index f11335a76d..ac7a596874 100644 --- a/GPU/Directx9/GPU_DX9.cpp +++ b/GPU/Directx9/GPU_DX9.cpp @@ -1330,6 +1330,10 @@ bool DIRECTX9_GPU::PerformMemoryDownload(u32 dest, int size) { return false; } +bool DIRECTX9_GPU::PerformStencilUpload(u32 dest, int size) { + return false; +} + void DIRECTX9_GPU::ClearCacheNextFrame() { textureCache_.ClearNextFrame(); } diff --git a/GPU/Directx9/GPU_DX9.h b/GPU/Directx9/GPU_DX9.h index a0124529af..35ff0dc4ba 100644 --- a/GPU/Directx9/GPU_DX9.h +++ b/GPU/Directx9/GPU_DX9.h @@ -49,6 +49,7 @@ public: virtual bool PerformMemoryCopy(u32 dest, u32 src, int size); virtual bool PerformMemorySet(u32 dest, u8 v, int size); virtual bool PerformMemoryDownload(u32 dest, int size); + virtual bool PerformStencilUpload(u32 dest, int size); virtual void ClearCacheNextFrame(); virtual void DeviceLost(); // Only happens on Android. Drop all textures and shaders. diff --git a/GPU/GLES/Framebuffer.cpp b/GPU/GLES/Framebuffer.cpp index d82642408e..179de778c3 100644 --- a/GPU/GLES/Framebuffer.cpp +++ b/GPU/GLES/Framebuffer.cpp @@ -101,7 +101,7 @@ enum { FBO_OLD_AGE = 5, }; -static bool MaskedEqual(u32 addr1, u32 addr2) { +bool FramebufferManager::MaskedEqual(u32 addr1, u32 addr2) { return (addr1 & 0x03FFFFFF) == (addr2 & 0x03FFFFFF); } @@ -160,7 +160,7 @@ void CenterRect(float *x, float *y, float *w, float *h, *h = outH; } -static void ClearBuffer() { +void FramebufferManager::ClearBuffer() { glstate.scissorTest.disable(); glstate.depthWrite.set(GL_TRUE); glstate.colorMask.set(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); @@ -175,7 +175,7 @@ static void ClearBuffer() { glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); } -static void DisableState() { +void FramebufferManager::DisableState() { glstate.blend.disable(); glstate.cullFace.disable(); glstate.depthTest.disable(); @@ -322,6 +322,7 @@ FramebufferManager::FramebufferManager() : convBuf_(0), draw2dprogram_(0), postShaderProgram_(0), + stencilUploadProgram_(0), plainColorLoc_(-1), timeLoc_(-1), textureCache_(0), @@ -361,6 +362,9 @@ FramebufferManager::~FramebufferManager() { if (draw2dprogram_) { glsl_destroy(draw2dprogram_); } + if (stencilUploadProgram_) { + glsl_destroy(stencilUploadProgram_); + } SetNumExtraFBOs(0); for (auto it = renderCopies_.begin(), end = renderCopies_.end(); it != end; ++it) { diff --git a/GPU/GLES/Framebuffer.h b/GPU/GLES/Framebuffer.h index 26eee974e4..7e265c37c8 100644 --- a/GPU/GLES/Framebuffer.h +++ b/GPU/GLES/Framebuffer.h @@ -213,6 +213,7 @@ public: inline bool ShouldDownloadFramebuffer(const VirtualFramebuffer *vfb) const; bool NotifyFramebufferCopy(u32 src, u32 dest, int size, bool isMemset = false); + bool NotifyStencilUpload(u32 addr, int size); void DestroyFramebuf(VirtualFramebuffer *vfb); @@ -231,6 +232,10 @@ private: void SetNumExtraFBOs(int num); + static void DisableState(); + static void ClearBuffer(); + static bool MaskedEqual(u32 addr1, u32 addr2); + u32 displayFramebufPtr_; u32 displayStride_; GEBufferFormat displayFormat_; @@ -262,6 +267,7 @@ private: GLSLProgram *draw2dprogram_; GLSLProgram *plainColorProgram_; GLSLProgram *postShaderProgram_; + GLSLProgram *stencilUploadProgram_; int plainColorLoc_; int timeLoc_; diff --git a/GPU/GLES/GLES_GPU.cpp b/GPU/GLES/GLES_GPU.cpp index 900821d374..63a460fc37 100644 --- a/GPU/GLES/GLES_GPU.cpp +++ b/GPU/GLES/GLES_GPU.cpp @@ -672,6 +672,10 @@ void GLES_GPU::ProcessEvent(GPUEvent ev) { PerformMemorySetInternal(ev.fb_memset.dst, ev.fb_memset.v, ev.fb_memset.size); break; + case GPU_EVENT_FB_STENCIL_UPLOAD: + PerformStencilUploadInternal(ev.fb_stencil_upload.dst, ev.fb_stencil_upload.size); + break; + default: GPUCommon::ProcessEvent(ev); } @@ -2002,6 +2006,10 @@ void GLES_GPU::PerformMemorySetInternal(u32 dest, u8 v, int size) { } } +void GLES_GPU::PerformStencilUploadInternal(u32 dest, int size) { + framebufferManager_.NotifyStencilUpload(dest, size); +} + bool GLES_GPU::PerformMemoryCopy(u32 dest, u32 src, int size) { // Track stray copies of a framebuffer in RAM. MotoGP does this. if (framebufferManager_.MayIntersectFramebuffer(src) || framebufferManager_.MayIntersectFramebuffer(dest)) { @@ -2054,6 +2062,21 @@ bool GLES_GPU::PerformMemoryDownload(u32 dest, int size) { return gpu->PerformMemoryCopy(dest ^ 0x00400000, dest, size); } +bool GLES_GPU::PerformStencilUpload(u32 dest, int size) { + if (framebufferManager_.MayIntersectFramebuffer(dest)) { + if (IsOnSeparateCPUThread()) { + GPUEvent ev(GPU_EVENT_FB_STENCIL_UPLOAD); + ev.fb_stencil_upload.dst = dest; + ev.fb_stencil_upload.size = size; + ScheduleEvent(ev); + } else { + PerformStencilUploadInternal(dest, size); + } + return true; + } + return false; +} + void GLES_GPU::ClearCacheNextFrame() { textureCache_.ClearNextFrame(); } diff --git a/GPU/GLES/GLES_GPU.h b/GPU/GLES/GLES_GPU.h index f60bdd73d6..85bcb032c1 100644 --- a/GPU/GLES/GLES_GPU.h +++ b/GPU/GLES/GLES_GPU.h @@ -48,6 +48,7 @@ public: virtual bool PerformMemoryCopy(u32 dest, u32 src, int size); virtual bool PerformMemorySet(u32 dest, u8 v, int size); virtual bool PerformMemoryDownload(u32 dest, int size); + virtual bool PerformStencilUpload(u32 dest, int size); virtual void ClearCacheNextFrame(); virtual void DeviceLost(); // Only happens on Android. Drop all textures and shaders. @@ -158,6 +159,7 @@ private: void CopyDisplayToOutputInternal(); void PerformMemoryCopyInternal(u32 dest, u32 src, int size); void PerformMemorySetInternal(u32 dest, u8 v, int size); + void PerformStencilUploadInternal(u32 dest, int size); void InvalidateCacheInternal(u32 addr, int size, GPUInvalidationType type); static CommandInfo cmdInfo_[256]; diff --git a/GPU/GLES/StencilBuffer.cpp b/GPU/GLES/StencilBuffer.cpp new file mode 100644 index 0000000000..9ffb4a5095 --- /dev/null +++ b/GPU/GLES/StencilBuffer.cpp @@ -0,0 +1,139 @@ +// Copyright (c) 2014- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#include "gfx_es2/glsl_program.h" +#include "gfx_es2/gl_state.h" +#include "Core/Reporting.h" +#include "GPU/GLES/Framebuffer.h" + +static const char *stencil_fs = +#ifdef USING_GLES +"#version 100\n" +"precision highp float;\n" +#endif +"varying vec2 v_texcoord0;\n" +"uniform float u_stencilValue;\n" +"uniform sampler2D tex;\n" +"float roundAndScaleTo255f(in float x) { return floor(x * 255.99); }\n" +"void main() {\n" +" vec4 index = texture2D(tex, v_texcoord0);\n" +" gl_FragColor = vec4(u_stencilValue);\n" +" if (roundAndScaleTo255f(u_stencilValue) != roundAndScaleTo255f(index.a)) discard;\n" +"}\n"; + +static const char *stencil_vs = +#ifdef USING_GLES +"#version 100\n" +"precision highp float;\n" +#endif +"attribute vec4 a_position;\n" +"attribute vec2 a_texcoord0;\n" +"varying vec2 v_texcoord0;\n" +"void main() {\n" +" v_texcoord0 = a_texcoord0;\n" +" gl_Position = a_position;\n" +"}\n"; + +static bool MaskedEqual(u32 addr1, u32 addr2) { + return (addr1 & 0x03FFFFFF) == (addr2 & 0x03FFFFFF); +} + +bool FramebufferManager::NotifyStencilUpload(u32 addr, int size) { + if (!MayIntersectFramebuffer(addr)) { + return false; + } + + VirtualFramebuffer *dstBuffer = 0; + for (size_t i = 0; i < vfbs_.size(); ++i) { + VirtualFramebuffer *vfb = vfbs_[i]; + if (MaskedEqual(vfb->fb_address, addr)) { + dstBuffer = vfb; + } + } + if (!dstBuffer) { + return false; + } + + GLSLProgram *program = 0; + if (!stencilUploadProgram_) { + std::string errorString; + stencilUploadProgram_ = glsl_create_source(stencil_vs, stencil_fs, &errorString); + if (!stencilUploadProgram_) { + ERROR_LOG_REPORT(G3D, "Failed to compile stencilUploadProgram! This shouldn't happen.\n%s", errorString.c_str()); + } else { + glsl_bind(stencilUploadProgram_); + } + + GLint u_tex = glsl_uniform_loc(stencilUploadProgram_, "tex"); + glUniform1i(u_tex, 0); + } else { + glsl_bind(stencilUploadProgram_); + } + gstate_c.shaderChanged = true; + + MakePixelTexture(Memory::GetPointer(addr), dstBuffer->format, dstBuffer->fb_stride, dstBuffer->width, dstBuffer->height); + DisableState(); + glstate.blend.set(true); + glstate.blendEquation.set(GL_FUNC_ADD); + glstate.blendFuncSeparate.set(GL_ZERO, GL_ONE, GL_ONE, GL_ZERO); + glstate.stencilTest.enable(); + glstate.stencilOp.set(GL_REPLACE, GL_REPLACE, GL_REPLACE); + + // TODO: Doing it the slow way for now. + int passes = 0; + + switch (dstBuffer->format) { + case GE_FORMAT_565: + // Well, this doesn't make much sense. + return false; + case GE_FORMAT_5551: + passes = 2; + break; + case GE_FORMAT_4444: + passes = 16; + break; + case GE_FORMAT_8888: + passes = 256; + break; + } + + if (dstBuffer->fbo) { + fbo_bind_as_render_target(dstBuffer->fbo); + } + glViewport(0, 0, dstBuffer->renderWidth, dstBuffer->renderHeight); + + glClearStencil(0); + glClear(GL_STENCIL_BUFFER_BIT); + + const float scale = 1.0f / (passes - 1); + GLint u_stencilValue = glsl_uniform_loc(stencilUploadProgram_, "u_stencilValue"); + for (int i = 0; i < passes; ++i) { + glsl_bind(stencilUploadProgram_); + glUniform1f(u_stencilValue, i * scale); + // TODO: 4444, 5551 + glstate.stencilFunc.set(GL_ALWAYS, i, 0xFF); + DrawActiveTexture(0, 0, 0, dstBuffer->width, dstBuffer->height, dstBuffer->width, dstBuffer->height, false, 0.0f, 0.0f, 1.0f, 1.0f, stencilUploadProgram_); + } + + if (currentRenderVfb_) { + RebindFramebuffer(); + } else { + fbo_unbind(); + } + glstate.viewport.restore(); + return true; +} \ No newline at end of file diff --git a/GPU/GPU.vcxproj b/GPU/GPU.vcxproj index 679f65db58..cbea9ef634 100644 --- a/GPU/GPU.vcxproj +++ b/GPU/GPU.vcxproj @@ -250,6 +250,7 @@ + @@ -285,4 +286,4 @@ - + \ No newline at end of file diff --git a/GPU/GPU.vcxproj.filters b/GPU/GPU.vcxproj.filters index b420625616..e3ba8997c2 100644 --- a/GPU/GPU.vcxproj.filters +++ b/GPU/GPU.vcxproj.filters @@ -314,8 +314,11 @@ GLES + + GLES + - + \ No newline at end of file diff --git a/GPU/GPUInterface.h b/GPU/GPUInterface.h index 9b29072c73..e8f6844514 100644 --- a/GPU/GPUInterface.h +++ b/GPU/GPUInterface.h @@ -165,6 +165,7 @@ enum GPUEventType { GPU_EVENT_SYNC_THREAD, GPU_EVENT_FB_MEMCPY, GPU_EVENT_FB_MEMSET, + GPU_EVENT_FB_STENCIL_UPLOAD, }; struct GPUEvent { @@ -189,6 +190,11 @@ struct GPUEvent { u8 v; int size; } fb_memset; + // GPU_EVENT_FB_STENCIL_UPLOAD + struct { + u32 dst; + int size; + } fb_stencil_upload; }; operator GPUEventType() const { @@ -244,6 +250,7 @@ public: virtual bool PerformMemoryCopy(u32 dest, u32 src, int size) = 0; virtual bool PerformMemorySet(u32 dest, u8 v, int size) = 0; virtual bool PerformMemoryDownload(u32 dest, int size) = 0; + virtual bool PerformStencilUpload(u32 dest, int size) = 0; // Will cause the texture cache to be cleared at the start of the next frame. virtual void ClearCacheNextFrame() = 0; diff --git a/GPU/Null/NullGpu.cpp b/GPU/Null/NullGpu.cpp index d718cb4059..f427b909d0 100644 --- a/GPU/Null/NullGpu.cpp +++ b/GPU/Null/NullGpu.cpp @@ -674,3 +674,7 @@ bool NullGPU::PerformMemoryDownload(u32 dest, int size) { InvalidateCache(dest, size, GPU_INVALIDATE_HINT); return false; } + +bool NullGPU::PerformStencilUpload(u32 dest, int size) { + return false; +} diff --git a/GPU/Null/NullGpu.h b/GPU/Null/NullGpu.h index 724403be5a..a83b256e1e 100644 --- a/GPU/Null/NullGpu.h +++ b/GPU/Null/NullGpu.h @@ -37,6 +37,7 @@ public: virtual bool PerformMemoryCopy(u32 dest, u32 src, int size); virtual bool PerformMemorySet(u32 dest, u8 v, int size); virtual bool PerformMemoryDownload(u32 dest, int size); + virtual bool PerformStencilUpload(u32 dest, int size); virtual void ClearCacheNextFrame() {}; virtual void DeviceLost() {} diff --git a/GPU/Software/SoftGpu.cpp b/GPU/Software/SoftGpu.cpp index 20db506252..2d3e38c522 100644 --- a/GPU/Software/SoftGpu.cpp +++ b/GPU/Software/SoftGpu.cpp @@ -876,6 +876,11 @@ bool SoftGPU::PerformMemoryDownload(u32 dest, int size) return false; } +bool SoftGPU::PerformStencilUpload(u32 dest, int size) +{ + return false; +} + bool SoftGPU::FramebufferDirty() { if (g_Config.bSeparateCPUThread) { // Allow it to process fully before deciding if it's dirty. diff --git a/GPU/Software/SoftGpu.h b/GPU/Software/SoftGpu.h index 8144f870ee..9ffe4509bd 100644 --- a/GPU/Software/SoftGpu.h +++ b/GPU/Software/SoftGpu.h @@ -62,6 +62,7 @@ public: virtual bool PerformMemoryCopy(u32 dest, u32 src, int size); virtual bool PerformMemorySet(u32 dest, u8 v, int size); virtual bool PerformMemoryDownload(u32 dest, int size); + virtual bool PerformStencilUpload(u32 dest, int size); virtual void ClearCacheNextFrame() {}; virtual void DeviceLost() {} diff --git a/Qt/Core.pro b/Qt/Core.pro index 5d0bd6befb..4300756fa7 100755 --- a/Qt/Core.pro +++ b/Qt/Core.pro @@ -54,6 +54,7 @@ SOURCES += $$P/Core/*.cpp \ # Core $$P/GPU/GLES/SoftwareTransform.cpp \ $$P/GPU/GLES/Spline.cpp \ $$P/GPU/GLES/StateMapping.cpp \ + $$P/GPU/GLES/StencilBuffer.cpp \ $$P/GPU/GLES/TextureCache.cpp \ $$P/GPU/GLES/TextureScaler.cpp \ $$P/GPU/GLES/TransformPipeline.cpp \ diff --git a/android/jni/Android.mk b/android/jni/Android.mk index 8bd8a9abaa..f4b71ccf3f 100644 --- a/android/jni/Android.mk +++ b/android/jni/Android.mk @@ -146,6 +146,7 @@ EXEC_AND_LIB_FILES := \ $(SRC)/GPU/GLES/Framebuffer.cpp \ $(SRC)/GPU/GLES/DepalettizeShader.cpp \ $(SRC)/GPU/GLES/GLES_GPU.cpp.arm \ + $(SRC)/GPU/GLES/StencilBuffer.cpp.arm \ $(SRC)/GPU/GLES/TextureCache.cpp.arm \ $(SRC)/GPU/GLES/TransformPipeline.cpp.arm \ $(SRC)/GPU/GLES/SoftwareTransform.cpp.arm \