Implement a very basic stencil upload.

This is not fast or efficient, but works.
This commit is contained in:
Unknown W. Brackets 2014-05-31 18:24:35 -07:00
parent e477a765e1
commit 089ee41d9c
17 changed files with 209 additions and 5 deletions

View file

@ -1269,6 +1269,7 @@ add_library(GPU OBJECT
GPU/GLES/Spline.cpp
GPU/GLES/StateMapping.cpp
GPU/GLES/StateMapping.h
GPU/GLES/StencilBuffer.cpp
GPU/GLES/TextureCache.cpp
GPU/GLES/TextureCache.h
GPU/GLES/TextureScaler.cpp

View file

@ -1330,6 +1330,10 @@ bool DIRECTX9_GPU::PerformMemoryDownload(u32 dest, int size) {
return false;
}
bool DIRECTX9_GPU::PerformStencilUpload(u32 dest, int size) {
return false;
}
void DIRECTX9_GPU::ClearCacheNextFrame() {
textureCache_.ClearNextFrame();
}

View file

@ -49,6 +49,7 @@ public:
virtual bool PerformMemoryCopy(u32 dest, u32 src, int size);
virtual bool PerformMemorySet(u32 dest, u8 v, int size);
virtual bool PerformMemoryDownload(u32 dest, int size);
virtual bool PerformStencilUpload(u32 dest, int size);
virtual void ClearCacheNextFrame();
virtual void DeviceLost(); // Only happens on Android. Drop all textures and shaders.

View file

@ -101,7 +101,7 @@ enum {
FBO_OLD_AGE = 5,
};
static bool MaskedEqual(u32 addr1, u32 addr2) {
bool FramebufferManager::MaskedEqual(u32 addr1, u32 addr2) {
return (addr1 & 0x03FFFFFF) == (addr2 & 0x03FFFFFF);
}
@ -160,7 +160,7 @@ void CenterRect(float *x, float *y, float *w, float *h,
*h = outH;
}
static void ClearBuffer() {
void FramebufferManager::ClearBuffer() {
glstate.scissorTest.disable();
glstate.depthWrite.set(GL_TRUE);
glstate.colorMask.set(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
@ -175,7 +175,7 @@ static void ClearBuffer() {
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT);
}
static void DisableState() {
void FramebufferManager::DisableState() {
glstate.blend.disable();
glstate.cullFace.disable();
glstate.depthTest.disable();
@ -322,6 +322,7 @@ FramebufferManager::FramebufferManager() :
convBuf_(0),
draw2dprogram_(0),
postShaderProgram_(0),
stencilUploadProgram_(0),
plainColorLoc_(-1),
timeLoc_(-1),
textureCache_(0),
@ -361,6 +362,9 @@ FramebufferManager::~FramebufferManager() {
if (draw2dprogram_) {
glsl_destroy(draw2dprogram_);
}
if (stencilUploadProgram_) {
glsl_destroy(stencilUploadProgram_);
}
SetNumExtraFBOs(0);
for (auto it = renderCopies_.begin(), end = renderCopies_.end(); it != end; ++it) {

View file

@ -213,6 +213,7 @@ public:
inline bool ShouldDownloadFramebuffer(const VirtualFramebuffer *vfb) const;
bool NotifyFramebufferCopy(u32 src, u32 dest, int size, bool isMemset = false);
bool NotifyStencilUpload(u32 addr, int size);
void DestroyFramebuf(VirtualFramebuffer *vfb);
@ -231,6 +232,10 @@ private:
void SetNumExtraFBOs(int num);
static void DisableState();
static void ClearBuffer();
static bool MaskedEqual(u32 addr1, u32 addr2);
u32 displayFramebufPtr_;
u32 displayStride_;
GEBufferFormat displayFormat_;
@ -262,6 +267,7 @@ private:
GLSLProgram *draw2dprogram_;
GLSLProgram *plainColorProgram_;
GLSLProgram *postShaderProgram_;
GLSLProgram *stencilUploadProgram_;
int plainColorLoc_;
int timeLoc_;

View file

@ -672,6 +672,10 @@ void GLES_GPU::ProcessEvent(GPUEvent ev) {
PerformMemorySetInternal(ev.fb_memset.dst, ev.fb_memset.v, ev.fb_memset.size);
break;
case GPU_EVENT_FB_STENCIL_UPLOAD:
PerformStencilUploadInternal(ev.fb_stencil_upload.dst, ev.fb_stencil_upload.size);
break;
default:
GPUCommon::ProcessEvent(ev);
}
@ -2002,6 +2006,10 @@ void GLES_GPU::PerformMemorySetInternal(u32 dest, u8 v, int size) {
}
}
void GLES_GPU::PerformStencilUploadInternal(u32 dest, int size) {
framebufferManager_.NotifyStencilUpload(dest, size);
}
bool GLES_GPU::PerformMemoryCopy(u32 dest, u32 src, int size) {
// Track stray copies of a framebuffer in RAM. MotoGP does this.
if (framebufferManager_.MayIntersectFramebuffer(src) || framebufferManager_.MayIntersectFramebuffer(dest)) {
@ -2054,6 +2062,21 @@ bool GLES_GPU::PerformMemoryDownload(u32 dest, int size) {
return gpu->PerformMemoryCopy(dest ^ 0x00400000, dest, size);
}
bool GLES_GPU::PerformStencilUpload(u32 dest, int size) {
if (framebufferManager_.MayIntersectFramebuffer(dest)) {
if (IsOnSeparateCPUThread()) {
GPUEvent ev(GPU_EVENT_FB_STENCIL_UPLOAD);
ev.fb_stencil_upload.dst = dest;
ev.fb_stencil_upload.size = size;
ScheduleEvent(ev);
} else {
PerformStencilUploadInternal(dest, size);
}
return true;
}
return false;
}
void GLES_GPU::ClearCacheNextFrame() {
textureCache_.ClearNextFrame();
}

View file

@ -48,6 +48,7 @@ public:
virtual bool PerformMemoryCopy(u32 dest, u32 src, int size);
virtual bool PerformMemorySet(u32 dest, u8 v, int size);
virtual bool PerformMemoryDownload(u32 dest, int size);
virtual bool PerformStencilUpload(u32 dest, int size);
virtual void ClearCacheNextFrame();
virtual void DeviceLost(); // Only happens on Android. Drop all textures and shaders.
@ -158,6 +159,7 @@ private:
void CopyDisplayToOutputInternal();
void PerformMemoryCopyInternal(u32 dest, u32 src, int size);
void PerformMemorySetInternal(u32 dest, u8 v, int size);
void PerformStencilUploadInternal(u32 dest, int size);
void InvalidateCacheInternal(u32 addr, int size, GPUInvalidationType type);
static CommandInfo cmdInfo_[256];

139
GPU/GLES/StencilBuffer.cpp Normal file
View file

@ -0,0 +1,139 @@
// Copyright (c) 2014- PPSSPP Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
#include "gfx_es2/glsl_program.h"
#include "gfx_es2/gl_state.h"
#include "Core/Reporting.h"
#include "GPU/GLES/Framebuffer.h"
static const char *stencil_fs =
#ifdef USING_GLES
"#version 100\n"
"precision highp float;\n"
#endif
"varying vec2 v_texcoord0;\n"
"uniform float u_stencilValue;\n"
"uniform sampler2D tex;\n"
"float roundAndScaleTo255f(in float x) { return floor(x * 255.99); }\n"
"void main() {\n"
" vec4 index = texture2D(tex, v_texcoord0);\n"
" gl_FragColor = vec4(u_stencilValue);\n"
" if (roundAndScaleTo255f(u_stencilValue) != roundAndScaleTo255f(index.a)) discard;\n"
"}\n";
static const char *stencil_vs =
#ifdef USING_GLES
"#version 100\n"
"precision highp float;\n"
#endif
"attribute vec4 a_position;\n"
"attribute vec2 a_texcoord0;\n"
"varying vec2 v_texcoord0;\n"
"void main() {\n"
" v_texcoord0 = a_texcoord0;\n"
" gl_Position = a_position;\n"
"}\n";
static bool MaskedEqual(u32 addr1, u32 addr2) {
return (addr1 & 0x03FFFFFF) == (addr2 & 0x03FFFFFF);
}
bool FramebufferManager::NotifyStencilUpload(u32 addr, int size) {
if (!MayIntersectFramebuffer(addr)) {
return false;
}
VirtualFramebuffer *dstBuffer = 0;
for (size_t i = 0; i < vfbs_.size(); ++i) {
VirtualFramebuffer *vfb = vfbs_[i];
if (MaskedEqual(vfb->fb_address, addr)) {
dstBuffer = vfb;
}
}
if (!dstBuffer) {
return false;
}
GLSLProgram *program = 0;
if (!stencilUploadProgram_) {
std::string errorString;
stencilUploadProgram_ = glsl_create_source(stencil_vs, stencil_fs, &errorString);
if (!stencilUploadProgram_) {
ERROR_LOG_REPORT(G3D, "Failed to compile stencilUploadProgram! This shouldn't happen.\n%s", errorString.c_str());
} else {
glsl_bind(stencilUploadProgram_);
}
GLint u_tex = glsl_uniform_loc(stencilUploadProgram_, "tex");
glUniform1i(u_tex, 0);
} else {
glsl_bind(stencilUploadProgram_);
}
gstate_c.shaderChanged = true;
MakePixelTexture(Memory::GetPointer(addr), dstBuffer->format, dstBuffer->fb_stride, dstBuffer->width, dstBuffer->height);
DisableState();
glstate.blend.set(true);
glstate.blendEquation.set(GL_FUNC_ADD);
glstate.blendFuncSeparate.set(GL_ZERO, GL_ONE, GL_ONE, GL_ZERO);
glstate.stencilTest.enable();
glstate.stencilOp.set(GL_REPLACE, GL_REPLACE, GL_REPLACE);
// TODO: Doing it the slow way for now.
int passes = 0;
switch (dstBuffer->format) {
case GE_FORMAT_565:
// Well, this doesn't make much sense.
return false;
case GE_FORMAT_5551:
passes = 2;
break;
case GE_FORMAT_4444:
passes = 16;
break;
case GE_FORMAT_8888:
passes = 256;
break;
}
if (dstBuffer->fbo) {
fbo_bind_as_render_target(dstBuffer->fbo);
}
glViewport(0, 0, dstBuffer->renderWidth, dstBuffer->renderHeight);
glClearStencil(0);
glClear(GL_STENCIL_BUFFER_BIT);
const float scale = 1.0f / (passes - 1);
GLint u_stencilValue = glsl_uniform_loc(stencilUploadProgram_, "u_stencilValue");
for (int i = 0; i < passes; ++i) {
glsl_bind(stencilUploadProgram_);
glUniform1f(u_stencilValue, i * scale);
// TODO: 4444, 5551
glstate.stencilFunc.set(GL_ALWAYS, i, 0xFF);
DrawActiveTexture(0, 0, 0, dstBuffer->width, dstBuffer->height, dstBuffer->width, dstBuffer->height, false, 0.0f, 0.0f, 1.0f, 1.0f, stencilUploadProgram_);
}
if (currentRenderVfb_) {
RebindFramebuffer();
} else {
fbo_unbind();
}
glstate.viewport.restore();
return true;
}

View file

@ -250,6 +250,7 @@
<ClCompile Include="GLES\ShaderManager.cpp" />
<ClCompile Include="GLES\Spline.cpp" />
<ClCompile Include="GLES\StateMapping.cpp" />
<ClCompile Include="GLES\StencilBuffer.cpp" />
<ClCompile Include="GLES\TextureCache.cpp" />
<ClCompile Include="GLES\TextureScaler.cpp" />
<ClCompile Include="GLES\SoftwareTransform.cpp" />
@ -285,4 +286,4 @@
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
</Project>
</Project>

View file

@ -314,8 +314,11 @@
<ClCompile Include="GLES\DepalettizeShader.cpp">
<Filter>GLES</Filter>
</ClCompile>
<ClCompile Include="GLES\StencilBuffer.cpp">
<Filter>GLES</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<None Include="CMakeLists.txt" />
</ItemGroup>
</Project>
</Project>

View file

@ -165,6 +165,7 @@ enum GPUEventType {
GPU_EVENT_SYNC_THREAD,
GPU_EVENT_FB_MEMCPY,
GPU_EVENT_FB_MEMSET,
GPU_EVENT_FB_STENCIL_UPLOAD,
};
struct GPUEvent {
@ -189,6 +190,11 @@ struct GPUEvent {
u8 v;
int size;
} fb_memset;
// GPU_EVENT_FB_STENCIL_UPLOAD
struct {
u32 dst;
int size;
} fb_stencil_upload;
};
operator GPUEventType() const {
@ -244,6 +250,7 @@ public:
virtual bool PerformMemoryCopy(u32 dest, u32 src, int size) = 0;
virtual bool PerformMemorySet(u32 dest, u8 v, int size) = 0;
virtual bool PerformMemoryDownload(u32 dest, int size) = 0;
virtual bool PerformStencilUpload(u32 dest, int size) = 0;
// Will cause the texture cache to be cleared at the start of the next frame.
virtual void ClearCacheNextFrame() = 0;

View file

@ -674,3 +674,7 @@ bool NullGPU::PerformMemoryDownload(u32 dest, int size) {
InvalidateCache(dest, size, GPU_INVALIDATE_HINT);
return false;
}
bool NullGPU::PerformStencilUpload(u32 dest, int size) {
return false;
}

View file

@ -37,6 +37,7 @@ public:
virtual bool PerformMemoryCopy(u32 dest, u32 src, int size);
virtual bool PerformMemorySet(u32 dest, u8 v, int size);
virtual bool PerformMemoryDownload(u32 dest, int size);
virtual bool PerformStencilUpload(u32 dest, int size);
virtual void ClearCacheNextFrame() {};
virtual void DeviceLost() {}

View file

@ -876,6 +876,11 @@ bool SoftGPU::PerformMemoryDownload(u32 dest, int size)
return false;
}
bool SoftGPU::PerformStencilUpload(u32 dest, int size)
{
return false;
}
bool SoftGPU::FramebufferDirty() {
if (g_Config.bSeparateCPUThread) {
// Allow it to process fully before deciding if it's dirty.

View file

@ -62,6 +62,7 @@ public:
virtual bool PerformMemoryCopy(u32 dest, u32 src, int size);
virtual bool PerformMemorySet(u32 dest, u8 v, int size);
virtual bool PerformMemoryDownload(u32 dest, int size);
virtual bool PerformStencilUpload(u32 dest, int size);
virtual void ClearCacheNextFrame() {};
virtual void DeviceLost() {}

View file

@ -54,6 +54,7 @@ SOURCES += $$P/Core/*.cpp \ # Core
$$P/GPU/GLES/SoftwareTransform.cpp \
$$P/GPU/GLES/Spline.cpp \
$$P/GPU/GLES/StateMapping.cpp \
$$P/GPU/GLES/StencilBuffer.cpp \
$$P/GPU/GLES/TextureCache.cpp \
$$P/GPU/GLES/TextureScaler.cpp \
$$P/GPU/GLES/TransformPipeline.cpp \

View file

@ -146,6 +146,7 @@ EXEC_AND_LIB_FILES := \
$(SRC)/GPU/GLES/Framebuffer.cpp \
$(SRC)/GPU/GLES/DepalettizeShader.cpp \
$(SRC)/GPU/GLES/GLES_GPU.cpp.arm \
$(SRC)/GPU/GLES/StencilBuffer.cpp.arm \
$(SRC)/GPU/GLES/TextureCache.cpp.arm \
$(SRC)/GPU/GLES/TransformPipeline.cpp.arm \
$(SRC)/GPU/GLES/SoftwareTransform.cpp.arm \