Defer blend, cull and depth test changes to draw command

Avoid unnecessary state changes and reduces the amount of OpenGL calls.
It also puts all the interesting logic at the same place, reducing the
complexity a little.
This commit is contained in:
Florent Castelli 2012-11-24 15:19:29 +01:00
parent 364061a45c
commit 56175bc505
15 changed files with 153 additions and 109 deletions

View file

@ -5,6 +5,7 @@ set(SRCS
GLES/FragmentShaderGenerator.cpp
GLES/Framebuffer.cpp
GLES/ShaderManager.cpp
GLES/StateMapping.cpp
GLES/TextureCache.cpp
GLES/TransformPipeline.cpp
GLES/VertexDecoder.cpp

View file

@ -32,6 +32,7 @@
#include "../../Core/Host.h"
#include "../../Core/Config.h"
#include "../../Core/System.h"
#include "../../native/gfx_es2/gl_state.h"
#include "../GPUState.h"
#include "../ge_constants.h"
@ -136,9 +137,9 @@ void GLES_GPU::CopyDisplayToOutput()
}
DEBUG_LOG(HLE, "Displaying FBO %08x", vfb->fb_address);
glDisable(GL_BLEND);
glDisable(GL_CULL_FACE);
glDisable(GL_DEPTH_TEST);
glstate.blend.disable();
glstate.cullFace.disable();
glstate.depthTest.disable();
fbo_bind_color_as_texture(vfb->fbo, 0);
@ -148,11 +149,6 @@ void GLES_GPU::CopyDisplayToOutput()
shaderManager.DirtyShader();
shaderManager.DirtyUniform(DIRTY_ALL);
gstate_c.textureChanged = true;
// Restore some state
ExecuteOp(gstate.cmdmem[GE_CMD_ALPHABLENDENABLE], 0xFFFFFFFF);
ExecuteOp(gstate.cmdmem[GE_CMD_CULLFACEENABLE], 0xFFFFFFFF);
ExecuteOp(gstate.cmdmem[GE_CMD_ZTESTENABLE], 0xFFFFFFFF);
}
GLES_GPU::VirtualFramebuffer *GLES_GPU::GetDisplayFBO()
@ -327,11 +323,6 @@ void EnterClearMode(u32 data)
bool updateZ = (data >> 10) & 1;
glColorMask(colMask, colMask, colMask, alphaMask);
glDepthMask(updateZ); // Update Z or not
// Note that depth test must be enabled for depth writes to go through! So we use GL_ALWAYS
glDisable(GL_BLEND);
glEnable(GL_DEPTH_TEST);
glDepthFunc(GL_ALWAYS);
glDisable(GL_CULL_FACE); // ??
}
void LeaveClearMode()
@ -344,66 +335,9 @@ void LeaveClearMode()
// Alpha test
glDepthMask(1);
glColorMask(1,1,1,1);
glEnDis(GL_DEPTH_TEST, gstate.zTestEnable & 1);
glDepthFunc(GL_LEQUAL); // TODO
glEnDis(GL_BLEND, gstate.alphaBlendEnable & 1);
// dirtyshader?
}
void SetBlendModePSP(u32 data)
{
// This can't be done exactly as there are several PSP blend modes that are impossible to do on OpenGL ES 2.0, and some even on regular OpenGL for desktop.
// HOWEVER - we should be able to approximate the 2x modes in the shader, although they will clip wrongly.
const GLint aLookup[] = {
GL_DST_COLOR,
GL_ONE_MINUS_DST_COLOR,
GL_SRC_ALPHA,
GL_ONE_MINUS_SRC_ALPHA,
GL_DST_ALPHA,
GL_ONE_MINUS_DST_ALPHA,
GL_SRC_ALPHA, // should be 2x
GL_ONE_MINUS_SRC_ALPHA, // should be 2x
GL_DST_ALPHA, // should be 2x
GL_ONE_MINUS_DST_ALPHA, // should be 2x - and COLOR?
GL_ONE, // should be FIXA
};
const GLint bLookup[] = {
GL_SRC_COLOR,
GL_ONE_MINUS_SRC_COLOR,
GL_SRC_ALPHA,
GL_ONE_MINUS_SRC_ALPHA,
GL_DST_ALPHA,
GL_ONE_MINUS_DST_ALPHA,
GL_SRC_ALPHA, // should be 2x
GL_ONE_MINUS_SRC_ALPHA, // should be 2x
GL_DST_ALPHA, // should be 2x
GL_ONE_MINUS_DST_ALPHA, // should be 2x
GL_ONE, // should be FIXB
};
const GLint eqLookup[] = {
GL_FUNC_ADD,
GL_FUNC_SUBTRACT,
GL_FUNC_REVERSE_SUBTRACT,
#if defined(ANDROID) || defined(BLACKBERRY)
GL_FUNC_ADD,
GL_FUNC_ADD,
GL_FUNC_ADD, // should be abs(diff)
#else
GL_MIN,
GL_MAX,
GL_MAX, // should be abs(diff)
#endif
};
int a = data & 0xF;
int b = (data >> 4) & 0xF;
int eq = (data >> 8) & 0x7;
glBlendFunc(aLookup[a], bLookup[b]);
glBlendEquation(eqLookup[eq]);
}
void GLES_GPU::ExecuteOp(u32 op, u32 diff)
{
u32 cmd = op >> 24;
@ -533,13 +467,6 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff)
// Throughmode changed, let's make the proj matrix dirty.
shaderManager.DirtyUniform(DIRTY_PROJMATRIX);
}
if (data & GE_VTYPE_THROUGH) {
glDisable(GL_CULL_FACE);
}
else {
// this might get spammy
glEnDis(GL_CULL_FACE, gstate.cullfaceEnable & 1);
}
// This sets through-mode or not, as well.
break;
@ -597,7 +524,6 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff)
case GE_CMD_CULLFACEENABLE:
DEBUG_LOG(G3D, "DL CullFace Enable: %i (ignoring)", data);
glEnDis(GL_CULL_FACE, data&1);
break;
case GE_CMD_TEXTUREMAPENABLE:
@ -938,8 +864,6 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff)
break;
case GE_CMD_CULL:
DEBUG_LOG(G3D,"DL cull: %06x", data);
glCullFace(data & 1 ? GL_FRONT : GL_BACK);
//glCullFace(data & 1 ? GL_BACK : GL_FRONT);
break;
case GE_CMD_LMODE:
@ -975,12 +899,10 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff)
//////////////////////////////////////////////////////////////////
case GE_CMD_ALPHABLENDENABLE:
DEBUG_LOG(G3D,"DL Alpha blend enable: %d", data);
glEnDis(GL_BLEND, data);
break;
case GE_CMD_BLENDMODE:
DEBUG_LOG(G3D,"DL Blend mode: %06x", data);
SetBlendModePSP(data);
break;
case GE_CMD_BLENDFIXEDA:
@ -1041,7 +963,6 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff)
//////////////////////////////////////////////////////////////////
case GE_CMD_ZTESTENABLE:
glEnDis(GL_DEPTH_TEST, data & 1);
DEBUG_LOG(G3D,"DL Z test enable: %d", data & 1);
break;
@ -1051,13 +972,6 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff)
case GE_CMD_ZTEST:
{
static const GLuint ztests[8] =
{
GL_NEVER, GL_ALWAYS, GL_EQUAL, GL_NOTEQUAL,
GL_LESS, GL_LEQUAL, GL_GREATER, GL_GEQUAL
};
//glDepthFunc(ztests[data&7]);
glDepthFunc(GL_LEQUAL);
DEBUG_LOG(G3D,"DL Z test mode: %i", data);
}
break;

View file

@ -28,6 +28,7 @@
#endif
#include "gfx_es2/glsl_program.h"
#include "gfx_es2/gl_state.h"
#include "math/lin/matrix4x4.h"
#include "../../Core/Host.h"
@ -80,9 +81,9 @@ void DisplayDrawer_Init()
glHint(GL_PERSPECTIVE_CORRECTION_HINT, GL_NICEST);
#endif
glDisable(GL_CULL_FACE);
glDisable(GL_DEPTH_TEST);
glDisable(GL_BLEND);
glstate.cullFace.disable();
glstate.depthTest.disable();
glstate.blend.disable();
//glPolygonMode(GL_FRONT_AND_BACK, GL_LINE);
glEnable(GL_TEXTURE_2D);

53
GPU/GLES/StateMapping.cpp Normal file
View file

@ -0,0 +1,53 @@
#include "StateMapping.h"
const GLint aLookup[] = {
GL_DST_COLOR,
GL_ONE_MINUS_DST_COLOR,
GL_SRC_ALPHA,
GL_ONE_MINUS_SRC_ALPHA,
GL_DST_ALPHA,
GL_ONE_MINUS_DST_ALPHA,
GL_SRC_ALPHA, // should be 2x
GL_ONE_MINUS_SRC_ALPHA, // should be 2x
GL_DST_ALPHA, // should be 2x
GL_ONE_MINUS_DST_ALPHA, // should be 2x - and COLOR?
GL_SRC_ALPHA, // should be FIXA
};
const GLint bLookup[] = {
GL_SRC_COLOR,
GL_ONE_MINUS_SRC_COLOR,
GL_SRC_ALPHA,
GL_ONE_MINUS_SRC_ALPHA,
GL_DST_ALPHA,
GL_ONE_MINUS_DST_ALPHA,
GL_SRC_ALPHA, // should be 2x
GL_ONE_MINUS_SRC_ALPHA, // should be 2x
GL_DST_ALPHA, // should be 2x
GL_ONE_MINUS_DST_ALPHA, // should be 2x
GL_SRC_ALPHA, // should be FIXB
};
const GLint eqLookup[] = {
GL_FUNC_ADD,
GL_FUNC_SUBTRACT,
GL_FUNC_REVERSE_SUBTRACT,
#if defined(ANDROID) || defined(BLACKBERRY)
GL_FUNC_ADD,
GL_FUNC_ADD,
#else
GL_MIN,
GL_MAX,
#endif
GL_FUNC_ADD, // should be abs(diff)
};
const GLint cullingMode[] = {
GL_BACK,
GL_FRONT,
};
const GLuint ztests[] =
{
GL_NEVER, GL_ALWAYS, GL_EQUAL, GL_NOTEQUAL,
GL_LESS, GL_LEQUAL, GL_GREATER, GL_GEQUAL,
};

17
GPU/GLES/StateMapping.h Normal file
View file

@ -0,0 +1,17 @@
#if defined(ANDROID) || defined(BLACKBERRY)
#include <GLES2/gl2.h>
#include <GLES2/gl2ext.h>
#else
#include <GL/glew.h>
#if defined(__APPLE__)
#include <OpenGL/gl.h>
#else
#include <GL/gl.h>
#endif
#endif
extern const GLint aLookup[];
extern const GLint bLookup[];
extern const GLint eqLookup[];
extern const GLint cullingMode[];
extern const GLuint ztests[];

View file

@ -30,11 +30,13 @@
#include "../../Core/MemMap.h"
#include "../../Core/Host.h"
#include "../../Core/System.h"
#include "../../native/gfx_es2/gl_state.h"
#include "../Math3D.h"
#include "../GPUState.h"
#include "../ge_constants.h"
#include "StateMapping.h"
#include "TextureCache.h"
#include "TransformPipeline.h"
#include "VertexDecoder.h"
@ -218,7 +220,7 @@ void TransformAndDrawPrim(void *verts, void *inds, int prim, int vertexCount, Li
// Check if anything needs updating
if (gstate_c.textureChanged)
{
if ((gstate.textureMapEnable & 1) && !(gstate.clearmode & 1))
if ((gstate.textureMapEnable & 1) && !gstate.isModeClear())
{
PSPSetTexture();
useTexCoord = true;
@ -524,6 +526,37 @@ void TransformAndDrawPrim(void *verts, void *inds, int prim, int vertexCount, Li
}
}
// Set cull
bool wantCull = !gstate.isModeClear() && !gstate.isModeThrough() && gstate.isCullEnabled();
glstate.cullFace.set(wantCull);
if(wantCull) {
u8 cullMode = gstate.getCullMode();
glstate.cullFaceMode.set(cullingMode[cullMode]);
}
// Set blend
bool wantBlend = !gstate.isModeClear() && (gstate.alphaBlendEnable & 1);
glstate.blend.set(wantBlend);
if(wantBlend) {
// This can't be done exactly as there are several PSP blend modes that are impossible to do on OpenGL ES 2.0, and some even on regular OpenGL for desktop.
// HOWEVER - we should be able to approximate the 2x modes in the shader, although they will clip wrongly.
u8 blendFuncA = gstate.getBlendFuncA();
u8 blendFuncB = gstate.getBlendFuncB();
u8 blendFuncEq = gstate.getBlendEq();
glstate.blendFunc.set(aLookup[blendFuncA], bLookup[blendFuncB]);
glstate.blendEquation.set(eqLookup[blendFuncEq]);
}
bool wantDepthTest = gstate.isModeClear() || gstate.isDepthTestEnabled();
glstate.depthTest.set(wantDepthTest);
if(wantDepthTest) {
// Force GL_ALWAYS if mode clear
u8 depthTestFunc = gstate.isModeClear() ? 1 : gstate.getDepthTestFunc();
glstate.depthFunc.set(ztests[depthTestFunc]);
}
glEnableVertexAttribArray(program->a_position);
if (useTexCoord && program->a_texcoord != -1) glEnableVertexAttribArray(program->a_texcoord);
if (program->a_color0 != -1) glEnableVertexAttribArray(program->a_color0);

View file

@ -118,6 +118,7 @@
<ClInclude Include="GLES\FragmentShaderGenerator.h" />
<ClInclude Include="GLES\Framebuffer.h" />
<ClInclude Include="GLES\ShaderManager.h" />
<ClInclude Include="GLES\StateMapping.h" />
<ClInclude Include="GLES\TextureCache.h" />
<ClInclude Include="GLES\TransformPipeline.h" />
<ClInclude Include="GLES\VertexDecoder.h" />
@ -132,6 +133,7 @@
<ClCompile Include="GLES\FragmentShaderGenerator.cpp" />
<ClCompile Include="GLES\Framebuffer.cpp" />
<ClCompile Include="GLES\ShaderManager.cpp" />
<ClCompile Include="GLES\StateMapping.cpp" />
<ClCompile Include="GLES\TextureCache.cpp" />
<ClCompile Include="GLES\TransformPipeline.cpp" />
<ClCompile Include="GLES\VertexDecoder.cpp" />

View file

@ -54,6 +54,9 @@
<ClInclude Include="Null\NullGpu.h">
<Filter>Null</Filter>
</ClInclude>
<ClInclude Include="GLES\StateMapping.h">
<Filter>GLES</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<ClCompile Include="Math3D.cpp">
@ -89,6 +92,9 @@
<ClCompile Include="Null\NullGpu.cpp">
<Filter>Null</Filter>
</ClCompile>
<ClCompile Include="GLES\StateMapping.cpp">
<Filter>GLES</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<None Include="CMakeLists.txt" />

View file

@ -46,6 +46,7 @@ void InitGfxState()
for (int i = 0; i < 256; i++) {
gstate.cmdmem[i] = i << 24;
}
gstate.lightingEnable = 0x17000001;
static const float identity4x3[12] =
@ -103,6 +104,7 @@ void ReapplyGfxState()
gpu->ExecuteOp(gstate.cmdmem[GE_CMD_SCISSOR1], 0xFFFFFFFF);
gpu->ExecuteOp(gstate.cmdmem[GE_CMD_SCISSOR2], 0xFFFFFFFF);
*/
for (int i = GE_CMD_VERTEXTYPE; i < GE_CMD_BONEMATRIXNUMBER; i++)
{
gpu->ExecuteOp(gstate.cmdmem[i], 0xFFFFFFFF);

View file

@ -18,6 +18,7 @@
#pragma once
#include "../Globals.h"
#include "ge_constants.h"
#include <cstring>
// TODO: this doesn't belong here
@ -237,6 +238,16 @@ struct GPUgstate
float projMatrix[16];
float tgenMatrix[12];
float boneMatrix[8*12];
inline bool isModeThrough() const { return vertType & GE_VTYPE_THROUGH; }
inline bool isModeClear() const { return clearmode & 1; }
inline bool isCullEnabled() const { return cullfaceEnable & 1; }
inline u8 getCullMode() const { return cullmode & 1; }
inline u8 getBlendFuncA() const { return blend & 0xF; }
inline u8 getBlendFuncB() const { return (blend >> 4) & 0xF; }
inline u8 getBlendEq() const { return (blend >> 8) & 0x7; }
inline bool isDepthTestEnabled() const { return zTestEnable & 1; }
inline u8 getDepthTestFunc() const { return ztestfunc & 0x7; }
};
// Real data in the context ends here
@ -286,8 +297,10 @@ struct GPUStatistics
// Total statistics
int numFrames;
};
void InitGfxState();
void ShutdownGfxState();
void ReapplyGfxState();
@ -314,4 +327,4 @@ class GPUInterface;
extern GPUgstate gstate;
extern GPUStateCache gstate_c;
extern GPUInterface *gpu;
extern GPUStatistics gpuStats;
extern GPUStatistics gpuStats;

View file

@ -713,7 +713,6 @@ void NullGPU::ExecuteOp(u32 op, u32 diff)
case GE_CMD_ZTEST:
{
//glDepthFunc(ztests[data&7]);
DEBUG_LOG(G3D,"DL Z test mode: %i", data);
}
break;

View file

@ -14,6 +14,7 @@
#include <windows.h>
#include <GL/gl.h> // Header File For The OpenGL32 Library
#include "../native/gfx_es2/gl_state.h"
#include "OpenGLBase.h"
@ -138,6 +139,7 @@ bool GL_Init(HWND window)
setVSync(0);
glewInit();
glstate.Initialize();
GL_Resized(); // Set Up Our Perspective GL Screen

View file

@ -1,8 +1,8 @@
Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio 2012
# Visual Studio 2010
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "PPSSPPWindows", "PPSSPP.vcxproj", "{567AF8DB-42C1-4D08-96CD-D70A2DFEFC6B}"
ProjectSection(ProjectDependencies) = postProject
{E8B58922-9827-493D-81E0-4B6E6BD77171} = {E8B58922-9827-493D-81E0-4B6E6BD77171}
{C4DF647E-80EA-4111-A0A8-218B1B711E18} = {C4DF647E-80EA-4111-A0A8-218B1B711E18}
{F761046E-6C38-4428-A5F1-38391A37BB34} = {F761046E-6C38-4428-A5F1-38391A37BB34}
{457F45D2-556F-47BC-A31D-AFF0D15BEAED} = {457F45D2-556F-47BC-A31D-AFF0D15BEAED}
{3FCDBAE2-5103-4350-9A8E-848CE9C73195} = {3FCDBAE2-5103-4350-9A8E-848CE9C73195}
@ -19,13 +19,13 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "GPU", "..\GPU\GPU.vcxproj",
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Core", "..\Core\Core.vcxproj", "{533F1D30-D04D-47CC-AD71-20F658907E36}"
ProjectSection(ProjectDependencies) = postProject
{E8B58922-9827-493D-81E0-4B6E6BD77171} = {E8B58922-9827-493D-81E0-4B6E6BD77171}
{C4DF647E-80EA-4111-A0A8-218B1B711E18} = {C4DF647E-80EA-4111-A0A8-218B1B711E18}
{F761046E-6C38-4428-A5F1-38391A37BB34} = {F761046E-6C38-4428-A5F1-38391A37BB34}
{457F45D2-556F-47BC-A31D-AFF0D15BEAED} = {457F45D2-556F-47BC-A31D-AFF0D15BEAED}
{3FCDBAE2-5103-4350-9A8E-848CE9C73195} = {3FCDBAE2-5103-4350-9A8E-848CE9C73195}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "native", "..\native\native.vcxproj", "{E8B58922-9827-493D-81E0-4B6E6BD77171}"
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "native", "..\native\native.vcxproj", "{C4DF647E-80EA-4111-A0A8-218B1B711E18}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "PPSSPPHeadless", "..\headless\Headless.vcxproj", "{EE9BD869-CAA3-447D-8328-294D90DE2C1F}"
EndProject
@ -79,14 +79,14 @@ Global
{533F1D30-D04D-47CC-AD71-20F658907E36}.Release|Win32.Build.0 = Release|Win32
{533F1D30-D04D-47CC-AD71-20F658907E36}.Release|x64.ActiveCfg = Release|x64
{533F1D30-D04D-47CC-AD71-20F658907E36}.Release|x64.Build.0 = Release|x64
{E8B58922-9827-493D-81E0-4B6E6BD77171}.Debug|Win32.ActiveCfg = Debug|Win32
{E8B58922-9827-493D-81E0-4B6E6BD77171}.Debug|Win32.Build.0 = Debug|Win32
{E8B58922-9827-493D-81E0-4B6E6BD77171}.Debug|x64.ActiveCfg = Debug|x64
{E8B58922-9827-493D-81E0-4B6E6BD77171}.Debug|x64.Build.0 = Debug|x64
{E8B58922-9827-493D-81E0-4B6E6BD77171}.Release|Win32.ActiveCfg = Release|Win32
{E8B58922-9827-493D-81E0-4B6E6BD77171}.Release|Win32.Build.0 = Release|Win32
{E8B58922-9827-493D-81E0-4B6E6BD77171}.Release|x64.ActiveCfg = Release|x64
{E8B58922-9827-493D-81E0-4B6E6BD77171}.Release|x64.Build.0 = Release|x64
{C4DF647E-80EA-4111-A0A8-218B1B711E18}.Debug|Win32.ActiveCfg = Debug|Win32
{C4DF647E-80EA-4111-A0A8-218B1B711E18}.Debug|Win32.Build.0 = Debug|Win32
{C4DF647E-80EA-4111-A0A8-218B1B711E18}.Debug|x64.ActiveCfg = Debug|x64
{C4DF647E-80EA-4111-A0A8-218B1B711E18}.Debug|x64.Build.0 = Debug|x64
{C4DF647E-80EA-4111-A0A8-218B1B711E18}.Release|Win32.ActiveCfg = Release|Win32
{C4DF647E-80EA-4111-A0A8-218B1B711E18}.Release|Win32.Build.0 = Release|Win32
{C4DF647E-80EA-4111-A0A8-218B1B711E18}.Release|x64.ActiveCfg = Release|x64
{C4DF647E-80EA-4111-A0A8-218B1B711E18}.Release|x64.Build.0 = Release|x64
{EE9BD869-CAA3-447D-8328-294D90DE2C1F}.Debug|Win32.ActiveCfg = Debug|Win32
{EE9BD869-CAA3-447D-8328-294D90DE2C1F}.Debug|Win32.Build.0 = Debug|Win32
{EE9BD869-CAA3-447D-8328-294D90DE2C1F}.Debug|x64.ActiveCfg = Debug|x64

View file

@ -72,6 +72,7 @@ LOCAL_SRC_FILES := \
$(SRC)/GPU/GLES/DisplayListInterpreter.cpp \
$(SRC)/GPU/GLES/TextureCache.cpp \
$(SRC)/GPU/GLES/TransformPipeline.cpp \
$(SRC)/GPU/GLES/StateMapping.cpp \
$(SRC)/GPU/GLES/VertexDecoder.cpp \
$(SRC)/GPU/GLES/ShaderManager.cpp \
$(SRC)/GPU/GLES/VertexShaderGenerator.cpp \

2
native

@ -1 +1 @@
Subproject commit 84f741b21dbaa5cc5f67a9399549648df86cd886
Subproject commit 8900d99779d836cf4a016809277003518d0b2a7c