Begin implementing GLQueueRunner and GLRenderManager

2025-04-02 11:01:50 -04:00 · 2017-11-18 15:42:39 +01:00 · 2017-11-18 15:42:39 +01:00 · dd91cb0f8e
commit dd91cb0f8e
parent 2ae2dd7d40
7 changed files with 1120 additions and 0 deletions
--- a/ext/native/native.vcxproj
+++ b/ext/native/native.vcxproj
@ -241,6 +241,8 @@
    <ClInclude Include="gfx_es2\draw_text_win.h" />
    <ClInclude Include="thin3d\d3d11_loader.h" />
    <ClInclude Include="thin3d\DataFormat.h" />
+    <ClInclude Include="thin3d\GLQueueRunner.h" />
+    <ClInclude Include="thin3d\GLRenderManager.h" />
    <ClInclude Include="thin3d\VulkanQueueRunner.h" />
    <ClInclude Include="thin3d\VulkanRenderManager.h" />
    <ClInclude Include="util\text\wrap_text.h" />
@ -699,6 +701,8 @@
    <ClCompile Include="gfx_es2\draw_text_win.cpp" />
    <ClCompile Include="math\dataconv.cpp" />
    <ClCompile Include="thin3d\d3d11_loader.cpp" />
+    <ClCompile Include="thin3d\GLQueueRunner.cpp" />
+    <ClCompile Include="thin3d\GLRenderManager.cpp" />
    <ClCompile Include="thin3d\thin3d_d3d11.cpp" />
    <ClCompile Include="thin3d\VulkanQueueRunner.cpp" />
    <ClCompile Include="thin3d\VulkanRenderManager.cpp" />
--- a/ext/native/native.vcxproj.filters
+++ b/ext/native/native.vcxproj.filters
@ -332,6 +332,12 @@
    <ClInclude Include="..\..\Qt\QtMain.h">
      <Filter>base</Filter>
    </ClInclude>
+    <ClInclude Include="thin3d\GLRenderManager.h">
+      <Filter>thin3d</Filter>
+    </ClInclude>
+    <ClInclude Include="thin3d\GLQueueRunner.h">
+      <Filter>thin3d</Filter>
+    </ClInclude>
  </ItemGroup>
  <ItemGroup>
    <ClCompile Include="gfx\gl_debug_log.cpp">
@ -799,6 +805,12 @@
    <ClCompile Include="ui\ui_tween.cpp">
      <Filter>ui</Filter>
    </ClCompile>
+    <ClCompile Include="thin3d\GLQueueRunner.cpp">
+      <Filter>thin3d</Filter>
+    </ClCompile>
+    <ClCompile Include="thin3d\GLRenderManager.cpp">
+      <Filter>thin3d</Filter>
+    </ClCompile>
  </ItemGroup>
  <ItemGroup>
    <Filter Include="gfx">
--- a/ext/native/thin3d/GLQueueRunner.cpp
+++ b/ext/native/thin3d/GLQueueRunner.cpp
@ -0,0 +1,217 @@
+#include "GLQueueRunner.h"
+#include "GLRenderManager.h"
+#include "gfx_es2/gpu_features.h"
+#include "math/dataconv.h"
+
+void GLQueueRunner::CreateDeviceObjects() {
+
+}
+
+void GLQueueRunner::DestroyDeviceObjects() {
+
+}
+
+void GLQueueRunner::RunInitSteps(const std::vector<GLRInitStep> &steps) {
+
+}
+
+void GLQueueRunner::RunSteps(const std::vector<GLRStep *> &steps) {
+	for (int i = 0; i < steps.size(); i++) {
+		const GLRStep &step = *steps[i];
+		switch (step.stepType) {
+		case GLRStepType::RENDER:
+			PerformRenderPass(step);
+			break;
+		case GLRStepType::COPY:
+			PerformCopy(step);
+			break;
+		case GLRStepType::BLIT:
+			PerformBlit(step);
+			break;
+		case GLRStepType::READBACK:
+			PerformReadback(step);
+			break;
+		case GLRStepType::READBACK_IMAGE:
+			PerformReadbackImage(step);
+			break;
+		}
+		delete steps[i];
+	}
+}
+
+void GLQueueRunner::LogSteps(const std::vector<GLRStep *> &steps) {
+
+}
+
+
+void GLQueueRunner::PerformBlit(const GLRStep &step) {
+}
+
+void GLQueueRunner::PerformRenderPass(const GLRStep &step) {
+	// Don't execute empty renderpasses.
+	if (step.commands.empty() && step.render.color == GLRRenderPassAction::KEEP && step.render.depthStencil == GLRRenderPassAction::KEEP) {
+		// Nothing to do.
+		return;
+	}
+
+	// This is supposed to bind a vulkan render pass to the command buffer.
+	PerformBindFramebufferAsRenderTarget(step);
+
+	int curWidth = step.render.framebuffer ? step.render.framebuffer->width : 0; // vulkan_->GetBackbufferWidth();
+	int curHeight = step.render.framebuffer ? step.render.framebuffer->height : 0; // vulkan_->GetBackbufferHeight();
+	
+	GLRFramebuffer *fb = step.render.framebuffer;
+
+	GLint activeTexture = GL_TEXTURE0;
+
+	auto &commands = step.commands;
+	for (const auto &c : commands) {
+		switch (c.cmd) {
+		case GLRRenderCommand::DEPTH:
+			if (c.depth.enabled) {
+				glEnable(GL_DEPTH_TEST);
+				glDepthMask(c.depth.write);
+				glDepthFunc(c.depth.func);
+			} else {
+				glDisable(GL_DEPTH_TEST);
+			}
+			break;
+		case GLRRenderCommand::BLEND:
+			if (c.blend.enabled) {
+				glEnable(GL_BLEND);
+				glBlendEquationSeparate(c.blend.funcColor, c.blend.funcAlpha);
+				glBlendFuncSeparate(c.blend.srcColor, c.blend.dstColor, c.blend.srcAlpha, c.blend.dstAlpha);
+			} else {
+				glDisable(GL_BLEND);
+			}
+			break;
+		case GLRRenderCommand::CLEAR:
+			if (c.clear.clearMask & GLR_ASPECT_COLOR) {
+				float color[4];
+				Uint8x4ToFloat4(color, c.clear.clearColor);
+				glClearColor(color[0], color[1], color[2], color[3]);
+			}
+			if (c.clear.clearMask & GLR_ASPECT_DEPTH) {
+				glClearDepth(c.clear.clearZ);
+			}
+			if (c.clear.clearMask & GLR_ASPECT_STENCIL) {
+				glClearStencil(c.clear.clearStencil);
+			}
+			break;
+		case GLRRenderCommand::BLENDCOLOR:
+			glBlendColor(c.blendColor.color[0], c.blendColor.color[1], c.blendColor.color[2], c.blendColor.color[3]);
+			break;
+		case GLRRenderCommand::VIEWPORT:
+			// TODO: Support FP viewports through glViewportArrays
+			glViewport((GLint)c.viewport.vp.x, (GLint)c.viewport.vp.y, (GLsizei)c.viewport.vp.w, (GLsizei)c.viewport.vp.h);
+			glDepthRange(c.viewport.vp.minZ, c.viewport.vp.maxZ);
+			break;
+		case GLRRenderCommand::SCISSOR:
+			glScissor(c.scissor.rc.x, c.scissor.rc.y, c.scissor.rc.w, c.scissor.rc.h);
+			break;
+		case GLRRenderCommand::UNIFORM4F:
+			switch (c.uniform4.count) {
+			case 1:
+				glUniform1f(c.uniform4.loc, c.uniform4.v[0]);
+				break;
+			case 2:
+				glUniform2fv(c.uniform4.loc, 1, c.uniform4.v);
+				break;
+			case 3:
+				glUniform3fv(c.uniform4.loc, 1, c.uniform4.v);
+				break;
+			case 4:
+				glUniform4fv(c.uniform4.loc, 1, c.uniform4.v);
+				break;
+			}
+			break;
+		case GLRRenderCommand::UNIFORMMATRIX:
+			glUniformMatrix4fv(c.uniformMatrix4.loc, 1, false, c.uniformMatrix4.m);
+			break;
+		case GLRRenderCommand::STENCIL:
+			glStencilFunc(c.stencil.stencilFunc, c.stencil.stencilRef, c.stencil.stencilCompareMask);
+			glStencilOp(c.stencil.stencilSFail, c.stencil.stencilZFail, c.stencil.stencilPass);
+			glStencilMask(c.stencil.stencilWriteMask);
+			break;
+		case GLRRenderCommand::BINDTEXTURE:
+		{
+			GLint target = c.texture.slot;
+			if (target != activeTexture) {
+				glActiveTexture(target);
+				activeTexture = target;
+			}
+			glBindTexture(GL_TEXTURE_2D, c.texture.texture);
+			break;
+		}
+		case GLRRenderCommand::DRAW:
+			glDrawArrays(c.draw.mode, c.draw.first, c.draw.count);
+			break;
+		case GLRRenderCommand::DRAW_INDEXED:
+			if (c.drawIndexed.instances == 1) {
+				glDrawElements(c.drawIndexed.mode, c.drawIndexed.count, c.drawIndexed.indexType, c.drawIndexed.indices);
+			}
+			break;
+		}
+	}
+	if (activeTexture != GL_TEXTURE0)
+		glActiveTexture(GL_TEXTURE0);
+}
+
+void GLQueueRunner::PerformCopy(const GLRStep &step) {
+	GLuint srcTex = 0;
+	GLuint dstTex = 0;
+	GLuint target = GL_TEXTURE_2D;
+
+	const GLRect2D &srcRect = step.copy.srcRect;
+	const GLOffset2D &dstPos = step.copy.dstPos;
+
+	GLRFramebuffer *src = step.copy.src;
+	GLRFramebuffer *dst = step.copy.src;
+
+	int srcLevel = 0;
+	int dstLevel = 0;
+	int srcZ = 0;
+	int dstZ = 0;
+	int depth = 1;
+
+	switch (step.copy.aspectMask) {
+	case GLR_ASPECT_COLOR:
+		srcTex = src->color.texture;
+		dstTex = dst->color.texture;
+		break;
+	case GLR_ASPECT_DEPTH:
+		target = GL_RENDERBUFFER;
+		srcTex = src->depth.texture;
+		dstTex = src->depth.texture;
+		break;
+	}
+#if defined(USING_GLES2)
+#ifndef IOS
+	glCopyImageSubDataOES(
+		srcTex, target, srcLevel, srcRect.x, srcRect.y, srcZ,
+		dstTex, target, dstLevel, dstPos.x, dstPos.y, dstZ,
+		srcRect.w, srcRect.h, depth);
+#endif
+#else
+	if (gl_extensions.ARB_copy_image) {
+		glCopyImageSubData(
+			srcTex, target, srcLevel, srcRect.x, srcRect.y, srcZ,
+			dstTex, target, dstLevel, dstPos.x, dstPos.y, dstZ,
+			srcRect.w, srcRect.h, depth);
+	} else if (gl_extensions.NV_copy_image) {
+		// Older, pre GL 4.x NVIDIA cards.
+		glCopyImageSubDataNV(
+			srcTex, target, srcLevel, srcRect.x, srcRect.y, srcZ,
+			dstTex, target, dstLevel, dstPos.x, dstPos.y, dstZ,
+			srcRect.w, srcRect.h, depth);
+	}
+#endif
+}
+
+void GLQueueRunner::PerformBindFramebufferAsRenderTarget(const GLRStep &pass) {
+	
+}
+
+void GLQueueRunner::CopyReadbackBuffer(int width, int height, Draw::DataFormat srcFormat, Draw::DataFormat destFormat, int pixelStride, uint8_t *pixels) {
+
+}
--- a/ext/native/thin3d/GLQueueRunner.h
+++ b/ext/native/thin3d/GLQueueRunner.h
@ -0,0 +1,260 @@
+#pragma once
+
+#include <cstdint>
+#include <vector>
+
+#include "gfx/gl_common.h"
+#include "thin3d/DataFormat.h"
+
+struct GLRViewport {
+	float x, y, w, h, minZ, maxZ;
+};
+
+struct GLRect2D {
+	int x, y, w, h;
+};
+
+struct GLOffset2D {
+	int x, y;
+};
+
+enum class GLRRenderCommand : uint8_t {
+	DEPTH,
+	STENCIL,
+	BLEND,
+	BLENDCOLOR,
+	UNIFORM4F,
+	UNIFORMMATRIX,
+	TEXTURESAMPLER,
+	VIEWPORT,
+	SCISSOR,
+	CLEAR,
+	BINDTEXTURE,
+	DRAW,
+	DRAW_INDEXED,
+	PUSH_CONSTANTS,
+};
+
+struct GLRRenderData {
+	GLRRenderCommand cmd;
+	union {
+		struct {
+			GLboolean enabled;
+			GLboolean write;
+			GLenum func;
+		} depth;
+		struct {
+			GLboolean enabled;
+			GLenum stencilOp;
+			GLenum stencilFunc;
+			uint8_t stencilWriteMask;
+			uint8_t stencilCompareMask;
+			uint8_t stencilRef;
+			GLenum stencilSFail;
+			GLenum stencilZFail;
+			GLenum stencilPass;
+		} stencil;
+		struct {
+			GLenum mode;  // primitive
+			GLint buffer;
+			GLint first;
+			GLint count;
+		} draw;
+		struct {
+			GLenum mode;  // primitive
+			GLint count;
+			GLint instances;
+			GLint indexType;
+			void *indices;
+		} drawIndexed;
+		struct {
+			GLint loc;
+			GLint count;
+			float v[4];
+		} uniform4;
+		struct {
+			GLint loc;
+			float m[16];
+		} uniformMatrix4;
+		struct {
+			uint32_t clearColor;
+			float clearZ;
+			int clearStencil;
+			int clearMask;   // VK_IMAGE_ASPECT_COLOR_BIT etc
+		} clear;
+		struct {
+			int slot;
+			GLint texture;
+		} texture;
+		struct {
+			GLuint wrapU;
+			GLuint wrapV;
+			bool maxFilter;
+			bool minFilter;
+			bool mipFilter;
+		} textureSampler;
+		struct {
+			GLRViewport vp;
+		} viewport;
+		struct {
+			GLRect2D rc;
+		} scissor;
+		struct {
+			GLboolean enabled;
+			GLenum srcColor;
+			GLenum dstColor;
+			GLenum srcAlpha;
+			GLenum dstAlpha;
+			GLenum funcColor;
+			GLenum funcAlpha;
+		} blend;
+		struct {
+			float color[4];
+		} blendColor;
+	};
+};
+
+// Unlike in Vulkan, we can't create stuff on the main thread, but need to
+// defer this too. A big benefit will be that we'll be able to do all creation
+// at the start of the frame.
+enum class GLRInitStepType : uint8_t {
+	CREATE_TEXTURE,
+	CREATE_SHADER,
+	CREATE_PROGRAM,
+	CREATE_BUFFER,
+
+	TEXTURE_SUBDATA,
+	BUFFER_SUBDATA,
+};
+
+class GLRShader;
+class GLRTexture;
+class GLRProgram;
+
+struct GLRInitStep {
+	GLRInitStep(GLRInitStepType _type) : stepType(_type) {}
+	GLRInitStepType stepType;
+	union {
+		struct {
+			GLRTexture *texture;
+			int width;
+			int height;
+			// ...
+		} create_texture;
+		struct {
+			GLRShader *shader;
+			const char *code;
+		} create_shader;
+		struct {
+			GLRProgram *program;
+			GLRShader *vshader;
+			GLRShader *fshader;
+		} create_program;
+	};
+};
+
+enum class GLRStepType : uint8_t {
+	RENDER,
+	COPY,
+	BLIT,
+	READBACK,
+	READBACK_IMAGE,
+};
+
+enum class GLRRenderPassAction {
+	DONT_CARE,
+	CLEAR,
+	KEEP,
+};
+
+class GLRFramebuffer;
+
+enum {
+	GLR_ASPECT_COLOR = 1,
+	GLR_ASPECT_DEPTH = 2,
+	GLR_ASPECT_STENCIL = 3,
+};
+
+struct GLRStep {
+	GLRStep(GLRStepType _type) : stepType(_type) {}
+	GLRStepType stepType;
+	std::vector<GLRRenderData> commands;
+	union {
+		struct {
+			GLRFramebuffer *framebuffer;
+			GLRRenderPassAction color;
+			GLRRenderPassAction depthStencil;
+			uint32_t clearColor;
+			float clearDepth;
+			int clearStencil;
+			int numDraws;
+		} render;
+		struct {
+			GLRFramebuffer *src;
+			GLRFramebuffer *dst;
+			GLRect2D srcRect;
+			GLOffset2D dstPos;
+			int aspectMask;
+		} copy;
+		struct {
+			GLRFramebuffer *src;
+			GLRFramebuffer *dst;
+			GLRect2D srcRect;
+			GLRect2D dstRect;
+			int aspectMask;
+			GLboolean filter;
+		} blit;
+		struct {
+			int aspectMask;
+			GLRFramebuffer *src;
+			GLRect2D srcRect;
+		} readback;
+		struct {
+			GLint texture;
+			GLRect2D srcRect;
+			int mipLevel;
+		} readback_image;
+	};
+};
+
+class GLQueueRunner {
+public:
+	GLQueueRunner() {}
+
+	void RunInitSteps(const std::vector<GLRInitStep> &steps);
+
+	void RunSteps(const std::vector<GLRStep *> &steps);
+	void LogSteps(const std::vector<GLRStep *> &steps);
+
+	void CreateDeviceObjects();
+	void DestroyDeviceObjects();
+
+	inline int RPIndex(GLRRenderPassAction color, GLRRenderPassAction depth) {
+		return (int)depth * 3 + (int)color;
+	}
+
+	void CopyReadbackBuffer(int width, int height, Draw::DataFormat srcFormat, Draw::DataFormat destFormat, int pixelStride, uint8_t *pixels);
+
+private:
+	void PerformBindFramebufferAsRenderTarget(const GLRStep &pass);
+	void PerformRenderPass(const GLRStep &pass);
+	void PerformCopy(const GLRStep &pass);
+	void PerformBlit(const GLRStep &pass);
+	void PerformReadback(const GLRStep &pass);
+	void PerformReadbackImage(const GLRStep &pass);
+
+	void LogRenderPass(const GLRStep &pass);
+	void LogCopy(const GLRStep &pass);
+	void LogBlit(const GLRStep &pass);
+	void LogReadback(const GLRStep &pass);
+	void LogReadbackImage(const GLRStep &pass);
+
+	void ResizeReadbackBuffer(size_t requiredSize);
+
+	GLint curFramebuffer_ = 0;
+
+	// Readback buffer. Currently we only support synchronous readback, so we only really need one.
+	// We size it generously.
+	GLint readbackBuffer_ = 0;
+	int readbackBufferSize_ = 0;
+};
--- a/ext/native/thin3d/GLRenderManager.cpp
+++ b/ext/native/thin3d/GLRenderManager.cpp
@ -0,0 +1,326 @@
+#include <cassert>
+
+#include "GLRenderManager.h"
+#include "thread/threadutil.h"
+#include "base/logging.h"
+
+#if 0 // def _DEBUG
+#define VLOG ILOG
+#else
+#define VLOG(...)
+#endif
+
+void GLCreateImage(GLRImage &img, int width, int height, GLuint format, bool color) {
+
+}
+
+void GLDeleter::Perform() {
+	for (auto shader : shaders) {
+		delete shader;
+	}
+	for (auto program : programs) {
+		delete program;
+	}
+	// ..
+}
+
+GLRenderManager::GLRenderManager() {
+	for (int i = 0; i < MAX_INFLIGHT_FRAMES; i++) {
+
+	}
+}
+
+GLRenderManager::~GLRenderManager() {
+	for (int i = 0; i < MAX_INFLIGHT_FRAMES; i++) {
+
+	}
+}
+
+void GLRenderManager::ThreadFunc() {
+	setCurrentThreadName("RenderMan");
+	int threadFrame = threadInitFrame_;
+	bool nextFrame = false;
+	bool firstFrame = true;
+	while (true) {
+		{
+			if (nextFrame) {
+				threadFrame++;
+				if (threadFrame >= MAX_INFLIGHT_FRAMES)
+					threadFrame = 0;
+			}
+			FrameData &frameData = frameData_[threadFrame];
+			std::unique_lock<std::mutex> lock(frameData.pull_mutex);
+			while (!frameData.readyForRun && run_) {
+				VLOG("PULL: Waiting for frame[%d].readyForRun", threadFrame);
+				frameData.pull_condVar.wait(lock);
+			}
+			if (!frameData.readyForRun && !run_) {
+				// This means we're out of frames to render and run_ is false, so bail.
+				break;
+			}
+			VLOG("PULL: frame[%d].readyForRun = false", threadFrame);
+			frameData.readyForRun = false;
+			// Previously we had a quick exit here that avoided calling Run() if run_ was suddenly false,
+			// but that created a race condition where frames could end up not finished properly on resize etc.
+
+			// Only increment next time if we're done.
+			nextFrame = frameData.type == GLRRunType::END;
+			assert(frameData.type == GLRRunType::END || frameData.type == GLRRunType::SYNC);
+		}
+		VLOG("PULL: Running frame %d", threadFrame);
+		if (firstFrame) {
+			ILOG("Running first frame (%d)", threadFrame);
+			firstFrame = false;
+		}
+		Run(threadFrame);
+		VLOG("PULL: Finished frame %d", threadFrame);
+	}
+
+	VLOG("PULL: Quitting");
+}
+
+void GLRenderManager::StopThread() {
+	// Since we don't control the thread directly, this will only pause the thread.
+
+
+	if (useThread_ && run_) {
+		run_ = false;
+		for (int i = 0; i < MAX_INFLIGHT_FRAMES; i++) {
+			auto &frameData = frameData_[i];
+			{
+				std::unique_lock<std::mutex> lock(frameData.push_mutex);
+				frameData.push_condVar.notify_all();
+			}
+			{
+				std::unique_lock<std::mutex> lock(frameData.pull_mutex);
+				frameData.pull_condVar.notify_all();
+			}
+		}
+
+		// TODO: Wait for something here!
+
+		ILOG("GL submission thread paused. Frame=%d", curFrame_);
+
+		// Eat whatever has been queued up for this frame if anything.
+		Wipe();
+
+		// Wait for any fences to finish and be resignaled, so we don't have sync issues.
+		// Also clean out any queued data, which might refer to things that might not be valid
+		// when we restart...
+		for (int i = 0; i < MAX_INFLIGHT_FRAMES; i++) {
+			auto &frameData = frameData_[i];
+			if (frameData.readyForRun || frameData.steps.size() != 0) {
+				Crash();
+			}
+			frameData.readyForRun = false;
+			for (size_t i = 0; i < frameData.steps.size(); i++) {
+				delete frameData.steps[i];
+			}
+			frameData.steps.clear();
+
+			std::unique_lock<std::mutex> lock(frameData.push_mutex);
+			while (!frameData.readyForFence) {
+				VLOG("PUSH: Waiting for frame[%d].readyForFence = 1 (stop)", i);
+				frameData.push_condVar.wait(lock);
+			}
+		}
+	} else {
+		ILOG("GL submission thread was already paused.");
+	}
+}
+
+void GLRenderManager::BindFramebufferAsRenderTarget(GLRFramebuffer *fb, GLRRenderPassAction color, GLRRenderPassAction depth, uint32_t clearColor, float clearDepth, uint8_t clearStencil) {
+	assert(insideFrame_);
+	// Eliminate dupes.
+	if (steps_.size() && steps_.back()->render.framebuffer == fb && steps_.back()->stepType == GLRStepType::RENDER) {
+		if (color != GLRRenderPassAction::CLEAR && depth != GLRRenderPassAction::CLEAR) {
+			// We don't move to a new step, this bind was unnecessary and we can safely skip it.
+			return;
+		}
+	}
+	if (curRenderStep_ && curRenderStep_->commands.size() == 0 && curRenderStep_->render.color == GLRRenderPassAction::KEEP && curRenderStep_->render.depthStencil == GLRRenderPassAction::KEEP) {
+		// Can trivially kill the last empty render step.
+		assert(steps_.back() == curRenderStep_);
+		delete steps_.back();
+		steps_.pop_back();
+		curRenderStep_ = nullptr;
+	}
+	if (curRenderStep_ && curRenderStep_->commands.size() == 0) {
+		VLOG("Empty render step. Usually happens after uploading pixels..");
+	}
+
+	GLRStep *step = new GLRStep{ GLRStepType::RENDER };
+	// This is what queues up new passes, and can end previous ones.
+	step->render.framebuffer = fb;
+	step->render.color = color;
+	step->render.depthStencil = depth;
+	step->render.clearColor = clearColor;
+	step->render.clearDepth = clearDepth;
+	step->render.clearStencil = clearStencil;
+	step->render.numDraws = 0;
+	steps_.push_back(step);
+
+	curRenderStep_ = step;
+	curWidth_ = fb ? fb->width : 0; // vulkan_->GetBackbufferWidth();
+	curHeight_ = fb ? fb->height : 0; // vulkan_->GetBackbufferHeight();
+}
+
+GLuint GLRenderManager::BindFramebufferAsTexture(GLRFramebuffer *fb, int binding, int aspectBit, int attachment) {
+	// Easy in GL.
+	return fb->color.texture;
+}
+
+void GLRenderManager::CopyFramebuffer(GLRFramebuffer *src, GLRect2D srcRect, GLRFramebuffer *dst, GLOffset2D dstPos, int aspectMask) {
+
+}
+
+void GLRenderManager::BlitFramebuffer(GLRFramebuffer *src, GLRect2D srcRect, GLRFramebuffer *dst, GLRect2D dstRect, int aspectMask, bool filter) {
+
+}
+
+void GLRenderManager::BeginFrame() {
+	VLOG("BeginFrame");
+
+	int curFrame = GetCurFrame();
+	FrameData &frameData = frameData_[curFrame];
+
+	// Make sure the very last command buffer from the frame before the previous has been fully executed.
+	if (useThread_) {
+		std::unique_lock<std::mutex> lock(frameData.push_mutex);
+		while (!frameData.readyForFence) {
+			VLOG("PUSH: Waiting for frame[%d].readyForFence = 1", curFrame);
+			frameData.push_condVar.wait(lock);
+		}
+		frameData.readyForFence = false;
+	}
+
+	VLOG("PUSH: Fencing %d", curFrame);
+
+	// vkWaitForFences(device, 1, &frameData.fence, true, UINT64_MAX);
+	// vkResetFences(device, 1, &frameData.fence);
+
+	// Must be after the fence - this performs deletes.
+	VLOG("PUSH: BeginFrame %d", curFrame);
+	if (!run_) {
+		WLOG("BeginFrame while !run_!");
+	}
+
+	// vulkan_->BeginFrame();
+	frameData.deleter.Perform();
+
+	insideFrame_ = true;
+}
+
+void GLRenderManager::Finish() {
+	curRenderStep_ = nullptr;
+	int curFrame = GetCurFrame();
+	FrameData &frameData = frameData_[curFrame];
+	if (!useThread_) {
+		frameData.steps = std::move(steps_);
+		frameData.type = GLRRunType::END;
+		Run(curFrame);
+	} else {
+		std::unique_lock<std::mutex> lock(frameData.pull_mutex);
+		VLOG("PUSH: Frame[%d].readyForRun = true", curFrame);
+		frameData.steps = std::move(steps_);
+		frameData.readyForRun = true;
+		frameData.type = GLRRunType::END;
+		frameData.pull_condVar.notify_all();
+	}
+
+	// vulkan_->EndFrame();
+	frameData_[curFrame_].deleter.Take(deleter_);
+
+	curFrame_++;
+
+	insideFrame_ = false;
+}
+
+void GLRenderManager::BeginSubmitFrame(int frame) {
+	FrameData &frameData = frameData_[frame];
+	if (!frameData.hasBegun) {
+		frameData.hasBegun = true;
+	}
+}
+
+void GLRenderManager::Submit(int frame, bool triggerFence) {
+	FrameData &frameData = frameData_[frame];
+
+	// In GL, submission happens automatically in Run().
+
+	// When !triggerFence, we notify after syncing with Vulkan.
+	if (useThread_ && triggerFence) {
+		VLOG("PULL: Frame %d.readyForFence = true", frame);
+		std::unique_lock<std::mutex> lock(frameData.push_mutex);
+		frameData.readyForFence = true;
+		frameData.push_condVar.notify_all();
+	}
+}
+
+void GLRenderManager::EndSubmitFrame(int frame) {
+	FrameData &frameData = frameData_[frame];
+	frameData.hasBegun = false;
+
+	Submit(frame, true);
+
+	if (!frameData.skipSwap) {
+		// glSwapBuffers();
+	} else {
+		frameData.skipSwap = false;
+	}
+}
+
+void GLRenderManager::Run(int frame) {
+	BeginSubmitFrame(frame);
+
+	FrameData &frameData = frameData_[frame];
+	auto &stepsOnThread = frameData_[frame].steps;
+	auto &initStepsOnThread = frameData_[frame].initSteps;
+	// queueRunner_.LogSteps(stepsOnThread);
+	queueRunner_.RunInitSteps(initStepsOnThread);
+	queueRunner_.RunSteps(stepsOnThread);
+	stepsOnThread.clear();
+	initStepsOnThread.clear();
+
+	switch (frameData.type) {
+	case GLRRunType::END:
+		EndSubmitFrame(frame);
+		break;
+
+	case GLRRunType::SYNC:
+		EndSyncFrame(frame);
+		break;
+
+	default:
+		assert(false);
+	}
+
+	VLOG("PULL: Finished running frame %d", frame);
+}
+
+void GLRenderManager::EndSyncFrame(int frame) {
+	FrameData &frameData = frameData_[frame];
+	Submit(frame, false);
+
+	// This is brutal! Should probably wait for a fence instead, not that it'll matter much since we'll
+	// still stall everything.
+	glFinish();
+	// vkDeviceWaitIdle(vulkan_->GetDevice());
+
+	// At this point we can resume filling the command buffers for the current frame since
+	// we know the device is idle - and thus all previously enqueued command buffers have been processed.
+	// No need to switch to the next frame number.
+
+	if (useThread_) {
+		std::unique_lock<std::mutex> lock(frameData.push_mutex);
+		frameData.readyForFence = true;
+		frameData.push_condVar.notify_all();
+	}
+}
+
+void GLRenderManager::Wipe() {
+	for (auto step : steps_) {
+		delete step;
+	}
+	steps_.clear();
+}
--- a/ext/native/thin3d/GLRenderManager.h
+++ b/ext/native/thin3d/GLRenderManager.h
@ -0,0 +1,297 @@
+#pragma once
+
+#include <thread>
+#include <mutex>
+
+#include "gfx/gl_common.h"
+#include "math/dataconv.h"
+#include "Common/Log.h"
+#include "GLQueueRunner.h"
+
+struct GLRImage {
+	GLuint texture;
+	GLuint format;
+};
+
+void GLCreateImage(GLRImage &img, int width, int height, GLint format, bool color);
+
+class GLRFramebuffer {
+public:
+	GLRFramebuffer(int _width, int _height) {
+		width = _width;
+		height = _height;
+
+		/*
+		CreateImage(vulkan_, initCmd, color, width, height, VK_FORMAT_R8G8B8A8_UNORM, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, true);
+		CreateImage(vulkan_, initCmd, depth, width, height, vulkan_->GetDeviceInfo().preferredDepthStencilFormat, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, false);
+
+		VkFramebufferCreateInfo fbci{ VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO };
+		VkImageView views[2]{};
+
+		fbci.renderPass = renderPass;
+		fbci.attachmentCount = 2;
+		fbci.pAttachments = views;
+		views[0] = color.imageView;
+		views[1] = depth.imageView;
+		fbci.width = width;
+		fbci.height = height;
+		fbci.layers = 1;
+
+		vkCreateFramebuffer(vulkan_->GetDevice(), &fbci, nullptr, &framebuf);*/
+	}
+
+	~GLRFramebuffer() {
+		glDeleteTextures(1, &color.texture);
+		glDeleteRenderbuffers(1, &depth.texture);
+	}
+
+	int numShadows = 1;  // TODO: Support this.
+
+	GLuint framebuf = 0;
+	GLRImage color{};
+	GLRImage depth{};
+	int width = 0;
+	int height = 0;
+};
+
+// We need to create some custom heap-allocated types so we can forward things that need to be created on the GL thread, before
+// they've actually been created.
+
+class GLRShader {
+public:
+	~GLRShader() {
+		if (shader) {
+			glDeleteShader(shader);
+		}
+	}
+	GLuint shader = 0;
+};
+
+class GLRProgram {
+public:
+	~GLRProgram() {
+		if (program) {
+			glDeleteProgram(program);
+		}
+	}
+	GLuint program = 0;
+};
+
+class GLRTexture {
+public:
+	~GLRTexture() {
+		if (texture) {
+			glDeleteTextures(1, &texture);
+		}
+	}
+	GLuint texture;
+};
+
+class GLRBuffer {
+public:
+	~GLRBuffer() {
+		if (texture) {
+			glDeleteTextures(1, &texture);
+		}
+	}
+	GLuint texture;
+};
+
+enum class GLRRunType {
+	END,
+	SYNC,
+};
+
+class GLDeleter {
+public:
+	void Perform();
+
+	void Take(GLDeleter &other) {
+		shaders = std::move(other.shaders);
+		programs = std::move(other.programs);
+	}
+
+	std::vector<GLRShader *> shaders;
+	std::vector<GLRProgram *> programs;
+};
+
+
+class GLRenderManager {
+public:
+	GLRenderManager();
+	~GLRenderManager();
+
+	void ThreadFunc();
+
+	// Makes sure that the GPU has caught up enough that we can start writing buffers of this frame again.
+	void BeginFrame();
+	// Can run on a different thread!
+	void Finish();
+	void Run(int frame);
+
+	// Zaps queued up commands. Use if you know there's a risk you've queued up stuff that has already been deleted. Can happen during in-game shutdown.
+	void Wipe();
+
+	// Creation commands. These were not needed in Vulkan since there we can do that on the main thread.
+	GLRTexture *CreateTexture(int w, int h) {
+		GLRInitStep step{ GLRInitStepType::CREATE_TEXTURE };
+		step.create_texture.texture = new GLRTexture();
+		step.create_texture.width = w;
+		step.create_texture.height = h;
+		initSteps_.push_back(step);
+		return step.create_texture.texture;
+	}
+
+	GLRShader *CreateShader(const char *code) {
+		GLRInitStep step{ GLRInitStepType::CREATE_SHADER };
+		step.create_shader.shader = new GLRShader();
+		step.create_shader.code = code;
+		initSteps_.push_back(step);
+		return step.create_shader.shader;
+	}
+
+	GLRProgram *CreateProgram(GLRShader *vshader, GLRShader *fshader) {
+		GLRInitStep step{ GLRInitStepType::CREATE_PROGRAM };
+		step.create_program.program = new GLRProgram();
+		step.create_program.vshader = vshader;
+		step.create_program.fshader = fshader;
+		initSteps_.push_back(step);
+		return step.create_program.program;
+	}
+
+	void BindFramebufferAsRenderTarget(GLRFramebuffer *fb, GLRRenderPassAction color, GLRRenderPassAction depth, uint32_t clearColor, float clearDepth, uint8_t clearStencil);
+	GLuint BindFramebufferAsTexture(GLRFramebuffer *fb, int binding, int aspectBit, int attachment);
+	bool CopyFramebufferToMemorySync(GLRFramebuffer *src, int aspectBits, int x, int y, int w, int h, Draw::DataFormat destFormat, uint8_t *pixels, int pixelStride);
+	void CopyImageToMemorySync(GLuint texture, int mipLevel, int x, int y, int w, int h, Draw::DataFormat destFormat, uint8_t *pixels, int pixelStride);
+
+	void CopyFramebuffer(GLRFramebuffer *src, GLRect2D srcRect, GLRFramebuffer *dst, GLOffset2D dstPos, int aspectMask);
+	void BlitFramebuffer(GLRFramebuffer *src, GLRect2D srcRect, GLRFramebuffer *dst, GLRect2D dstRect, int aspectMask, bool filter);
+
+	void SetViewport(const GLRViewport &vp) {
+		_dbg_assert_(G3D, curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER);
+		GLRRenderData data{ GLRRenderCommand::VIEWPORT };
+		data.viewport.vp = vp;
+		curRenderStep_->commands.push_back(data);
+	}
+
+	void SetScissor(const GLRect2D &rc) {
+		_dbg_assert_(G3D, curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER);
+		GLRRenderData data{ GLRRenderCommand::SCISSOR };
+		data.scissor.rc = rc;
+		curRenderStep_->commands.push_back(data);
+	}
+
+	void SetStencil(bool enabled, uint8_t writeMask, uint8_t compareMask, uint8_t refValue) {
+		_dbg_assert_(G3D, curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER);
+		GLRRenderData data{ GLRRenderCommand::STENCIL };
+		data.stencil.stencilWriteMask = writeMask;
+		data.stencil.stencilCompareMask = compareMask;
+		data.stencil.stencilRef = refValue;
+		curRenderStep_->commands.push_back(data);
+	}
+
+	void SetBlendFactor(float color[4]) {
+		_dbg_assert_(G3D, curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER);
+		GLRRenderData data{ GLRRenderCommand::BLENDCOLOR };
+		CopyFloat4(data.blendColor.color, color);
+		curRenderStep_->commands.push_back(data);
+	}
+
+	void Clear(uint32_t clearColor, float clearZ, int clearStencil, int clearMask) {
+		_dbg_assert_(G3D, curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER);
+		GLRRenderData data{ GLRRenderCommand::CLEAR };
+		data.clear.clearMask = clearMask;
+		data.clear.clearColor = clearColor;
+		data.clear.clearZ = clearZ;
+		data.clear.clearStencil = clearStencil;
+		curRenderStep_->commands.push_back(data);
+	}
+
+	void Draw(GLenum mode, int first, int count) {
+		_dbg_assert_(G3D, curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER);
+		GLRRenderData data{ GLRRenderCommand::DRAW };
+		data.draw.mode = mode;
+		data.draw.first = first;
+		data.draw.count = count;
+		data.draw.buffer = 0;
+		curRenderStep_->commands.push_back(data);
+		curRenderStep_->render.numDraws++;
+	}
+
+	void DrawIndexed(GLenum mode, int count, GLenum indexType, void *indices) {
+		_dbg_assert_(G3D, curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER);
+		GLRRenderData data{ GLRRenderCommand::DRAW_INDEXED };
+		data.drawIndexed.mode = mode;
+		data.drawIndexed.count = count;
+		data.drawIndexed.indexType = indexType;
+		data.drawIndexed.instances = 1;
+		data.drawIndexed.indices = indices;
+		curRenderStep_->commands.push_back(data);
+		curRenderStep_->render.numDraws++;
+	}
+
+	enum { MAX_INFLIGHT_FRAMES = 3 };
+
+private:
+	void BeginSubmitFrame(int frame);
+	void EndSubmitFrame(int frame);
+	void Submit(int frame, bool triggerFence);
+
+	// Bad for performance but sometimes necessary for synchronous CPU readbacks (screenshots and whatnot).
+	void FlushSync();
+	void EndSyncFrame(int frame);
+
+	void StopThread();
+
+	int GetCurFrame() const {
+		return curFrame_;
+	}
+
+	// Per-frame data, round-robin so we can overlap submission with execution of the previous frame.
+	struct FrameData {
+		std::mutex push_mutex;
+		std::condition_variable push_condVar;
+
+		std::mutex pull_mutex;
+		std::condition_variable pull_condVar;
+
+		bool readyForFence = true;
+		bool readyForRun = false;
+		bool skipSwap = false;
+		GLRRunType type = GLRRunType::END;
+
+		// GLuint fence; For future AZDO stuff?
+		std::vector<GLRStep *> steps;
+		std::vector<GLRInitStep> initSteps;
+
+		// Swapchain.
+		bool hasBegun = false;
+		uint32_t curSwapchainImage = -1;
+		
+		GLDeleter deleter;
+	};
+
+	FrameData frameData_[MAX_INFLIGHT_FRAMES];
+
+	// Submission time state
+	int curWidth_;
+	int curHeight_;
+	bool insideFrame_ = false;
+	GLRStep *curRenderStep_ = nullptr;
+	std::vector<GLRStep *> steps_;
+	std::vector<GLRInitStep> initSteps_;
+
+	// Execution time state
+	bool run_ = true;
+	// Thread is managed elsewhere, and should call ThreadFunc.
+	std::mutex mutex_;
+	int threadInitFrame_ = 0;
+	GLQueueRunner queueRunner_;
+
+	GLDeleter deleter_;
+
+	bool useThread_ = false;
+
+	int curFrame_ = 0;
+};
+
--- a/ext/native/thin3d/thin3d_gl.cpp
+++ b/ext/native/thin3d/thin3d_gl.cpp
@ -14,6 +14,8 @@
 #include "gfx/GLStateCache.h"
 #include "gfx_es2/gpu_features.h"

+#include "thin3d/GLRenderManager.h"
+
 #ifdef IOS
 extern void bindDefaultFBO();
 #endif
@ -546,6 +548,8 @@ private:
 	void fbo_unbind();
 	void ApplySamplers();

+	GLRenderManager renderManager_;
+
 	std::vector<OpenGLSamplerState *> boundSamplers_;
 	OpenGLTexture *boundTextures_[8]{};
 	int maxTextures_ = 0;