#include "ppsspp_config.h" #include "GLRenderManager.h" #include "Common/GPU/OpenGL/GLFeatures.h" #include "Common/GPU/thin3d.h" #include "Common/Thread/ThreadUtil.h" #include "Common/VR/PPSSPPVR.h" #include "Common/Log.h" #include "Common/TimeUtil.h" #include "Common/MemoryUtil.h" #include "Common/StringUtils.h" #include "Common/Math/math_util.h" #if 0 // def _DEBUG #define VLOG(...) INFO_LOG(G3D, __VA_ARGS__) #else #define VLOG(...) #endif std::thread::id renderThreadId; GLRTexture::GLRTexture(const Draw::DeviceCaps &caps, int width, int height, int depth, int numMips) { if (caps.textureNPOTFullySupported) { canWrap = true; } else { canWrap = isPowerOf2(width) && isPowerOf2(height); } w = width; h = height; d = depth; this->numMips = numMips; } GLRTexture::~GLRTexture() { if (texture) { glDeleteTextures(1, &texture); } } GLRenderManager::GLRenderManager() { // size_t sz = sizeof(GLRRenderData); // _dbg_assert_(sz == 88); } GLRenderManager::~GLRenderManager() { _dbg_assert_(!run_); for (int i = 0; i < MAX_INFLIGHT_FRAMES; i++) { _assert_(frameData_[i].deleter.IsEmpty()); _assert_(frameData_[i].deleter_prev.IsEmpty()); } // Was anything deleted during shutdown? deleter_.Perform(this, skipGLCalls_); _assert_(deleter_.IsEmpty()); } void GLRenderManager::ThreadStart(Draw::DrawContext *draw) { queueRunner_.CreateDeviceObjects(); renderThreadId = std::this_thread::get_id(); if (newInflightFrames_ != -1) { INFO_LOG(G3D, "Updating inflight frames to %d", newInflightFrames_); inflightFrames_ = newInflightFrames_; newInflightFrames_ = -1; } // Don't save draw, we don't want any thread safety confusion. bool mapBuffers = draw->GetBugs().Has(Draw::Bugs::ANY_MAP_BUFFER_RANGE_SLOW); bool hasBufferStorage = gl_extensions.ARB_buffer_storage || gl_extensions.EXT_buffer_storage; if (!gl_extensions.VersionGEThan(3, 0, 0) && gl_extensions.IsGLES && !hasBufferStorage) { // Force disable if it wouldn't work anyway. mapBuffers = false; } // Notes on buffer mapping: // NVIDIA GTX 9xx / 2017-10 drivers - mapping improves speed, basic unmap seems best. // PowerVR GX6xxx / iOS 10.3 - mapping has little improvement, explicit flush is slower. if (mapBuffers) { switch (gl_extensions.gpuVendor) { case GPU_VENDOR_NVIDIA: bufferStrategy_ = GLBufferStrategy::FRAME_UNMAP; break; // Temporarily disabled because it doesn't work with task switching on Android. // The mapped buffer seems to just be pulled out like a rug from under us, crashing // as soon as any write happens, which can happen during shutdown since we write from the // Emu thread which may not yet have shut down. There may be solutions to this, but for now, // disable this strategy to avoid crashing. //case GPU_VENDOR_QUALCOMM: // bufferStrategy_ = GLBufferStrategy::FLUSH_INVALIDATE_UNMAP; // break; default: bufferStrategy_ = GLBufferStrategy::SUBDATA; } } else { bufferStrategy_ = GLBufferStrategy::SUBDATA; } } void GLRenderManager::ThreadEnd() { INFO_LOG(G3D, "ThreadEnd"); queueRunner_.DestroyDeviceObjects(); VLOG(" PULL: Quitting"); // Good time to run all the deleters to get rid of leftover objects. for (int i = 0; i < MAX_INFLIGHT_FRAMES; i++) { // Since we're in shutdown, we should skip the GL calls on Android. frameData_[i].deleter.Perform(this, skipGLCalls_); frameData_[i].deleter_prev.Perform(this, skipGLCalls_); } deleter_.Perform(this, skipGLCalls_); for (int i = 0; i < (int)steps_.size(); i++) { delete steps_[i]; } steps_.clear(); initSteps_.clear(); } // Unlike in Vulkan, this isn't a full independent function, instead it gets called every frame. // // This means that we have to block and run the render queue until we've presented one frame, // at which point we can leave. // // NOTE: If run_ is true, we WILL run a task! bool GLRenderManager::ThreadFrame() { if (!run_) { return false; } GLRRenderThreadTask *task = nullptr; // In case of syncs or other partial completion, we keep going until we complete a frame. while (true) { // Pop a task of the queue and execute it. // NOTE: We need to actually wait for a task, we can't just bail! { std::unique_lock lock(pushMutex_); while (renderThreadQueue_.empty()) { pushCondVar_.wait(lock); } task = std::move(renderThreadQueue_.front()); renderThreadQueue_.pop(); } // We got a task! We can now have pushMutex_ unlocked, allowing the host to // push more work when it feels like it, and just start working. if (task->runType == GLRRunType::EXIT) { // Oh, host wanted out. Let's leave, and also let's notify the host. // This is unlike Vulkan too which can just block on the thread existing. std::unique_lock lock(syncMutex_); syncCondVar_.notify_one(); syncDone_ = true; break; } // Render the scene. VLOG(" PULL: Frame %d RUN (%0.3f)", task->frame, time_now_d()); if (Run(*task)) { // Swap requested, so we just bail the loop. delete task; break; } delete task; }; return true; } void GLRenderManager::StopThread() { // There's not really a lot to do here anymore. INFO_LOG(G3D, "GLRenderManager::StopThread()"); if (run_) { run_ = false; std::unique_lock lock(pushMutex_); renderThreadQueue_.push(new GLRRenderThreadTask(GLRRunType::EXIT)); pushCondVar_.notify_one(); } else { WARN_LOG(G3D, "GL submission thread was already paused."); } } std::string GLRenderManager::GetGpuProfileString() const { int curFrame = GetCurFrame(); const GLQueueProfileContext &profile = frameData_[curFrame].profile; float cputime_ms = 1000.0f * (profile.cpuEndTime - profile.cpuStartTime); return StringFromFormat("CPU time to run the list: %0.2f ms", cputime_ms); } void GLRenderManager::BindFramebufferAsRenderTarget(GLRFramebuffer *fb, GLRRenderPassAction color, GLRRenderPassAction depth, GLRRenderPassAction stencil, uint32_t clearColor, float clearDepth, uint8_t clearStencil, const char *tag) { _assert_(insideFrame_); #ifdef _DEBUG curProgram_ = nullptr; #endif // Eliminate dupes. if (steps_.size() && steps_.back()->stepType == GLRStepType::RENDER && steps_.back()->render.framebuffer == fb) { if (color != GLRRenderPassAction::CLEAR && depth != GLRRenderPassAction::CLEAR && stencil != GLRRenderPassAction::CLEAR) { // We don't move to a new step, this bind was unnecessary and we can safely skip it. curRenderStep_ = steps_.back(); return; } } if (curRenderStep_ && curRenderStep_->commands.size() == 0) { VLOG("Empty render step. Usually happens after uploading pixels."); } GLRStep *step = new GLRStep{ GLRStepType::RENDER }; // This is what queues up new passes, and can end previous ones. step->render.framebuffer = fb; step->render.color = color; step->render.depth = depth; step->render.stencil = stencil; step->tag = tag; steps_.push_back(step); GLuint clearMask = 0; GLRRenderData data(GLRRenderCommand::CLEAR); if (color == GLRRenderPassAction::CLEAR) { clearMask |= GL_COLOR_BUFFER_BIT; data.clear.clearColor = clearColor; } if (depth == GLRRenderPassAction::CLEAR) { clearMask |= GL_DEPTH_BUFFER_BIT; data.clear.clearZ = clearDepth; } if (stencil == GLRRenderPassAction::CLEAR) { clearMask |= GL_STENCIL_BUFFER_BIT; data.clear.clearStencil = clearStencil; } if (clearMask) { data.clear.scissorX = 0; data.clear.scissorY = 0; data.clear.scissorW = 0; data.clear.scissorH = 0; data.clear.clearMask = clearMask; data.clear.colorMask = 0xF; step->commands.push_back(data); } curRenderStep_ = step; if (fb) { if (color == GLRRenderPassAction::KEEP || depth == GLRRenderPassAction::KEEP || stencil == GLRRenderPassAction::KEEP) { step->dependencies.insert(fb); } } if (invalidationCallback_) { invalidationCallback_(InvalidationCallbackFlags::RENDER_PASS_STATE); } } void GLRenderManager::BindFramebufferAsTexture(GLRFramebuffer *fb, int binding, int aspectBit) { _dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER); _dbg_assert_(binding < MAX_GL_TEXTURE_SLOTS); GLRRenderData data{ GLRRenderCommand::BIND_FB_TEXTURE }; data.bind_fb_texture.slot = binding; data.bind_fb_texture.framebuffer = fb; data.bind_fb_texture.aspect = aspectBit; curRenderStep_->commands.push_back(data); curRenderStep_->dependencies.insert(fb); } void GLRenderManager::CopyFramebuffer(GLRFramebuffer *src, GLRect2D srcRect, GLRFramebuffer *dst, GLOffset2D dstPos, int aspectMask, const char *tag) { GLRStep *step = new GLRStep{ GLRStepType::COPY }; step->copy.srcRect = srcRect; step->copy.dstPos = dstPos; step->copy.src = src; step->copy.dst = dst; step->copy.aspectMask = aspectMask; step->dependencies.insert(src); step->tag = tag; bool fillsDst = dst && srcRect.x == 0 && srcRect.y == 0 && srcRect.w == dst->width && srcRect.h == dst->height; if (dstPos.x != 0 || dstPos.y != 0 || !fillsDst) step->dependencies.insert(dst); steps_.push_back(step); } void GLRenderManager::BlitFramebuffer(GLRFramebuffer *src, GLRect2D srcRect, GLRFramebuffer *dst, GLRect2D dstRect, int aspectMask, bool filter, const char *tag) { GLRStep *step = new GLRStep{ GLRStepType::BLIT }; step->blit.srcRect = srcRect; step->blit.dstRect = dstRect; step->blit.src = src; step->blit.dst = dst; step->blit.aspectMask = aspectMask; step->blit.filter = filter; step->dependencies.insert(src); step->tag = tag; bool fillsDst = dst && dstRect.x == 0 && dstRect.y == 0 && dstRect.w == dst->width && dstRect.h == dst->height; if (!fillsDst) step->dependencies.insert(dst); steps_.push_back(step); } bool GLRenderManager::CopyFramebufferToMemory(GLRFramebuffer *src, int aspectBits, int x, int y, int w, int h, Draw::DataFormat destFormat, uint8_t *pixels, int pixelStride, Draw::ReadbackMode mode, const char *tag) { _assert_(pixels); GLRStep *step = new GLRStep{ GLRStepType::READBACK }; step->readback.src = src; step->readback.srcRect = { x, y, w, h }; step->readback.aspectMask = aspectBits; step->readback.dstFormat = destFormat; step->dependencies.insert(src); step->tag = tag; steps_.push_back(step); curRenderStep_ = nullptr; FlushSync(); Draw::DataFormat srcFormat; if (aspectBits & GL_COLOR_BUFFER_BIT) { srcFormat = Draw::DataFormat::R8G8B8A8_UNORM; } else if (aspectBits & GL_STENCIL_BUFFER_BIT) { // Copies from stencil are always S8. srcFormat = Draw::DataFormat::S8; } else if (aspectBits & GL_DEPTH_BUFFER_BIT) { // TODO: Do this properly. srcFormat = Draw::DataFormat::D24_S8; } else { return false; } queueRunner_.CopyFromReadbackBuffer(src, w, h, srcFormat, destFormat, pixelStride, pixels); return true; } void GLRenderManager::CopyImageToMemorySync(GLRTexture *texture, int mipLevel, int x, int y, int w, int h, Draw::DataFormat destFormat, uint8_t *pixels, int pixelStride, const char *tag) { _assert_(texture); _assert_(pixels); GLRStep *step = new GLRStep{ GLRStepType::READBACK_IMAGE }; step->readback_image.texture = texture; step->readback_image.mipLevel = mipLevel; step->readback_image.srcRect = { x, y, w, h }; step->tag = tag; steps_.push_back(step); curRenderStep_ = nullptr; FlushSync(); queueRunner_.CopyFromReadbackBuffer(nullptr, w, h, Draw::DataFormat::R8G8B8A8_UNORM, destFormat, pixelStride, pixels); } void GLRenderManager::BeginFrame(bool enableProfiling) { #ifdef _DEBUG curProgram_ = nullptr; #endif int curFrame = GetCurFrame(); GLFrameData &frameData = frameData_[curFrame]; frameData.profile.enabled = enableProfiling; { VLOG("PUSH: BeginFrame (curFrame = %d, readyForFence = %d, time=%0.3f)", curFrame, (int)frameData.readyForFence, time_now_d()); std::unique_lock lock(frameData.fenceMutex); while (!frameData.readyForFence) { frameData.fenceCondVar.wait(lock); } frameData.readyForFence = false; } if (!run_) { WARN_LOG(G3D, "BeginFrame while !run_!"); } insideFrame_ = true; } void GLRenderManager::Finish() { curRenderStep_ = nullptr; // EndCurRenderStep is this simple here. int curFrame = GetCurFrame(); GLFrameData &frameData = frameData_[curFrame]; frameData_[curFrame].deleter.Take(deleter_); VLOG("PUSH: Finish, pushing task. curFrame = %d", curFrame); GLRRenderThreadTask *task = new GLRRenderThreadTask(GLRRunType::PRESENT); task->frame = curFrame; { std::unique_lock lock(pushMutex_); renderThreadQueue_.push(task); renderThreadQueue_.back()->initSteps = std::move(initSteps_); renderThreadQueue_.back()->steps = std::move(steps_); initSteps_.clear(); steps_.clear(); pushCondVar_.notify_one(); } curFrame_++; if (curFrame_ >= inflightFrames_) curFrame_ = 0; insideFrame_ = false; } // Render thread. Returns true if the caller should handle a swap. bool GLRenderManager::Run(GLRRenderThreadTask &task) { GLFrameData &frameData = frameData_[task.frame]; if (!frameData.hasBegun) { frameData.hasBegun = true; frameData.deleter_prev.Perform(this, skipGLCalls_); frameData.deleter_prev.Take(frameData.deleter); } // queueRunner_.LogSteps(stepsOnThread); queueRunner_.RunInitSteps(task.initSteps, skipGLCalls_); // Run this after RunInitSteps so any fresh GLRBuffers for the pushbuffers can get created. if (!skipGLCalls_) { for (auto iter : frameData.activePushBuffers) { iter->Flush(); iter->UnmapDevice(); } } if (frameData.profile.enabled) { frameData.profile.cpuStartTime = time_now_d(); } if (IsVREnabled()) { int passes = GetVRPassesCount(); for (int i = 0; i < passes; i++) { PreVRFrameRender(i); queueRunner_.RunSteps(task.steps, skipGLCalls_, i < passes - 1, true); PostVRFrameRender(); } } else { queueRunner_.RunSteps(task.steps, skipGLCalls_, false, false); } if (frameData.profile.enabled) { frameData.profile.cpuEndTime = time_now_d(); } if (!skipGLCalls_) { for (auto iter : frameData.activePushBuffers) { iter->MapDevice(bufferStrategy_); } } bool swapRequest = false; switch (task.runType) { case GLRRunType::PRESENT: if (!frameData.skipSwap) { if (swapIntervalChanged_) { swapIntervalChanged_ = false; if (swapIntervalFunction_) { swapIntervalFunction_(swapInterval_); } } // This is the swapchain framebuffer flip. if (swapFunction_) { VLOG(" PULL: SwapFunction()"); swapFunction_(); if (!retainControl_) { // get out of here. swapRequest = true; } } else { VLOG(" PULL: SwapRequested"); swapRequest = true; } } else { frameData.skipSwap = false; } frameData.hasBegun = false; VLOG(" PULL: Frame %d.readyForFence = true", task.frame); { std::lock_guard lock(frameData.fenceMutex); frameData.readyForFence = true; frameData.fenceCondVar.notify_one(); // At this point, we're done with this framedata (for now). } break; case GLRRunType::SYNC: frameData.hasBegun = false; // glFinish is not actually necessary here, and won't be unless we start using // glBufferStorage. Then we need to use fences. { std::unique_lock lock(syncMutex_); syncDone_ = true; syncCondVar_.notify_one(); } break; default: _assert_(false); } VLOG(" PULL: ::Run(): Done running tasks"); return swapRequest; } void GLRenderManager::FlushSync() { { VLOG("PUSH: Frame[%d].readyForRun = true (sync)", curFrame_); GLRRenderThreadTask *task = new GLRRenderThreadTask(GLRRunType::SYNC); task->frame = curFrame_; std::unique_lock lock(pushMutex_); renderThreadQueue_.push(task); renderThreadQueue_.back()->initSteps = std::move(initSteps_); renderThreadQueue_.back()->steps = std::move(steps_); pushCondVar_.notify_one(); steps_.clear(); } { std::unique_lock lock(syncMutex_); // Wait for the flush to be hit, since we're syncing. while (!syncDone_) { VLOG("PUSH: Waiting for frame[%d].readyForFence = 1 (sync)", curFrame_); syncCondVar_.wait(lock); } syncDone_ = false; } }