From 77f2b0c9e9f682d68c0cc2e2a8db8f81dbc58f63 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Sun, 8 Nov 2015 13:44:21 +0100 Subject: [PATCH 1/7] Extract the viewport/scissor conversion into a function. --- GPU/GLES/StateMapping.cpp | 117 ++++++++++++++++++++++---------------- 1 file changed, 69 insertions(+), 48 deletions(-) diff --git a/GPU/GLES/StateMapping.cpp b/GPU/GLES/StateMapping.cpp index 6513984610..920eb81d85 100644 --- a/GPU/GLES/StateMapping.cpp +++ b/GPU/GLES/StateMapping.cpp @@ -585,8 +585,24 @@ void TransformDrawEngine::ApplyBlendState() { } } -void TransformDrawEngine::ApplyDrawState(int prim) { +// Common representation, should be able to set this directly with any modern API. +struct ViewportAndScissor { + bool scissorEnable; + int scissorX; + int scissorY; + int scissorW; + int scissorH; + float viewportX; + float viewportY; + float viewportW; + float viewportH; + float depthRangeMin; + float depthRangeMax; + bool dirtyProj; +}; +void ConvertViewportAndScissor(bool useBufferedRendering, float renderWidth, float renderHeight, int bufferWidth, int bufferHeight, ViewportAndScissor &out); +void TransformDrawEngine::ApplyDrawState(int prim) { // TODO: All this setup is soon so expensive that we'll need dirty flags, or simply do it in the command writes where we detect dirty by xoring. Silly to do all this work on every drawcall. if (gstate_c.textureChanged != TEXCHANGE_UNCHANGED && !gstate.isModeClear() && gstate.isTextureMapEnabled()) { @@ -746,19 +762,45 @@ void TransformDrawEngine::ApplyDrawState(int prim) { } } + ViewportAndScissor vpAndScissor; + ConvertViewportAndScissor(useBufferedRendering, + framebufferManager_->GetRenderWidth(), framebufferManager_->GetRenderHeight(), + framebufferManager_->GetTargetBufferWidth(), framebufferManager_->GetTargetBufferHeight(), + vpAndScissor); + + if (vpAndScissor.scissorEnable) { + glstate.scissorTest.enable(); + if (!useBufferedRendering) { + vpAndScissor.scissorY = PSP_CoreParameter().pixelHeight - vpAndScissor.scissorH - vpAndScissor.scissorY; + } + glstate.scissorRect.set(vpAndScissor.scissorX, vpAndScissor.scissorY, vpAndScissor.scissorW, vpAndScissor.scissorH); + } else { + glstate.scissorTest.disable(); + } + + if (!useBufferedRendering) { + vpAndScissor.viewportY = PSP_CoreParameter().pixelHeight - vpAndScissor.viewportH - vpAndScissor.viewportY; + } + glstate.viewport.set(vpAndScissor.viewportX, vpAndScissor.viewportY, vpAndScissor.viewportW, vpAndScissor.viewportH); + glstate.depthRange.set(vpAndScissor.depthRangeMin, vpAndScissor.depthRangeMax); + + if (vpAndScissor.dirtyProj) { + shaderManager_->DirtyUniform(DIRTY_PROJMATRIX); + } +} + +void ConvertViewportAndScissor(bool useBufferedRendering, float renderWidth, float renderHeight, int bufferWidth, int bufferHeight, ViewportAndScissor &out) { bool throughmode = gstate.isModeThrough(); + out.dirtyProj = false; float renderWidthFactor, renderHeightFactor; - float renderWidth, renderHeight; float renderX = 0.0f, renderY = 0.0f; float displayOffsetX, displayOffsetY; if (useBufferedRendering) { displayOffsetX = 0.0f; displayOffsetY = 0.0f; - renderWidth = framebufferManager_->GetRenderWidth(); - renderHeight = framebufferManager_->GetRenderHeight(); - renderWidthFactor = (float)renderWidth / framebufferManager_->GetTargetBufferWidth(); - renderHeightFactor = (float)renderHeight / framebufferManager_->GetTargetBufferHeight(); + renderWidthFactor = (float)renderWidth / (float)bufferWidth; + renderHeightFactor = (float)renderHeight / (float)bufferHeight; } else { float pixelW = PSP_CoreParameter().pixelWidth; float pixelH = PSP_CoreParameter().pixelHeight; @@ -778,26 +820,15 @@ void TransformDrawEngine::ApplyDrawState(int prim) { // This is a bit of a hack as the render buffer isn't always that size // We always scissor on non-buffered so that clears don't spill outside the frame. if (useBufferedRendering && scissorX1 == 0 && scissorY1 == 0 - && scissorX2 >= (int) gstate_c.curRTWidth - && scissorY2 >= (int) gstate_c.curRTHeight) { - glstate.scissorTest.disable(); + && scissorX2 >= (int)gstate_c.curRTWidth + && scissorY2 >= (int)gstate_c.curRTHeight) { + out.scissorEnable = false; } else { - glstate.scissorTest.enable(); - - // Buffers are now in the GL coordinate system, so no flipping needed. - if (useBufferedRendering) { - glstate.scissorRect.set( - renderX + displayOffsetX + scissorX1 * renderWidthFactor, - renderY + displayOffsetY + scissorY1 * renderHeightFactor, - (scissorX2 - scissorX1) * renderWidthFactor, - (scissorY2 - scissorY1) * renderHeightFactor); - } else { - glstate.scissorRect.set( - renderX + displayOffsetX + scissorX1 * renderWidthFactor, - renderY + displayOffsetY + renderHeight - (scissorY2 * renderHeightFactor), - (scissorX2 - scissorX1) * renderWidthFactor, - (scissorY2 - scissorY1) * renderHeightFactor); - } + out.scissorEnable = true; + out.scissorX = renderX + displayOffsetX + scissorX1 * renderWidthFactor; + out.scissorY = renderY + displayOffsetY + scissorY1 * renderHeightFactor; + out.scissorW = (scissorX2 - scissorX1) * renderWidthFactor; + out.scissorH = (scissorY2 - scissorY1) * renderHeightFactor; } int curRTWidth = gstate_c.curRTWidth; @@ -808,22 +839,12 @@ void TransformDrawEngine::ApplyDrawState(int prim) { if (throughmode) { // No viewport transform here. Let's experiment with using region. - if (useBufferedRendering) { - // No flip needed - glstate.viewport.set( - renderX + displayOffsetX, - renderY + displayOffsetY, - curRTWidth * renderWidthFactor, - curRTHeight * renderHeightFactor); - } else { - renderY += renderHeight - framebufferManager_->GetTargetHeight() * renderHeightFactor; - glstate.viewport.set( - renderX + displayOffsetX, - renderY + displayOffsetY, - curRTWidth * renderWidthFactor, - curRTHeight * renderHeightFactor); - } - glstate.depthRange.set(0.0f, 1.0f); + out.viewportX = renderX + displayOffsetX; + out.viewportY = renderY + displayOffsetY; + out.viewportW = curRTWidth * renderWidthFactor; + out.viewportH = curRTHeight * renderHeightFactor; + out.depthRangeMin = 0.0f; + out.depthRangeMax = 1.0f; } else { // These we can turn into a glViewport call, offset by offsetX and offsetY. Math after. float vpXScale = gstate.getViewportXScale(); @@ -902,20 +923,20 @@ void TransformDrawEngine::ApplyDrawState(int prim) { gstate_c.vpHeightScale = hScale; gstate_c.vpXOffset = xOffset; gstate_c.vpYOffset = yOffset; - shaderManager_->DirtyUniform(DIRTY_PROJMATRIX); + out.dirtyProj = true; } - if (useBufferedRendering) { - glstate.viewport.set(left + displayOffsetX, displayOffsetY + top, right - left, bottom - top); - } else { - glstate.viewport.set(left + displayOffsetX, displayOffsetY + (renderHeight - bottom), right - left, bottom - top); - } + out.viewportX = left + displayOffsetX; + out.viewportY = top + displayOffsetY; + out.viewportW = right - left; + out.viewportH = bottom - top; float zScale = gstate.getViewportZScale(); float zCenter = gstate.getViewportZCenter(); float depthRangeMin = zCenter - zScale; float depthRangeMax = zCenter + zScale; - glstate.depthRange.set(depthRangeMin * (1.0f / 65535.0f), depthRangeMax * (1.0f / 65535.0f)); + out.depthRangeMin = depthRangeMin * (1.0f / 65535.0f); + out.depthRangeMax = depthRangeMax * (1.0f / 65535.0f); #ifndef MOBILE_DEVICE float minz = gstate.getDepthRangeMin(); From a6ced1d0b4d33a2e5ce495598ee187a45595306d Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Sun, 8 Nov 2015 18:13:34 +0100 Subject: [PATCH 2/7] Move viewport/stencil conversion out into GPUStateUtils.cpp, use it in D3D as well. D3D bugfixes. Not perfect yet. --- GPU/Common/GPUStateUtils.cpp | 186 +++++++++++++++++++++++++++++++ GPU/Common/GPUStateUtils.h | 18 +++ GPU/Directx9/StateMappingDX9.cpp | 165 +++++---------------------- GPU/GLES/StateMapping.cpp | 181 ------------------------------ 4 files changed, 229 insertions(+), 321 deletions(-) diff --git a/GPU/Common/GPUStateUtils.cpp b/GPU/Common/GPUStateUtils.cpp index 8fe1f13ec7..9c39beacfb 100644 --- a/GPU/Common/GPUStateUtils.cpp +++ b/GPU/Common/GPUStateUtils.cpp @@ -1,10 +1,31 @@ +// Copyright (c) 2015- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#include + #include "Common/StringUtils.h" #include "Core/Config.h" +#include "Core/System.h" #include "GPU/ge_constants.h" #include "GPU/GPUState.h" #include "GPU/Common/ShaderId.h" #include "GPU/Common/VertexDecoderCommon.h" +#include "GPU/Common/FramebufferCommon.h" #include "GPU/Common/GPUStateUtils.h" @@ -386,3 +407,168 @@ LogicOpReplaceType ReplaceLogicOpType() { } return LOGICOPTYPE_NORMAL; } + + +void ConvertViewportAndScissor(bool useBufferedRendering, float renderWidth, float renderHeight, int bufferWidth, int bufferHeight, ViewportAndScissor &out) { + bool throughmode = gstate.isModeThrough(); + out.dirtyProj = false; + + float renderWidthFactor, renderHeightFactor; + float renderX = 0.0f, renderY = 0.0f; + float displayOffsetX, displayOffsetY; + if (useBufferedRendering) { + displayOffsetX = 0.0f; + displayOffsetY = 0.0f; + renderWidthFactor = (float)renderWidth / (float)bufferWidth; + renderHeightFactor = (float)renderHeight / (float)bufferHeight; + } else { + float pixelW = PSP_CoreParameter().pixelWidth; + float pixelH = PSP_CoreParameter().pixelHeight; + CenterDisplayOutputRect(&displayOffsetX, &displayOffsetY, &renderWidth, &renderHeight, 480, 272, pixelW, pixelH, ROTATION_LOCKED_HORIZONTAL); + renderWidthFactor = renderWidth / 480.0f; + renderHeightFactor = renderHeight / 272.0f; + } + + renderX += gstate_c.curRTOffsetX * renderWidthFactor; + + // Scissor + int scissorX1 = gstate.getScissorX1(); + int scissorY1 = gstate.getScissorY1(); + int scissorX2 = gstate.getScissorX2() + 1; + int scissorY2 = gstate.getScissorY2() + 1; + + // This is a bit of a hack as the render buffer isn't always that size + // We always scissor on non-buffered so that clears don't spill outside the frame. + if (useBufferedRendering && scissorX1 == 0 && scissorY1 == 0 + && scissorX2 >= (int)gstate_c.curRTWidth + && scissorY2 >= (int)gstate_c.curRTHeight) { + out.scissorEnable = false; + } else { + out.scissorEnable = true; + out.scissorX = renderX + displayOffsetX + scissorX1 * renderWidthFactor; + out.scissorY = renderY + displayOffsetY + scissorY1 * renderHeightFactor; + out.scissorW = (scissorX2 - scissorX1) * renderWidthFactor; + out.scissorH = (scissorY2 - scissorY1) * renderHeightFactor; + } + + int curRTWidth = gstate_c.curRTWidth; + int curRTHeight = gstate_c.curRTHeight; + + float offsetX = gstate.getOffsetX(); + float offsetY = gstate.getOffsetY(); + + if (throughmode) { + // No viewport transform here. Let's experiment with using region. + out.viewportX = renderX + displayOffsetX; + out.viewportY = renderY + displayOffsetY; + out.viewportW = curRTWidth * renderWidthFactor; + out.viewportH = curRTHeight * renderHeightFactor; + out.depthRangeMin = 0.0f; + out.depthRangeMax = 1.0f; + } else { + // These we can turn into a glViewport call, offset by offsetX and offsetY. Math after. + float vpXScale = gstate.getViewportXScale(); + float vpXCenter = gstate.getViewportXCenter(); + float vpYScale = gstate.getViewportYScale(); + float vpYCenter = gstate.getViewportYCenter(); + + // The viewport transform appears to go like this: + // Xscreen = -offsetX + vpXCenter + vpXScale * Xview + // Yscreen = -offsetY + vpYCenter + vpYScale * Yview + // Zscreen = vpZCenter + vpZScale * Zview + + // The viewport is normally centered at 2048,2048 but can also be centered at other locations. + // Offset is subtracted from the viewport center and is also set to values in those ranges, and is set so that the viewport will cover + // the desired screen area ([0-480)x[0-272)), so 1808,1912. + + // This means that to get the analogue glViewport we must: + float vpX0 = vpXCenter - offsetX - fabsf(vpXScale); + float vpY0 = vpYCenter - offsetY - fabsf(vpYScale); + gstate_c.vpWidth = vpXScale * 2.0f; + gstate_c.vpHeight = vpYScale * 2.0f; + + float vpWidth = fabsf(gstate_c.vpWidth); + float vpHeight = fabsf(gstate_c.vpHeight); + + // This multiplication should probably be done after viewport clipping. Would let us very slightly simplify the clipping logic? + vpX0 *= renderWidthFactor; + vpY0 *= renderHeightFactor; + vpWidth *= renderWidthFactor; + vpHeight *= renderHeightFactor; + + // We used to apply the viewport here via glstate, but there are limits which vary by driver. + // This may mean some games won't work, or at least won't work at higher render resolutions. + // So we apply it in the shader instead. + float left = renderX + vpX0; + float top = renderY + vpY0; + float right = left + vpWidth; + float bottom = top + vpHeight; + + float wScale = 1.0f; + float xOffset = 0.0f; + float hScale = 1.0f; + float yOffset = 0.0f; + + // If we're within the bounds, we want clipping the viewport way. So leave it be. + if (left < 0.0f || right > renderWidth) { + float overageLeft = std::max(-left, 0.0f); + float overageRight = std::max(right - renderWidth, 0.0f); + // Our center drifted by the difference in overages. + float drift = overageRight - overageLeft; + + left += overageLeft; + right -= overageRight; + + wScale = vpWidth / (right - left); + xOffset = drift / (right - left); + } + + if (top < 0.0f || bottom > renderHeight) { + float overageTop = std::max(-top, 0.0f); + float overageBottom = std::max(bottom - renderHeight, 0.0f); + // Our center drifted by the difference in overages. + float drift = overageBottom - overageTop; + + top += overageTop; + bottom -= overageBottom; + + hScale = vpHeight / (bottom - top); + yOffset = drift / (bottom - top); + } + + bool scaleChanged = gstate_c.vpWidthScale != wScale || gstate_c.vpHeightScale != hScale; + bool offsetChanged = gstate_c.vpXOffset != xOffset || gstate_c.vpYOffset != yOffset; + if (scaleChanged || offsetChanged) { + gstate_c.vpWidthScale = wScale; + gstate_c.vpHeightScale = hScale; + gstate_c.vpXOffset = xOffset; + gstate_c.vpYOffset = yOffset; + out.dirtyProj = true; + } + + out.viewportX = left + displayOffsetX; + out.viewportY = top + displayOffsetY; + out.viewportW = right - left; + out.viewportH = bottom - top; + + float zScale = gstate.getViewportZScale(); + float zCenter = gstate.getViewportZCenter(); + float depthRangeMin = zCenter - zScale; + float depthRangeMax = zCenter + zScale; + out.depthRangeMin = depthRangeMin * (1.0f / 65535.0f); + out.depthRangeMax = depthRangeMax * (1.0f / 65535.0f); + +#ifndef MOBILE_DEVICE + float minz = gstate.getDepthRangeMin(); + float maxz = gstate.getDepthRangeMax(); + if ((minz > depthRangeMin && minz > depthRangeMax) || (maxz < depthRangeMin && maxz < depthRangeMax)) { + WARN_LOG_REPORT_ONCE(minmaxz, G3D, "Unsupported depth range in test - depth range: %f-%f, test: %f-%f", depthRangeMin, depthRangeMax, minz, maxz); + } else if ((gstate.clipEnable & 1) == 0) { + // TODO: Need to test whether clipEnable should even affect depth or not. + if ((minz < depthRangeMin && minz < depthRangeMax) || (maxz > depthRangeMin && maxz > depthRangeMax)) { + WARN_LOG_REPORT_ONCE(znoclip, G3D, "Unsupported depth range in test without clipping - depth range: %f-%f, test: %f-%f", depthRangeMin, depthRangeMax, minz, maxz); + } + } +#endif + } +} diff --git a/GPU/Common/GPUStateUtils.h b/GPU/Common/GPUStateUtils.h index 6acf4c7e36..00f33c5b72 100644 --- a/GPU/Common/GPUStateUtils.h +++ b/GPU/Common/GPUStateUtils.h @@ -47,3 +47,21 @@ ReplaceBlendType ReplaceBlendWithShader(bool allowShaderBlend, GEBufferFormat bu bool CanUseHardwareTransform(int prim); LogicOpReplaceType ReplaceLogicOpType(); + + +// Common representation, should be able to set this directly with any modern API. +struct ViewportAndScissor { + bool scissorEnable; + int scissorX; + int scissorY; + int scissorW; + int scissorH; + float viewportX; + float viewportY; + float viewportW; + float viewportH; + float depthRangeMin; + float depthRangeMax; + bool dirtyProj; +}; +void ConvertViewportAndScissor(bool useBufferedRendering, float renderWidth, float renderHeight, int bufferWidth, int bufferHeight, ViewportAndScissor &out); diff --git a/GPU/Directx9/StateMappingDX9.cpp b/GPU/Directx9/StateMappingDX9.cpp index fe815d2a60..32a5884411 100644 --- a/GPU/Directx9/StateMappingDX9.cpp +++ b/GPU/Directx9/StateMappingDX9.cpp @@ -284,6 +284,7 @@ void TransformDrawEngineDX9::ApplyBlendState() { ReplaceBlendType replaceBlend = ReplaceBlendWithShader(gstate_c.allowShaderBlend, gstate.FrameBufFormat()); ReplaceAlphaType replaceAlphaWithStencil = ReplaceAlphaWithStencil(replaceBlend); + bool usePreSrc = false; switch (replaceBlend) { @@ -650,152 +651,36 @@ void TransformDrawEngineDX9::ApplyDrawState(int prim) { } } - float renderWidthFactor, renderHeightFactor; - float renderWidth, renderHeight; - float renderX, renderY; bool useBufferedRendering = g_Config.iRenderingMode != FB_NON_BUFFERED_MODE; - if (useBufferedRendering) { - renderX = 0.0f; - renderY = 0.0f; - renderWidth = framebufferManager_->GetRenderWidth(); - renderHeight = framebufferManager_->GetRenderHeight(); - renderWidthFactor = (float)renderWidth / framebufferManager_->GetTargetBufferWidth(); - renderHeightFactor = (float)renderHeight / framebufferManager_->GetTargetBufferHeight(); - } else { - float pixelW = PSP_CoreParameter().pixelWidth; - float pixelH = PSP_CoreParameter().pixelHeight; - CenterDisplayOutputRect(&renderX, &renderY, &renderWidth, &renderHeight, 480, 272, pixelW, pixelH, ROTATION_LOCKED_HORIZONTAL); - renderWidthFactor = renderWidth / 480.0f; - renderHeightFactor = renderHeight / 272.0f; - } - renderX += gstate_c.curRTOffsetX * renderWidthFactor; + ViewportAndScissor vpAndScissor; + ConvertViewportAndScissor(useBufferedRendering, + framebufferManager_->GetRenderWidth(), framebufferManager_->GetRenderHeight(), + framebufferManager_->GetTargetBufferWidth(), framebufferManager_->GetTargetBufferHeight(), + vpAndScissor); - bool throughmode = gstate.isModeThrough(); - - // Scissor - int scissorX1 = gstate.getScissorX1(); - int scissorY1 = gstate.getScissorY1(); - int scissorX2 = gstate.getScissorX2() + 1; - int scissorY2 = gstate.getScissorY2() + 1; - - // This is a bit of a hack as the render buffer isn't always that size - if (scissorX1 == 0 && scissorY1 == 0 - && scissorX2 >= (int) gstate_c.curRTWidth - && scissorY2 >= (int) gstate_c.curRTHeight) { - dxstate.scissorTest.disable(); - } else { + if (vpAndScissor.scissorEnable) { dxstate.scissorTest.enable(); - dxstate.scissorRect.set( - renderX + scissorX1 * renderWidthFactor, - renderY + scissorY1 * renderHeightFactor, - renderX + scissorX2 * renderWidthFactor, - renderY + scissorY2 * renderHeightFactor); + dxstate.scissorRect.set(vpAndScissor.scissorX, vpAndScissor.scissorY, vpAndScissor.scissorX + vpAndScissor.scissorW, vpAndScissor.scissorY + vpAndScissor.scissorH); + } else { + dxstate.scissorTest.disable(); } - int curRTWidth = gstate_c.curRTWidth; - int curRTHeight = gstate_c.curRTHeight; - - float offsetX = gstate.getOffsetX(); - float offsetY = gstate.getOffsetY(); - - if (throughmode) { - dxstate.viewport.set( - renderX, - renderY, - curRTWidth * renderWidthFactor, - curRTHeight * renderHeightFactor, - 0.f, 1.f); - } else { - float vpXScale = gstate.getViewportXScale(); - float vpXCenter = gstate.getViewportXCenter(); - float vpYScale = gstate.getViewportYScale(); - float vpYCenter = gstate.getViewportYCenter(); - - // The viewport transform appears to go like this: - // Xscreen = -offsetX + vpXCenter + vpXScale * Xview - // Yscreen = -offsetY + vpYCenter + vpYScale * Yview - // Zscreen = vpZCenter + vpZScale * Zview - - // This means that to get the analogue glViewport we must: - float vpX0 = vpXCenter - offsetX - fabsf(vpXScale); - float vpY0 = vpYCenter - offsetY - fabsf(vpYScale); - gstate_c.vpWidth = vpXScale * 2.0f; - gstate_c.vpHeight = vpYScale * 2.0f; - - float vpWidth = fabsf(gstate_c.vpWidth); - float vpHeight = fabsf(gstate_c.vpHeight); - - vpX0 *= renderWidthFactor; - vpY0 *= renderHeightFactor; - vpWidth *= renderWidthFactor; - vpHeight *= renderHeightFactor; - - float zScale = gstate.getViewportZScale(); - float zCenter = gstate.getViewportZCenter(); - - // Note - We lose the sign of the zscale here. But we keep it in gstate_c.vpDepth. - // That variable is only check for sign later so the multiplication by 2 isn't really necessary. - - // It's unclear why we need this Z offset to match OpenGL, but this checks out in multiple games. - float depthRangeMin = (zCenter - fabsf(zScale)) * (1.0f / 65535.0f); - float depthRangeMax = (zCenter + fabsf(zScale)) * (1.0f / 65535.0f); - gstate_c.vpDepth = zScale * (2.0f / 65335.0f); - - // D3D doesn't like viewports partially outside the target, so we - // apply the viewport partially in the shader. - float left = renderX + vpX0; - float top = renderY + vpY0; - float right = left + vpWidth; - float bottom = top + vpHeight; - - float wScale = 1.0f; - float xOffset = 0.0f; - float hScale = 1.0f; - float yOffset = 0.0f; - - // If we're within the bounds, we want clipping the viewport way. So leave it be. - if (left < 0.0f || right > renderWidth) { - float overageLeft = std::max(-left, 0.0f); - float overageRight = std::max(right - renderWidth, 0.0f); - // Our center drifted by the difference in overages. - float drift = overageRight - overageLeft; - - left += overageLeft; - right -= overageRight; - - wScale = vpWidth / (right - left); - xOffset = drift / (right - left); - } - - if (top < 0.0f || bottom > renderHeight) { - float overageTop = std::max(-top, 0.0f); - float overageBottom = std::max(bottom - renderHeight, 0.0f); - // Our center drifted by the difference in overages. - float drift = overageTop - overageBottom; - - top += overageTop; - bottom -= overageBottom; - - hScale = vpHeight / (bottom - top); - yOffset = drift / (bottom - top); - } - - depthRangeMin = std::max(0.0f, depthRangeMin); - depthRangeMax = std::min(1.0f, depthRangeMax); - - bool scaleChanged = gstate_c.vpWidthScale != wScale || gstate_c.vpHeightScale != hScale; - bool offsetChanged = gstate_c.vpXOffset != xOffset || gstate_c.vpYOffset != yOffset; - if (scaleChanged || offsetChanged) - { - gstate_c.vpWidthScale = wScale; - gstate_c.vpHeightScale = hScale; - gstate_c.vpXOffset = xOffset; - gstate_c.vpYOffset = yOffset; - shaderManager_->DirtyUniform(DIRTY_PROJMATRIX); - } - - dxstate.viewport.set(left, top, right - left, bottom - top, depthRangeMin, depthRangeMax); + // Direct3D can't handle negative depth ranges, so we fix it in the projection matrix. + float depthMin = vpAndScissor.depthRangeMin; + float depthMax = vpAndScissor.depthRangeMax; + if (gstate_c.vpDepth != depthMax - depthMin) { + gstate_c.vpDepth = depthMax - depthMin; + vpAndScissor.dirtyProj = true; + } + if (depthMin > depthMax) { + std::swap(depthMin, depthMax); + } + if (depthMin < 0.0f) depthMin = 0.0f; + if (depthMax > 1.0f) depthMax = 1.0f; + dxstate.viewport.set(vpAndScissor.viewportX, vpAndScissor.viewportY, vpAndScissor.viewportW, vpAndScissor.viewportH, depthMin, depthMax); + if (vpAndScissor.dirtyProj) { + shaderManager_->DirtyUniform(DIRTY_PROJMATRIX); } } diff --git a/GPU/GLES/StateMapping.cpp b/GPU/GLES/StateMapping.cpp index 920eb81d85..a4908a2410 100644 --- a/GPU/GLES/StateMapping.cpp +++ b/GPU/GLES/StateMapping.cpp @@ -585,23 +585,6 @@ void TransformDrawEngine::ApplyBlendState() { } } -// Common representation, should be able to set this directly with any modern API. -struct ViewportAndScissor { - bool scissorEnable; - int scissorX; - int scissorY; - int scissorW; - int scissorH; - float viewportX; - float viewportY; - float viewportW; - float viewportH; - float depthRangeMin; - float depthRangeMax; - bool dirtyProj; -}; -void ConvertViewportAndScissor(bool useBufferedRendering, float renderWidth, float renderHeight, int bufferWidth, int bufferHeight, ViewportAndScissor &out); - void TransformDrawEngine::ApplyDrawState(int prim) { // TODO: All this setup is soon so expensive that we'll need dirty flags, or simply do it in the command writes where we detect dirty by xoring. Silly to do all this work on every drawcall. @@ -789,170 +772,6 @@ void TransformDrawEngine::ApplyDrawState(int prim) { } } -void ConvertViewportAndScissor(bool useBufferedRendering, float renderWidth, float renderHeight, int bufferWidth, int bufferHeight, ViewportAndScissor &out) { - bool throughmode = gstate.isModeThrough(); - out.dirtyProj = false; - - float renderWidthFactor, renderHeightFactor; - float renderX = 0.0f, renderY = 0.0f; - float displayOffsetX, displayOffsetY; - if (useBufferedRendering) { - displayOffsetX = 0.0f; - displayOffsetY = 0.0f; - renderWidthFactor = (float)renderWidth / (float)bufferWidth; - renderHeightFactor = (float)renderHeight / (float)bufferHeight; - } else { - float pixelW = PSP_CoreParameter().pixelWidth; - float pixelH = PSP_CoreParameter().pixelHeight; - CenterDisplayOutputRect(&displayOffsetX, &displayOffsetY, &renderWidth, &renderHeight, 480, 272, pixelW, pixelH, ROTATION_LOCKED_HORIZONTAL); - renderWidthFactor = renderWidth / 480.0f; - renderHeightFactor = renderHeight / 272.0f; - } - - renderX += gstate_c.curRTOffsetX * renderWidthFactor; - - // Scissor - int scissorX1 = gstate.getScissorX1(); - int scissorY1 = gstate.getScissorY1(); - int scissorX2 = gstate.getScissorX2() + 1; - int scissorY2 = gstate.getScissorY2() + 1; - - // This is a bit of a hack as the render buffer isn't always that size - // We always scissor on non-buffered so that clears don't spill outside the frame. - if (useBufferedRendering && scissorX1 == 0 && scissorY1 == 0 - && scissorX2 >= (int)gstate_c.curRTWidth - && scissorY2 >= (int)gstate_c.curRTHeight) { - out.scissorEnable = false; - } else { - out.scissorEnable = true; - out.scissorX = renderX + displayOffsetX + scissorX1 * renderWidthFactor; - out.scissorY = renderY + displayOffsetY + scissorY1 * renderHeightFactor; - out.scissorW = (scissorX2 - scissorX1) * renderWidthFactor; - out.scissorH = (scissorY2 - scissorY1) * renderHeightFactor; - } - - int curRTWidth = gstate_c.curRTWidth; - int curRTHeight = gstate_c.curRTHeight; - - float offsetX = gstate.getOffsetX(); - float offsetY = gstate.getOffsetY(); - - if (throughmode) { - // No viewport transform here. Let's experiment with using region. - out.viewportX = renderX + displayOffsetX; - out.viewportY = renderY + displayOffsetY; - out.viewportW = curRTWidth * renderWidthFactor; - out.viewportH = curRTHeight * renderHeightFactor; - out.depthRangeMin = 0.0f; - out.depthRangeMax = 1.0f; - } else { - // These we can turn into a glViewport call, offset by offsetX and offsetY. Math after. - float vpXScale = gstate.getViewportXScale(); - float vpXCenter = gstate.getViewportXCenter(); - float vpYScale = gstate.getViewportYScale(); - float vpYCenter = gstate.getViewportYCenter(); - - // The viewport transform appears to go like this: - // Xscreen = -offsetX + vpXCenter + vpXScale * Xview - // Yscreen = -offsetY + vpYCenter + vpYScale * Yview - // Zscreen = vpZCenter + vpZScale * Zview - - // The viewport is normally centered at 2048,2048 but can also be centered at other locations. - // Offset is subtracted from the viewport center and is also set to values in those ranges, and is set so that the viewport will cover - // the desired screen area ([0-480)x[0-272)), so 1808,1912. - - // This means that to get the analogue glViewport we must: - float vpX0 = vpXCenter - offsetX - fabsf(vpXScale); - float vpY0 = vpYCenter - offsetY - fabsf(vpYScale); // Need to account for sign of Y - gstate_c.vpWidth = vpXScale * 2.0f; - gstate_c.vpHeight = vpYScale * 2.0f; - - float vpWidth = fabsf(gstate_c.vpWidth); - float vpHeight = fabsf(gstate_c.vpHeight); - - // This multiplication should probably be done after viewport clipping. Would let us very slightly simplify the clipping logic? - vpX0 *= renderWidthFactor; - vpY0 *= renderHeightFactor; - vpWidth *= renderWidthFactor; - vpHeight *= renderHeightFactor; - - // We used to apply the viewport here via glstate, but there are limits which vary by driver. - // This may mean some games won't work, or at least won't work at higher render resolutions. - // So we apply it in the shader instead. - float left = renderX + vpX0; - float top = renderY + vpY0; - float right = left + vpWidth; - float bottom = top + vpHeight; - - float wScale = 1.0f; - float xOffset = 0.0f; - float hScale = 1.0f; - float yOffset = 0.0f; - - // If we're within the bounds, we want clipping the viewport way. So leave it be. - if (left < 0.0f || right > renderWidth) { - float overageLeft = std::max(-left, 0.0f); - float overageRight = std::max(right - renderWidth, 0.0f); - // Our center drifted by the difference in overages. - float drift = overageRight - overageLeft; - - left += overageLeft; - right -= overageRight; - - wScale = vpWidth / (right - left); - xOffset = drift / (right - left); - } - - if (top < 0.0f || bottom > renderHeight) { - float overageTop = std::max(-top, 0.0f); - float overageBottom = std::max(bottom - renderHeight, 0.0f); - // Our center drifted by the difference in overages. - float drift = overageBottom - overageTop; - - top += overageTop; - bottom -= overageBottom; - - hScale = vpHeight / (bottom - top); - yOffset = drift / (bottom - top); - } - - bool scaleChanged = gstate_c.vpWidthScale != wScale || gstate_c.vpHeightScale != hScale; - bool offsetChanged = gstate_c.vpXOffset != xOffset || gstate_c.vpYOffset != yOffset; - if (scaleChanged || offsetChanged) { - gstate_c.vpWidthScale = wScale; - gstate_c.vpHeightScale = hScale; - gstate_c.vpXOffset = xOffset; - gstate_c.vpYOffset = yOffset; - out.dirtyProj = true; - } - - out.viewportX = left + displayOffsetX; - out.viewportY = top + displayOffsetY; - out.viewportW = right - left; - out.viewportH = bottom - top; - - float zScale = gstate.getViewportZScale(); - float zCenter = gstate.getViewportZCenter(); - float depthRangeMin = zCenter - zScale; - float depthRangeMax = zCenter + zScale; - out.depthRangeMin = depthRangeMin * (1.0f / 65535.0f); - out.depthRangeMax = depthRangeMax * (1.0f / 65535.0f); - -#ifndef MOBILE_DEVICE - float minz = gstate.getDepthRangeMin(); - float maxz = gstate.getDepthRangeMax(); - if ((minz > depthRangeMin && minz > depthRangeMax) || (maxz < depthRangeMin && maxz < depthRangeMax)) { - WARN_LOG_REPORT_ONCE(minmaxz, G3D, "Unsupported depth range in test - depth range: %f-%f, test: %f-%f", depthRangeMin, depthRangeMax, minz, maxz); - } else if ((gstate.clipEnable & 1) == 0) { - // TODO: Need to test whether clipEnable should even affect depth or not. - if ((minz < depthRangeMin && minz < depthRangeMax) || (maxz > depthRangeMin && maxz > depthRangeMax)) { - WARN_LOG_REPORT_ONCE(znoclip, G3D, "Unsupported depth range in test without clipping - depth range: %f-%f, test: %f-%f", depthRangeMin, depthRangeMax, minz, maxz); - } - } -#endif - } -} - void TransformDrawEngine::ApplyDrawStateLate() { // At this point, we know if the vertices are full alpha or not. // TODO: Set the nearest/linear here (since we correctly know if alpha/color tests are needed)? From 5667f348055c59eaee51a2498d0a0b1c3de3be26 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Mon, 9 Nov 2015 19:35:25 +0100 Subject: [PATCH 3/7] Invert proj matrix offset properly in D3D. --- GPU/Directx9/ShaderManagerDX9.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/GPU/Directx9/ShaderManagerDX9.cpp b/GPU/Directx9/ShaderManagerDX9.cpp index d02b8c7982..c1243b2f4f 100644 --- a/GPU/Directx9/ShaderManagerDX9.cpp +++ b/GPU/Directx9/ShaderManagerDX9.cpp @@ -255,6 +255,11 @@ static void ConvertProjMatrixToD3D(Matrix4x4 &in, bool invertedX, bool invertedY float yoff = -0.5f / gstate_c.curRTRenderHeight; yoff = gstate_c.vpYOffset + (invertedY ? yoff : -yoff); + if (invertedX) + xoff = -xoff; + if (invertedY) + yoff = -yoff; + in.translateAndScale(Vec3(xoff, yoff, 0.5f), Vec3(gstate_c.vpWidthScale, gstate_c.vpHeightScale, invertedZ ? -0.5 : 0.5f)); } From 5a8e86e8bab4f5e1edddacff333c4956812fdeb3 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Mon, 9 Nov 2015 21:57:48 +0100 Subject: [PATCH 4/7] Don't mess with vpDepth in through mode. Fixes glitches in Wipeout Pulse. --- GPU/Directx9/StateMappingDX9.cpp | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/GPU/Directx9/StateMappingDX9.cpp b/GPU/Directx9/StateMappingDX9.cpp index 32a5884411..98c9479c23 100644 --- a/GPU/Directx9/StateMappingDX9.cpp +++ b/GPU/Directx9/StateMappingDX9.cpp @@ -666,18 +666,22 @@ void TransformDrawEngineDX9::ApplyDrawState(int prim) { dxstate.scissorTest.disable(); } - // Direct3D can't handle negative depth ranges, so we fix it in the projection matrix. float depthMin = vpAndScissor.depthRangeMin; float depthMax = vpAndScissor.depthRangeMax; - if (gstate_c.vpDepth != depthMax - depthMin) { - gstate_c.vpDepth = depthMax - depthMin; - vpAndScissor.dirtyProj = true; + + if (!gstate.isModeThrough()) { + // Direct3D can't handle negative depth ranges, so we fix it in the projection matrix. + if (gstate_c.vpDepth != depthMax - depthMin) { + gstate_c.vpDepth = depthMax - depthMin; + vpAndScissor.dirtyProj = true; + } + if (depthMin > depthMax) { + std::swap(depthMin, depthMax); + } + if (depthMin < 0.0f) depthMin = 0.0f; + if (depthMax > 1.0f) depthMax = 1.0f; } - if (depthMin > depthMax) { - std::swap(depthMin, depthMax); - } - if (depthMin < 0.0f) depthMin = 0.0f; - if (depthMax > 1.0f) depthMax = 1.0f; + dxstate.viewport.set(vpAndScissor.viewportX, vpAndScissor.viewportY, vpAndScissor.viewportW, vpAndScissor.viewportH, depthMin, depthMax); if (vpAndScissor.dirtyProj) { shaderManager_->DirtyUniform(DIRTY_PROJMATRIX); From 7e3cd987cd0715fc2f0e9689bcbe8a2cc1006935 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Thu, 12 Nov 2015 14:47:43 +0100 Subject: [PATCH 5/7] Fixes to D3D viewport management. For some reason, still having strange driver-bug-ish problems in unbuffered... --- GPU/Directx9/FramebufferDX9.cpp | 30 +++++++++++++----------------- GPU/Directx9/GPU_DX9.cpp | 3 +-- GPU/Directx9/StencilBufferDX9.cpp | 4 ++-- GPU/Directx9/TextureCacheDX9.cpp | 10 +--------- GPU/Directx9/helper/dx_state.cpp | 3 +-- GPU/Directx9/helper/dx_state.h | 4 ++-- GPU/Directx9/helper/global.cpp | 11 +++++++++++ GPU/Directx9/helper/global.h | 2 ++ 8 files changed, 33 insertions(+), 34 deletions(-) diff --git a/GPU/Directx9/FramebufferDX9.cpp b/GPU/Directx9/FramebufferDX9.cpp index 7bd9c518d1..d8f58cb139 100644 --- a/GPU/Directx9/FramebufferDX9.cpp +++ b/GPU/Directx9/FramebufferDX9.cpp @@ -200,16 +200,17 @@ namespace DX9 { void FramebufferManagerDX9::DrawPixels(VirtualFramebuffer *vfb, int dstX, int dstY, const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height) { if (useBufferedRendering_ && vfb && vfb->fbo) { fbo_bind_as_render_target(vfb->fbo); - dxstate.viewport.set(0, 0, vfb->renderWidth, vfb->renderHeight); + DXSetViewport(0, 0, vfb->renderWidth, vfb->renderHeight); } else { float x, y, w, h; CenterDisplayOutputRect(&x, &y, &w, &h, 480.0f, 272.0f, (float)pixelWidth_, (float)pixelHeight_, ROTATION_LOCKED_HORIZONTAL); - dxstate.viewport.set(x, y, w, h); + DXSetViewport(x, y, w, h); } MakePixelTexture(srcPixels, srcPixelFormat, srcStride, width, height); DisableState(); DrawActiveTexture(drawPixelsTex_, dstX, dstY, width, height, vfb->bufferWidth, vfb->bufferHeight, 0.0f, 0.0f, 1.0f, 1.0f, ROTATION_LOCKED_HORIZONTAL); textureCache_->ForgetLastTexture(); + dxstate.viewport.restore(); } void FramebufferManagerDX9::DrawFramebufferToOutput(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, bool applyPostShader) { @@ -515,20 +516,14 @@ namespace DX9 { shaderManager_->DirtyLastShader(); pD3Ddevice->SetTexture(0, nullptr); - D3DVIEWPORT9 vp; - vp.MinZ = 0; - vp.MaxZ = 1; - vp.X = 0; - vp.Y = 0; - vp.Width = vfb->renderWidth; - vp.Height = vfb->renderHeight; - pD3Ddevice->SetViewport(&vp); + DXSetViewport(0, 0, vfb->renderWidth, vfb->renderHeight); // This should clear stencil and alpha without changing the other colors. HRESULT hr = pD3Ddevice->DrawPrimitiveUP(D3DPT_TRIANGLEFAN, 2, coord, 5 * sizeof(float)); if (FAILED(hr)) { ERROR_LOG_REPORT(G3D, "ReformatFramebufferFrom() failed: %08x", hr); } + dxstate.viewport.restore(); } RebindFramebuffer(); @@ -682,9 +677,10 @@ namespace DX9 { } void FramebufferManagerDX9::CopyDisplayToOutput() { - fbo_unbind(); - dxstate.viewport.set(0, 0, PSP_CoreParameter().pixelWidth, PSP_CoreParameter().pixelHeight); + if (useBufferedRendering_) { + DXSetViewport(0, 0, PSP_CoreParameter().pixelWidth, PSP_CoreParameter().pixelHeight); + } currentRenderVfb_ = 0; u32 offsetX = 0; @@ -794,7 +790,7 @@ namespace DX9 { HRESULT hr = fbo_blit_color(vfb->fbo, &srcRect, nullptr, &dstRect, g_Config.iBufFilter == SCALE_LINEAR ? D3DTEXF_LINEAR : D3DTEXF_POINT); if (FAILED(hr)) { ERROR_LOG_REPORT_ONCE(blit_fail, G3D, "fbo_blit_color failed on display: %08x", hr); - dxstate.viewport.set(0, 0, PSP_CoreParameter().pixelWidth, PSP_CoreParameter().pixelHeight); + DXSetViewport(0, 0, PSP_CoreParameter().pixelWidth, PSP_CoreParameter().pixelHeight); // These are in the output display coordinates if (g_Config.iBufFilter == SCALE_LINEAR) { dxstate.texMagFilter.set(D3DTEXF_LINEAR); @@ -814,7 +810,7 @@ namespace DX9 { fbo_bind_as_render_target(extraFBOs_[0]); int fbo_w, fbo_h; fbo_get_dimensions(extraFBOs_[0], &fbo_w, &fbo_h); - dxstate.viewport.set(0, 0, fbo_w, fbo_h); + DXSetViewport(0, 0, fbo_w, fbo_h); DrawActiveTexture(colorTexture, 0, 0, fbo_w, fbo_h, fbo_w, fbo_h, true, 1.0f, 1.0f, postShaderProgram_); fbo_unbind(); @@ -826,18 +822,19 @@ namespace DX9 { return; } colorTexture = fbo_get_color_texture(extraFBOs_[0]); - dxstate.viewport.set(0, 0, PSP_CoreParameter().pixelWidth, PSP_CoreParameter().pixelHeight); + DXSetViewport(0, 0, PSP_CoreParameter().pixelWidth, PSP_CoreParameter().pixelHeight); // These are in the output display coordinates DrawActiveTexture(colorTexture, x, y, w, h, (float)PSP_CoreParameter().pixelWidth, (float)PSP_CoreParameter().pixelHeight, true, 480.0f / (float)vfb->width, 272.0f / (float)vfb->height); } else { // Use post-shader, but run shader at output resolution. - dxstate.viewport.set(0, 0, PSP_CoreParameter().pixelWidth, PSP_CoreParameter().pixelHeight); + DXSetViewport(0, 0, PSP_CoreParameter().pixelWidth, PSP_CoreParameter().pixelHeight); // These are in the output display coordinates DrawActiveTexture(colorTexture, x, y, w, h, (float)PSP_CoreParameter().pixelWidth, (float)PSP_CoreParameter().pixelHeight, true, 480.0f / (float)vfb->width, 272.0f / (float)vfb->height, postShaderProgram_); } */ pD3Ddevice->SetTexture(0, NULL); } + dxstate.viewport.restore(); } void FramebufferManagerDX9::ReadFramebufferToMemory(VirtualFramebuffer *vfb, bool sync, int x, int y, int w, int h) { @@ -1107,7 +1104,6 @@ namespace DX9 { void FramebufferManagerDX9::EndFrame() { if (resized_) { DestroyAllFBOs(); - dxstate.viewport.set(0, 0, PSP_CoreParameter().pixelWidth, PSP_CoreParameter().pixelHeight); // Actually, auto mode should be more granular... // Round up to a zoom factor for the render size. int zoom = g_Config.iInternalResolution; diff --git a/GPU/Directx9/GPU_DX9.cpp b/GPU/Directx9/GPU_DX9.cpp index 2a7493458d..777405ef99 100644 --- a/GPU/Directx9/GPU_DX9.cpp +++ b/GPU/Directx9/GPU_DX9.cpp @@ -509,7 +509,6 @@ void DIRECTX9_GPU::InitClearInternal() { dxstate.colorMask.set(true, true, true, true); pD3Ddevice->Clear(0, NULL, D3DCLEAR_STENCIL|D3DCLEAR_TARGET|D3DCLEAR_ZBUFFER, D3DCOLOR_XRGB(0, 0, 0), 1.f, 0); } - dxstate.viewport.set(0, 0, PSP_CoreParameter().pixelWidth, PSP_CoreParameter().pixelHeight); } void DIRECTX9_GPU::DumpNextFrame() { @@ -521,7 +520,7 @@ void DIRECTX9_GPU::BeginFrame() { } void DIRECTX9_GPU::ReapplyGfxStateInternal() { - DX9::dxstate.Restore(); + dxstate.Restore(); GPUCommon::ReapplyGfxStateInternal(); } diff --git a/GPU/Directx9/StencilBufferDX9.cpp b/GPU/Directx9/StencilBufferDX9.cpp index 11cfa72069..84ca55bc42 100644 --- a/GPU/Directx9/StencilBufferDX9.cpp +++ b/GPU/Directx9/StencilBufferDX9.cpp @@ -221,7 +221,7 @@ bool FramebufferManagerDX9::NotifyStencilUpload(u32 addr, int size, bool skipZer if (dstBuffer->fbo) { fbo_bind_as_render_target(dstBuffer->fbo); } - dxstate.viewport.set(0, 0, w, h); + DXSetViewport(0, 0, w, h); MakePixelTexture(src, dstBuffer->format, dstBuffer->fb_stride, dstBuffer->bufferWidth, dstBuffer->bufferHeight); @@ -279,7 +279,7 @@ bool FramebufferManagerDX9::NotifyStencilUpload(u32 addr, int size, bool skipZer } } dxstate.stencilMask.set(0xFF); - + dxstate.viewport.restore(); RebindFramebuffer(); return true; } diff --git a/GPU/Directx9/TextureCacheDX9.cpp b/GPU/Directx9/TextureCacheDX9.cpp index 408072a542..d25c1143ad 100644 --- a/GPU/Directx9/TextureCacheDX9.cpp +++ b/GPU/Directx9/TextureCacheDX9.cpp @@ -1042,15 +1042,7 @@ void TextureCacheDX9::ApplyTextureFramebuffer(TexCacheEntry *entry, VirtualFrame pD3Ddevice->SetRenderState(D3DRS_SCISSORTESTENABLE, FALSE); pD3Ddevice->SetRenderState(D3DRS_CULLMODE, D3DCULL_NONE); - D3DVIEWPORT9 vp; - vp.MinZ = 0; - vp.MaxZ = 1; - vp.X = 0; - vp.Y = 0; - vp.Width = framebuffer->renderWidth; - vp.Height = framebuffer->renderHeight; - pD3Ddevice->SetViewport(&vp); - + DXSetViewport(0, 0, framebuffer->renderWidth, framebuffer->renderHeight); HRESULT hr = pD3Ddevice->DrawPrimitiveUP(D3DPT_TRIANGLEFAN, 2, verts, (3 + 2) * sizeof(float)); if (FAILED(hr)) { ERROR_LOG_REPORT(G3D, "Depal render failed: %08x", hr); diff --git a/GPU/Directx9/helper/dx_state.cpp b/GPU/Directx9/helper/dx_state.cpp index ccfdc821b4..b5bc4fdf7a 100644 --- a/GPU/Directx9/helper/dx_state.cpp +++ b/GPU/Directx9/helper/dx_state.cpp @@ -37,8 +37,7 @@ void DirectXState::Restore() { colorMask.restore(); count++; - // why not? - // viewport.restore(); count++; + viewport.restore(); count++; alphaTest.restore(); count++; alphaTestFunc.restore(); count++; diff --git a/GPU/Directx9/helper/dx_state.h b/GPU/Directx9/helper/dx_state.h index 156abf0abf..3a74f669eb 100644 --- a/GPU/Directx9/helper/dx_state.h +++ b/GPU/Directx9/helper/dx_state.h @@ -342,6 +342,7 @@ private: class StateVp { D3DVIEWPORT9 viewport; public: + StateVp() { memset(&viewport, 0, sizeof(viewport)); } inline void set(int x, int y, int w, int h, float n = 0.f, float f = 1.f) { D3DVIEWPORT9 newviewport; newviewport.X = x; @@ -350,8 +351,7 @@ private: newviewport.Height = h; newviewport.MinZ = n; newviewport.MaxZ = f; - - if (memcmp(&viewport, &newviewport, sizeof(viewport))) { + if (memcmp(&viewport, &newviewport, sizeof(viewport)) != 0) { viewport = newviewport; restore(); } diff --git a/GPU/Directx9/helper/global.cpp b/GPU/Directx9/helper/global.cpp index d814cb3211..351e90efc7 100644 --- a/GPU/Directx9/helper/global.cpp +++ b/GPU/Directx9/helper/global.cpp @@ -11,6 +11,17 @@ LPDIRECT3DDEVICE9 pD3Ddevice = NULL; LPDIRECT3DDEVICE9EX pD3DdeviceEx = NULL; LPDIRECT3D9 pD3D = NULL; +void DXSetViewport(float x, float y, float w, float h, float minZ, float maxZ) { + D3DVIEWPORT9 vp; + vp.X = (DWORD)x; + vp.Y = (DWORD)y; + vp.Width = (DWORD)w; + vp.Height = (DWORD)h; + vp.MinZ = minZ; + vp.MaxZ = maxZ; + pD3Ddevice->SetViewport(&vp); +} + static const char * vscode = "struct VS_IN {\n" " float4 ObjPos : POSITION;\n" diff --git a/GPU/Directx9/helper/global.h b/GPU/Directx9/helper/global.h index 4eebbca15a..ef6a8eb455 100644 --- a/GPU/Directx9/helper/global.h +++ b/GPU/Directx9/helper/global.h @@ -31,6 +31,8 @@ bool CompileVertexShader(const char *code, LPDIRECT3DVERTEXSHADER9 *pShader, ID3 void DestroyShaders(); void DirectxInit(HWND window); +void DXSetViewport(float x, float y, float w, float h, float minZ = 0.0f, float maxZ = 1.0f); + #define D3DBLEND_UNK D3DBLEND_FORCE_DWORD }; From 45da00d381691467440c08dc59446f2fe5b24ffe Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Thu, 12 Nov 2015 15:22:20 +0100 Subject: [PATCH 6/7] Fix viewport offset in gl non-buffered --- GPU/GLES/ShaderManager.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/GPU/GLES/ShaderManager.cpp b/GPU/GLES/ShaderManager.cpp index 49c663a3f7..016dce8d25 100644 --- a/GPU/GLES/ShaderManager.cpp +++ b/GPU/GLES/ShaderManager.cpp @@ -366,7 +366,12 @@ static void SetMatrix4x3(int uniform, const float *m4x3) { } static inline void ScaleProjMatrix(Matrix4x4 &in) { - const Vec3 trans(gstate_c.vpXOffset, gstate_c.vpYOffset, 0.0f); + float yOffset = gstate_c.vpYOffset; + if (g_Config.iRenderingMode == FB_NON_BUFFERED_MODE) { + // GL upside down is a pain as usual. + yOffset = -yOffset; + } + const Vec3 trans(gstate_c.vpXOffset, yOffset, 0.0f); const Vec3 scale(gstate_c.vpWidthScale, gstate_c.vpHeightScale, 1.0); in.translateAndScale(trans, scale); } From e6b39d65d39f7ace55746c48bbd4ce4407015cb2 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Thu, 12 Nov 2015 15:26:06 +0100 Subject: [PATCH 7/7] Apply unknown's workaround for dx viewport issues - additionally, we shouldn't even have to call the function in non-buffered.. --- GPU/Directx9/FramebufferDX9.cpp | 4 +++- GPU/Directx9/helper/dx_fbo.cpp | 2 ++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/GPU/Directx9/FramebufferDX9.cpp b/GPU/Directx9/FramebufferDX9.cpp index d8f58cb139..3ea01aa94c 100644 --- a/GPU/Directx9/FramebufferDX9.cpp +++ b/GPU/Directx9/FramebufferDX9.cpp @@ -1161,7 +1161,9 @@ namespace DX9 { } void FramebufferManagerDX9::DecimateFBOs() { - fbo_unbind(); + if (g_Config.iRenderingMode != FB_NON_BUFFERED_MODE) { + fbo_unbind(); + } currentRenderVfb_ = 0; bool updateVram = !(g_Config.iRenderingMode == FB_NON_BUFFERED_MODE || g_Config.iRenderingMode == FB_BUFFERED_MODE); diff --git a/GPU/Directx9/helper/dx_fbo.cpp b/GPU/Directx9/helper/dx_fbo.cpp index 87e17c0695..23692afaaa 100644 --- a/GPU/Directx9/helper/dx_fbo.cpp +++ b/GPU/Directx9/helper/dx_fbo.cpp @@ -115,6 +115,8 @@ void fbo_destroy(FBO *fbo) { void fbo_unbind() { pD3Ddevice->SetRenderTarget(0, deviceRTsurf); pD3Ddevice->SetDepthStencilSurface(deviceDSsurf); + dxstate.scissorRect.restore(); + dxstate.viewport.restore(); } void fbo_resolve(FBO *fbo) {