diff --git a/GPU/Common/GPUStateUtils.cpp b/GPU/Common/GPUStateUtils.cpp index 8fe1f13ec7..9c39beacfb 100644 --- a/GPU/Common/GPUStateUtils.cpp +++ b/GPU/Common/GPUStateUtils.cpp @@ -1,10 +1,31 @@ +// Copyright (c) 2015- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#include + #include "Common/StringUtils.h" #include "Core/Config.h" +#include "Core/System.h" #include "GPU/ge_constants.h" #include "GPU/GPUState.h" #include "GPU/Common/ShaderId.h" #include "GPU/Common/VertexDecoderCommon.h" +#include "GPU/Common/FramebufferCommon.h" #include "GPU/Common/GPUStateUtils.h" @@ -386,3 +407,168 @@ LogicOpReplaceType ReplaceLogicOpType() { } return LOGICOPTYPE_NORMAL; } + + +void ConvertViewportAndScissor(bool useBufferedRendering, float renderWidth, float renderHeight, int bufferWidth, int bufferHeight, ViewportAndScissor &out) { + bool throughmode = gstate.isModeThrough(); + out.dirtyProj = false; + + float renderWidthFactor, renderHeightFactor; + float renderX = 0.0f, renderY = 0.0f; + float displayOffsetX, displayOffsetY; + if (useBufferedRendering) { + displayOffsetX = 0.0f; + displayOffsetY = 0.0f; + renderWidthFactor = (float)renderWidth / (float)bufferWidth; + renderHeightFactor = (float)renderHeight / (float)bufferHeight; + } else { + float pixelW = PSP_CoreParameter().pixelWidth; + float pixelH = PSP_CoreParameter().pixelHeight; + CenterDisplayOutputRect(&displayOffsetX, &displayOffsetY, &renderWidth, &renderHeight, 480, 272, pixelW, pixelH, ROTATION_LOCKED_HORIZONTAL); + renderWidthFactor = renderWidth / 480.0f; + renderHeightFactor = renderHeight / 272.0f; + } + + renderX += gstate_c.curRTOffsetX * renderWidthFactor; + + // Scissor + int scissorX1 = gstate.getScissorX1(); + int scissorY1 = gstate.getScissorY1(); + int scissorX2 = gstate.getScissorX2() + 1; + int scissorY2 = gstate.getScissorY2() + 1; + + // This is a bit of a hack as the render buffer isn't always that size + // We always scissor on non-buffered so that clears don't spill outside the frame. + if (useBufferedRendering && scissorX1 == 0 && scissorY1 == 0 + && scissorX2 >= (int)gstate_c.curRTWidth + && scissorY2 >= (int)gstate_c.curRTHeight) { + out.scissorEnable = false; + } else { + out.scissorEnable = true; + out.scissorX = renderX + displayOffsetX + scissorX1 * renderWidthFactor; + out.scissorY = renderY + displayOffsetY + scissorY1 * renderHeightFactor; + out.scissorW = (scissorX2 - scissorX1) * renderWidthFactor; + out.scissorH = (scissorY2 - scissorY1) * renderHeightFactor; + } + + int curRTWidth = gstate_c.curRTWidth; + int curRTHeight = gstate_c.curRTHeight; + + float offsetX = gstate.getOffsetX(); + float offsetY = gstate.getOffsetY(); + + if (throughmode) { + // No viewport transform here. Let's experiment with using region. + out.viewportX = renderX + displayOffsetX; + out.viewportY = renderY + displayOffsetY; + out.viewportW = curRTWidth * renderWidthFactor; + out.viewportH = curRTHeight * renderHeightFactor; + out.depthRangeMin = 0.0f; + out.depthRangeMax = 1.0f; + } else { + // These we can turn into a glViewport call, offset by offsetX and offsetY. Math after. + float vpXScale = gstate.getViewportXScale(); + float vpXCenter = gstate.getViewportXCenter(); + float vpYScale = gstate.getViewportYScale(); + float vpYCenter = gstate.getViewportYCenter(); + + // The viewport transform appears to go like this: + // Xscreen = -offsetX + vpXCenter + vpXScale * Xview + // Yscreen = -offsetY + vpYCenter + vpYScale * Yview + // Zscreen = vpZCenter + vpZScale * Zview + + // The viewport is normally centered at 2048,2048 but can also be centered at other locations. + // Offset is subtracted from the viewport center and is also set to values in those ranges, and is set so that the viewport will cover + // the desired screen area ([0-480)x[0-272)), so 1808,1912. + + // This means that to get the analogue glViewport we must: + float vpX0 = vpXCenter - offsetX - fabsf(vpXScale); + float vpY0 = vpYCenter - offsetY - fabsf(vpYScale); + gstate_c.vpWidth = vpXScale * 2.0f; + gstate_c.vpHeight = vpYScale * 2.0f; + + float vpWidth = fabsf(gstate_c.vpWidth); + float vpHeight = fabsf(gstate_c.vpHeight); + + // This multiplication should probably be done after viewport clipping. Would let us very slightly simplify the clipping logic? + vpX0 *= renderWidthFactor; + vpY0 *= renderHeightFactor; + vpWidth *= renderWidthFactor; + vpHeight *= renderHeightFactor; + + // We used to apply the viewport here via glstate, but there are limits which vary by driver. + // This may mean some games won't work, or at least won't work at higher render resolutions. + // So we apply it in the shader instead. + float left = renderX + vpX0; + float top = renderY + vpY0; + float right = left + vpWidth; + float bottom = top + vpHeight; + + float wScale = 1.0f; + float xOffset = 0.0f; + float hScale = 1.0f; + float yOffset = 0.0f; + + // If we're within the bounds, we want clipping the viewport way. So leave it be. + if (left < 0.0f || right > renderWidth) { + float overageLeft = std::max(-left, 0.0f); + float overageRight = std::max(right - renderWidth, 0.0f); + // Our center drifted by the difference in overages. + float drift = overageRight - overageLeft; + + left += overageLeft; + right -= overageRight; + + wScale = vpWidth / (right - left); + xOffset = drift / (right - left); + } + + if (top < 0.0f || bottom > renderHeight) { + float overageTop = std::max(-top, 0.0f); + float overageBottom = std::max(bottom - renderHeight, 0.0f); + // Our center drifted by the difference in overages. + float drift = overageBottom - overageTop; + + top += overageTop; + bottom -= overageBottom; + + hScale = vpHeight / (bottom - top); + yOffset = drift / (bottom - top); + } + + bool scaleChanged = gstate_c.vpWidthScale != wScale || gstate_c.vpHeightScale != hScale; + bool offsetChanged = gstate_c.vpXOffset != xOffset || gstate_c.vpYOffset != yOffset; + if (scaleChanged || offsetChanged) { + gstate_c.vpWidthScale = wScale; + gstate_c.vpHeightScale = hScale; + gstate_c.vpXOffset = xOffset; + gstate_c.vpYOffset = yOffset; + out.dirtyProj = true; + } + + out.viewportX = left + displayOffsetX; + out.viewportY = top + displayOffsetY; + out.viewportW = right - left; + out.viewportH = bottom - top; + + float zScale = gstate.getViewportZScale(); + float zCenter = gstate.getViewportZCenter(); + float depthRangeMin = zCenter - zScale; + float depthRangeMax = zCenter + zScale; + out.depthRangeMin = depthRangeMin * (1.0f / 65535.0f); + out.depthRangeMax = depthRangeMax * (1.0f / 65535.0f); + +#ifndef MOBILE_DEVICE + float minz = gstate.getDepthRangeMin(); + float maxz = gstate.getDepthRangeMax(); + if ((minz > depthRangeMin && minz > depthRangeMax) || (maxz < depthRangeMin && maxz < depthRangeMax)) { + WARN_LOG_REPORT_ONCE(minmaxz, G3D, "Unsupported depth range in test - depth range: %f-%f, test: %f-%f", depthRangeMin, depthRangeMax, minz, maxz); + } else if ((gstate.clipEnable & 1) == 0) { + // TODO: Need to test whether clipEnable should even affect depth or not. + if ((minz < depthRangeMin && minz < depthRangeMax) || (maxz > depthRangeMin && maxz > depthRangeMax)) { + WARN_LOG_REPORT_ONCE(znoclip, G3D, "Unsupported depth range in test without clipping - depth range: %f-%f, test: %f-%f", depthRangeMin, depthRangeMax, minz, maxz); + } + } +#endif + } +} diff --git a/GPU/Common/GPUStateUtils.h b/GPU/Common/GPUStateUtils.h index 6acf4c7e36..00f33c5b72 100644 --- a/GPU/Common/GPUStateUtils.h +++ b/GPU/Common/GPUStateUtils.h @@ -47,3 +47,21 @@ ReplaceBlendType ReplaceBlendWithShader(bool allowShaderBlend, GEBufferFormat bu bool CanUseHardwareTransform(int prim); LogicOpReplaceType ReplaceLogicOpType(); + + +// Common representation, should be able to set this directly with any modern API. +struct ViewportAndScissor { + bool scissorEnable; + int scissorX; + int scissorY; + int scissorW; + int scissorH; + float viewportX; + float viewportY; + float viewportW; + float viewportH; + float depthRangeMin; + float depthRangeMax; + bool dirtyProj; +}; +void ConvertViewportAndScissor(bool useBufferedRendering, float renderWidth, float renderHeight, int bufferWidth, int bufferHeight, ViewportAndScissor &out); diff --git a/GPU/Directx9/FramebufferDX9.cpp b/GPU/Directx9/FramebufferDX9.cpp index 0883b4f1eb..e4b24d859a 100644 --- a/GPU/Directx9/FramebufferDX9.cpp +++ b/GPU/Directx9/FramebufferDX9.cpp @@ -200,16 +200,17 @@ namespace DX9 { void FramebufferManagerDX9::DrawPixels(VirtualFramebuffer *vfb, int dstX, int dstY, const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height) { if (useBufferedRendering_ && vfb && vfb->fbo) { fbo_bind_as_render_target(vfb->fbo); - dxstate.viewport.set(0, 0, vfb->renderWidth, vfb->renderHeight); + DXSetViewport(0, 0, vfb->renderWidth, vfb->renderHeight); } else { float x, y, w, h; CenterDisplayOutputRect(&x, &y, &w, &h, 480.0f, 272.0f, (float)pixelWidth_, (float)pixelHeight_, ROTATION_LOCKED_HORIZONTAL); - dxstate.viewport.set(x, y, w, h); + DXSetViewport(x, y, w, h); } MakePixelTexture(srcPixels, srcPixelFormat, srcStride, width, height); DisableState(); DrawActiveTexture(drawPixelsTex_, dstX, dstY, width, height, vfb->bufferWidth, vfb->bufferHeight, 0.0f, 0.0f, 1.0f, 1.0f, ROTATION_LOCKED_HORIZONTAL); textureCache_->ForgetLastTexture(); + dxstate.viewport.restore(); } void FramebufferManagerDX9::DrawFramebufferToOutput(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, bool applyPostShader) { @@ -515,20 +516,14 @@ namespace DX9 { shaderManager_->DirtyLastShader(); pD3Ddevice->SetTexture(0, nullptr); - D3DVIEWPORT9 vp; - vp.MinZ = 0; - vp.MaxZ = 1; - vp.X = 0; - vp.Y = 0; - vp.Width = vfb->renderWidth; - vp.Height = vfb->renderHeight; - pD3Ddevice->SetViewport(&vp); + DXSetViewport(0, 0, vfb->renderWidth, vfb->renderHeight); // This should clear stencil and alpha without changing the other colors. HRESULT hr = pD3Ddevice->DrawPrimitiveUP(D3DPT_TRIANGLEFAN, 2, coord, 5 * sizeof(float)); if (FAILED(hr)) { ERROR_LOG_REPORT(G3D, "ReformatFramebufferFrom() failed: %08x", hr); } + dxstate.viewport.restore(); } RebindFramebuffer(); @@ -682,9 +677,10 @@ namespace DX9 { } void FramebufferManagerDX9::CopyDisplayToOutput() { - fbo_unbind(); - dxstate.viewport.set(0, 0, PSP_CoreParameter().pixelWidth, PSP_CoreParameter().pixelHeight); + if (useBufferedRendering_) { + DXSetViewport(0, 0, PSP_CoreParameter().pixelWidth, PSP_CoreParameter().pixelHeight); + } currentRenderVfb_ = 0; u32 offsetX = 0; @@ -794,7 +790,7 @@ namespace DX9 { HRESULT hr = fbo_blit_color(vfb->fbo, &srcRect, nullptr, &dstRect, g_Config.iBufFilter == SCALE_LINEAR ? D3DTEXF_LINEAR : D3DTEXF_POINT); if (FAILED(hr)) { ERROR_LOG_REPORT_ONCE(blit_fail, G3D, "fbo_blit_color failed on display: %08x", hr); - dxstate.viewport.set(0, 0, PSP_CoreParameter().pixelWidth, PSP_CoreParameter().pixelHeight); + DXSetViewport(0, 0, PSP_CoreParameter().pixelWidth, PSP_CoreParameter().pixelHeight); // These are in the output display coordinates if (g_Config.iBufFilter == SCALE_LINEAR) { dxstate.texMagFilter.set(D3DTEXF_LINEAR); @@ -814,7 +810,7 @@ namespace DX9 { fbo_bind_as_render_target(extraFBOs_[0]); int fbo_w, fbo_h; fbo_get_dimensions(extraFBOs_[0], &fbo_w, &fbo_h); - dxstate.viewport.set(0, 0, fbo_w, fbo_h); + DXSetViewport(0, 0, fbo_w, fbo_h); DrawActiveTexture(colorTexture, 0, 0, fbo_w, fbo_h, fbo_w, fbo_h, true, 1.0f, 1.0f, postShaderProgram_); fbo_unbind(); @@ -826,18 +822,19 @@ namespace DX9 { return; } colorTexture = fbo_get_color_texture(extraFBOs_[0]); - dxstate.viewport.set(0, 0, PSP_CoreParameter().pixelWidth, PSP_CoreParameter().pixelHeight); + DXSetViewport(0, 0, PSP_CoreParameter().pixelWidth, PSP_CoreParameter().pixelHeight); // These are in the output display coordinates DrawActiveTexture(colorTexture, x, y, w, h, (float)PSP_CoreParameter().pixelWidth, (float)PSP_CoreParameter().pixelHeight, true, 480.0f / (float)vfb->width, 272.0f / (float)vfb->height); } else { // Use post-shader, but run shader at output resolution. - dxstate.viewport.set(0, 0, PSP_CoreParameter().pixelWidth, PSP_CoreParameter().pixelHeight); + DXSetViewport(0, 0, PSP_CoreParameter().pixelWidth, PSP_CoreParameter().pixelHeight); // These are in the output display coordinates DrawActiveTexture(colorTexture, x, y, w, h, (float)PSP_CoreParameter().pixelWidth, (float)PSP_CoreParameter().pixelHeight, true, 480.0f / (float)vfb->width, 272.0f / (float)vfb->height, postShaderProgram_); } */ pD3Ddevice->SetTexture(0, NULL); } + dxstate.viewport.restore(); } void FramebufferManagerDX9::ReadFramebufferToMemory(VirtualFramebuffer *vfb, bool sync, int x, int y, int w, int h) { @@ -1107,7 +1104,6 @@ namespace DX9 { void FramebufferManagerDX9::EndFrame() { if (resized_) { DestroyAllFBOs(); - dxstate.viewport.set(0, 0, PSP_CoreParameter().pixelWidth, PSP_CoreParameter().pixelHeight); // Actually, auto mode should be more granular... // Round up to a zoom factor for the render size. int zoom = g_Config.iInternalResolution; @@ -1165,7 +1161,9 @@ namespace DX9 { } void FramebufferManagerDX9::DecimateFBOs() { - fbo_unbind(); + if (g_Config.iRenderingMode != FB_NON_BUFFERED_MODE) { + fbo_unbind(); + } currentRenderVfb_ = 0; bool updateVram = !(g_Config.iRenderingMode == FB_NON_BUFFERED_MODE || g_Config.iRenderingMode == FB_BUFFERED_MODE); diff --git a/GPU/Directx9/GPU_DX9.cpp b/GPU/Directx9/GPU_DX9.cpp index 2a7493458d..777405ef99 100644 --- a/GPU/Directx9/GPU_DX9.cpp +++ b/GPU/Directx9/GPU_DX9.cpp @@ -509,7 +509,6 @@ void DIRECTX9_GPU::InitClearInternal() { dxstate.colorMask.set(true, true, true, true); pD3Ddevice->Clear(0, NULL, D3DCLEAR_STENCIL|D3DCLEAR_TARGET|D3DCLEAR_ZBUFFER, D3DCOLOR_XRGB(0, 0, 0), 1.f, 0); } - dxstate.viewport.set(0, 0, PSP_CoreParameter().pixelWidth, PSP_CoreParameter().pixelHeight); } void DIRECTX9_GPU::DumpNextFrame() { @@ -521,7 +520,7 @@ void DIRECTX9_GPU::BeginFrame() { } void DIRECTX9_GPU::ReapplyGfxStateInternal() { - DX9::dxstate.Restore(); + dxstate.Restore(); GPUCommon::ReapplyGfxStateInternal(); } diff --git a/GPU/Directx9/ShaderManagerDX9.cpp b/GPU/Directx9/ShaderManagerDX9.cpp index d02b8c7982..c1243b2f4f 100644 --- a/GPU/Directx9/ShaderManagerDX9.cpp +++ b/GPU/Directx9/ShaderManagerDX9.cpp @@ -255,6 +255,11 @@ static void ConvertProjMatrixToD3D(Matrix4x4 &in, bool invertedX, bool invertedY float yoff = -0.5f / gstate_c.curRTRenderHeight; yoff = gstate_c.vpYOffset + (invertedY ? yoff : -yoff); + if (invertedX) + xoff = -xoff; + if (invertedY) + yoff = -yoff; + in.translateAndScale(Vec3(xoff, yoff, 0.5f), Vec3(gstate_c.vpWidthScale, gstate_c.vpHeightScale, invertedZ ? -0.5 : 0.5f)); } diff --git a/GPU/Directx9/StateMappingDX9.cpp b/GPU/Directx9/StateMappingDX9.cpp index 5c122f06f8..8a4e6ef799 100644 --- a/GPU/Directx9/StateMappingDX9.cpp +++ b/GPU/Directx9/StateMappingDX9.cpp @@ -284,6 +284,7 @@ void TransformDrawEngineDX9::ApplyBlendState() { ReplaceBlendType replaceBlend = ReplaceBlendWithShader(gstate_c.allowShaderBlend, gstate.FrameBufFormat()); ReplaceAlphaType replaceAlphaWithStencil = ReplaceAlphaWithStencil(replaceBlend); + bool usePreSrc = false; switch (replaceBlend) { @@ -650,152 +651,40 @@ void TransformDrawEngineDX9::ApplyDrawState(int prim) { } } - float renderWidthFactor, renderHeightFactor; - float renderWidth, renderHeight; - float renderX, renderY; bool useBufferedRendering = g_Config.iRenderingMode != FB_NON_BUFFERED_MODE; - if (useBufferedRendering) { - renderX = 0.0f; - renderY = 0.0f; - renderWidth = framebufferManager_->GetRenderWidth(); - renderHeight = framebufferManager_->GetRenderHeight(); - renderWidthFactor = (float)renderWidth / framebufferManager_->GetTargetBufferWidth(); - renderHeightFactor = (float)renderHeight / framebufferManager_->GetTargetBufferHeight(); - } else { - float pixelW = PSP_CoreParameter().pixelWidth; - float pixelH = PSP_CoreParameter().pixelHeight; - CenterDisplayOutputRect(&renderX, &renderY, &renderWidth, &renderHeight, 480, 272, pixelW, pixelH, ROTATION_LOCKED_HORIZONTAL); - renderWidthFactor = renderWidth / 480.0f; - renderHeightFactor = renderHeight / 272.0f; - } - renderX += gstate_c.curRTOffsetX * renderWidthFactor; + ViewportAndScissor vpAndScissor; + ConvertViewportAndScissor(useBufferedRendering, + framebufferManager_->GetRenderWidth(), framebufferManager_->GetRenderHeight(), + framebufferManager_->GetTargetBufferWidth(), framebufferManager_->GetTargetBufferHeight(), + vpAndScissor); - bool throughmode = gstate.isModeThrough(); - - // Scissor - int scissorX1 = gstate.getScissorX1(); - int scissorY1 = gstate.getScissorY1(); - int scissorX2 = gstate.getScissorX2() + 1; - int scissorY2 = gstate.getScissorY2() + 1; - - // This is a bit of a hack as the render buffer isn't always that size - if (scissorX1 == 0 && scissorY1 == 0 - && scissorX2 >= (int) gstate_c.curRTWidth - && scissorY2 >= (int) gstate_c.curRTHeight) { - dxstate.scissorTest.disable(); - } else { + if (vpAndScissor.scissorEnable) { dxstate.scissorTest.enable(); - dxstate.scissorRect.set( - renderX + scissorX1 * renderWidthFactor, - renderY + scissorY1 * renderHeightFactor, - renderX + scissorX2 * renderWidthFactor, - renderY + scissorY2 * renderHeightFactor); + dxstate.scissorRect.set(vpAndScissor.scissorX, vpAndScissor.scissorY, vpAndScissor.scissorX + vpAndScissor.scissorW, vpAndScissor.scissorY + vpAndScissor.scissorH); + } else { + dxstate.scissorTest.disable(); } - int curRTWidth = gstate_c.curRTWidth; - int curRTHeight = gstate_c.curRTHeight; + float depthMin = vpAndScissor.depthRangeMin; + float depthMax = vpAndScissor.depthRangeMax; - float offsetX = gstate.getOffsetX(); - float offsetY = gstate.getOffsetY(); - - if (throughmode) { - dxstate.viewport.set( - renderX, - renderY, - curRTWidth * renderWidthFactor, - curRTHeight * renderHeightFactor, - 0.f, 1.f); - } else { - float vpXScale = gstate.getViewportXScale(); - float vpXCenter = gstate.getViewportXCenter(); - float vpYScale = gstate.getViewportYScale(); - float vpYCenter = gstate.getViewportYCenter(); - - // The viewport transform appears to go like this: - // Xscreen = -offsetX + vpXCenter + vpXScale * Xview - // Yscreen = -offsetY + vpYCenter + vpYScale * Yview - // Zscreen = vpZCenter + vpZScale * Zview - - // This means that to get the analogue glViewport we must: - float vpX0 = vpXCenter - offsetX - fabsf(vpXScale); - float vpY0 = vpYCenter - offsetY - fabsf(vpYScale); - gstate_c.vpWidth = vpXScale * 2.0f; - gstate_c.vpHeight = vpYScale * 2.0f; - - float vpWidth = fabsf(gstate_c.vpWidth); - float vpHeight = fabsf(gstate_c.vpHeight); - - vpX0 *= renderWidthFactor; - vpY0 *= renderHeightFactor; - vpWidth *= renderWidthFactor; - vpHeight *= renderHeightFactor; - - float zScale = gstate.getViewportZScale(); - float zCenter = gstate.getViewportZCenter(); - - // Note - We lose the sign of the zscale here. But we keep it in gstate_c.vpDepth. - // That variable is only check for sign later so the multiplication by 2 isn't really necessary. - - // It's unclear why we need this Z offset to match OpenGL, but this checks out in multiple games. - float depthRangeMin = (zCenter - fabsf(zScale)) * (1.0f / 65535.0f); - float depthRangeMax = (zCenter + fabsf(zScale)) * (1.0f / 65535.0f); - gstate_c.vpDepth = zScale * (2.0f / 65335.0f); - - // D3D doesn't like viewports partially outside the target, so we - // apply the viewport partially in the shader. - float left = renderX + vpX0; - float top = renderY + vpY0; - float right = left + vpWidth; - float bottom = top + vpHeight; - - float wScale = 1.0f; - float xOffset = 0.0f; - float hScale = 1.0f; - float yOffset = 0.0f; - - // If we're within the bounds, we want clipping the viewport way. So leave it be. - if (left < 0.0f || right > renderWidth) { - float overageLeft = std::max(-left, 0.0f); - float overageRight = std::max(right - renderWidth, 0.0f); - // Our center drifted by the difference in overages. - float drift = overageRight - overageLeft; - - left += overageLeft; - right -= overageRight; - - wScale = vpWidth / (right - left); - xOffset = drift / (right - left); + if (!gstate.isModeThrough()) { + // Direct3D can't handle negative depth ranges, so we fix it in the projection matrix. + if (gstate_c.vpDepth != depthMax - depthMin) { + gstate_c.vpDepth = depthMax - depthMin; + vpAndScissor.dirtyProj = true; } - - if (top < 0.0f || bottom > renderHeight) { - float overageTop = std::max(-top, 0.0f); - float overageBottom = std::max(bottom - renderHeight, 0.0f); - // Our center drifted by the difference in overages. - float drift = overageTop - overageBottom; - - top += overageTop; - bottom -= overageBottom; - - hScale = vpHeight / (bottom - top); - yOffset = drift / (bottom - top); + if (depthMin > depthMax) { + std::swap(depthMin, depthMax); } + if (depthMin < 0.0f) depthMin = 0.0f; + if (depthMax > 1.0f) depthMax = 1.0f; + } - depthRangeMin = std::max(0.0f, depthRangeMin); - depthRangeMax = std::min(1.0f, depthRangeMax); - - bool scaleChanged = gstate_c.vpWidthScale != wScale || gstate_c.vpHeightScale != hScale; - bool offsetChanged = gstate_c.vpXOffset != xOffset || gstate_c.vpYOffset != yOffset; - if (scaleChanged || offsetChanged) - { - gstate_c.vpWidthScale = wScale; - gstate_c.vpHeightScale = hScale; - gstate_c.vpXOffset = xOffset; - gstate_c.vpYOffset = yOffset; - shaderManager_->DirtyUniform(DIRTY_PROJMATRIX); - } - - dxstate.viewport.set(left, top, right - left, bottom - top, depthRangeMin, depthRangeMax); + dxstate.viewport.set(vpAndScissor.viewportX, vpAndScissor.viewportY, vpAndScissor.viewportW, vpAndScissor.viewportH, depthMin, depthMax); + if (vpAndScissor.dirtyProj) { + shaderManager_->DirtyUniform(DIRTY_PROJMATRIX); } } diff --git a/GPU/Directx9/StencilBufferDX9.cpp b/GPU/Directx9/StencilBufferDX9.cpp index 11cfa72069..84ca55bc42 100644 --- a/GPU/Directx9/StencilBufferDX9.cpp +++ b/GPU/Directx9/StencilBufferDX9.cpp @@ -221,7 +221,7 @@ bool FramebufferManagerDX9::NotifyStencilUpload(u32 addr, int size, bool skipZer if (dstBuffer->fbo) { fbo_bind_as_render_target(dstBuffer->fbo); } - dxstate.viewport.set(0, 0, w, h); + DXSetViewport(0, 0, w, h); MakePixelTexture(src, dstBuffer->format, dstBuffer->fb_stride, dstBuffer->bufferWidth, dstBuffer->bufferHeight); @@ -279,7 +279,7 @@ bool FramebufferManagerDX9::NotifyStencilUpload(u32 addr, int size, bool skipZer } } dxstate.stencilMask.set(0xFF); - + dxstate.viewport.restore(); RebindFramebuffer(); return true; } diff --git a/GPU/Directx9/TextureCacheDX9.cpp b/GPU/Directx9/TextureCacheDX9.cpp index 408072a542..d25c1143ad 100644 --- a/GPU/Directx9/TextureCacheDX9.cpp +++ b/GPU/Directx9/TextureCacheDX9.cpp @@ -1042,15 +1042,7 @@ void TextureCacheDX9::ApplyTextureFramebuffer(TexCacheEntry *entry, VirtualFrame pD3Ddevice->SetRenderState(D3DRS_SCISSORTESTENABLE, FALSE); pD3Ddevice->SetRenderState(D3DRS_CULLMODE, D3DCULL_NONE); - D3DVIEWPORT9 vp; - vp.MinZ = 0; - vp.MaxZ = 1; - vp.X = 0; - vp.Y = 0; - vp.Width = framebuffer->renderWidth; - vp.Height = framebuffer->renderHeight; - pD3Ddevice->SetViewport(&vp); - + DXSetViewport(0, 0, framebuffer->renderWidth, framebuffer->renderHeight); HRESULT hr = pD3Ddevice->DrawPrimitiveUP(D3DPT_TRIANGLEFAN, 2, verts, (3 + 2) * sizeof(float)); if (FAILED(hr)) { ERROR_LOG_REPORT(G3D, "Depal render failed: %08x", hr); diff --git a/GPU/Directx9/helper/dx_fbo.cpp b/GPU/Directx9/helper/dx_fbo.cpp index 87e17c0695..23692afaaa 100644 --- a/GPU/Directx9/helper/dx_fbo.cpp +++ b/GPU/Directx9/helper/dx_fbo.cpp @@ -115,6 +115,8 @@ void fbo_destroy(FBO *fbo) { void fbo_unbind() { pD3Ddevice->SetRenderTarget(0, deviceRTsurf); pD3Ddevice->SetDepthStencilSurface(deviceDSsurf); + dxstate.scissorRect.restore(); + dxstate.viewport.restore(); } void fbo_resolve(FBO *fbo) { diff --git a/GPU/Directx9/helper/dx_state.cpp b/GPU/Directx9/helper/dx_state.cpp index ccfdc821b4..b5bc4fdf7a 100644 --- a/GPU/Directx9/helper/dx_state.cpp +++ b/GPU/Directx9/helper/dx_state.cpp @@ -37,8 +37,7 @@ void DirectXState::Restore() { colorMask.restore(); count++; - // why not? - // viewport.restore(); count++; + viewport.restore(); count++; alphaTest.restore(); count++; alphaTestFunc.restore(); count++; diff --git a/GPU/Directx9/helper/dx_state.h b/GPU/Directx9/helper/dx_state.h index 156abf0abf..3a74f669eb 100644 --- a/GPU/Directx9/helper/dx_state.h +++ b/GPU/Directx9/helper/dx_state.h @@ -342,6 +342,7 @@ private: class StateVp { D3DVIEWPORT9 viewport; public: + StateVp() { memset(&viewport, 0, sizeof(viewport)); } inline void set(int x, int y, int w, int h, float n = 0.f, float f = 1.f) { D3DVIEWPORT9 newviewport; newviewport.X = x; @@ -350,8 +351,7 @@ private: newviewport.Height = h; newviewport.MinZ = n; newviewport.MaxZ = f; - - if (memcmp(&viewport, &newviewport, sizeof(viewport))) { + if (memcmp(&viewport, &newviewport, sizeof(viewport)) != 0) { viewport = newviewport; restore(); } diff --git a/GPU/Directx9/helper/global.cpp b/GPU/Directx9/helper/global.cpp index d814cb3211..351e90efc7 100644 --- a/GPU/Directx9/helper/global.cpp +++ b/GPU/Directx9/helper/global.cpp @@ -11,6 +11,17 @@ LPDIRECT3DDEVICE9 pD3Ddevice = NULL; LPDIRECT3DDEVICE9EX pD3DdeviceEx = NULL; LPDIRECT3D9 pD3D = NULL; +void DXSetViewport(float x, float y, float w, float h, float minZ, float maxZ) { + D3DVIEWPORT9 vp; + vp.X = (DWORD)x; + vp.Y = (DWORD)y; + vp.Width = (DWORD)w; + vp.Height = (DWORD)h; + vp.MinZ = minZ; + vp.MaxZ = maxZ; + pD3Ddevice->SetViewport(&vp); +} + static const char * vscode = "struct VS_IN {\n" " float4 ObjPos : POSITION;\n" diff --git a/GPU/Directx9/helper/global.h b/GPU/Directx9/helper/global.h index 4eebbca15a..ef6a8eb455 100644 --- a/GPU/Directx9/helper/global.h +++ b/GPU/Directx9/helper/global.h @@ -31,6 +31,8 @@ bool CompileVertexShader(const char *code, LPDIRECT3DVERTEXSHADER9 *pShader, ID3 void DestroyShaders(); void DirectxInit(HWND window); +void DXSetViewport(float x, float y, float w, float h, float minZ = 0.0f, float maxZ = 1.0f); + #define D3DBLEND_UNK D3DBLEND_FORCE_DWORD }; diff --git a/GPU/GLES/ShaderManager.cpp b/GPU/GLES/ShaderManager.cpp index 49c663a3f7..016dce8d25 100644 --- a/GPU/GLES/ShaderManager.cpp +++ b/GPU/GLES/ShaderManager.cpp @@ -366,7 +366,12 @@ static void SetMatrix4x3(int uniform, const float *m4x3) { } static inline void ScaleProjMatrix(Matrix4x4 &in) { - const Vec3 trans(gstate_c.vpXOffset, gstate_c.vpYOffset, 0.0f); + float yOffset = gstate_c.vpYOffset; + if (g_Config.iRenderingMode == FB_NON_BUFFERED_MODE) { + // GL upside down is a pain as usual. + yOffset = -yOffset; + } + const Vec3 trans(gstate_c.vpXOffset, yOffset, 0.0f); const Vec3 scale(gstate_c.vpWidthScale, gstate_c.vpHeightScale, 1.0); in.translateAndScale(trans, scale); } diff --git a/GPU/GLES/StateMapping.cpp b/GPU/GLES/StateMapping.cpp index 2ce84e5212..16ffd24b99 100644 --- a/GPU/GLES/StateMapping.cpp +++ b/GPU/GLES/StateMapping.cpp @@ -586,7 +586,6 @@ void TransformDrawEngine::ApplyBlendState() { } void TransformDrawEngine::ApplyDrawState(int prim) { - // TODO: All this setup is soon so expensive that we'll need dirty flags, or simply do it in the command writes where we detect dirty by xoring. Silly to do all this work on every drawcall. if (gstate_c.textureChanged != TEXCHANGE_UNCHANGED && !gstate.isModeClear() && gstate.isTextureMapEnabled()) { @@ -746,189 +745,30 @@ void TransformDrawEngine::ApplyDrawState(int prim) { } } - bool throughmode = gstate.isModeThrough(); + ViewportAndScissor vpAndScissor; + ConvertViewportAndScissor(useBufferedRendering, + framebufferManager_->GetRenderWidth(), framebufferManager_->GetRenderHeight(), + framebufferManager_->GetTargetBufferWidth(), framebufferManager_->GetTargetBufferHeight(), + vpAndScissor); - float renderWidthFactor, renderHeightFactor; - float renderWidth, renderHeight; - float renderX = 0.0f, renderY = 0.0f; - float displayOffsetX, displayOffsetY; - if (useBufferedRendering) { - displayOffsetX = 0.0f; - displayOffsetY = 0.0f; - renderWidth = framebufferManager_->GetRenderWidth(); - renderHeight = framebufferManager_->GetRenderHeight(); - renderWidthFactor = (float)renderWidth / framebufferManager_->GetTargetBufferWidth(); - renderHeightFactor = (float)renderHeight / framebufferManager_->GetTargetBufferHeight(); - } else { - float pixelW = PSP_CoreParameter().pixelWidth; - float pixelH = PSP_CoreParameter().pixelHeight; - CenterDisplayOutputRect(&displayOffsetX, &displayOffsetY, &renderWidth, &renderHeight, 480, 272, pixelW, pixelH, ROTATION_LOCKED_HORIZONTAL); - renderWidthFactor = renderWidth / 480.0f; - renderHeightFactor = renderHeight / 272.0f; - } - - renderX += gstate_c.curRTOffsetX * renderWidthFactor; - - // Scissor - int scissorX1 = gstate.getScissorX1(); - int scissorY1 = gstate.getScissorY1(); - int scissorX2 = gstate.getScissorX2() + 1; - int scissorY2 = gstate.getScissorY2() + 1; - - // This is a bit of a hack as the render buffer isn't always that size - // We always scissor on non-buffered so that clears don't spill outside the frame. - if (useBufferedRendering && scissorX1 == 0 && scissorY1 == 0 - && scissorX2 >= (int) gstate_c.curRTWidth - && scissorY2 >= (int) gstate_c.curRTHeight) { - glstate.scissorTest.disable(); - } else { + if (vpAndScissor.scissorEnable) { glstate.scissorTest.enable(); - - // Buffers are now in the GL coordinate system, so no flipping needed. - if (useBufferedRendering) { - glstate.scissorRect.set( - renderX + displayOffsetX + scissorX1 * renderWidthFactor, - renderY + displayOffsetY + scissorY1 * renderHeightFactor, - (scissorX2 - scissorX1) * renderWidthFactor, - (scissorY2 - scissorY1) * renderHeightFactor); - } else { - glstate.scissorRect.set( - renderX + displayOffsetX + scissorX1 * renderWidthFactor, - renderY + displayOffsetY + renderHeight - (scissorY2 * renderHeightFactor), - (scissorX2 - scissorX1) * renderWidthFactor, - (scissorY2 - scissorY1) * renderHeightFactor); + if (!useBufferedRendering) { + vpAndScissor.scissorY = PSP_CoreParameter().pixelHeight - vpAndScissor.scissorH - vpAndScissor.scissorY; } + glstate.scissorRect.set(vpAndScissor.scissorX, vpAndScissor.scissorY, vpAndScissor.scissorW, vpAndScissor.scissorH); + } else { + glstate.scissorTest.disable(); } - int curRTWidth = gstate_c.curRTWidth; - int curRTHeight = gstate_c.curRTHeight; + if (!useBufferedRendering) { + vpAndScissor.viewportY = PSP_CoreParameter().pixelHeight - vpAndScissor.viewportH - vpAndScissor.viewportY; + } + glstate.viewport.set(vpAndScissor.viewportX, vpAndScissor.viewportY, vpAndScissor.viewportW, vpAndScissor.viewportH); + glstate.depthRange.set(vpAndScissor.depthRangeMin, vpAndScissor.depthRangeMax); - float offsetX = gstate.getOffsetX(); - float offsetY = gstate.getOffsetY(); - - if (throughmode) { - // No viewport transform here. Let's experiment with using region. - if (useBufferedRendering) { - // No flip needed - glstate.viewport.set( - renderX + displayOffsetX, - renderY + displayOffsetY, - curRTWidth * renderWidthFactor, - curRTHeight * renderHeightFactor); - } else { - renderY += renderHeight - framebufferManager_->GetTargetHeight() * renderHeightFactor; - glstate.viewport.set( - renderX + displayOffsetX, - renderY + displayOffsetY, - curRTWidth * renderWidthFactor, - curRTHeight * renderHeightFactor); - } - glstate.depthRange.set(0.0f, 1.0f); - } else { - // These we can turn into a glViewport call, offset by offsetX and offsetY. Math after. - float vpXScale = gstate.getViewportXScale(); - float vpXCenter = gstate.getViewportXCenter(); - float vpYScale = gstate.getViewportYScale(); - float vpYCenter = gstate.getViewportYCenter(); - - // The viewport transform appears to go like this: - // Xscreen = -offsetX + vpXCenter + vpXScale * Xview - // Yscreen = -offsetY + vpYCenter + vpYScale * Yview - // Zscreen = vpZCenter + vpZScale * Zview - - // The viewport is normally centered at 2048,2048 but can also be centered at other locations. - // Offset is subtracted from the viewport center and is also set to values in those ranges, and is set so that the viewport will cover - // the desired screen area ([0-480)x[0-272)), so 1808,1912. - - // This means that to get the analogue glViewport we must: - float vpX0 = vpXCenter - offsetX - fabsf(vpXScale); - float vpY0 = vpYCenter - offsetY - fabsf(vpYScale); // Need to account for sign of Y - gstate_c.vpWidth = vpXScale * 2.0f; - gstate_c.vpHeight = vpYScale * 2.0f; - - float vpWidth = fabsf(gstate_c.vpWidth); - float vpHeight = fabsf(gstate_c.vpHeight); - - // This multiplication should probably be done after viewport clipping. Would let us very slightly simplify the clipping logic? - vpX0 *= renderWidthFactor; - vpY0 *= renderHeightFactor; - vpWidth *= renderWidthFactor; - vpHeight *= renderHeightFactor; - - // We used to apply the viewport here via glstate, but there are limits which vary by driver. - // This may mean some games won't work, or at least won't work at higher render resolutions. - // So we apply it in the shader instead. - float left = renderX + vpX0; - float top = renderY + vpY0; - float right = left + vpWidth; - float bottom = top + vpHeight; - - float wScale = 1.0f; - float xOffset = 0.0f; - float hScale = 1.0f; - float yOffset = 0.0f; - - // If we're within the bounds, we want clipping the viewport way. So leave it be. - if (left < 0.0f || right > renderWidth) { - float overageLeft = std::max(-left, 0.0f); - float overageRight = std::max(right - renderWidth, 0.0f); - // Our center drifted by the difference in overages. - float drift = overageRight - overageLeft; - - left += overageLeft; - right -= overageRight; - - wScale = vpWidth / (right - left); - xOffset = drift / (right - left); - } - - if (top < 0.0f || bottom > renderHeight) { - float overageTop = std::max(-top, 0.0f); - float overageBottom = std::max(bottom - renderHeight, 0.0f); - // Our center drifted by the difference in overages. - float drift = overageBottom - overageTop; - - top += overageTop; - bottom -= overageBottom; - - hScale = vpHeight / (bottom - top); - yOffset = drift / (bottom - top); - } - - bool scaleChanged = gstate_c.vpWidthScale != wScale || gstate_c.vpHeightScale != hScale; - bool offsetChanged = gstate_c.vpXOffset != xOffset || gstate_c.vpYOffset != yOffset; - if (scaleChanged || offsetChanged) { - gstate_c.vpWidthScale = wScale; - gstate_c.vpHeightScale = hScale; - gstate_c.vpXOffset = xOffset; - gstate_c.vpYOffset = yOffset; - shaderManager_->DirtyUniform(DIRTY_PROJMATRIX); - } - - if (useBufferedRendering) { - glstate.viewport.set(left + displayOffsetX, displayOffsetY + top, right - left, bottom - top); - } else { - glstate.viewport.set(left + displayOffsetX, displayOffsetY + (renderHeight - bottom), right - left, bottom - top); - } - - float zScale = gstate.getViewportZScale(); - float zCenter = gstate.getViewportZCenter(); - float depthRangeMin = zCenter - zScale; - float depthRangeMax = zCenter + zScale; - glstate.depthRange.set(depthRangeMin * (1.0f / 65535.0f), depthRangeMax * (1.0f / 65535.0f)); - -#ifndef MOBILE_DEVICE - float minz = gstate.getDepthRangeMin(); - float maxz = gstate.getDepthRangeMax(); - if ((minz > depthRangeMin && minz > depthRangeMax) || (maxz < depthRangeMin && maxz < depthRangeMax)) { - WARN_LOG_REPORT_ONCE(minmaxz, G3D, "Unsupported depth range in test - depth range: %f-%f, test: %f-%f", depthRangeMin, depthRangeMax, minz, maxz); - } else if ((gstate.clipEnable & 1) == 0) { - // TODO: Need to test whether clipEnable should even affect depth or not. - if ((minz < depthRangeMin && minz < depthRangeMax) || (maxz > depthRangeMin && maxz > depthRangeMax)) { - WARN_LOG_REPORT_ONCE(znoclip, G3D, "Unsupported depth range in test without clipping - depth range: %f-%f, test: %f-%f", depthRangeMin, depthRangeMax, minz, maxz); - } - } -#endif + if (vpAndScissor.dirtyProj) { + shaderManager_->DirtyUniform(DIRTY_PROJMATRIX); } }