From 19a1fa84300b31dd4170eda915246b20ba094b4b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Tue, 22 Oct 2019 21:38:14 +0200 Subject: [PATCH 01/18] Add a compat.ini flag for software rendering, use it to force on in Darkstalkers --- Core/Compatibility.cpp | 1 + Core/Compatibility.h | 1 + Core/System.cpp | 5 +++++ UI/EmuScreen.cpp | 3 --- assets/compat.ini | 5 +++++ 5 files changed, 12 insertions(+), 3 deletions(-) diff --git a/Core/Compatibility.cpp b/Core/Compatibility.cpp index ff2d6d23fa..e4b35d2ee6 100644 --- a/Core/Compatibility.cpp +++ b/Core/Compatibility.cpp @@ -67,6 +67,7 @@ void Compatibility::CheckSettings(IniFile &iniFile, const std::string &gameID) { CheckSetting(iniFile, gameID, "JitInvalidationHack", &flags_.JitInvalidationHack); CheckSetting(iniFile, gameID, "HideISOFiles", &flags_.HideISOFiles); CheckSetting(iniFile, gameID, "MoreAccurateVMMUL", &flags_.MoreAccurateVMMUL); + CheckSetting(iniFile, gameID, "ForceSoftwareRenderer", &flags_.ForceSoftwareRenderer); } void Compatibility::CheckSetting(IniFile &iniFile, const std::string &gameID, const char *option, bool *flag) { diff --git a/Core/Compatibility.h b/Core/Compatibility.h index 0938b4c731..a3f35ec3fb 100644 --- a/Core/Compatibility.h +++ b/Core/Compatibility.h @@ -67,6 +67,7 @@ struct CompatFlags { bool JitInvalidationHack; bool HideISOFiles; bool MoreAccurateVMMUL; + bool ForceSoftwareRenderer; }; class IniFile; diff --git a/Core/System.cpp b/Core/System.cpp index 266eab30e3..f36cbcec83 100644 --- a/Core/System.cpp +++ b/Core/System.cpp @@ -349,6 +349,11 @@ bool PSP_InitStart(const CoreParameter &coreParam, std::string *error_string) { CPU_Init(); + // Compat flags get loaded in CPU_Init (which is a bit of a misnomer) so we check for SW renderer here. + if (g_Config.bSoftwareRendering || PSP_CoreParameter().compat.flags().ForceSoftwareRenderer) { + coreParameter.gpuCore = GPUCORE_SOFTWARE; + } + *error_string = coreParameter.errorString; bool success = coreParameter.fileToStart != ""; if (!success) { diff --git a/UI/EmuScreen.cpp b/UI/EmuScreen.cpp index 4a7a8a466c..6c062d6a52 100644 --- a/UI/EmuScreen.cpp +++ b/UI/EmuScreen.cpp @@ -232,9 +232,6 @@ void EmuScreen::bootGame(const std::string &filename) { break; #endif } - if (g_Config.bSoftwareRendering) { - coreParam.gpuCore = GPUCORE_SOFTWARE; - } // Preserve the existing graphics context. coreParam.graphicsContext = PSP_CoreParameter().graphicsContext; diff --git a/assets/compat.ini b/assets/compat.ini index 662e3b0e25..4d41fa0665 100644 --- a/assets/compat.ini +++ b/assets/compat.ini @@ -683,3 +683,8 @@ UCET00844 = true UCUS98705 = true UCED00971 = true UCUS98713 = true + +[ForceSoftwareRenderer] +# Darkstalkers +ULES00016 = true +ULUS10005 = true From 3a0804a7ddf3969e3dd9938481116d7a4daeb8a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Tue, 22 Oct 2019 21:54:43 +0200 Subject: [PATCH 02/18] Start slowly migrating from macros --- GPU/Software/Clipper.cpp | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/GPU/Software/Clipper.cpp b/GPU/Software/Clipper.cpp index eb181ce613..ca04f7f97b 100644 --- a/GPU/Software/Clipper.cpp +++ b/GPU/Software/Clipper.cpp @@ -51,26 +51,28 @@ static inline int CalcClipMask(const ClipCoords& v) #define AddInterpolatedVertex(t, out, in, numVertices) \ { \ - Vertices[numVertices]->Lerp(t, *Vertices[out], *Vertices[in]); \ - numVertices++; \ + Vertices[numVertices++]->Lerp(t, *Vertices[out], *Vertices[in]); \ } -#define DIFFERENT_SIGNS(x,y) ((x <= 0 && y > 0) || (x > 0 && y <= 0)) +inline bool DIFFERENT_SIGNS(float x, float y) { + return ((x <= 0 && y > 0) || (x > 0 && y <= 0)); +} -#define CLIP_DOTPROD(I, A, B, C, D) \ - (Vertices[I]->clippos.x * A + Vertices[I]->clippos.y * B + Vertices[I]->clippos.z * C + Vertices[I]->clippos.w * D) +inline float CLIP_DOTPROD(const VertexData &vert, float A, float B, float C, float D) { + return (vert.clippos.x * A + vert.clippos.y * B + vert.clippos.z * C + vert.clippos.w * D); +} #define POLY_CLIP( PLANE_BIT, A, B, C, D ) \ { \ if (mask & PLANE_BIT) { \ int idxPrev = inlist[0]; \ - float dpPrev = CLIP_DOTPROD(idxPrev, A, B, C, D ); \ + float dpPrev = CLIP_DOTPROD(*Vertices[idxPrev], A, B, C, D );\ int outcount = 0; \ \ inlist[n] = inlist[0]; \ for (int j = 1; j <= n; j++) { \ int idx = inlist[j]; \ - float dp = CLIP_DOTPROD(idx, A, B, C, D ); \ + float dp = CLIP_DOTPROD(*Vertices[idx], A, B, C, D ); \ if (dpPrev >= 0) { \ outlist[outcount++] = idxPrev; \ } \ @@ -104,9 +106,9 @@ static inline int CalcClipMask(const ClipCoords& v) #define CLIP_LINE(PLANE_BIT, A, B, C, D) \ { \ - if (mask & PLANE_BIT) { \ - float dp0 = CLIP_DOTPROD(0, A, B, C, D ); \ - float dp1 = CLIP_DOTPROD(1, A, B, C, D ); \ + if (mask & PLANE_BIT) { \ + float dp0 = CLIP_DOTPROD(*Vertices[0], A, B, C, D ); \ + float dp1 = CLIP_DOTPROD(*Vertices[1], A, B, C, D ); \ int i = 0; \ \ if (mask0 & PLANE_BIT) { \ @@ -116,7 +118,7 @@ static inline int CalcClipMask(const ClipCoords& v) AddInterpolatedVertex(t, 1, 0, i); \ } \ } \ - dp0 = CLIP_DOTPROD(0, A, B, C, D ); \ + dp0 = CLIP_DOTPROD(*Vertices[0], A, B, C, D ); \ \ if (mask1 & PLANE_BIT) { \ if (dp1 < 0) { \ From 58568632e8e19082359686a31660066da56e967d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Tue, 22 Oct 2019 23:14:27 +0200 Subject: [PATCH 03/18] Software renderer: Use hardware color conversion on Vulkan in 5551 16-bit mode --- GPU/Software/SoftGpu.cpp | 15 ++++++++++----- ext/native/thin3d/thin3d.h | 7 +++++++ ext/native/thin3d/thin3d_vulkan.cpp | 15 ++++++++++++++- 3 files changed, 31 insertions(+), 6 deletions(-) diff --git a/GPU/Software/SoftGpu.cpp b/GPU/Software/SoftGpu.cpp index 76f4a8d7b9..00a240e9bd 100644 --- a/GPU/Software/SoftGpu.cpp +++ b/GPU/Software/SoftGpu.cpp @@ -163,6 +163,9 @@ void SoftGPU::CopyToCurrentFboFromDisplayRam(int srcwidth, int srcheight) { // For accuracy, try to handle 0 stride - sometimes used. if (displayStride_ == 0) { srcheight = 1; + u1 = 1.0f; + } else { + u1 = (float)srcwidth / displayStride_; } Draw::TextureDesc desc{}; @@ -181,11 +184,13 @@ void SoftGPU::CopyToCurrentFboFromDisplayRam(int srcwidth, int srcheight) { desc.height = srcheight; desc.initData.push_back(data); desc.format = Draw::DataFormat::R8G8B8A8_UNORM; - if (displayStride_ != 0) { - u1 = (float)srcwidth / displayStride_; - } else { - u1 = 1.0f; - } + } else if (g_Config.iGPUBackend == (int)GPUBackend::VULKAN && displayFormat_ == GE_FORMAT_5551) { + u8 *data = Memory::GetPointer(displayFramebuf_); + desc.swizzle = Draw::TextureSwizzle::BGRA; + desc.format = Draw::DataFormat::A1R5G5B5_UNORM_PACK16; + desc.width = displayStride_ == 0 ? srcwidth : displayStride_; + desc.height = srcheight; + desc.initData.push_back(data); } else { // TODO: This should probably be converted in a shader instead.. fbTexBuffer.resize(srcwidth * srcheight); diff --git a/ext/native/thin3d/thin3d.h b/ext/native/thin3d/thin3d.h index 7488e5ef47..91b1847f4b 100644 --- a/ext/native/thin3d/thin3d.h +++ b/ext/native/thin3d/thin3d.h @@ -515,9 +515,16 @@ struct DeviceCaps { std::string deviceName; // The device name to use when creating the thin3d context, to get the same one. }; +// Some predefined swizzle +enum class TextureSwizzle { + NO_SWIZZLE = 0, + BGRA = 1, +}; + struct TextureDesc { TextureType type; DataFormat format; + TextureSwizzle swizzle; int width; int height; int depth; diff --git a/ext/native/thin3d/thin3d_vulkan.cpp b/ext/native/thin3d/thin3d_vulkan.cpp index 3de9e59db4..72c88feed3 100644 --- a/ext/native/thin3d/thin3d_vulkan.cpp +++ b/ext/native/thin3d/thin3d_vulkan.cpp @@ -699,7 +699,20 @@ bool VKTexture::Create(VkCommandBuffer cmd, VulkanPushBuffer *push, const Textur // Gonna have to generate some, which requires TRANSFER_SRC usageBits |= VK_IMAGE_USAGE_TRANSFER_SRC_BIT; } - if (!vkTex_->CreateDirect(cmd, alloc, width_, height_, mipLevels_, vulkanFormat, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, usageBits)) { + + VkComponentMapping mapping{}; // Defaults to no swizzle + switch (desc.swizzle) { + case TextureSwizzle::NO_SWIZZLE: + break; + case TextureSwizzle::BGRA: + mapping.r = VK_COMPONENT_SWIZZLE_B; + mapping.g = VK_COMPONENT_SWIZZLE_G; + mapping.b = VK_COMPONENT_SWIZZLE_R; + mapping.a = VK_COMPONENT_SWIZZLE_A; + break; + } + + if (!vkTex_->CreateDirect(cmd, alloc, width_, height_, mipLevels_, vulkanFormat, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, usageBits, &mapping)) { ELOG("Failed to create VulkanTexture: %dx%dx%d fmt %d, %d levels", width_, height_, depth_, (int)vulkanFormat, mipLevels_); return false; } From 510229b68b1c00110beb747f0f10f431a35e2286 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Tue, 22 Oct 2019 23:46:26 +0200 Subject: [PATCH 04/18] SoftGPU: Detect through-mode rectangles from triangle strips --- GPU/Math3D.h | 4 ++++ GPU/Software/Clipper.cpp | 2 +- GPU/Software/TransformUnit.cpp | 38 ++++++++++++++++++++++++++++++++-- 3 files changed, 41 insertions(+), 3 deletions(-) diff --git a/GPU/Math3D.h b/GPU/Math3D.h index 292c63be05..ada1a6931e 100644 --- a/GPU/Math3D.h +++ b/GPU/Math3D.h @@ -625,6 +625,10 @@ public: *this = *this / f; } + bool operator ==(const Vec4 &other) const { + return x == other.x && y == other.y && z == other.z && w == other.w; + } + T Length2() const { return x*x + y*y + z*z + w*w; diff --git a/GPU/Software/Clipper.cpp b/GPU/Software/Clipper.cpp index ca04f7f97b..746896c431 100644 --- a/GPU/Software/Clipper.cpp +++ b/GPU/Software/Clipper.cpp @@ -198,7 +198,7 @@ void ProcessRect(const VertexData& v0, const VertexData& v1) // Color and depth values of second vertex are used for the whole rectangle buf[0].color0 = buf[1].color0 = buf[2].color0 = buf[3].color0; - buf[0].color1 = buf[1].color1 = buf[2].color1 = buf[3].color1; + buf[0].color1 = buf[1].color1 = buf[2].color1 = buf[3].color1; // is color1 ever used in through mode? buf[0].clippos.w = buf[1].clippos.w = buf[2].clippos.w = buf[3].clippos.w = 1.0f; buf[0].fogdepth = buf[1].fogdepth = buf[2].fogdepth = buf[3].fogdepth = 1.0f; diff --git a/GPU/Software/TransformUnit.cpp b/GPU/Software/TransformUnit.cpp index 3b0603fd79..2848ce600f 100644 --- a/GPU/Software/TransformUnit.cpp +++ b/GPU/Software/TransformUnit.cpp @@ -319,8 +319,7 @@ void TransformUnit::SubmitPrimitive(void* vertices, void* indices, GEPrimitiveTy VertexReader vreader(buf, vtxfmt, vertex_type); - const int max_vtcs_per_prim = 3; - static VertexData data[max_vtcs_per_prim]; + static VertexData data[4]; // Normally max verts per prim is 3, but we temporarily need 4 to detect rectangles from strips. // This is the index of the next vert in data (or higher, may need modulus.) static int data_index = 0; @@ -439,6 +438,41 @@ void TransformUnit::SubmitPrimitive(void* vertices, void* indices, GEPrimitiveTy // Don't draw a triangle when loading the first two vertices. int skip_count = data_index >= 2 ? 0 : 2 - data_index; + // If index count == 4, check if we can convert to a rectangle. + // This is for Darkstalkers (and should speed up many 2D games). + if (vertex_count == 4 && gstate.isModeThrough()) { + for (int vtx = 0; vtx < 4; ++vtx) { + if (indices) { + vreader.Goto(ConvertIndex(vtx) - index_lower_bound); + } + else { + vreader.Goto(vtx); + } + data[vtx] = ReadVertex(vreader); + } + + // OK, now let's look at data to detect rectangles. There are a few possibilities + // but we focus on Darkstalkers for now. + if (data[0].screenpos.x == data[1].screenpos.x && + data[0].screenpos.y == data[2].screenpos.y && + data[2].screenpos.x == data[3].screenpos.x && + data[1].screenpos.y == data[3].screenpos.y && + data[1].screenpos.y > data[0].screenpos.y && // Avoid rotation handling + data[2].screenpos.x > data[0].screenpos.x && + data[0].texturecoords.x == data[1].texturecoords.x && + data[0].texturecoords.y == data[2].texturecoords.y && + data[2].texturecoords.x == data[3].texturecoords.x && + data[1].texturecoords.y == data[3].texturecoords.y && + data[1].texturecoords.y > data[0].texturecoords.y && + data[2].texturecoords.x > data[0].texturecoords.x && + data[0].color0 == data[1].color0 && + data[1].color0 == data[2].color0 && + data[2].color0 == data[3].color0) { + // It's a rectangle! + Clipper::ProcessRect(data[0], data[3]); + } + } + for (int vtx = 0; vtx < vertex_count; ++vtx) { if (indices) { vreader.Goto(ConvertIndex(vtx) - index_lower_bound); From c7f6724f7efc0b8477245b3199bbb1500d93000f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Wed, 23 Oct 2019 00:18:30 +0200 Subject: [PATCH 05/18] Detect sprite drawing (1:1 texture mapping), run a simpler function without the triangle state tracking. This will allow further simplification and specialization. --- GPU/GPU.vcxproj | 1 + GPU/Software/Clipper.cpp | 33 +++++++++++++++++++++++ GPU/Software/Rasterizer.cpp | 48 ++++++++++++++++++++++++++++++++++ GPU/Software/Rasterizer.h | 1 + GPU/Software/TransformUnit.cpp | 21 +++++++++++++++ 5 files changed, 104 insertions(+) diff --git a/GPU/GPU.vcxproj b/GPU/GPU.vcxproj index 53903c75c1..44da128934 100644 --- a/GPU/GPU.vcxproj +++ b/GPU/GPU.vcxproj @@ -197,6 +197,7 @@ MultiThreadedDebug Common/DbgNew.h ProgramDatabase + false true diff --git a/GPU/Software/Clipper.cpp b/GPU/Software/Clipper.cpp index 746896c431..b3fe40949e 100644 --- a/GPU/Software/Clipper.cpp +++ b/GPU/Software/Clipper.cpp @@ -184,6 +184,39 @@ void ProcessRect(const VertexData& v0, const VertexData& v1) ProcessTriangle(*topleft, *bottomleft, *bottomright, buf[3]); } else { // through mode handling + + // Check for simple case: No depth, alpha != 0 testing only, no blend, texture mapping 1:1 etc. + // Also check for scissor rectangle etc. + // That is, state commonly used in PSX games and ports like Darkstalker. + // In that case we can call DrawPSXSprite. + int xdiff = v1.screenpos.x - v0.screenpos.x; + int ydiff = v1.screenpos.y - v0.screenpos.y; + int udiff = (v1.texturecoords.x - v0.texturecoords.x) * 16.0f; + int vdiff = (v1.texturecoords.y - v0.texturecoords.y) * 16.0f; + bool coord_check = + (xdiff == udiff /* || xdiff == -udiff*/) && + (ydiff == vdiff /* || ydiff == -vdiff*/); + // TODO: The U/V mirror support is off by one somehow. Predecrement? + + /* + bool state_check = + !gstate.isModeClear() && + !gstate.isFogEnabled() && + gstate.isTextureMapEnabled() && + !gstate.isDepthTestEnabled() && + !gstate.isStencilTestEnabled(); + bool alpha_check = + gstate.getAlphaTestFunction() == GEComparison::GE_COMP_GREATER && + gstate.getAlphaTestMask() == 0xFF && + gstate.getAlphaTestRef() == 0; + */ + bool state_check = !gstate.isModeClear(); + bool alpha_check = true; + if ((coord_check || !gstate.isTextureMapEnabled()) && state_check && alpha_check) { + Rasterizer::DrawPSXSprite(v0, v1); + return; + } + VertexData buf[4]; buf[0].screenpos = ScreenCoords(v0.screenpos.x, v0.screenpos.y, v1.screenpos.z); buf[0].texturecoords = v0.texturecoords; diff --git a/GPU/Software/Rasterizer.cpp b/GPU/Software/Rasterizer.cpp index 5db77b9407..88841eeced 100644 --- a/GPU/Software/Rasterizer.cpp +++ b/GPU/Software/Rasterizer.cpp @@ -1287,6 +1287,54 @@ void DrawTriangleSlice( } } +void DrawPSXSprite(const VertexData& v0, const VertexData& v1) { + u8 *texptr = nullptr; + + GETextureFormat texfmt = gstate.getTextureFormat(); + u32 texaddr = gstate.getTextureAddress(0); + int texbufw = GetTextureBufw(0, texaddr, texfmt); + if (Memory::IsValidAddress(texaddr)) + texptr = Memory::GetPointerUnchecked(texaddr); + + ScreenCoords pprime(v0.screenpos.x, v0.screenpos.y, 0); + Sampler::Funcs sampler = Sampler::GetFuncs(); + + DrawingCoords pos0 = TransformUnit::ScreenToDrawing(v0.screenpos); + DrawingCoords pos1 = TransformUnit::ScreenToDrawing(v1.screenpos); + + int z = pos0.z; + float fog = 1.0f; + + if (gstate.isTextureMapEnabled()) { + // 1:1 (but with mirror support) texture mapping! + int s = v0.texturecoords.x; + int t = v0.texturecoords.y; + int ds = v1.texturecoords.x > v0.texturecoords.x ? 1 : -1; + int dt = v1.texturecoords.y > v0.texturecoords.y ? 1 : -1; + for (int y = pos0.y; y < pos1.y; y++) { + s = v0.texturecoords.x; + for (int x = pos0.x; x < pos1.x; x++) { + Vec4 prim_color = v0.color0; + Vec4 tex_color = Vec4::FromRGBA(sampler.nearest(s, t, texptr, texbufw, 0)); + prim_color = GetTextureFunctionOutput(prim_color, tex_color); + DrawingCoords pos(x, y, z); + DrawSinglePixel(pos, (u16)z, fog, prim_color); + s += ds; + } + t += dt; + } + } + else { + for (int y = pos0.y; y < pos1.y; y++) { + for (int x = pos0.x; x < pos1.x; x++) { + Vec4 prim_color = v0.color0; + DrawingCoords pos(x, y, z); + DrawSinglePixel(pos, (u16)z, fog, prim_color); + } + } + } +} + // Draws triangle, vertices specified in counter-clockwise direction void DrawTriangle(const VertexData& v0, const VertexData& v1, const VertexData& v2) { diff --git a/GPU/Software/Rasterizer.h b/GPU/Software/Rasterizer.h index 53d44e8af0..076a0421a5 100644 --- a/GPU/Software/Rasterizer.h +++ b/GPU/Software/Rasterizer.h @@ -27,6 +27,7 @@ namespace Rasterizer { void DrawTriangle(const VertexData& v0, const VertexData& v1, const VertexData& v2); void DrawPoint(const VertexData &v0); void DrawLine(const VertexData &v0, const VertexData &v1); +void DrawPSXSprite(const VertexData &v0, const VertexData &v1); void ClearRectangle(const VertexData &v0, const VertexData &v1); bool GetCurrentStencilbuffer(GPUDebugBuffer &buffer); diff --git a/GPU/Software/TransformUnit.cpp b/GPU/Software/TransformUnit.cpp index 2848ce600f..82a8497684 100644 --- a/GPU/Software/TransformUnit.cpp +++ b/GPU/Software/TransformUnit.cpp @@ -470,6 +470,27 @@ void TransformUnit::SubmitPrimitive(void* vertices, void* indices, GEPrimitiveTy data[2].color0 == data[3].color0) { // It's a rectangle! Clipper::ProcessRect(data[0], data[3]); + break; + } + // There's the other vertex order too... + if (data[0].screenpos.x == data[2].screenpos.x && + data[0].screenpos.y == data[1].screenpos.y && + data[1].screenpos.x == data[3].screenpos.x && + data[2].screenpos.y == data[3].screenpos.y && + data[2].screenpos.y > data[0].screenpos.y && // Avoid rotation handling + data[1].screenpos.x > data[0].screenpos.x && + data[0].texturecoords.x == data[2].texturecoords.x && + data[0].texturecoords.y == data[1].texturecoords.y && + data[1].texturecoords.x == data[3].texturecoords.x && + data[2].texturecoords.y == data[3].texturecoords.y && + data[2].texturecoords.y > data[0].texturecoords.y && + data[1].texturecoords.x > data[0].texturecoords.x && + data[0].color0 == data[1].color0 && + data[1].color0 == data[2].color0 && + data[2].color0 == data[3].color0) { + // It's a rectangle! + Clipper::ProcessRect(data[0], data[3]); + break; } } From 2dd7a9aa12612bc125aa924c321896c378630ee1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Wed, 23 Oct 2019 23:21:47 +0200 Subject: [PATCH 06/18] More darkstalkers work --- GPU/Software/Clipper.cpp | 42 +++++++++++----------------- GPU/Software/Rasterizer.cpp | 55 +++++++++++++++++++++++++++++++++---- 2 files changed, 65 insertions(+), 32 deletions(-) diff --git a/GPU/Software/Clipper.cpp b/GPU/Software/Clipper.cpp index b3fe40949e..5f4f76d3a3 100644 --- a/GPU/Software/Clipper.cpp +++ b/GPU/Software/Clipper.cpp @@ -49,16 +49,11 @@ static inline int CalcClipMask(const ClipCoords& v) return mask; } -#define AddInterpolatedVertex(t, out, in, numVertices) \ -{ \ - Vertices[numVertices++]->Lerp(t, *Vertices[out], *Vertices[in]); \ -} - -inline bool DIFFERENT_SIGNS(float x, float y) { +inline bool different_signs(float x, float y) { return ((x <= 0 && y > 0) || (x > 0 && y <= 0)); } -inline float CLIP_DOTPROD(const VertexData &vert, float A, float B, float C, float D) { +inline float clip_dotprod(const VertexData &vert, float A, float B, float C, float D) { return (vert.clippos.x * A + vert.clippos.y * B + vert.clippos.z * C + vert.clippos.w * D); } @@ -66,24 +61,24 @@ inline float CLIP_DOTPROD(const VertexData &vert, float A, float B, float C, flo { \ if (mask & PLANE_BIT) { \ int idxPrev = inlist[0]; \ - float dpPrev = CLIP_DOTPROD(*Vertices[idxPrev], A, B, C, D );\ + float dpPrev = clip_dotprod(*Vertices[idxPrev], A, B, C, D );\ int outcount = 0; \ \ inlist[n] = inlist[0]; \ for (int j = 1; j <= n; j++) { \ int idx = inlist[j]; \ - float dp = CLIP_DOTPROD(*Vertices[idx], A, B, C, D ); \ + float dp = clip_dotprod(*Vertices[idx], A, B, C, D ); \ if (dpPrev >= 0) { \ outlist[outcount++] = idxPrev; \ } \ \ - if (DIFFERENT_SIGNS(dp, dpPrev)) { \ + if (different_signs(dp, dpPrev)) { \ if (dp < 0) { \ float t = dp / (dp - dpPrev); \ - AddInterpolatedVertex(t, idx, idxPrev, numVertices); \ + Vertices[numVertices++]->Lerp(t, *Vertices[idx], *Vertices[idxPrev]); \ } else { \ float t = dpPrev / (dpPrev - dp); \ - AddInterpolatedVertex(t, idxPrev, idx, numVertices); \ + Vertices[numVertices++]->Lerp(t, *Vertices[idxPrev], *Vertices[idx]); \ } \ outlist[outcount++] = numVertices - 1; \ } \ @@ -107,24 +102,22 @@ inline float CLIP_DOTPROD(const VertexData &vert, float A, float B, float C, flo #define CLIP_LINE(PLANE_BIT, A, B, C, D) \ { \ if (mask & PLANE_BIT) { \ - float dp0 = CLIP_DOTPROD(*Vertices[0], A, B, C, D ); \ - float dp1 = CLIP_DOTPROD(*Vertices[1], A, B, C, D ); \ - int i = 0; \ + float dp0 = clip_dotprod(*Vertices[0], A, B, C, D ); \ + float dp1 = clip_dotprod(*Vertices[1], A, B, C, D ); \ + int numVertices = 0; \ \ if (mask0 & PLANE_BIT) { \ if (dp0 < 0) { \ float t = dp1 / (dp1 - dp0); \ - i = 0; \ - AddInterpolatedVertex(t, 1, 0, i); \ + Vertices[0]->Lerp(t, *Vertices[1], *Vertices[0]); \ } \ } \ - dp0 = CLIP_DOTPROD(*Vertices[0], A, B, C, D ); \ + dp0 = clip_dotprod(*Vertices[0], A, B, C, D ); \ \ if (mask1 & PLANE_BIT) { \ if (dp1 < 0) { \ float t = dp1 / (dp1- dp0); \ - i = 1; \ - AddInterpolatedVertex(t, 1, 0, i); \ + Vertices[1]->Lerp(t, *Vertices[1], *Vertices[0]); \ } \ } \ } \ @@ -185,17 +178,14 @@ void ProcessRect(const VertexData& v0, const VertexData& v1) } else { // through mode handling - // Check for simple case: No depth, alpha != 0 testing only, no blend, texture mapping 1:1 etc. - // Also check for scissor rectangle etc. - // That is, state commonly used in PSX games and ports like Darkstalker. - // In that case we can call DrawPSXSprite. + // Check for 1:1 texture mapping. In that case we can call DrawSprite. int xdiff = v1.screenpos.x - v0.screenpos.x; int ydiff = v1.screenpos.y - v0.screenpos.y; int udiff = (v1.texturecoords.x - v0.texturecoords.x) * 16.0f; int vdiff = (v1.texturecoords.y - v0.texturecoords.y) * 16.0f; bool coord_check = - (xdiff == udiff /* || xdiff == -udiff*/) && - (ydiff == vdiff /* || ydiff == -vdiff*/); + (xdiff == udiff || xdiff == -udiff) && + (ydiff == vdiff || ydiff == -vdiff); // TODO: The U/V mirror support is off by one somehow. Predecrement? /* diff --git a/GPU/Software/Rasterizer.cpp b/GPU/Software/Rasterizer.cpp index 88841eeced..cd90f89fac 100644 --- a/GPU/Software/Rasterizer.cpp +++ b/GPU/Software/Rasterizer.cpp @@ -1287,8 +1287,20 @@ void DrawTriangleSlice( } } +// Slow but can handle all input. +void SafeScanline(int s, int ds, int t, int x1, int x2, int y, int z, const u8 *texptr, int texbufw, Sampler::Funcs &sampler, Vec4 v0_color) { + for (int x = x1; x < x2; x++) { + Vec4 prim_color = v0_color; + Vec4 tex_color = Vec4::FromRGBA(sampler.nearest(s, t, texptr, texbufw, 0)); + prim_color = GetTextureFunctionOutput(prim_color, tex_color); + DrawingCoords pos(x, y, z); + DrawSinglePixel(pos, (u16)z, 1.0f, prim_color); + s += ds; + } +} + void DrawPSXSprite(const VertexData& v0, const VertexData& v1) { - u8 *texptr = nullptr; + const u8 *texptr = nullptr; GETextureFormat texfmt = gstate.getTextureFormat(); u32 texaddr = gstate.getTextureAddress(0); @@ -1302,17 +1314,44 @@ void DrawPSXSprite(const VertexData& v0, const VertexData& v1) { DrawingCoords pos0 = TransformUnit::ScreenToDrawing(v0.screenpos); DrawingCoords pos1 = TransformUnit::ScreenToDrawing(v1.screenpos); + DrawingCoords scissorTL(gstate.getScissorX1(), gstate.getScissorY1(), 0); + DrawingCoords scissorBR(gstate.getScissorX2(), gstate.getScissorY2(), 0); + int z = pos0.z; float fog = 1.0f; if (gstate.isTextureMapEnabled()) { // 1:1 (but with mirror support) texture mapping! - int s = v0.texturecoords.x; - int t = v0.texturecoords.y; + int s_start = v0.texturecoords.x; + int t_start = v0.texturecoords.y; int ds = v1.texturecoords.x > v0.texturecoords.x ? 1 : -1; int dt = v1.texturecoords.y > v0.texturecoords.y ? 1 : -1; + + if (ds < 0) { + s_start += ds; + } + if (dt < 0) { + t_start += dt; + } + + // First clip the right and bottom sides, since we don't need to adjust the deltas. + if (pos1.x > scissorBR.x) pos1.x = scissorBR.x; + if (pos1.y > scissorBR.y) pos1.y = scissorBR.y; + // Now clip the other sides. + if (pos0.x < scissorTL.x) { + s_start += (scissorTL.x - pos0.x) * ds; + pos0.x = scissorTL.x; + } + if (pos0.y < scissorTL.y) { + t_start += (scissorTL.y - pos0.y) * dt; + pos0.y = scissorTL.y; + } + + int t = t_start; for (int y = pos0.y; y < pos1.y; y++) { - s = v0.texturecoords.x; + int s = s_start; + SafeScanline(s, ds, t, pos0.x, pos1.x, y, z, texptr, texbufw, sampler, v0.color0); + /* for (int x = pos0.x; x < pos1.x; x++) { Vec4 prim_color = v0.color0; Vec4 tex_color = Vec4::FromRGBA(sampler.nearest(s, t, texptr, texbufw, 0)); @@ -1321,10 +1360,14 @@ void DrawPSXSprite(const VertexData& v0, const VertexData& v1) { DrawSinglePixel(pos, (u16)z, fog, prim_color); s += ds; } + */ t += dt; } - } - else { + } else { + if (pos1.x > scissorBR.x) pos1.x = scissorBR.x; + if (pos1.y > scissorBR.y) pos1.y = scissorBR.y; + if (pos0.x < scissorTL.x) pos0.x = scissorTL.x; + if (pos0.y < scissorTL.y) pos0.y = scissorTL.y; for (int y = pos0.y; y < pos1.y; y++) { for (int x = pos0.x; x < pos1.x; x++) { Vec4 prim_color = v0.color0; From 9099441973e8c55984eba1394d1d239d96f2e81a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Thu, 24 Oct 2019 00:10:39 +0200 Subject: [PATCH 07/18] Darkstalkers: Gross hack to avoid the game's own stretch, and present the raw buffer instead for a sharper image. --- Core/Compatibility.cpp | 1 + Core/Compatibility.h | 1 + GPU/Software/Clipper.cpp | 12 ++++++++++++ GPU/Software/SoftGpu.cpp | 22 ++++++++++++++++++---- assets/compat.ini | 5 +++++ 5 files changed, 37 insertions(+), 4 deletions(-) diff --git a/Core/Compatibility.cpp b/Core/Compatibility.cpp index e4b35d2ee6..b006447072 100644 --- a/Core/Compatibility.cpp +++ b/Core/Compatibility.cpp @@ -68,6 +68,7 @@ void Compatibility::CheckSettings(IniFile &iniFile, const std::string &gameID) { CheckSetting(iniFile, gameID, "HideISOFiles", &flags_.HideISOFiles); CheckSetting(iniFile, gameID, "MoreAccurateVMMUL", &flags_.MoreAccurateVMMUL); CheckSetting(iniFile, gameID, "ForceSoftwareRenderer", &flags_.ForceSoftwareRenderer); + CheckSetting(iniFile, gameID, "DarkStalkersPresentHack", &flags_.DarkStalkersPresentHack); } void Compatibility::CheckSetting(IniFile &iniFile, const std::string &gameID, const char *option, bool *flag) { diff --git a/Core/Compatibility.h b/Core/Compatibility.h index a3f35ec3fb..0baf9db5fa 100644 --- a/Core/Compatibility.h +++ b/Core/Compatibility.h @@ -68,6 +68,7 @@ struct CompatFlags { bool HideISOFiles; bool MoreAccurateVMMUL; bool ForceSoftwareRenderer; + bool DarkStalkersPresentHack; }; class IniFile; diff --git a/GPU/Software/Clipper.cpp b/GPU/Software/Clipper.cpp index 5f4f76d3a3..be9acb8c19 100644 --- a/GPU/Software/Clipper.cpp +++ b/GPU/Software/Clipper.cpp @@ -17,6 +17,8 @@ #include +#include "Core/System.h" + #include "GPU/GPUState.h" #include "GPU/Software/Clipper.h" @@ -24,6 +26,9 @@ #include "profiler/profiler.h" + +extern bool g_DarkStalkerStretch; + namespace Clipper { enum { @@ -207,6 +212,13 @@ void ProcessRect(const VertexData& v0, const VertexData& v1) return; } + // Eliminate the stretch blit in DarkStalkers. + // We compensate for that when blitting the framebuffer in SoftGpu.cpp. + if (PSP_CoreParameter().compat.flags().DarkStalkersPresentHack && v0.texturecoords.x == 64.0f && v0.texturecoords.y == 16.0f && v1.texturecoords.x == 448.0f && v1.texturecoords.y == 240.0f) { + g_DarkStalkerStretch = true; + return; + } + VertexData buf[4]; buf[0].screenpos = ScreenCoords(v0.screenpos.x, v0.screenpos.y, v1.screenpos.z); buf[0].texturecoords = v0.texturecoords; diff --git a/GPU/Software/SoftGpu.cpp b/GPU/Software/SoftGpu.cpp index 00a240e9bd..6b13a34f2e 100644 --- a/GPU/Software/SoftGpu.cpp +++ b/GPU/Software/SoftGpu.cpp @@ -148,12 +148,16 @@ void SoftGPU::SetDisplayFramebuffer(u32 framebuf, u32 stride, GEBufferFormat for GPURecord::NotifyDisplay(framebuf, stride, format); } +bool g_DarkStalkerStretch; + // Copies RGBA8 data from RAM to the currently bound render target. void SoftGPU::CopyToCurrentFboFromDisplayRam(int srcwidth, int srcheight) { if (!draw_) return; float u0 = 0.0f; float u1; + float v0 = 1.0f; + float v1 = 0.0f; if (fbTex) { fbTex->Release(); @@ -175,7 +179,19 @@ void SoftGPU::CopyToCurrentFboFromDisplayRam(int srcwidth, int srcheight) { desc.mipLevels = 1; desc.tag = "SoftGPU"; bool hasImage = true; - if (!Memory::IsValidAddress(displayFramebuf_) || srcwidth == 0 || srcheight == 0) { + if (PSP_CoreParameter().compat.flags().DarkStalkersPresentHack && displayFormat_ == GE_FORMAT_5551 && g_DarkStalkerStretch) { + u8 *data = Memory::GetPointer(0x04088000); + desc.swizzle = Draw::TextureSwizzle::BGRA; + desc.format = Draw::DataFormat::A1R5G5B5_UNORM_PACK16; + desc.width = displayStride_ == 0 ? srcwidth : displayStride_; + desc.height = srcheight; + desc.initData.push_back(data); + u0 = 64.0f / 512.0f; + u1 = 448.0f / 512.0f; + v1 = 16.0f / 272.0f; + v0 = 240.0f / 272.0f; + g_DarkStalkerStretch = false; + } else if (!Memory::IsValidAddress(displayFramebuf_) || srcwidth == 0 || srcheight == 0) { hasImage = false; u1 = 1.0f; } else if (displayFormat_ == GE_FORMAT_8888) { @@ -252,12 +268,10 @@ void SoftGPU::CopyToCurrentFboFromDisplayRam(int srcwidth, int srcheight) { x2 -= 1.0f; y2 -= 1.0f; - float v0 = 1.0f; - float v1 = 0.0f; - if (GetGPUBackend() == GPUBackend::VULKAN) { std::swap(v0, v1); } + draw_->BindFramebufferAsRenderTarget(nullptr, { Draw::RPAction::CLEAR, Draw::RPAction::DONT_CARE, Draw::RPAction::DONT_CARE }); Draw::Viewport viewport = { 0.0f, 0.0f, dstwidth, dstheight, 0.0f, 1.0f }; draw_->SetViewports(1, &viewport); diff --git a/assets/compat.ini b/assets/compat.ini index 4d41fa0665..c67f0b8360 100644 --- a/assets/compat.ini +++ b/assets/compat.ini @@ -688,3 +688,8 @@ UCUS98713 = true # Darkstalkers ULES00016 = true ULUS10005 = true + +[DarkStalkersPresentHack] +# Darkstalkers +ULES00016 = true +ULUS10005 = true From 796539ad7fc7b6187528e058fd1fabf8b30dd56f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Thu, 24 Oct 2019 00:51:55 +0200 Subject: [PATCH 08/18] DarkStalkers: Fix display in the D3D backends. Still broken in OpenGL. --- GPU/Software/SoftGpu.cpp | 18 +++++++++--- GPU/Software/SoftGpu.h | 1 + ext/native/thin3d/thin3d.cpp | 47 ++++++++++++++++++++++++++++++- ext/native/thin3d/thin3d.h | 1 + ext/native/thin3d/thin3d_d3d9.cpp | 9 ++++++ 5 files changed, 71 insertions(+), 5 deletions(-) diff --git a/GPU/Software/SoftGpu.cpp b/GPU/Software/SoftGpu.cpp index 6b13a34f2e..c468b45b65 100644 --- a/GPU/Software/SoftGpu.cpp +++ b/GPU/Software/SoftGpu.cpp @@ -73,8 +73,6 @@ SoftGPU::SoftGPU(GraphicsContext *gfxCtx, Draw::DrawContext *draw) }, }; - ShaderModule *vshader = draw_->GetVshaderPreset(VS_TEXTURE_COLOR_2D); - vdata = draw_->CreateBuffer(sizeof(Vertex) * 4, BufferUsageFlag::DYNAMIC | BufferUsageFlag::VERTEXDATA); idata = draw_->CreateBuffer(sizeof(int) * 6, BufferUsageFlag::DYNAMIC | BufferUsageFlag::INDEXDATA); @@ -92,6 +90,14 @@ SoftGPU::SoftGPU(GraphicsContext *gfxCtx, Draw::DrawContext *draw) inputLayout, depth, blendstateOff, rasterNoCull, &vsTexColBufDesc }; texColor = draw_->CreateGraphicsPipeline(pipelineDesc); + + PipelineDesc pipelineDescRBSwizzle{ + Primitive::TRIANGLE_LIST, + { draw_->GetVshaderPreset(VS_TEXTURE_COLOR_2D), draw_->GetFshaderPreset(FS_TEXTURE_COLOR_2D_RB_SWIZZLE) }, + inputLayout, depth, blendstateOff, rasterNoCull, &vsTexColBufDesc + }; + texColorRBSwizzle = draw_->CreateGraphicsPipeline(pipelineDescRBSwizzle); + inputLayout->Release(); depth->Release(); blendstateOff->Release(); @@ -122,6 +128,8 @@ void SoftGPU::DeviceRestore() { SoftGPU::~SoftGPU() { texColor->Release(); texColor = nullptr; + texColorRBSwizzle->Release(); + texColorRBSwizzle = nullptr; if (fbTex) { fbTex->Release(); @@ -179,9 +187,10 @@ void SoftGPU::CopyToCurrentFboFromDisplayRam(int srcwidth, int srcheight) { desc.mipLevels = 1; desc.tag = "SoftGPU"; bool hasImage = true; + + Draw::Pipeline *pipeline = texColor; if (PSP_CoreParameter().compat.flags().DarkStalkersPresentHack && displayFormat_ == GE_FORMAT_5551 && g_DarkStalkerStretch) { u8 *data = Memory::GetPointer(0x04088000); - desc.swizzle = Draw::TextureSwizzle::BGRA; desc.format = Draw::DataFormat::A1R5G5B5_UNORM_PACK16; desc.width = displayStride_ == 0 ? srcwidth : displayStride_; desc.height = srcheight; @@ -190,6 +199,7 @@ void SoftGPU::CopyToCurrentFboFromDisplayRam(int srcwidth, int srcheight) { u1 = 448.0f / 512.0f; v1 = 16.0f / 272.0f; v0 = 240.0f / 272.0f; + pipeline = texColorRBSwizzle; g_DarkStalkerStretch = false; } else if (!Memory::IsValidAddress(displayFramebuf_) || srcwidth == 0 || srcheight == 0) { hasImage = false; @@ -307,7 +317,7 @@ void SoftGPU::CopyToCurrentFboFromDisplayRam(int srcwidth, int srcheight) { Draw::VsTexColUB ub{}; memcpy(ub.WorldViewProj, g_display_rot_matrix.m, sizeof(float) * 16); - draw_->BindPipeline(texColor); + draw_->BindPipeline(pipeline); draw_->UpdateDynamicUniformBuffer(&ub, sizeof(ub)); draw_->BindVertexBuffers(0, 1, &vdata, nullptr); draw_->BindIndexBuffer(idata, 0); diff --git a/GPU/Software/SoftGpu.h b/GPU/Software/SoftGpu.h index f92344742e..c7d4f9365e 100644 --- a/GPU/Software/SoftGpu.h +++ b/GPU/Software/SoftGpu.h @@ -108,6 +108,7 @@ private: Draw::Texture *fbTex; Draw::Pipeline *texColor; + Draw::Pipeline *texColorRBSwizzle; std::vector fbTexBuffer; Draw::SamplerState *samplerNearest = nullptr; diff --git a/ext/native/thin3d/thin3d.cpp b/ext/native/thin3d/thin3d.cpp index d142638e07..340157708d 100644 --- a/ext/native/thin3d/thin3d.cpp +++ b/ext/native/thin3d/thin3d.cpp @@ -146,6 +146,50 @@ static const std::vector fsTexCol = { } }; +static const std::vector fsTexColRBSwizzle = { + {ShaderLanguage::GLSL_ES_200, + "#ifdef GL_ES\n" + "precision lowp float;\n" + "#endif\n" + "#if __VERSION__ >= 130\n" + "#define varying in\n" + "#define texture2D texture\n" + "#define gl_FragColor fragColor0\n" + "out vec4 fragColor0;\n" + "#endif\n" + "varying vec4 oColor0;\n" + "varying vec2 oTexCoord0;\n" + "uniform sampler2D Sampler0;\n" + "void main() { gl_FragColor = texture2D(Sampler0, oTexCoord0).zyxw * oColor0; }\n" + }, + {ShaderLanguage::HLSL_D3D9, + "struct PS_INPUT { float4 color : COLOR0; float2 uv : TEXCOORD0; };\n" + "sampler2D Sampler0 : register(s0);\n" + "float4 main(PS_INPUT input) : COLOR0 {\n" + " return input.color * tex2D(Sampler0, input.uv).zyxw;\n" + "}\n" + }, + {ShaderLanguage::HLSL_D3D11, + "struct PS_INPUT { float4 color : COLOR0; float2 uv : TEXCOORD0; };\n" + "SamplerState samp : register(s0);\n" + "Texture2D tex : register(t0);\n" + "float4 main(PS_INPUT input) : SV_Target {\n" + " float4 col = input.color * tex.Sample(samp, input.uv).bgra;\n" + " return col;\n" + "}\n" + }, + {ShaderLanguage::GLSL_VULKAN, + "#version 140\n" + "#extension GL_ARB_separate_shader_objects : enable\n" + "#extension GL_ARB_shading_language_420pack : enable\n" + "layout(location = 0) in vec4 oColor0;\n" + "layout(location = 1) in vec2 oTexCoord0;\n" + "layout(location = 0) out vec4 fragColor0\n;" + "layout(set = 0, binding = 1) uniform sampler2D Sampler0;\n" + "void main() { fragColor0 = texture(Sampler0, oTexCoord0).bgra * oColor0; }\n" + } +}; + static const std::vector fsCol = { { ShaderLanguage::GLSL_ES_200, "#ifdef GL_ES\n" @@ -330,8 +374,9 @@ bool DrawContext::CreatePresets() { fsPresets_[FS_TEXTURE_COLOR_2D] = CreateShader(this, ShaderStage::FRAGMENT, fsTexCol); fsPresets_[FS_COLOR_2D] = CreateShader(this, ShaderStage::FRAGMENT, fsCol); + fsPresets_[FS_TEXTURE_COLOR_2D_RB_SWIZZLE] = CreateShader(this, ShaderStage::FRAGMENT, fsTexColRBSwizzle); - return vsPresets_[VS_TEXTURE_COLOR_2D] && vsPresets_[VS_COLOR_2D] && fsPresets_[FS_TEXTURE_COLOR_2D] && fsPresets_[FS_COLOR_2D]; + return vsPresets_[VS_TEXTURE_COLOR_2D] && vsPresets_[VS_COLOR_2D] && fsPresets_[FS_TEXTURE_COLOR_2D] && fsPresets_[FS_COLOR_2D] && fsPresets_[FS_TEXTURE_COLOR_2D_RB_SWIZZLE]; } void DrawContext::DestroyPresets() { diff --git a/ext/native/thin3d/thin3d.h b/ext/native/thin3d/thin3d.h index 91b1847f4b..fce59694b9 100644 --- a/ext/native/thin3d/thin3d.h +++ b/ext/native/thin3d/thin3d.h @@ -146,6 +146,7 @@ enum VertexShaderPreset : int { enum FragmentShaderPreset : int { FS_COLOR_2D, FS_TEXTURE_COLOR_2D, + FS_TEXTURE_COLOR_2D_RB_SWIZZLE, FS_MAX_PRESET, }; diff --git a/ext/native/thin3d/thin3d_d3d9.cpp b/ext/native/thin3d/thin3d_d3d9.cpp index cdd784b5f1..e9c2a8d171 100644 --- a/ext/native/thin3d/thin3d_d3d9.cpp +++ b/ext/native/thin3d/thin3d_d3d9.cpp @@ -348,6 +348,10 @@ bool D3D9Texture::Create(const TextureDesc &desc) { format_ = desc.format; tex_ = NULL; d3dfmt_ = FormatToD3DFMT(desc.format); + + if (d3dfmt_ == D3DFMT_UNKNOWN) { + return false; + } HRESULT hr = E_FAIL; D3DPOOL pool = D3DPOOL_MANAGED; @@ -424,6 +428,7 @@ void D3D9Texture::SetImageData(int x, int y, int z, int width, int height, int d } break; case DataFormat::A4R4G4B4_UNORM_PACK16: + case DataFormat::A1R5G5B5_UNORM_PACK16: // Native memcpy(dest, source, width * sizeof(uint16_t)); break; @@ -437,6 +442,10 @@ void D3D9Texture::SetImageData(int x, int y, int z, int width, int height, int d case DataFormat::B8G8R8A8_UNORM: memcpy(dest, source, sizeof(uint32_t) * width); break; + default: + // Unhandled data format copy. + DebugBreak(); + break; } } tex_->UnlockRect(level); From 4f7c23fe79aca5e6ea1b9f55c315b1a7c91ca573 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Thu, 24 Oct 2019 01:29:24 +0200 Subject: [PATCH 09/18] DarkStalkers: Fix display on OpenGL ES. --- GPU/Software/SoftGpu.cpp | 16 +++++++++++---- Windows/GPU/WindowsVulkanContext.cpp | 1 + ext/native/base/display.cpp | 2 +- ext/native/math/lin/matrix4x4.h | 6 +++++- ext/native/thin3d/DataFormat.h | 1 + ext/native/thin3d/thin3d_gl.cpp | 30 +++++++++++++++++++++++----- ext/native/thin3d/thin3d_vulkan.cpp | 2 ++ 7 files changed, 47 insertions(+), 11 deletions(-) diff --git a/GPU/Software/SoftGpu.cpp b/GPU/Software/SoftGpu.cpp index c468b45b65..0b715dd952 100644 --- a/GPU/Software/SoftGpu.cpp +++ b/GPU/Software/SoftGpu.cpp @@ -191,15 +191,23 @@ void SoftGPU::CopyToCurrentFboFromDisplayRam(int srcwidth, int srcheight) { Draw::Pipeline *pipeline = texColor; if (PSP_CoreParameter().compat.flags().DarkStalkersPresentHack && displayFormat_ == GE_FORMAT_5551 && g_DarkStalkerStretch) { u8 *data = Memory::GetPointer(0x04088000); - desc.format = Draw::DataFormat::A1R5G5B5_UNORM_PACK16; + if (draw_->GetDataFormatSupport(Draw::DataFormat::A1B5G5R5_UNORM_PACK16) & Draw::FMT_TEXTURE) { + // The perfect one. + desc.format = Draw::DataFormat::A1B5G5R5_UNORM_PACK16; + } else if (draw_->GetDataFormatSupport(Draw::DataFormat::A1R5G5B5_UNORM_PACK16) & Draw::FMT_TEXTURE) { + // RB swapped, compensate with a shader. + desc.format = Draw::DataFormat::A1R5G5B5_UNORM_PACK16; + pipeline = texColorRBSwizzle; + } else { + // Shouldn't happen (once I'm done with the backends). + } desc.width = displayStride_ == 0 ? srcwidth : displayStride_; desc.height = srcheight; desc.initData.push_back(data); - u0 = 64.0f / 512.0f; - u1 = 448.0f / 512.0f; + u0 = 64.5f / 512.0f; + u1 = 447.5f / 512.0f; v1 = 16.0f / 272.0f; v0 = 240.0f / 272.0f; - pipeline = texColorRBSwizzle; g_DarkStalkerStretch = false; } else if (!Memory::IsValidAddress(displayFramebuf_) || srcwidth == 0 || srcheight == 0) { hasImage = false; diff --git a/Windows/GPU/WindowsVulkanContext.cpp b/Windows/GPU/WindowsVulkanContext.cpp index 09f9f16a08..7797b58289 100644 --- a/Windows/GPU/WindowsVulkanContext.cpp +++ b/Windows/GPU/WindowsVulkanContext.cpp @@ -116,6 +116,7 @@ bool WindowsVulkanContext::Init(HINSTANCE hInst, HWND hWnd, std::string *error_m if (!g_Config.sVulkanDevice.empty()) g_Config.sVulkanDevice = g_Vulkan->GetPhysicalDeviceProperties(deviceNum).properties.deviceName; } + g_Vulkan->ChooseDevice(deviceNum); if (g_Vulkan->CreateDevice() != VK_SUCCESS) { *error_message = g_Vulkan->InitError(); diff --git a/ext/native/base/display.cpp b/ext/native/base/display.cpp index 3e912788db..c673639551 100644 --- a/ext/native/base/display.cpp +++ b/ext/native/base/display.cpp @@ -17,7 +17,7 @@ float pixel_in_dps_y = 1.0f; float display_hz = 60.0f; DisplayRotation g_display_rotation; -Lin::Matrix4x4 g_display_rot_matrix; +Lin::Matrix4x4 g_display_rot_matrix = Lin::Matrix4x4::identity(); template void RotateRectToDisplayImpl(DisplayRect &rect, T curRTWidth, T curRTHeight) { diff --git a/ext/native/math/lin/matrix4x4.h b/ext/native/math/lin/matrix4x4.h index c9464384ad..a9e312de70 100644 --- a/ext/native/math/lin/matrix4x4.h +++ b/ext/native/math/lin/matrix4x4.h @@ -57,7 +57,11 @@ public: empty(); xx=yy=zz=f; ww=1.0f; } - + static Matrix4x4 identity() { + Matrix4x4 id; + id.setIdentity(); + return id; + } void setIdentity() { setScaling(1.0f); } diff --git a/ext/native/thin3d/DataFormat.h b/ext/native/thin3d/DataFormat.h index 467f9e4a00..f1e1687630 100644 --- a/ext/native/thin3d/DataFormat.h +++ b/ext/native/thin3d/DataFormat.h @@ -30,6 +30,7 @@ enum class DataFormat : uint8_t { R5G5B5A1_UNORM_PACK16, // A1 in the LOWER bit B5G5R5A1_UNORM_PACK16, // A1 in the LOWER bit A1R5G5B5_UNORM_PACK16, // A1 in the UPPER bit. + A1B5G5R5_UNORM_PACK16, // A1 in the UPPER bit. OpenGL-only. R16_FLOAT, R16G16_FLOAT, diff --git a/ext/native/thin3d/thin3d_gl.cpp b/ext/native/thin3d/thin3d_gl.cpp index d31e63f892..9b164ec2ab 100644 --- a/ext/native/thin3d/thin3d_gl.cpp +++ b/ext/native/thin3d/thin3d_gl.cpp @@ -277,7 +277,7 @@ bool OpenGLShaderModule::Compile(GLRenderManager *render, ShaderLanguage languag class OpenGLInputLayout : public InputLayout { public: - OpenGLInputLayout(GLRenderManager *render) : render_(render), stride(0) {} + OpenGLInputLayout(GLRenderManager *render) : render_(render) {} ~OpenGLInputLayout(); void Compile(const InputLayoutDesc &desc); @@ -286,7 +286,7 @@ public: } GLRInputLayout *inputLayout_ = nullptr; - int stride; + int stride = 0; private: GLRenderManager *render_; }; @@ -718,6 +718,15 @@ public: FBColorDepth colorDepth = FBO_8888; }; +// TODO: SSE/NEON optimize, and move to ColorConv.cpp. +void MoveABit(u16 *dest, const u16 *src, size_t count) { + for (int i = 0; i < count; i++) { + u16 data = src[i]; + data = (data >> 15) | (data << 1); + dest[i] = data; + } +} + void OpenGLTexture::SetImageData(int x, int y, int z, int width, int height, int depth, int level, int stride, const uint8_t *data) { if (width != width_ || height != height_ || depth != depth_) { // When switching to texStorage we need to handle this correctly. @@ -729,12 +738,20 @@ void OpenGLTexture::SetImageData(int x, int y, int z, int width, int height, int if (stride == 0) stride = width; - size_t alignment = DataFormatSizeInBytes(format_); // Make a copy of data with stride eliminated. uint8_t *texData = new uint8_t[(size_t)(width * height * alignment)]; - for (int y = 0; y < height; y++) { - memcpy(texData + y * width * alignment, data + y * stride * alignment, width * alignment); + + // Emulate support for DataFormat::A1R5G5B5_UNORM_PACK16. + if (format_ == DataFormat::A1R5G5B5_UNORM_PACK16) { + format_ = DataFormat::R5G5B5A1_UNORM_PACK16; + for (int y = 0; y < height; y++) { + MoveABit((u16 *)(texData + y * width * alignment), (const u16 *)(data + y * stride * alignment), width); + } + } else { + for (int y = 0; y < height; y++) { + memcpy(texData + y * width * alignment, data + y * stride * alignment, width * alignment); + } } render_->TextureImage(tex_, level, width, height, format_, texData); } @@ -1220,6 +1237,9 @@ uint32_t OpenGLContext::GetDataFormatSupport(DataFormat fmt) const { case DataFormat::R8G8B8A8_UNORM: return FMT_RENDERTARGET | FMT_TEXTURE | FMT_INPUTLAYOUT | FMT_AUTOGEN_MIPS; + case DataFormat::A1R5G5B5_UNORM_PACK16: + return FMT_TEXTURE; // we will emulate this! Very fast to convert from R5G5B5A1_UNORM_PACK16 during upload. + case DataFormat::R32_FLOAT: case DataFormat::R32G32_FLOAT: case DataFormat::R32G32B32_FLOAT: diff --git a/ext/native/thin3d/thin3d_vulkan.cpp b/ext/native/thin3d/thin3d_vulkan.cpp index 72c88feed3..cb2fc0d4e8 100644 --- a/ext/native/thin3d/thin3d_vulkan.cpp +++ b/ext/native/thin3d/thin3d_vulkan.cpp @@ -1371,6 +1371,8 @@ uint32_t VKContext::GetDataFormatSupport(DataFormat fmt) const { return 0; case DataFormat::A4R4G4B4_UNORM_PACK16: return 0; + case DataFormat::A1R5G5B5_UNORM_PACK16: + return FMT_RENDERTARGET | FMT_TEXTURE; case DataFormat::R8G8B8A8_UNORM: return FMT_RENDERTARGET | FMT_TEXTURE | FMT_INPUTLAYOUT; From 290e9971a73c29a7033bf641f30ea1b9b0adaf3c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Thu, 24 Oct 2019 23:52:55 +0200 Subject: [PATCH 10/18] More specialization work. --- GPU/Software/Clipper.cpp | 2 +- GPU/Software/Rasterizer.cpp | 100 +++++++++++++++++++++++++++--------- GPU/Software/Rasterizer.h | 2 +- 3 files changed, 77 insertions(+), 27 deletions(-) diff --git a/GPU/Software/Clipper.cpp b/GPU/Software/Clipper.cpp index be9acb8c19..548d9e3dfd 100644 --- a/GPU/Software/Clipper.cpp +++ b/GPU/Software/Clipper.cpp @@ -208,7 +208,7 @@ void ProcessRect(const VertexData& v0, const VertexData& v1) bool state_check = !gstate.isModeClear(); bool alpha_check = true; if ((coord_check || !gstate.isTextureMapEnabled()) && state_check && alpha_check) { - Rasterizer::DrawPSXSprite(v0, v1); + Rasterizer::DrawSprite(v0, v1); return; } diff --git a/GPU/Software/Rasterizer.cpp b/GPU/Software/Rasterizer.cpp index cd90f89fac..72f4958c61 100644 --- a/GPU/Software/Rasterizer.cpp +++ b/GPU/Software/Rasterizer.cpp @@ -1287,19 +1287,51 @@ void DrawTriangleSlice( } } -// Slow but can handle all input. -void SafeScanline(int s, int ds, int t, int x1, int x2, int y, int z, const u8 *texptr, int texbufw, Sampler::Funcs &sampler, Vec4 v0_color) { - for (int x = x1; x < x2; x++) { - Vec4 prim_color = v0_color; - Vec4 tex_color = Vec4::FromRGBA(sampler.nearest(s, t, texptr, texbufw, 0)); - prim_color = GetTextureFunctionOutput(prim_color, tex_color); - DrawingCoords pos(x, y, z); - DrawSinglePixel(pos, (u16)z, 1.0f, prim_color); - s += ds; + +// Through mode, with the specific Darkstalker settings. +inline void DrawSinglePixelFast(const DrawingCoords &p, const Vec4 &color_in) { + Vec4 prim_color = color_in.Clamp(0, 255); + if (gstate.isAlphaTestEnabled()) + if (!AlphaTestPassed(prim_color.a())) + return; + + const u32 old_color = GetPixelColor(p.x, p.y); + u32 new_color; + + u8 stencil = GetPixelStencil(p.x, p.y); + + // Dithering happens before the logic op and regardless of framebuffer format or clear mode. + // We do it while alpha blending because it happens before clamping. + if (gstate.isAlphaBlendEnabled()) { + const Vec4 dst = Vec4::FromRGBA(old_color); + Vec3 blended = AlphaBlendingResult(prim_color, dst); + if (gstate.isDitherEnabled()) { + blended += Vec3::AssignToAll(gstate.getDitherValue(p.x, p.y)); + } + + // ToRGB() always automatically clamps. + new_color = blended.ToRGB(); + new_color |= stencil << 24; + } else { + if (gstate.isDitherEnabled()) { + // We'll discard alpha anyway. + prim_color += Vec4::AssignToAll(gstate.getDitherValue(p.x, p.y)); + } + +#if defined(_M_SSE) + new_color = Vec3(prim_color.ivec).ToRGB(); + new_color |= stencil << 24; +#else + new_color = Vec4(prim_color.r(), prim_color.g(), prim_color.b(), stencil).ToRGBA(); +#endif } + + new_color = (new_color & ~gstate.getColorMask()) | (old_color & gstate.getColorMask()); + SetPixelColor(p.x, p.y, new_color); } -void DrawPSXSprite(const VertexData& v0, const VertexData& v1) { + +void DrawSprite(const VertexData& v0, const VertexData& v1) { const u8 *texptr = nullptr; GETextureFormat texfmt = gstate.getTextureFormat(); @@ -1309,7 +1341,7 @@ void DrawPSXSprite(const VertexData& v0, const VertexData& v1) { texptr = Memory::GetPointerUnchecked(texaddr); ScreenCoords pprime(v0.screenpos.x, v0.screenpos.y, 0); - Sampler::Funcs sampler = Sampler::GetFuncs(); + Sampler::NearestFunc nearestFunc = Sampler::GetNearestFunc(); // Looks at gstate. DrawingCoords pos0 = TransformUnit::ScreenToDrawing(v0.screenpos); DrawingCoords pos1 = TransformUnit::ScreenToDrawing(v1.screenpos); @@ -1347,21 +1379,39 @@ void DrawPSXSprite(const VertexData& v0, const VertexData& v1) { pos0.y = scissorTL.y; } - int t = t_start; - for (int y = pos0.y; y < pos1.y; y++) { - int s = s_start; - SafeScanline(s, ds, t, pos0.x, pos1.x, y, z, texptr, texbufw, sampler, v0.color0); - /* - for (int x = pos0.x; x < pos1.x; x++) { - Vec4 prim_color = v0.color0; - Vec4 tex_color = Vec4::FromRGBA(sampler.nearest(s, t, texptr, texbufw, 0)); - prim_color = GetTextureFunctionOutput(prim_color, tex_color); - DrawingCoords pos(x, y, z); - DrawSinglePixel(pos, (u16)z, fog, prim_color); - s += ds; + if (!gstate.isStencilTestEnabled() && + !gstate.isDepthTestEnabled() && + !gstate.isLogicOpEnabled() && + !gstate.isColorTestEnabled()) { + int t = t_start; + for (int y = pos0.y; y < pos1.y; y++) { + int s = s_start; + // Not really that fast but faster than triangle. + for (int x = pos0.x; x < pos1.x; x++) { + Vec4 prim_color = v0.color0; + Vec4 tex_color = Vec4::FromRGBA(nearestFunc(s, t, texptr, texbufw, 0)); + prim_color = GetTextureFunctionOutput(prim_color, tex_color); + DrawingCoords pos(x, y, z); + DrawSinglePixelFast(pos, prim_color); + s += ds; + } + t += dt; + } + } else { + int t = t_start; + for (int y = pos0.y; y < pos1.y; y++) { + int s = s_start; + // Not really that fast but faster than triangle. + for (int x = pos0.x; x < pos1.x; x++) { + Vec4 prim_color = v0.color0; + Vec4 tex_color = Vec4::FromRGBA(nearestFunc(s, t, texptr, texbufw, 0)); + prim_color = GetTextureFunctionOutput(prim_color, tex_color); + DrawingCoords pos(x, y, z); + DrawSinglePixel(pos, (u16)z, 1.0f, prim_color); + s += ds; + } + t += dt; } - */ - t += dt; } } else { if (pos1.x > scissorBR.x) pos1.x = scissorBR.x; diff --git a/GPU/Software/Rasterizer.h b/GPU/Software/Rasterizer.h index 076a0421a5..df3075e3a0 100644 --- a/GPU/Software/Rasterizer.h +++ b/GPU/Software/Rasterizer.h @@ -27,7 +27,7 @@ namespace Rasterizer { void DrawTriangle(const VertexData& v0, const VertexData& v1, const VertexData& v2); void DrawPoint(const VertexData &v0); void DrawLine(const VertexData &v0, const VertexData &v1); -void DrawPSXSprite(const VertexData &v0, const VertexData &v1); +void DrawSprite(const VertexData &v0, const VertexData &v1); void ClearRectangle(const VertexData &v0, const VertexData &v1); bool GetCurrentStencilbuffer(GPUDebugBuffer &buffer); From 714f83f614675f2c6ae69a66167d826d65d685bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Fri, 25 Oct 2019 00:14:58 +0200 Subject: [PATCH 11/18] Further specialization. --- GPU/Software/Rasterizer.cpp | 83 ++++++++++++++++++++++--------------- 1 file changed, 50 insertions(+), 33 deletions(-) diff --git a/GPU/Software/Rasterizer.cpp b/GPU/Software/Rasterizer.cpp index 72f4958c61..c2d9b84a14 100644 --- a/GPU/Software/Rasterizer.cpp +++ b/GPU/Software/Rasterizer.cpp @@ -1289,47 +1289,56 @@ void DrawTriangleSlice( // Through mode, with the specific Darkstalker settings. -inline void DrawSinglePixelFast(const DrawingCoords &p, const Vec4 &color_in) { - Vec4 prim_color = color_in.Clamp(0, 255); - if (gstate.isAlphaTestEnabled()) - if (!AlphaTestPassed(prim_color.a())) - return; +inline void DrawSinglePixel5551(const DrawingCoords &p, const Vec4 &color_in) { + Vec4 prim_color = color_in; + if (prim_color.a() == 0) + return; const u32 old_color = GetPixelColor(p.x, p.y); u32 new_color; u8 stencil = GetPixelStencil(p.x, p.y); - // Dithering happens before the logic op and regardless of framebuffer format or clear mode. - // We do it while alpha blending because it happens before clamping. - if (gstate.isAlphaBlendEnabled()) { - const Vec4 dst = Vec4::FromRGBA(old_color); - Vec3 blended = AlphaBlendingResult(prim_color, dst); - if (gstate.isDitherEnabled()) { - blended += Vec3::AssignToAll(gstate.getDitherValue(p.x, p.y)); - } - - // ToRGB() always automatically clamps. - new_color = blended.ToRGB(); - new_color |= stencil << 24; - } else { - if (gstate.isDitherEnabled()) { - // We'll discard alpha anyway. - prim_color += Vec4::AssignToAll(gstate.getDitherValue(p.x, p.y)); - } - -#if defined(_M_SSE) - new_color = Vec3(prim_color.ivec).ToRGB(); - new_color |= stencil << 24; -#else - new_color = Vec4(prim_color.r(), prim_color.g(), prim_color.b(), stencil).ToRGBA(); -#endif - } + const Vec4 dst = Vec4::FromRGBA(old_color); + Vec3 blended = AlphaBlendingResult(prim_color, dst); + // ToRGB() always automatically clamps. + new_color = blended.ToRGB(); + new_color |= stencil << 24; new_color = (new_color & ~gstate.getColorMask()) | (old_color & gstate.getColorMask()); SetPixelColor(p.x, p.y, new_color); } +static inline Vec4 ModulateRGBA(const Vec4& prim_color, const Vec4& texcolor) { + Vec3 out_rgb; + int out_a; + +#if defined(_M_SSE) + // We can be accurate up to 24 bit integers, should be enough. + const __m128 p = _mm_cvtepi32_ps(prim_color.ivec); + const __m128 t = _mm_cvtepi32_ps(texcolor.ivec); + const __m128 b = _mm_mul_ps(p, t); + if (gstate.isColorDoublingEnabled()) { + // We double right here, only for modulate. Other tex funcs do not color double. + const __m128 doubleColor = _mm_setr_ps(2.0f / 255.0f, 2.0f / 255.0f, 2.0f / 255.0f, 1.0f / 255.0f); + out_rgb.ivec = _mm_cvtps_epi32(_mm_mul_ps(b, doubleColor)); + } else { + out_rgb.ivec = _mm_cvtps_epi32(_mm_mul_ps(b, _mm_set_ps1(1.0f / 255.0f))); + } + return Vec4(out_rgb.ivec); +#else + if (gstate.isColorDoublingEnabled()) { + out_rgb = (prim_color.rgb() * texcolor.rgb() * 2) / 255; + } else { + out_rgb = prim_color.rgb() * texcolor.rgb() / 255; + } + out_a = (rgba) ? (prim_color.a() * texcolor.a() / 255) : prim_color.a(); +#endif + + return Vec4(out_rgb.r(), out_rgb.g(), out_rgb.b(), out_a); + +} + void DrawSprite(const VertexData& v0, const VertexData& v1) { const u8 *texptr = nullptr; @@ -1382,7 +1391,15 @@ void DrawSprite(const VertexData& v0, const VertexData& v1) { if (!gstate.isStencilTestEnabled() && !gstate.isDepthTestEnabled() && !gstate.isLogicOpEnabled() && - !gstate.isColorTestEnabled()) { + !gstate.isColorTestEnabled() && + !gstate.isDitherEnabled() && + gstate.isAlphaTestEnabled() && + gstate.getAlphaTestRef() == 0 && + gstate.getAlphaTestMask() == 0xFF && + gstate.isAlphaBlendEnabled() && + gstate.isTextureAlphaUsed() && + gstate.getTextureFunction() == GE_TEXFUNC_MODULATE && + gstate.FrameBufFormat() == GE_FORMAT_5551) { int t = t_start; for (int y = pos0.y; y < pos1.y; y++) { int s = s_start; @@ -1390,9 +1407,9 @@ void DrawSprite(const VertexData& v0, const VertexData& v1) { for (int x = pos0.x; x < pos1.x; x++) { Vec4 prim_color = v0.color0; Vec4 tex_color = Vec4::FromRGBA(nearestFunc(s, t, texptr, texbufw, 0)); - prim_color = GetTextureFunctionOutput(prim_color, tex_color); + prim_color = ModulateRGBA(prim_color, tex_color); DrawingCoords pos(x, y, z); - DrawSinglePixelFast(pos, prim_color); + DrawSinglePixel5551(pos, prim_color); s += ds; } t += dt; From bbbd7f8acc2b2875d021766fd37a396e90d6ccb8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Fri, 25 Oct 2019 13:03:45 +0200 Subject: [PATCH 12/18] Buildfix --- .gitignore | 1 + GPU/Software/Rasterizer.cpp | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 2beadcfc83..9187343927 100644 --- a/.gitignore +++ b/.gitignore @@ -68,6 +68,7 @@ build.ios versionname.txt versioncode.txt build*/ +android/.cxx # Temp file used by jenkins windows build (TODO: remove) desc.txt diff --git a/GPU/Software/Rasterizer.cpp b/GPU/Software/Rasterizer.cpp index c2d9b84a14..ee8dd8cf70 100644 --- a/GPU/Software/Rasterizer.cpp +++ b/GPU/Software/Rasterizer.cpp @@ -1332,7 +1332,7 @@ static inline Vec4 ModulateRGBA(const Vec4& prim_color, const Vec4(out_rgb.r(), out_rgb.g(), out_rgb.b(), out_a); From eb53609cb06ca355848dcdec20b7d98e08020c43 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Fri, 25 Oct 2019 23:02:53 +0200 Subject: [PATCH 13/18] More speed --- GPU/Software/Rasterizer.cpp | 89 +++++++++++++++++++++++++------------ GPU/Software/SoftGpu.h | 4 ++ 2 files changed, 65 insertions(+), 28 deletions(-) diff --git a/GPU/Software/Rasterizer.cpp b/GPU/Software/Rasterizer.cpp index ee8dd8cf70..c789d5e1b7 100644 --- a/GPU/Software/Rasterizer.cpp +++ b/GPU/Software/Rasterizer.cpp @@ -1287,26 +1287,26 @@ void DrawTriangleSlice( } } - // Through mode, with the specific Darkstalker settings. inline void DrawSinglePixel5551(const DrawingCoords &p, const Vec4 &color_in) { - Vec4 prim_color = color_in; - if (prim_color.a() == 0) + if (color_in.a() == 0) return; - const u32 old_color = GetPixelColor(p.x, p.y); + u16 *pixel = fb.Get16Ptr(p.x, p.y, gstate.FrameBufStride()); + u32 new_color; + if (color_in.a() == 255) { + const u32 old_color = RGBA5551ToRGBA8888(*pixel); + const Vec4 dst = Vec4::FromRGBA(old_color); + Vec3 blended = AlphaBlendingResult(color_in, dst); + // ToRGB() always automatically clamps. + new_color = blended.ToRGB(); + } else { + new_color = color_in.ToRGBA() & 0xFFFFFF; + } - u8 stencil = GetPixelStencil(p.x, p.y); - - const Vec4 dst = Vec4::FromRGBA(old_color); - Vec3 blended = AlphaBlendingResult(prim_color, dst); - - // ToRGB() always automatically clamps. - new_color = blended.ToRGB(); - new_color |= stencil << 24; - new_color = (new_color & ~gstate.getColorMask()) | (old_color & gstate.getColorMask()); - SetPixelColor(p.x, p.y, new_color); + new_color |= (*pixel & 0x8000) ? 0xff000000 : 0x00000000; + *pixel = RGBA8888ToRGBA5551(new_color); } static inline Vec4 ModulateRGBA(const Vec4& prim_color, const Vec4& texcolor) { @@ -1339,7 +1339,6 @@ static inline Vec4 ModulateRGBA(const Vec4& prim_color, const Vec4(255, 255, 255, 255); + if (gstate.isTextureMapEnabled()) { // 1:1 (but with mirror support) texture mapping! int s_start = v0.texturecoords.x; @@ -1399,18 +1400,27 @@ void DrawSprite(const VertexData& v0, const VertexData& v1) { gstate.isAlphaBlendEnabled() && gstate.isTextureAlphaUsed() && gstate.getTextureFunction() == GE_TEXFUNC_MODULATE && + gstate.getColorMask() == 0x000000 && gstate.FrameBufFormat() == GE_FORMAT_5551) { int t = t_start; for (int y = pos0.y; y < pos1.y; y++) { int s = s_start; - // Not really that fast but faster than triangle. - for (int x = pos0.x; x < pos1.x; x++) { - Vec4 prim_color = v0.color0; - Vec4 tex_color = Vec4::FromRGBA(nearestFunc(s, t, texptr, texbufw, 0)); - prim_color = ModulateRGBA(prim_color, tex_color); - DrawingCoords pos(x, y, z); - DrawSinglePixel5551(pos, prim_color); - s += ds; + if (isWhite) { + for (int x = pos0.x; x < pos1.x; x++) { + Vec4 tex_color = Vec4::FromRGBA(nearestFunc(s, t, texptr, texbufw, 0)); + DrawingCoords pos(x, y, z); + DrawSinglePixel5551(pos, tex_color); + s += ds; + } + } else { + for (int x = pos0.x; x < pos1.x; x++) { + Vec4 prim_color = v0.color0; + Vec4 tex_color = Vec4::FromRGBA(nearestFunc(s, t, texptr, texbufw, 0)); + prim_color = ModulateRGBA(prim_color, tex_color); + DrawingCoords pos(x, y, z); + DrawSinglePixel5551(pos, prim_color); + s += ds; + } } t += dt; } @@ -1435,11 +1445,34 @@ void DrawSprite(const VertexData& v0, const VertexData& v1) { if (pos1.y > scissorBR.y) pos1.y = scissorBR.y; if (pos0.x < scissorTL.x) pos0.x = scissorTL.x; if (pos0.y < scissorTL.y) pos0.y = scissorTL.y; - for (int y = pos0.y; y < pos1.y; y++) { - for (int x = pos0.x; x < pos1.x; x++) { - Vec4 prim_color = v0.color0; - DrawingCoords pos(x, y, z); - DrawSinglePixel(pos, (u16)z, fog, prim_color); + if (!gstate.isStencilTestEnabled() && + !gstate.isDepthTestEnabled() && + !gstate.isLogicOpEnabled() && + !gstate.isColorTestEnabled() && + !gstate.isDitherEnabled() && + gstate.isAlphaTestEnabled() && + gstate.getAlphaTestRef() == 0 && + gstate.getAlphaTestMask() == 0xFF && + gstate.isAlphaBlendEnabled() && + gstate.isTextureAlphaUsed() && + gstate.getTextureFunction() == GE_TEXFUNC_MODULATE && + gstate.getColorMask() == 0x000000 && + gstate.FrameBufFormat() == GE_FORMAT_5551) { + + for (int y = pos0.y; y < pos1.y; y++) { + for (int x = pos0.x; x < pos1.x; x++) { + Vec4 prim_color = v0.color0; + DrawingCoords pos(x, y, z); + DrawSinglePixel5551(pos, prim_color); + } + } + } else { + for (int y = pos0.y; y < pos1.y; y++) { + for (int x = pos0.x; x < pos1.x; x++) { + Vec4 prim_color = v0.color0; + DrawingCoords pos(x, y, z); + DrawSinglePixel(pos, (u16)z, fog, prim_color); + } } } } diff --git a/GPU/Software/SoftGpu.h b/GPU/Software/SoftGpu.h index c7d4f9365e..02660b5576 100644 --- a/GPU/Software/SoftGpu.h +++ b/GPU/Software/SoftGpu.h @@ -44,6 +44,10 @@ struct FormatBuffer { inline u32 Get32(int x, int y, int stride) { return as32[x + y * stride]; } + + inline u16 *Get16Ptr(int x, int y, int stride) { + return &as16[x + y * stride]; + } }; class SoftwareDrawEngine; From a84f4a0caa1ccc6104d2f25189c8a09a9a0487e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Fri, 25 Oct 2019 23:52:31 +0200 Subject: [PATCH 14/18] Even more speed. --- GPU/Software/Rasterizer.cpp | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/GPU/Software/Rasterizer.cpp b/GPU/Software/Rasterizer.cpp index c789d5e1b7..b07d70c3fd 100644 --- a/GPU/Software/Rasterizer.cpp +++ b/GPU/Software/Rasterizer.cpp @@ -1288,12 +1288,7 @@ void DrawTriangleSlice( } // Through mode, with the specific Darkstalker settings. -inline void DrawSinglePixel5551(const DrawingCoords &p, const Vec4 &color_in) { - if (color_in.a() == 0) - return; - - u16 *pixel = fb.Get16Ptr(p.x, p.y, gstate.FrameBufStride()); - +inline void DrawSinglePixel5551(u16 *pixel, const Vec4 &color_in) { u32 new_color; if (color_in.a() == 255) { const u32 old_color = RGBA5551ToRGBA8888(*pixel); @@ -1405,21 +1400,26 @@ void DrawSprite(const VertexData& v0, const VertexData& v1) { int t = t_start; for (int y = pos0.y; y < pos1.y; y++) { int s = s_start; + u16 *pixel = fb.Get16Ptr(pos0.x, y, gstate.FrameBufStride()); if (isWhite) { for (int x = pos0.x; x < pos1.x; x++) { - Vec4 tex_color = Vec4::FromRGBA(nearestFunc(s, t, texptr, texbufw, 0)); - DrawingCoords pos(x, y, z); - DrawSinglePixel5551(pos, tex_color); + u32 tex_color = nearestFunc(s, t, texptr, texbufw, 0); + if (tex_color & 0xFF000000) { + DrawSinglePixel5551(pixel, Vec4::FromRGBA(tex_color)); + } s += ds; + pixel++; } } else { for (int x = pos0.x; x < pos1.x; x++) { Vec4 prim_color = v0.color0; Vec4 tex_color = Vec4::FromRGBA(nearestFunc(s, t, texptr, texbufw, 0)); prim_color = ModulateRGBA(prim_color, tex_color); - DrawingCoords pos(x, y, z); - DrawSinglePixel5551(pos, prim_color); + if (prim_color.a() > 0) { + DrawSinglePixel5551(pixel, prim_color); + } s += ds; + pixel++; } } t += dt; @@ -1458,12 +1458,15 @@ void DrawSprite(const VertexData& v0, const VertexData& v1) { gstate.getTextureFunction() == GE_TEXFUNC_MODULATE && gstate.getColorMask() == 0x000000 && gstate.FrameBufFormat() == GE_FORMAT_5551) { + if (v0.color0.a() == 0) + return; for (int y = pos0.y; y < pos1.y; y++) { + u16 *pixel = fb.Get16Ptr(pos0.x, y, gstate.FrameBufStride()); for (int x = pos0.x; x < pos1.x; x++) { Vec4 prim_color = v0.color0; - DrawingCoords pos(x, y, z); - DrawSinglePixel5551(pos, prim_color); + DrawSinglePixel5551(pixel, prim_color); + pixel++; } } } else { From 102a70b4a583847b723ed2d66a1874891b743dec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Sat, 26 Oct 2019 00:29:07 +0200 Subject: [PATCH 15/18] Scissor fix --- GPU/Software/Rasterizer.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/GPU/Software/Rasterizer.cpp b/GPU/Software/Rasterizer.cpp index b07d70c3fd..fce5100af3 100644 --- a/GPU/Software/Rasterizer.cpp +++ b/GPU/Software/Rasterizer.cpp @@ -1372,8 +1372,8 @@ void DrawSprite(const VertexData& v0, const VertexData& v1) { } // First clip the right and bottom sides, since we don't need to adjust the deltas. - if (pos1.x > scissorBR.x) pos1.x = scissorBR.x; - if (pos1.y > scissorBR.y) pos1.y = scissorBR.y; + if (pos1.x > scissorBR.x) pos1.x = scissorBR.x + 1; + if (pos1.y > scissorBR.y) pos1.y = scissorBR.y + 1; // Now clip the other sides. if (pos0.x < scissorTL.x) { s_start += (scissorTL.x - pos0.x) * ds; From 86c781e434f0ef75b5eef9e298208baa94a92208 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Sat, 26 Oct 2019 00:30:19 +0200 Subject: [PATCH 16/18] Hack around most of the problems with the save/load dialog. Software stretch gets enabled in non-wide mode, so wallpapers work at a cost of speed. --- Core/HLE/sceUtility.cpp | 2 +- GPU/Software/Clipper.cpp | 44 +++++++++++++++++++++++----------------- GPU/Software/SoftGpu.cpp | 1 - 3 files changed, 26 insertions(+), 21 deletions(-) diff --git a/Core/HLE/sceUtility.cpp b/Core/HLE/sceUtility.cpp index 1a9c994701..3eb751436b 100644 --- a/Core/HLE/sceUtility.cpp +++ b/Core/HLE/sceUtility.cpp @@ -130,7 +130,7 @@ enum UtilityDialogType { // Only a single dialog is allowed at a time. static UtilityDialogType currentDialogType; -static bool currentDialogActive; +bool currentDialogActive; static PSPSaveDialog saveDialog; static PSPMsgDialog msgDialog; static PSPOskDialog oskDialog; diff --git a/GPU/Software/Clipper.cpp b/GPU/Software/Clipper.cpp index 548d9e3dfd..6d22d2e7ee 100644 --- a/GPU/Software/Clipper.cpp +++ b/GPU/Software/Clipper.cpp @@ -28,6 +28,8 @@ extern bool g_DarkStalkerStretch; +// For Darkstalkers hack. Ugh. +extern bool currentDialogActive; namespace Clipper { @@ -139,8 +141,11 @@ static void RotateUVThrough(const VertexData &tl, const VertexData &br, VertexDa } } +bool needsClear = false; + void ProcessRect(const VertexData& v0, const VertexData& v1) { + g_DarkStalkerStretch = false; if (!gstate.isModeThrough()) { VertexData buf[4]; buf[0].clippos = ClipCoords(v0.clippos.x, v0.clippos.y, v1.clippos.z, v1.clippos.w); @@ -191,23 +196,8 @@ void ProcessRect(const VertexData& v0, const VertexData& v1) bool coord_check = (xdiff == udiff || xdiff == -udiff) && (ydiff == vdiff || ydiff == -vdiff); - // TODO: The U/V mirror support is off by one somehow. Predecrement? - - /* - bool state_check = - !gstate.isModeClear() && - !gstate.isFogEnabled() && - gstate.isTextureMapEnabled() && - !gstate.isDepthTestEnabled() && - !gstate.isStencilTestEnabled(); - bool alpha_check = - gstate.getAlphaTestFunction() == GEComparison::GE_COMP_GREATER && - gstate.getAlphaTestMask() == 0xFF && - gstate.getAlphaTestRef() == 0; - */ - bool state_check = !gstate.isModeClear(); - bool alpha_check = true; - if ((coord_check || !gstate.isTextureMapEnabled()) && state_check && alpha_check) { + bool state_check = !gstate.isModeClear(); // TODO: Add support for clear modes in Rasterizer::DrawSprite. + if ((coord_check || !gstate.isTextureMapEnabled()) && state_check) { Rasterizer::DrawSprite(v0, v1); return; } @@ -215,8 +205,24 @@ void ProcessRect(const VertexData& v0, const VertexData& v1) // Eliminate the stretch blit in DarkStalkers. // We compensate for that when blitting the framebuffer in SoftGpu.cpp. if (PSP_CoreParameter().compat.flags().DarkStalkersPresentHack && v0.texturecoords.x == 64.0f && v0.texturecoords.y == 16.0f && v1.texturecoords.x == 448.0f && v1.texturecoords.y == 240.0f) { - g_DarkStalkerStretch = true; - return; + if (v0.screenpos.x == 0x7100 && v0.screenpos.y == 0x7780 && v1.screenpos.x == 0x8f00 && v1.screenpos.y == 0x8880) { + // Also check for save/load dialog. + if (!currentDialogActive) { + g_DarkStalkerStretch = true; + if (needsClear) { + needsClear = false; + // Afterwards, we also need to clear the actual destination. Can do a fast rectfill. + gstate.textureMapEnable &= ~1; + VertexData newV0 = v0; + newV0.color0 = Vec4(0, 0, 0, 255); + Rasterizer::DrawSprite(newV0, v1); + gstate.textureMapEnable |= 1; + } + return; + } else { + needsClear = true; + } + } // else, handle the Capcom screen stretch, or the non-wide stretch? Or let's just not bother. } VertexData buf[4]; diff --git a/GPU/Software/SoftGpu.cpp b/GPU/Software/SoftGpu.cpp index 0b715dd952..1324380c8f 100644 --- a/GPU/Software/SoftGpu.cpp +++ b/GPU/Software/SoftGpu.cpp @@ -208,7 +208,6 @@ void SoftGPU::CopyToCurrentFboFromDisplayRam(int srcwidth, int srcheight) { u1 = 447.5f / 512.0f; v1 = 16.0f / 272.0f; v0 = 240.0f / 272.0f; - g_DarkStalkerStretch = false; } else if (!Memory::IsValidAddress(displayFramebuf_) || srcwidth == 0 || srcheight == 0) { hasImage = false; u1 = 1.0f; From 6c8186d046f6a478cfa3cc07a9c2671f07c1140e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Sat, 26 Oct 2019 00:46:56 +0200 Subject: [PATCH 17/18] Remove unused textureswizzle support (we use shaders instead). Universally support presenting 5551 format directly. --- GPU/Software/SoftGpu.cpp | 13 +++++++++---- ext/native/thin3d/thin3d.h | 7 ------- ext/native/thin3d/thin3d_vulkan.cpp | 14 +------------- 3 files changed, 10 insertions(+), 24 deletions(-) diff --git a/GPU/Software/SoftGpu.cpp b/GPU/Software/SoftGpu.cpp index 1324380c8f..7254c2ce11 100644 --- a/GPU/Software/SoftGpu.cpp +++ b/GPU/Software/SoftGpu.cpp @@ -198,8 +198,6 @@ void SoftGPU::CopyToCurrentFboFromDisplayRam(int srcwidth, int srcheight) { // RB swapped, compensate with a shader. desc.format = Draw::DataFormat::A1R5G5B5_UNORM_PACK16; pipeline = texColorRBSwizzle; - } else { - // Shouldn't happen (once I'm done with the backends). } desc.width = displayStride_ == 0 ? srcwidth : displayStride_; desc.height = srcheight; @@ -217,10 +215,17 @@ void SoftGPU::CopyToCurrentFboFromDisplayRam(int srcwidth, int srcheight) { desc.height = srcheight; desc.initData.push_back(data); desc.format = Draw::DataFormat::R8G8B8A8_UNORM; - } else if (g_Config.iGPUBackend == (int)GPUBackend::VULKAN && displayFormat_ == GE_FORMAT_5551) { + } else if (displayFormat_ == GE_FORMAT_5551) { u8 *data = Memory::GetPointer(displayFramebuf_); - desc.swizzle = Draw::TextureSwizzle::BGRA; desc.format = Draw::DataFormat::A1R5G5B5_UNORM_PACK16; + if (draw_->GetDataFormatSupport(Draw::DataFormat::A1B5G5R5_UNORM_PACK16) & Draw::FMT_TEXTURE) { + // The perfect one. + desc.format = Draw::DataFormat::A1B5G5R5_UNORM_PACK16; + } else if (draw_->GetDataFormatSupport(Draw::DataFormat::A1R5G5B5_UNORM_PACK16) & Draw::FMT_TEXTURE) { + // RB swapped, compensate with a shader. + desc.format = Draw::DataFormat::A1R5G5B5_UNORM_PACK16; + pipeline = texColorRBSwizzle; + } desc.width = displayStride_ == 0 ? srcwidth : displayStride_; desc.height = srcheight; desc.initData.push_back(data); diff --git a/ext/native/thin3d/thin3d.h b/ext/native/thin3d/thin3d.h index fce59694b9..146ea138c0 100644 --- a/ext/native/thin3d/thin3d.h +++ b/ext/native/thin3d/thin3d.h @@ -516,16 +516,9 @@ struct DeviceCaps { std::string deviceName; // The device name to use when creating the thin3d context, to get the same one. }; -// Some predefined swizzle -enum class TextureSwizzle { - NO_SWIZZLE = 0, - BGRA = 1, -}; - struct TextureDesc { TextureType type; DataFormat format; - TextureSwizzle swizzle; int width; int height; int depth; diff --git a/ext/native/thin3d/thin3d_vulkan.cpp b/ext/native/thin3d/thin3d_vulkan.cpp index cb2fc0d4e8..3222abf2c0 100644 --- a/ext/native/thin3d/thin3d_vulkan.cpp +++ b/ext/native/thin3d/thin3d_vulkan.cpp @@ -700,19 +700,7 @@ bool VKTexture::Create(VkCommandBuffer cmd, VulkanPushBuffer *push, const Textur usageBits |= VK_IMAGE_USAGE_TRANSFER_SRC_BIT; } - VkComponentMapping mapping{}; // Defaults to no swizzle - switch (desc.swizzle) { - case TextureSwizzle::NO_SWIZZLE: - break; - case TextureSwizzle::BGRA: - mapping.r = VK_COMPONENT_SWIZZLE_B; - mapping.g = VK_COMPONENT_SWIZZLE_G; - mapping.b = VK_COMPONENT_SWIZZLE_R; - mapping.a = VK_COMPONENT_SWIZZLE_A; - break; - } - - if (!vkTex_->CreateDirect(cmd, alloc, width_, height_, mipLevels_, vulkanFormat, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, usageBits, &mapping)) { + if (!vkTex_->CreateDirect(cmd, alloc, width_, height_, mipLevels_, vulkanFormat, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, usageBits)) { ELOG("Failed to create VulkanTexture: %dx%dx%d fmt %d, %d levels", width_, height_, depth_, (int)vulkanFormat, mipLevels_); return false; } From 1966c8fe75af8c2fa35e15dc46cdd1c0bc19855f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Sat, 26 Oct 2019 20:08:31 +0200 Subject: [PATCH 18/18] Fix a backwards check --- GPU/Software/Rasterizer.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/GPU/Software/Rasterizer.cpp b/GPU/Software/Rasterizer.cpp index fce5100af3..71560776b8 100644 --- a/GPU/Software/Rasterizer.cpp +++ b/GPU/Software/Rasterizer.cpp @@ -1291,13 +1291,13 @@ void DrawTriangleSlice( inline void DrawSinglePixel5551(u16 *pixel, const Vec4 &color_in) { u32 new_color; if (color_in.a() == 255) { + new_color = color_in.ToRGBA() & 0xFFFFFF; + } else { const u32 old_color = RGBA5551ToRGBA8888(*pixel); const Vec4 dst = Vec4::FromRGBA(old_color); Vec3 blended = AlphaBlendingResult(color_in, dst); // ToRGB() always automatically clamps. new_color = blended.ToRGB(); - } else { - new_color = color_in.ToRGBA() & 0xFFFFFF; } new_color |= (*pixel & 0x8000) ? 0xff000000 : 0x00000000;