From e488189723952af74cc00d948262c704c09ddbaf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Fri, 1 Dec 2023 23:54:34 +0100 Subject: [PATCH] Inline UpdateUVScaleOffset --- GPU/GPUCommon.cpp | 19 ++----------------- GPU/GPUCommon.h | 2 -- GPU/GPUCommonHW.cpp | 6 +++--- GPU/GPUState.h | 31 ++++++++++++++++++++++++++++++- GPU/Software/SoftGpu.cpp | 8 ++++---- 5 files changed, 39 insertions(+), 27 deletions(-) diff --git a/GPU/GPUCommon.cpp b/GPU/GPUCommon.cpp index 8b23ec1e98..08ad845e05 100644 --- a/GPU/GPUCommon.cpp +++ b/GPU/GPUCommon.cpp @@ -1287,7 +1287,7 @@ void GPUCommon::FlushImm() { immCount_ = 0; return; } - UpdateUVScaleOffset(); + gstate_c.UpdateUVScaleOffset(); if (vfb) { CheckDepthUsage(vfb); } @@ -1933,7 +1933,7 @@ bool GPUCommon::PerformWriteStencilFromMemory(u32 dest, int size, WriteStencil f } bool GPUCommon::GetCurrentSimpleVertices(int count, std::vector &vertices, std::vector &indices) { - UpdateUVScaleOffset(); + gstate_c.UpdateUVScaleOffset(); return drawEngineCommon_->GetCurrentSimpleVertices(count, vertices, indices); } @@ -1942,18 +1942,3 @@ bool GPUCommon::DescribeCodePtr(const u8 *ptr, std::string &name) { // which is owned by the drawengine. return drawEngineCommon_->DescribeCodePtr(ptr, name); } - -void GPUCommon::UpdateUVScaleOffset() { -#if defined(_M_SSE) - __m128i values = _mm_slli_epi32(_mm_load_si128((const __m128i *)&gstate.texscaleu), 8); - _mm_storeu_si128((__m128i *)&gstate_c.uv, values); -#elif PPSSPP_ARCH(ARM_NEON) - const uint32x4_t values = vshlq_n_u32(vld1q_u32((const u32 *)&gstate.texscaleu), 8); - vst1q_u32((u32 *)&gstate_c.uv, values); -#else - gstate_c.uv.uScale = getFloat24(gstate.texscaleu); - gstate_c.uv.vScale = getFloat24(gstate.texscalev); - gstate_c.uv.uOff = getFloat24(gstate.texoffsetu); - gstate_c.uv.vOff = getFloat24(gstate.texoffsetv); -#endif -} diff --git a/GPU/GPUCommon.h b/GPU/GPUCommon.h index 0bc8d5f5a4..ede8d1ec73 100644 --- a/GPU/GPUCommon.h +++ b/GPU/GPUCommon.h @@ -204,8 +204,6 @@ public: GPUgstate GetGState() override; void SetCmdValue(u32 op) override; - void UpdateUVScaleOffset(); - DisplayList* getList(int listid) override { return &dls[listid]; } diff --git a/GPU/GPUCommonHW.cpp b/GPU/GPUCommonHW.cpp index f57250d5e2..2c1eba7f4d 100644 --- a/GPU/GPUCommonHW.cpp +++ b/GPU/GPUCommonHW.cpp @@ -982,7 +982,7 @@ void GPUCommonHW::Execute_Prim(u32 op, u32 diff) { } int bytesRead = 0; - UpdateUVScaleOffset(); + gstate_c.UpdateUVScaleOffset(); // cull mode int cullMode = gstate.getCullMode(); @@ -1306,7 +1306,7 @@ void GPUCommonHW::Execute_Bezier(u32 op, u32 diff) { } int bytesRead = 0; - UpdateUVScaleOffset(); + gstate_c.UpdateUVScaleOffset(); drawEngineCommon_->SubmitCurve(control_points, indices, surface, gstate.vertType, &bytesRead, "bezier"); gstate_c.Dirty(DIRTY_RASTER_STATE | DIRTY_VERTEXSHADER_STATE | DIRTY_GEOMETRYSHADER_STATE | DIRTY_UVSCALEOFFSET); @@ -1380,7 +1380,7 @@ void GPUCommonHW::Execute_Spline(u32 op, u32 diff) { } int bytesRead = 0; - UpdateUVScaleOffset(); + gstate_c.UpdateUVScaleOffset(); drawEngineCommon_->SubmitCurve(control_points, indices, surface, gstate.vertType, &bytesRead, "spline"); gstate_c.Dirty(DIRTY_RASTER_STATE | DIRTY_VERTEXSHADER_STATE | DIRTY_GEOMETRYSHADER_STATE | DIRTY_UVSCALEOFFSET); diff --git a/GPU/GPUState.h b/GPU/GPUState.h index 6b1446c5e8..e8d77494a5 100644 --- a/GPU/GPUState.h +++ b/GPU/GPUState.h @@ -17,12 +17,25 @@ #pragma once +#include "ppsspp_config.h" + #include "Common/CommonTypes.h" #include "Common/Swap.h" #include "GPU/GPU.h" #include "GPU/ge_constants.h" #include "GPU/Common/ShaderCommon.h" +#if defined(_M_SSE) +#include +#endif +#if PPSSPP_ARCH(ARM_NEON) +#if defined(_MSC_VER) && PPSSPP_ARCH(ARM64) +#include +#else +#include +#endif +#endif + class PointerWrap; struct GPUgstate { @@ -523,6 +536,8 @@ enum class SubmitType { HW_SPLINE, }; +extern GPUgstate gstate; + struct GPUStateCache { bool Use(u32 flags) const { return (useFlags_ & flags) != 0; } // Return true if ANY of flags are true. bool UseAll(u32 flags) const { return (useFlags_ & flags) == flags; } // Return true if ALL flags are true. @@ -604,6 +619,21 @@ struct GPUStateCache { return useFlags_; } + void UpdateUVScaleOffset() { +#if defined(_M_SSE) + __m128i values = _mm_slli_epi32(_mm_load_si128((const __m128i *)&gstate.texscaleu), 8); + _mm_storeu_si128((__m128i *)&uv, values); +#elif PPSSPP_ARCH(ARM_NEON) + const uint32x4_t values = vshlq_n_u32(vld1q_u32((const u32 *)&gstate.texscaleu), 8); + vst1q_u32((u32 *)&uv, values); +#else + uv.uScale = getFloat24(gstate.texscaleu); + uv.vScale = getFloat24(gstate.texscalev); + uv.uOff = getFloat24(gstate.texoffsetu); + uv.vOff = getFloat24(gstate.texoffsetv); +#endif + } + private: u32 useFlags_; public: @@ -690,7 +720,6 @@ public: class GPUInterface; class GPUDebugInterface; -extern GPUgstate gstate; extern GPUStateCache gstate_c; inline u32 GPUStateCache::getRelativeAddress(u32 data) const { diff --git a/GPU/Software/SoftGpu.cpp b/GPU/Software/SoftGpu.cpp index 2ca6fa8d8a..d02049d740 100644 --- a/GPU/Software/SoftGpu.cpp +++ b/GPU/Software/SoftGpu.cpp @@ -864,7 +864,7 @@ void SoftGPU::Execute_Prim(u32 op, u32 diff) { cyclesExecuted += EstimatePerVertexCost() * count; int bytesRead; - UpdateUVScaleOffset(); + gstate_c.UpdateUVScaleOffset(); drawEngine_->transformUnit.SetDirty(dirtyFlags_); drawEngine_->transformUnit.SubmitPrimitive(verts, indices, prim, count, gstate.vertType, &bytesRead, drawEngine_); dirtyFlags_ = drawEngine_->transformUnit.GetDirty(); @@ -917,7 +917,7 @@ void SoftGPU::Execute_Bezier(u32 op, u32 diff) { SetDrawType(DRAW_BEZIER, PatchPrimToPrim(surface.primType)); int bytesRead = 0; - UpdateUVScaleOffset(); + gstate_c.UpdateUVScaleOffset(); drawEngine_->transformUnit.SetDirty(dirtyFlags_); drawEngineCommon_->SubmitCurve(control_points, indices, surface, gstate.vertType, &bytesRead, "bezier"); dirtyFlags_ = drawEngine_->transformUnit.GetDirty(); @@ -971,7 +971,7 @@ void SoftGPU::Execute_Spline(u32 op, u32 diff) { SetDrawType(DRAW_SPLINE, PatchPrimToPrim(surface.primType)); int bytesRead = 0; - UpdateUVScaleOffset(); + gstate_c.UpdateUVScaleOffset(); drawEngine_->transformUnit.SetDirty(dirtyFlags_); drawEngineCommon_->SubmitCurve(control_points, indices, surface, gstate.vertType, &bytesRead, "spline"); dirtyFlags_ = drawEngine_->transformUnit.GetDirty(); @@ -1445,7 +1445,7 @@ bool SoftGPU::GetCurrentClut(GPUDebugBuffer &buffer) } bool SoftGPU::GetCurrentSimpleVertices(int count, std::vector &vertices, std::vector &indices) { - UpdateUVScaleOffset(); + gstate_c.UpdateUVScaleOffset(); return drawEngine_->transformUnit.GetCurrentSimpleVertices(count, vertices, indices); }