Inline UpdateUVScaleOffset

This commit is contained in:
Henrik Rydgård 2023-12-01 23:54:34 +01:00
parent 5fccf64b94
commit e488189723
5 changed files with 39 additions and 27 deletions

View file

@ -1287,7 +1287,7 @@ void GPUCommon::FlushImm() {
immCount_ = 0;
return;
}
UpdateUVScaleOffset();
gstate_c.UpdateUVScaleOffset();
if (vfb) {
CheckDepthUsage(vfb);
}
@ -1933,7 +1933,7 @@ bool GPUCommon::PerformWriteStencilFromMemory(u32 dest, int size, WriteStencil f
}
bool GPUCommon::GetCurrentSimpleVertices(int count, std::vector<GPUDebugVertex> &vertices, std::vector<u16> &indices) {
UpdateUVScaleOffset();
gstate_c.UpdateUVScaleOffset();
return drawEngineCommon_->GetCurrentSimpleVertices(count, vertices, indices);
}
@ -1942,18 +1942,3 @@ bool GPUCommon::DescribeCodePtr(const u8 *ptr, std::string &name) {
// which is owned by the drawengine.
return drawEngineCommon_->DescribeCodePtr(ptr, name);
}
void GPUCommon::UpdateUVScaleOffset() {
#if defined(_M_SSE)
__m128i values = _mm_slli_epi32(_mm_load_si128((const __m128i *)&gstate.texscaleu), 8);
_mm_storeu_si128((__m128i *)&gstate_c.uv, values);
#elif PPSSPP_ARCH(ARM_NEON)
const uint32x4_t values = vshlq_n_u32(vld1q_u32((const u32 *)&gstate.texscaleu), 8);
vst1q_u32((u32 *)&gstate_c.uv, values);
#else
gstate_c.uv.uScale = getFloat24(gstate.texscaleu);
gstate_c.uv.vScale = getFloat24(gstate.texscalev);
gstate_c.uv.uOff = getFloat24(gstate.texoffsetu);
gstate_c.uv.vOff = getFloat24(gstate.texoffsetv);
#endif
}

View file

@ -204,8 +204,6 @@ public:
GPUgstate GetGState() override;
void SetCmdValue(u32 op) override;
void UpdateUVScaleOffset();
DisplayList* getList(int listid) override {
return &dls[listid];
}

View file

@ -982,7 +982,7 @@ void GPUCommonHW::Execute_Prim(u32 op, u32 diff) {
}
int bytesRead = 0;
UpdateUVScaleOffset();
gstate_c.UpdateUVScaleOffset();
// cull mode
int cullMode = gstate.getCullMode();
@ -1306,7 +1306,7 @@ void GPUCommonHW::Execute_Bezier(u32 op, u32 diff) {
}
int bytesRead = 0;
UpdateUVScaleOffset();
gstate_c.UpdateUVScaleOffset();
drawEngineCommon_->SubmitCurve(control_points, indices, surface, gstate.vertType, &bytesRead, "bezier");
gstate_c.Dirty(DIRTY_RASTER_STATE | DIRTY_VERTEXSHADER_STATE | DIRTY_GEOMETRYSHADER_STATE | DIRTY_UVSCALEOFFSET);
@ -1380,7 +1380,7 @@ void GPUCommonHW::Execute_Spline(u32 op, u32 diff) {
}
int bytesRead = 0;
UpdateUVScaleOffset();
gstate_c.UpdateUVScaleOffset();
drawEngineCommon_->SubmitCurve(control_points, indices, surface, gstate.vertType, &bytesRead, "spline");
gstate_c.Dirty(DIRTY_RASTER_STATE | DIRTY_VERTEXSHADER_STATE | DIRTY_GEOMETRYSHADER_STATE | DIRTY_UVSCALEOFFSET);

View file

@ -17,12 +17,25 @@
#pragma once
#include "ppsspp_config.h"
#include "Common/CommonTypes.h"
#include "Common/Swap.h"
#include "GPU/GPU.h"
#include "GPU/ge_constants.h"
#include "GPU/Common/ShaderCommon.h"
#if defined(_M_SSE)
#include <emmintrin.h>
#endif
#if PPSSPP_ARCH(ARM_NEON)
#if defined(_MSC_VER) && PPSSPP_ARCH(ARM64)
#include <arm64_neon.h>
#else
#include <arm_neon.h>
#endif
#endif
class PointerWrap;
struct GPUgstate {
@ -523,6 +536,8 @@ enum class SubmitType {
HW_SPLINE,
};
extern GPUgstate gstate;
struct GPUStateCache {
bool Use(u32 flags) const { return (useFlags_ & flags) != 0; } // Return true if ANY of flags are true.
bool UseAll(u32 flags) const { return (useFlags_ & flags) == flags; } // Return true if ALL flags are true.
@ -604,6 +619,21 @@ struct GPUStateCache {
return useFlags_;
}
void UpdateUVScaleOffset() {
#if defined(_M_SSE)
__m128i values = _mm_slli_epi32(_mm_load_si128((const __m128i *)&gstate.texscaleu), 8);
_mm_storeu_si128((__m128i *)&uv, values);
#elif PPSSPP_ARCH(ARM_NEON)
const uint32x4_t values = vshlq_n_u32(vld1q_u32((const u32 *)&gstate.texscaleu), 8);
vst1q_u32((u32 *)&uv, values);
#else
uv.uScale = getFloat24(gstate.texscaleu);
uv.vScale = getFloat24(gstate.texscalev);
uv.uOff = getFloat24(gstate.texoffsetu);
uv.vOff = getFloat24(gstate.texoffsetv);
#endif
}
private:
u32 useFlags_;
public:
@ -690,7 +720,6 @@ public:
class GPUInterface;
class GPUDebugInterface;
extern GPUgstate gstate;
extern GPUStateCache gstate_c;
inline u32 GPUStateCache::getRelativeAddress(u32 data) const {

View file

@ -864,7 +864,7 @@ void SoftGPU::Execute_Prim(u32 op, u32 diff) {
cyclesExecuted += EstimatePerVertexCost() * count;
int bytesRead;
UpdateUVScaleOffset();
gstate_c.UpdateUVScaleOffset();
drawEngine_->transformUnit.SetDirty(dirtyFlags_);
drawEngine_->transformUnit.SubmitPrimitive(verts, indices, prim, count, gstate.vertType, &bytesRead, drawEngine_);
dirtyFlags_ = drawEngine_->transformUnit.GetDirty();
@ -917,7 +917,7 @@ void SoftGPU::Execute_Bezier(u32 op, u32 diff) {
SetDrawType(DRAW_BEZIER, PatchPrimToPrim(surface.primType));
int bytesRead = 0;
UpdateUVScaleOffset();
gstate_c.UpdateUVScaleOffset();
drawEngine_->transformUnit.SetDirty(dirtyFlags_);
drawEngineCommon_->SubmitCurve(control_points, indices, surface, gstate.vertType, &bytesRead, "bezier");
dirtyFlags_ = drawEngine_->transformUnit.GetDirty();
@ -971,7 +971,7 @@ void SoftGPU::Execute_Spline(u32 op, u32 diff) {
SetDrawType(DRAW_SPLINE, PatchPrimToPrim(surface.primType));
int bytesRead = 0;
UpdateUVScaleOffset();
gstate_c.UpdateUVScaleOffset();
drawEngine_->transformUnit.SetDirty(dirtyFlags_);
drawEngineCommon_->SubmitCurve(control_points, indices, surface, gstate.vertType, &bytesRead, "spline");
dirtyFlags_ = drawEngine_->transformUnit.GetDirty();
@ -1445,7 +1445,7 @@ bool SoftGPU::GetCurrentClut(GPUDebugBuffer &buffer)
}
bool SoftGPU::GetCurrentSimpleVertices(int count, std::vector<GPUDebugVertex> &vertices, std::vector<u16> &indices) {
UpdateUVScaleOffset();
gstate_c.UpdateUVScaleOffset();
return drawEngine_->transformUnit.GetCurrentSimpleVertices(count, vertices, indices);
}