mirror of
https://github.com/hrydgard/ppsspp.git
synced 2025-04-02 11:01:50 -04:00
Merge pull request #19789 from hrydgard/depth-raster-low-quality
Implement the low-quality depth raster mode, default to it on Android/iOS
This commit is contained in:
commit
f88b6abb5d
5 changed files with 69 additions and 19 deletions
|
@ -160,7 +160,7 @@ struct Vec4S32 {
|
|||
Vec4S32 Mul(Vec4S32 other) const { return *this * other; }
|
||||
|
||||
template<int imm>
|
||||
Vec4S32 Shl() const { return Vec4S32{ _mm_slli_epi32(v, imm) }; }
|
||||
Vec4S32 Shl() const { return Vec4S32{ imm == 0 ? v : _mm_slli_epi32(v, imm) }; }
|
||||
|
||||
// NOTE: May be slow.
|
||||
int operator[](size_t index) const { return ((int *)&v)[index]; }
|
||||
|
|
|
@ -139,6 +139,14 @@ const char *DefaultLangRegion() {
|
|||
return defaultLangRegion.c_str();
|
||||
}
|
||||
|
||||
static int DefaultDepthRaster() {
|
||||
#if PPSSPP_PLATFORM(ANDROID) || PPSSPP_PLATFORM(IOS)
|
||||
return (int)DepthRasterMode::LOW_QUALITY;
|
||||
#else
|
||||
return (int)DepthRasterMode::DEFAULT;
|
||||
#endif
|
||||
}
|
||||
|
||||
std::string CreateRandMAC() {
|
||||
std::stringstream randStream;
|
||||
srand(time(nullptr));
|
||||
|
@ -616,7 +624,7 @@ static const ConfigSetting graphicsSettings[] = {
|
|||
ConfigSetting("UseGeometryShader", &g_Config.bUseGeometryShader, false, CfgFlag::PER_GAME),
|
||||
ConfigSetting("SkipBufferEffects", &g_Config.bSkipBufferEffects, false, CfgFlag::PER_GAME | CfgFlag::REPORT),
|
||||
ConfigSetting("DisableRangeCulling", &g_Config.bDisableRangeCulling, false, CfgFlag::PER_GAME | CfgFlag::REPORT),
|
||||
ConfigSetting("DepthRasterMode", &g_Config.iDepthRasterMode, 0, CfgFlag::PER_GAME | CfgFlag::REPORT),
|
||||
ConfigSetting("DepthRasterMode", &g_Config.iDepthRasterMode, &DefaultDepthRaster, CfgFlag::PER_GAME | CfgFlag::REPORT),
|
||||
ConfigSetting("SoftwareRenderer", &g_Config.bSoftwareRendering, false, CfgFlag::PER_GAME),
|
||||
ConfigSetting("SoftwareRendererJit", &g_Config.bSoftwareRenderingJit, true, CfgFlag::PER_GAME),
|
||||
ConfigSetting("HardwareTransform", &g_Config.bHardwareTransform, true, CfgFlag::PER_GAME | CfgFlag::REPORT),
|
||||
|
|
|
@ -87,12 +87,6 @@ static void DepthRasterRect(uint16_t *dest, int stride, const DepthScissor sciss
|
|||
|
||||
alignas(16) static const int zero123[4] = {0, 1, 2, 3};
|
||||
|
||||
constexpr int stepXSize = 4;
|
||||
constexpr int stepYSize = 1;
|
||||
|
||||
constexpr int stepXShift = 2;
|
||||
constexpr int stepYShift = 0;
|
||||
|
||||
enum class TriangleStat {
|
||||
OK,
|
||||
NoPixels,
|
||||
|
@ -102,7 +96,7 @@ enum class TriangleStat {
|
|||
constexpr int MIN_TWICE_TRI_AREA = 10;
|
||||
|
||||
// A mix of ideas from Intel's sample and ryg's rasterizer blog series.
|
||||
template<ZCompareMode compareMode>
|
||||
template<ZCompareMode compareMode, bool lowQ>
|
||||
void DepthRaster4Triangles(int stats[3], uint16_t *depthBuf, int stride, DepthScissor scissor, const int *tx, const int *ty, const float *tz) {
|
||||
// Triangle setup. This is done using SIMD, four triangles at a time.
|
||||
// 16x16->32 multiplications are doable on SSE2, which should be all we need.
|
||||
|
@ -117,6 +111,12 @@ void DepthRaster4Triangles(int stats[3], uint16_t *depthBuf, int stride, DepthSc
|
|||
Vec4S32 x2 = Vec4S32::LoadAligned(tx + 8);
|
||||
Vec4S32 y2 = Vec4S32::LoadAligned(ty + 8);
|
||||
|
||||
if (lowQ) {
|
||||
y0 &= Vec4S32::Splat(~1);
|
||||
y1 &= Vec4S32::Splat(~1);
|
||||
y2 &= Vec4S32::Splat(~1);
|
||||
}
|
||||
|
||||
// FixupAfterMinMax is just 16->32 sign extension, in case the current platform (like SSE2) just has 16-bit min/max operations.
|
||||
Vec4S32 minX = x0.Min16(x1).Min16(x2).Max16(Vec4S32::Splat(scissor.x1)).FixupAfterMinMax();
|
||||
Vec4S32 maxX = x0.Max16(x1).Max16(x2).Min16(Vec4S32::Splat(scissor.x2)).FixupAfterMinMax();
|
||||
|
@ -138,6 +138,12 @@ void DepthRaster4Triangles(int stats[3], uint16_t *depthBuf, int stride, DepthSc
|
|||
Vec4S32 B01 = x1 - x0;
|
||||
Vec4S32 C01 = x0.Mul16(y1) - y0.Mul16(x1);
|
||||
|
||||
constexpr int stepXSize = 4;
|
||||
constexpr int stepYSize = lowQ ? 2 : 1;
|
||||
|
||||
constexpr int stepXShift = 2;
|
||||
constexpr int stepYShift = lowQ ? 1 : 0;
|
||||
|
||||
// Step deltas
|
||||
Vec4S32 stepX12 = A12.Shl<stepXShift>();
|
||||
Vec4S32 stepY12 = B12.Shl<stepYShift>();
|
||||
|
@ -229,22 +235,27 @@ void DepthRaster4Triangles(int stats[3], uint16_t *depthBuf, int stride, DepthSc
|
|||
Vec4U16 shortZ = Vec4U16::FromVec4F32(zs);
|
||||
|
||||
// This switch is on a templated constant, so should collapse away.
|
||||
Vec4U16 writeVal;
|
||||
switch (compareMode) {
|
||||
case ZCompareMode::Greater:
|
||||
// To implement the greater/greater-than comparison, we can combine mask and max.
|
||||
// Unfortunately there's no unsigned max on SSE2, it's synthesized by xoring 0x8000 on input and output.
|
||||
// We use AndNot to zero out Z results, before doing Max with the buffer.
|
||||
shortZ.AndNot(shortMaskInv).Max(bufferValues).Store(rowPtr + x);
|
||||
writeVal = shortZ.AndNot(shortMaskInv).Max(bufferValues);
|
||||
break;
|
||||
case ZCompareMode::Less:
|
||||
// This time, we OR the mask and use .Min.
|
||||
(shortZ | shortMaskInv).Min(bufferValues).Store(rowPtr + x);
|
||||
writeVal = (shortZ | shortMaskInv).Min(bufferValues);
|
||||
break;
|
||||
case ZCompareMode::Always: // UNTESTED
|
||||
// This could be replaced with a vblend operation.
|
||||
((bufferValues & shortMaskInv) | shortZ.AndNot(shortMaskInv)).Store(rowPtr + x);
|
||||
writeVal = ((bufferValues & shortMaskInv) | shortZ.AndNot(shortMaskInv));
|
||||
break;
|
||||
}
|
||||
writeVal.Store(rowPtr + x);
|
||||
if (lowQ) {
|
||||
writeVal.Store(rowPtr + stride + x);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -544,7 +555,7 @@ int DepthRasterClipIndexedTriangles(int *tx, int *ty, float *tz, const float *tr
|
|||
}
|
||||
|
||||
// Rasterizes screen-space vertices.
|
||||
void DepthRasterScreenVerts(uint16_t *depth, int depthStride, const int *tx, const int *ty, const float *tz, int count, const DepthDraw &draw, const DepthScissor scissor) {
|
||||
void DepthRasterScreenVerts(uint16_t *depth, int depthStride, const int *tx, const int *ty, const float *tz, int count, const DepthDraw &draw, const DepthScissor scissor, bool lowQ) {
|
||||
// Prim should now be either TRIANGLES or RECTs.
|
||||
_dbg_assert_(draw.prim == GE_PRIM_RECTANGLES || draw.prim == GE_PRIM_TRIANGLES);
|
||||
|
||||
|
@ -562,21 +573,51 @@ void DepthRasterScreenVerts(uint16_t *depth, int depthStride, const int *tx, con
|
|||
{
|
||||
int stats[3]{};
|
||||
// Batches of 4 triangles, as output by the clip function.
|
||||
for (int i = 0; i < count; i += 12) {
|
||||
if (lowQ) {
|
||||
switch (draw.compareMode) {
|
||||
case ZCompareMode::Greater:
|
||||
{
|
||||
DepthRaster4Triangles<ZCompareMode::Greater>(stats, depth, depthStride, scissor, &tx[i], &ty[i], &tz[i]);
|
||||
for (int i = 0; i < count; i += 12) {
|
||||
DepthRaster4Triangles<ZCompareMode::Greater, true>(stats, depth, depthStride, scissor, &tx[i], &ty[i], &tz[i]);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case ZCompareMode::Less:
|
||||
{
|
||||
DepthRaster4Triangles<ZCompareMode::Less>(stats, depth, depthStride, scissor, &tx[i], &ty[i], &tz[i]);
|
||||
for (int i = 0; i < count; i += 12) {
|
||||
DepthRaster4Triangles<ZCompareMode::Less, true>(stats, depth, depthStride, scissor, &tx[i], &ty[i], &tz[i]);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case ZCompareMode::Always:
|
||||
{
|
||||
DepthRaster4Triangles<ZCompareMode::Always>(stats, depth, depthStride, scissor, &tx[i], &ty[i], &tz[i]);
|
||||
for (int i = 0; i < count; i += 12) {
|
||||
DepthRaster4Triangles<ZCompareMode::Always, true>(stats, depth, depthStride, scissor, &tx[i], &ty[i], &tz[i]);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
switch (draw.compareMode) {
|
||||
case ZCompareMode::Greater:
|
||||
{
|
||||
for (int i = 0; i < count; i += 12) {
|
||||
DepthRaster4Triangles<ZCompareMode::Greater, false>(stats, depth, depthStride, scissor, &tx[i], &ty[i], &tz[i]);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case ZCompareMode::Less:
|
||||
{
|
||||
for (int i = 0; i < count; i += 12) {
|
||||
DepthRaster4Triangles<ZCompareMode::Less, false>(stats, depth, depthStride, scissor, &tx[i], &ty[i], &tz[i]);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case ZCompareMode::Always:
|
||||
{
|
||||
for (int i = 0; i < count; i += 12) {
|
||||
DepthRaster4Triangles<ZCompareMode::Always, false>(stats, depth, depthStride, scissor, &tx[i], &ty[i], &tz[i]);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -56,4 +56,4 @@ int DepthRasterClipIndexedRectangles(int *tx, int *ty, float *tz, const float *t
|
|||
void DecodeAndTransformForDepthRaster(float *dest, const float *worldviewproj, const void *vertexData, int indexLowerBound, int indexUpperBound, VertexDecoder *dec, u32 vertTypeID);
|
||||
void TransformPredecodedForDepthRaster(float *dest, const float *worldviewproj, const void *decodedVertexData, VertexDecoder *dec, int count);
|
||||
void ConvertPredecodedThroughForDepthRaster(float *dest, const void *decodedVertexData, VertexDecoder *dec, int count);
|
||||
void DepthRasterScreenVerts(uint16_t *depth, int depthStride, const int *tx, const int *ty, const float *tz, int count, const DepthDraw &draw, const DepthScissor scissor);
|
||||
void DepthRasterScreenVerts(uint16_t *depth, int depthStride, const int *tx, const int *ty, const float *tz, int count, const DepthDraw &draw, const DepthScissor scissor, bool lowQ);
|
||||
|
|
|
@ -1098,6 +1098,7 @@ void DrawEngineCommon::FlushQueuedDepth() {
|
|||
}
|
||||
|
||||
const bool collectStats = coreCollectDebugStats;
|
||||
const bool lowQ = g_Config.iDepthRasterMode == (int)DepthRasterMode::LOW_QUALITY;
|
||||
|
||||
for (const auto &draw : depthDraws_) {
|
||||
int *tx = depthScreenVerts_;
|
||||
|
@ -1127,7 +1128,7 @@ void DrawEngineCommon::FlushQueuedDepth() {
|
|||
}
|
||||
{
|
||||
TimeCollector collectStat(&gpuStats.msRasterizeDepth, collectStats);
|
||||
DepthRasterScreenVerts((uint16_t *)Memory::GetPointerWrite(draw.depthAddr), draw.depthStride, tx, ty, tz, outVertCount, draw, tileScissor);
|
||||
DepthRasterScreenVerts((uint16_t *)Memory::GetPointerWrite(draw.depthAddr), draw.depthStride, tx, ty, tz, outVertCount, draw, tileScissor, lowQ);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue