mirror of
https://github.com/PCSX2/pcsx2.git
synced 2025-04-02 10:52:54 -04:00
This pull request is for the pending reorganization of the folder structure on GSdx, making it better organized and easier to work with. Also remove unused GSTextureFX.cpp file.
1215 lines
26 KiB
C++
1215 lines
26 KiB
C++
/*
|
|
* Copyright (C) 2007-2009 Gabest
|
|
* http://www.gabest.org
|
|
*
|
|
* This Program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation; either version 2, or (at your option)
|
|
* any later version.
|
|
*
|
|
* This Program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with GNU Make; see the file COPYING. If not, write to
|
|
* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA.
|
|
* http://www.gnu.org/copyleft/gpl.html
|
|
*
|
|
*/
|
|
|
|
// TODO: JIT Draw* (flags: depth, texture, color (+iip), scissor)
|
|
|
|
#include "stdafx.h"
|
|
#include "GSRasterizer.h"
|
|
|
|
int GSRasterizerData::s_counter = 0;
|
|
|
|
static int compute_best_thread_height(int threads) {
|
|
// - for more threads screen segments should be smaller to better distribute the pixels
|
|
// - but not too small to keep the threading overhead low
|
|
// - ideal value between 3 and 5, or log2(64 / number of threads)
|
|
|
|
int th = theApp.GetConfigI("extrathreads_height");
|
|
|
|
if (th > 0 && th < 9)
|
|
return th;
|
|
else
|
|
return 4;
|
|
}
|
|
|
|
GSRasterizer::GSRasterizer(IDrawScanline* ds, int id, int threads, GSPerfMon* perfmon)
|
|
: m_perfmon(perfmon)
|
|
, m_ds(ds)
|
|
, m_id(id)
|
|
, m_threads(threads)
|
|
{
|
|
memset(&m_pixels, 0, sizeof(m_pixels));
|
|
|
|
m_thread_height = compute_best_thread_height(threads);
|
|
|
|
m_edge.buff = (GSVertexSW*)vmalloc(sizeof(GSVertexSW) * 2048, false);
|
|
m_edge.count = 0;
|
|
|
|
int rows = (2048 >> m_thread_height) + 16;
|
|
m_scanline = (uint8*)_aligned_malloc(rows, 64);
|
|
|
|
int row = 0;
|
|
|
|
while(row < rows)
|
|
{
|
|
for(int i = 0; i < threads; i++, row++)
|
|
{
|
|
m_scanline[row] = i == id ? 1 : 0;
|
|
}
|
|
}
|
|
}
|
|
|
|
GSRasterizer::~GSRasterizer()
|
|
{
|
|
_aligned_free(m_scanline);
|
|
|
|
if(m_edge.buff != NULL) vmfree(m_edge.buff, sizeof(GSVertexSW) * 2048);
|
|
|
|
delete m_ds;
|
|
}
|
|
|
|
bool GSRasterizer::IsOneOfMyScanlines(int top) const
|
|
{
|
|
ASSERT(top >= 0 && top < 2048);
|
|
|
|
return m_scanline[top >> m_thread_height] != 0;
|
|
}
|
|
|
|
bool GSRasterizer::IsOneOfMyScanlines(int top, int bottom) const
|
|
{
|
|
ASSERT(top >= 0 && top < 2048 && bottom >= 0 && bottom < 2048);
|
|
|
|
top = top >> m_thread_height;
|
|
bottom = (bottom + (1 << m_thread_height) - 1) >> m_thread_height;
|
|
|
|
while(top < bottom)
|
|
{
|
|
if(m_scanline[top++])
|
|
{
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
int GSRasterizer::FindMyNextScanline(int top) const
|
|
{
|
|
int i = top >> m_thread_height;
|
|
|
|
if(m_scanline[i] == 0)
|
|
{
|
|
while(m_scanline[++i] == 0);
|
|
|
|
top = i << m_thread_height;
|
|
}
|
|
|
|
return top;
|
|
}
|
|
|
|
void GSRasterizer::Queue(const std::shared_ptr<GSRasterizerData>& data)
|
|
{
|
|
Draw(data.get());
|
|
}
|
|
|
|
int GSRasterizer::GetPixels(bool reset)
|
|
{
|
|
int pixels = m_pixels.sum;
|
|
|
|
if(reset)
|
|
{
|
|
m_pixels.sum = 0;
|
|
}
|
|
|
|
return pixels;
|
|
}
|
|
|
|
void GSRasterizer::Draw(GSRasterizerData* data)
|
|
{
|
|
GSPerfMonAutoTimer pmat(m_perfmon, GSPerfMon::WorkerDraw0 + m_id);
|
|
|
|
if(data->vertex != NULL && data->vertex_count == 0 || data->index != NULL && data->index_count == 0) return;
|
|
|
|
m_pixels.actual = 0;
|
|
m_pixels.total = 0;
|
|
|
|
data->start = __rdtsc();
|
|
|
|
m_ds->BeginDraw(data);
|
|
|
|
const GSVertexSW* vertex = data->vertex;
|
|
const GSVertexSW* vertex_end = data->vertex + data->vertex_count;
|
|
|
|
const uint32* index = data->index;
|
|
const uint32* index_end = data->index + data->index_count;
|
|
|
|
uint32 tmp_index[] = {0, 1, 2};
|
|
|
|
bool scissor_test = !data->bbox.eq(data->bbox.rintersect(data->scissor));
|
|
|
|
m_scissor = data->scissor;
|
|
m_fscissor_x = GSVector4(data->scissor).xzxz();
|
|
m_fscissor_y = GSVector4(data->scissor).ywyw();
|
|
|
|
switch(data->primclass)
|
|
{
|
|
case GS_POINT_CLASS:
|
|
|
|
if(scissor_test)
|
|
{
|
|
DrawPoint<true>(vertex, data->vertex_count, index, data->index_count);
|
|
}
|
|
else
|
|
{
|
|
DrawPoint<false>(vertex, data->vertex_count, index, data->index_count);
|
|
}
|
|
|
|
break;
|
|
|
|
case GS_LINE_CLASS:
|
|
|
|
if(index != NULL)
|
|
{
|
|
do {DrawLine(vertex, index); index += 2;}
|
|
while(index < index_end);
|
|
}
|
|
else
|
|
{
|
|
do {DrawLine(vertex, tmp_index); vertex += 2;}
|
|
while(vertex < vertex_end);
|
|
}
|
|
|
|
break;
|
|
|
|
case GS_TRIANGLE_CLASS:
|
|
|
|
if(index != NULL)
|
|
{
|
|
do {DrawTriangle(vertex, index); index += 3;}
|
|
while(index < index_end);
|
|
}
|
|
else
|
|
{
|
|
do {DrawTriangle(vertex, tmp_index); vertex += 3;}
|
|
while(vertex < vertex_end);
|
|
}
|
|
|
|
break;
|
|
|
|
case GS_SPRITE_CLASS:
|
|
|
|
if(index != NULL)
|
|
{
|
|
do {DrawSprite(vertex, index); index += 2;}
|
|
while(index < index_end);
|
|
}
|
|
else
|
|
{
|
|
do {DrawSprite(vertex, tmp_index); vertex += 2;}
|
|
while(vertex < vertex_end);
|
|
}
|
|
|
|
break;
|
|
|
|
default:
|
|
__assume(0);
|
|
}
|
|
|
|
#if _M_SSE >= 0x501
|
|
_mm256_zeroupper();
|
|
#endif
|
|
|
|
data->pixels = m_pixels.actual;
|
|
|
|
uint64 ticks = __rdtsc() - data->start;
|
|
|
|
m_pixels.sum += m_pixels.actual;
|
|
|
|
m_ds->EndDraw(data->frame, ticks, m_pixels.actual, m_pixels.total);
|
|
}
|
|
|
|
template<bool scissor_test>
|
|
void GSRasterizer::DrawPoint(const GSVertexSW* vertex, int vertex_count, const uint32* index, int index_count)
|
|
{
|
|
if(index != NULL)
|
|
{
|
|
for(int i = 0; i < index_count; i++, index++)
|
|
{
|
|
const GSVertexSW& v = vertex[*index];
|
|
|
|
GSVector4i p(v.p);
|
|
|
|
if(!scissor_test || m_scissor.left <= p.x && p.x < m_scissor.right && m_scissor.top <= p.y && p.y < m_scissor.bottom)
|
|
{
|
|
if(IsOneOfMyScanlines(p.y))
|
|
{
|
|
m_ds->SetupPrim(vertex, index, GSVertexSW::zero());
|
|
|
|
DrawScanline(1, p.x, p.y, v);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
uint32 tmp_index[1] = {0};
|
|
|
|
for(int i = 0; i < vertex_count; i++, vertex++)
|
|
{
|
|
const GSVertexSW& v = vertex[0];
|
|
|
|
GSVector4i p(v.p);
|
|
|
|
if(!scissor_test || m_scissor.left <= p.x && p.x < m_scissor.right && m_scissor.top <= p.y && p.y < m_scissor.bottom)
|
|
{
|
|
if(IsOneOfMyScanlines(p.y))
|
|
{
|
|
m_ds->SetupPrim(vertex, tmp_index, GSVertexSW::zero());
|
|
|
|
DrawScanline(1, p.x, p.y, v);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void GSRasterizer::DrawLine(const GSVertexSW* vertex, const uint32* index)
|
|
{
|
|
const GSVertexSW& v0 = vertex[index[0]];
|
|
const GSVertexSW& v1 = vertex[index[1]];
|
|
|
|
GSVertexSW dv = v1 - v0;
|
|
|
|
GSVector4 dp = dv.p.abs();
|
|
|
|
int i = (dp < dp.yxwz()).mask() & 1; // |dx| <= |dy|
|
|
|
|
if(m_ds->HasEdge())
|
|
{
|
|
DrawEdge(v0, v1, dv, i, 0);
|
|
DrawEdge(v0, v1, dv, i, 1);
|
|
|
|
Flush(vertex, index, GSVertexSW::zero(), true);
|
|
|
|
return;
|
|
}
|
|
|
|
GSVector4i dpi(dp);
|
|
|
|
if(dpi.y == 0)
|
|
{
|
|
if(dpi.x > 0)
|
|
{
|
|
// shortcut for horizontal lines
|
|
|
|
GSVector4 mask = (v0.p > v1.p).xxxx();
|
|
|
|
GSVertexSW scan;
|
|
|
|
scan.p = v0.p.blend32(v1.p, mask);
|
|
scan.t = v0.t.blend32(v1.t, mask);
|
|
scan.c = v0.c.blend32(v1.c, mask);
|
|
|
|
GSVector4i p(scan.p);
|
|
|
|
if(m_scissor.top <= p.y && p.y < m_scissor.bottom && IsOneOfMyScanlines(p.y))
|
|
{
|
|
GSVector4 lrf = scan.p.upl(v1.p.blend32(v0.p, mask)).ceil();
|
|
GSVector4 l = lrf.max(m_fscissor_x);
|
|
GSVector4 r = lrf.min(m_fscissor_x);
|
|
GSVector4i lr = GSVector4i(l.xxyy(r));
|
|
|
|
int left = lr.extract32<0>();
|
|
int right = lr.extract32<2>();
|
|
|
|
int pixels = right - left;
|
|
|
|
if(pixels > 0)
|
|
{
|
|
GSVertexSW dscan = dv / dv.p.xxxx();
|
|
|
|
scan += dscan * (l - scan.p).xxxx();
|
|
|
|
m_ds->SetupPrim(vertex, index, dscan);
|
|
|
|
DrawScanline(pixels, left, p.y, scan);
|
|
}
|
|
}
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
int steps = dpi.v[i];
|
|
|
|
if(steps > 0)
|
|
{
|
|
GSVertexSW edge = v0;
|
|
GSVertexSW dedge = dv / GSVector4(dp.v[i]);
|
|
|
|
GSVertexSW* RESTRICT e = m_edge.buff;
|
|
|
|
while(1)
|
|
{
|
|
GSVector4i p(edge.p);
|
|
|
|
if(m_scissor.left <= p.x && p.x < m_scissor.right && m_scissor.top <= p.y && p.y < m_scissor.bottom)
|
|
{
|
|
if(IsOneOfMyScanlines(p.y))
|
|
{
|
|
AddScanline(e, 1, p.x, p.y, edge);
|
|
|
|
e++;
|
|
}
|
|
}
|
|
|
|
if(--steps == 0) break;
|
|
|
|
edge += dedge;
|
|
}
|
|
|
|
m_edge.count = e - m_edge.buff;
|
|
|
|
Flush(vertex, index, GSVertexSW::zero());
|
|
}
|
|
}
|
|
|
|
static const uint8 s_ysort[8][4] =
|
|
{
|
|
{0, 1, 2, 0}, // y0 <= y1 <= y2
|
|
{1, 0, 2, 0}, // y1 < y0 <= y2
|
|
{0, 0, 0, 0},
|
|
{1, 2, 0, 0}, // y1 <= y2 < y0
|
|
{0, 2, 1, 0}, // y0 <= y2 < y1
|
|
{0, 0, 0, 0},
|
|
{2, 0, 1, 0}, // y2 < y0 <= y1
|
|
{2, 1, 0, 0}, // y2 < y1 < y0
|
|
};
|
|
|
|
#if _M_SSE >= 0x501
|
|
|
|
void GSRasterizer::DrawTriangle(const GSVertexSW* vertex, const uint32* index)
|
|
{
|
|
GSVertexSW2 dv[3];
|
|
GSVertexSW2 edge;
|
|
GSVertexSW2 dedge;
|
|
GSVertexSW2 dscan;
|
|
|
|
GSVector4 y0011 = vertex[index[0]].p.yyyy(vertex[index[1]].p);
|
|
GSVector4 y1221 = vertex[index[1]].p.yyyy(vertex[index[2]].p).xzzx();
|
|
|
|
int m1 = (y0011 > y1221).mask() & 7;
|
|
|
|
int i[3];
|
|
|
|
i[0] = index[s_ysort[m1][0]];
|
|
i[1] = index[s_ysort[m1][1]];
|
|
i[2] = index[s_ysort[m1][2]];
|
|
|
|
const GSVertexSW2* _v = (const GSVertexSW2*)vertex;
|
|
|
|
const GSVertexSW2& v0 = _v[i[0]];
|
|
const GSVertexSW2& v1 = _v[i[1]];
|
|
const GSVertexSW2& v2 = _v[i[2]];
|
|
|
|
y0011 = v0.p.yyyy(v1.p);
|
|
y1221 = v1.p.yyyy(v2.p).xzzx();
|
|
|
|
m1 = (y0011 == y1221).mask() & 7;
|
|
|
|
// if(i == 0) => y0 < y1 < y2
|
|
// if(i == 1) => y0 == y1 < y2
|
|
// if(i == 4) => y0 < y1 == y2
|
|
|
|
if(m1 == 7) return; // y0 == y1 == y2
|
|
|
|
GSVector4 tbf = y0011.xzxz(y1221).ceil();
|
|
GSVector4 tbmax = tbf.max(m_fscissor_y);
|
|
GSVector4 tbmin = tbf.min(m_fscissor_y);
|
|
GSVector4i tb = GSVector4i(tbmax.xzyw(tbmin)); // max(y0, t) max(y1, t) min(y1, b) min(y2, b)
|
|
|
|
dv[0] = v1 - v0;
|
|
dv[1] = v2 - v0;
|
|
dv[2] = v2 - v1;
|
|
|
|
GSVector4 cross = dv[0].p * dv[1].p.yxwz();
|
|
|
|
cross = (cross - cross.yxwz()).yyyy(); // select the second component, the negated cross product
|
|
|
|
// the longest horizontal span would be cross.x / dv[1].p.y, but we don't need its actual value
|
|
|
|
int m2 = cross.upl(cross == GSVector4::zero()).mask();
|
|
|
|
if(m2 & 2) return;
|
|
|
|
m2 &= 1;
|
|
|
|
GSVector4 dxy01 = dv[0].p.xyxy(dv[1].p);
|
|
|
|
GSVector4 dx = dxy01.xzxy(dv[2].p);
|
|
GSVector4 dy = dxy01.ywyx(dv[2].p);
|
|
|
|
GSVector4 ddx[3];
|
|
|
|
ddx[0] = dx / dy;
|
|
ddx[1] = ddx[0].yxzw();
|
|
ddx[2] = ddx[0].xzyw();
|
|
|
|
// Precision is important here. Don't use reciprocal, it will break Jak3/Xenosaga1
|
|
GSVector8 _dxy01c(dxy01 / cross);
|
|
|
|
/*
|
|
dscan = dv[1] * dxy01c.yyyy() - dv[0] * dxy01c.wwww();
|
|
dedge = dv[0] * dxy01c.zzzz() - dv[1] * dxy01c.xxxx();
|
|
*/
|
|
|
|
dscan.p = dv[1].p * _dxy01c.yyyy().extract<0>() - dv[0].p * _dxy01c.wwww().extract<0>();
|
|
dscan.tc = dv[1].tc * _dxy01c.yyyy() - dv[0].tc * _dxy01c.wwww();
|
|
|
|
dedge.p = dv[0].p * _dxy01c.zzzz().extract<0>() - dv[1].p * _dxy01c.xxxx().extract<0>();
|
|
dedge.tc = dv[0].tc * _dxy01c.zzzz() - dv[1].tc * _dxy01c.xxxx();
|
|
|
|
if(m1 & 1)
|
|
{
|
|
if(tb.y < tb.w)
|
|
{
|
|
edge = _v[i[1 - m2]];
|
|
|
|
edge.p = edge.p.insert32<0, 1>(vertex[i[m2]].p);
|
|
dedge.p = ddx[2 - (m2 << 1)].yzzw(dedge.p);
|
|
|
|
DrawTriangleSection(tb.x, tb.w, edge, dedge, dscan, vertex[i[1 - m2]].p);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if(tb.x < tb.z)
|
|
{
|
|
edge = v0;
|
|
|
|
edge.p = edge.p.xxzw();
|
|
dedge.p = ddx[m2].xyzw(dedge.p);
|
|
|
|
DrawTriangleSection(tb.x, tb.z, edge, dedge, dscan, v0.p);
|
|
}
|
|
|
|
if(tb.y < tb.w)
|
|
{
|
|
edge = v1;
|
|
|
|
edge.p = (v0.p.xxxx() + ddx[m2] * dv[0].p.yyyy()).xyzw(edge.p);
|
|
dedge.p = ddx[2 - (m2 << 1)].yzzw(dedge.p);
|
|
|
|
DrawTriangleSection(tb.y, tb.w, edge, dedge, dscan, v1.p);
|
|
}
|
|
}
|
|
|
|
Flush(vertex, index, (GSVertexSW&)dscan);
|
|
|
|
if(m_ds->HasEdge())
|
|
{
|
|
GSVector4 a = dx.abs() < dy.abs(); // |dx| <= |dy|
|
|
GSVector4 b = dx < GSVector4::zero(); // dx < 0
|
|
GSVector4 c = cross < GSVector4::zero(); // longest.p.x < 0
|
|
|
|
int orientation = a.mask();
|
|
int side = ((a | b) ^ c).mask() ^ 2; // evil
|
|
|
|
DrawEdge((GSVertexSW&)v0, (GSVertexSW&)v1, (GSVertexSW&)dv[0], orientation & 1, side & 1);
|
|
DrawEdge((GSVertexSW&)v0, (GSVertexSW&)v2, (GSVertexSW&)dv[1], orientation & 2, side & 2);
|
|
DrawEdge((GSVertexSW&)v1, (GSVertexSW&)v2, (GSVertexSW&)dv[2], orientation & 4, side & 4);
|
|
|
|
Flush(vertex, index, GSVertexSW::zero(), true);
|
|
}
|
|
}
|
|
|
|
void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW2& edge, const GSVertexSW2& dedge, const GSVertexSW2& dscan, const GSVector4& p0)
|
|
{
|
|
ASSERT(top < bottom);
|
|
ASSERT(edge.p.x <= edge.p.y);
|
|
|
|
GSVertexSW* RESTRICT e = &m_edge.buff[m_edge.count];
|
|
|
|
GSVector4 scissor = m_fscissor_x;
|
|
|
|
top = FindMyNextScanline(top);
|
|
|
|
while(top < bottom)
|
|
{
|
|
GSVector8 dy(GSVector4(top) - p0.yyyy());
|
|
|
|
GSVertexSW2 scan;
|
|
|
|
scan.p = edge.p + dedge.p * dy.extract<0>();
|
|
|
|
GSVector4 lrf = scan.p.ceil();
|
|
GSVector4 l = lrf.max(scissor);
|
|
GSVector4 r = lrf.min(scissor);
|
|
GSVector4i lr = GSVector4i(l.xxyy(r));
|
|
|
|
int left = lr.extract32<0>();
|
|
int right = lr.extract32<2>();
|
|
|
|
int pixels = right - left;
|
|
|
|
if(pixels > 0)
|
|
{
|
|
scan.tc = edge.tc + dedge.tc * dy;
|
|
|
|
GSVector8 prestep((l - p0).xxxx());
|
|
|
|
scan.p = scan.p + dscan.p * prestep.extract<0>();
|
|
scan.tc = scan.tc + dscan.tc * prestep;
|
|
|
|
AddScanline(e++, pixels, left, top, (GSVertexSW&)scan);
|
|
}
|
|
|
|
top++;
|
|
|
|
if(!IsOneOfMyScanlines(top))
|
|
{
|
|
top += (m_threads - 1) << m_thread_height;
|
|
}
|
|
}
|
|
|
|
m_edge.count += e - &m_edge.buff[m_edge.count];
|
|
}
|
|
|
|
#else
|
|
|
|
void GSRasterizer::DrawTriangle(const GSVertexSW* vertex, const uint32* index)
|
|
{
|
|
GSVertexSW dv[3];
|
|
GSVertexSW edge;
|
|
GSVertexSW dedge;
|
|
GSVertexSW dscan;
|
|
|
|
GSVector4 y0011 = vertex[index[0]].p.yyyy(vertex[index[1]].p);
|
|
GSVector4 y1221 = vertex[index[1]].p.yyyy(vertex[index[2]].p).xzzx();
|
|
|
|
int m1 = (y0011 > y1221).mask() & 7;
|
|
|
|
int i[3];
|
|
|
|
i[0] = index[s_ysort[m1][0]];
|
|
i[1] = index[s_ysort[m1][1]];
|
|
i[2] = index[s_ysort[m1][2]];
|
|
|
|
const GSVertexSW& v0 = vertex[i[0]];
|
|
const GSVertexSW& v1 = vertex[i[1]];
|
|
const GSVertexSW& v2 = vertex[i[2]];
|
|
|
|
y0011 = v0.p.yyyy(v1.p);
|
|
y1221 = v1.p.yyyy(v2.p).xzzx();
|
|
|
|
m1 = (y0011 == y1221).mask() & 7;
|
|
|
|
// if(i == 0) => y0 < y1 < y2
|
|
// if(i == 1) => y0 == y1 < y2
|
|
// if(i == 4) => y0 < y1 == y2
|
|
|
|
if(m1 == 7) return; // y0 == y1 == y2
|
|
|
|
GSVector4 tbf = y0011.xzxz(y1221).ceil();
|
|
GSVector4 tbmax = tbf.max(m_fscissor_y);
|
|
GSVector4 tbmin = tbf.min(m_fscissor_y);
|
|
GSVector4i tb = GSVector4i(tbmax.xzyw(tbmin)); // max(y0, t) max(y1, t) min(y1, b) min(y2, b)
|
|
|
|
dv[0] = v1 - v0;
|
|
dv[1] = v2 - v0;
|
|
dv[2] = v2 - v1;
|
|
|
|
GSVector4 cross = dv[0].p * dv[1].p.yxwz();
|
|
|
|
cross = (cross - cross.yxwz()).yyyy(); // select the second component, the negated cross product
|
|
|
|
// the longest horizontal span would be cross.x / dv[1].p.y, but we don't need its actual value
|
|
|
|
int m2 = cross.upl(cross == GSVector4::zero()).mask();
|
|
|
|
if(m2 & 2) return;
|
|
|
|
m2 &= 1;
|
|
|
|
GSVector4 dxy01 = dv[0].p.xyxy(dv[1].p);
|
|
|
|
GSVector4 dx = dxy01.xzxy(dv[2].p);
|
|
GSVector4 dy = dxy01.ywyx(dv[2].p);
|
|
|
|
GSVector4 ddx[3];
|
|
|
|
ddx[0] = dx / dy;
|
|
ddx[1] = ddx[0].yxzw();
|
|
ddx[2] = ddx[0].xzyw();
|
|
|
|
// Precision is important here. Don't use reciprocal, it will break Jak3/Xenosaga1
|
|
GSVector4 dxy01c = dxy01 / cross;
|
|
|
|
/*
|
|
dscan = dv[1] * dxy01c.yyyy() - dv[0] * dxy01c.wwww();
|
|
dedge = dv[0] * dxy01c.zzzz() - dv[1] * dxy01c.xxxx();
|
|
*/
|
|
|
|
dscan.p = dv[1].p * dxy01c.yyyy() - dv[0].p * dxy01c.wwww();
|
|
dscan.t = dv[1].t * dxy01c.yyyy() - dv[0].t * dxy01c.wwww();
|
|
dscan.c = dv[1].c * dxy01c.yyyy() - dv[0].c * dxy01c.wwww();
|
|
|
|
dedge.p = dv[0].p * dxy01c.zzzz() - dv[1].p * dxy01c.xxxx();
|
|
dedge.t = dv[0].t * dxy01c.zzzz() - dv[1].t * dxy01c.xxxx();
|
|
dedge.c = dv[0].c * dxy01c.zzzz() - dv[1].c * dxy01c.xxxx();
|
|
|
|
if(m1 & 1)
|
|
{
|
|
if(tb.y < tb.w)
|
|
{
|
|
edge = vertex[i[1 - m2]];
|
|
|
|
edge.p = edge.p.insert32<0, 1>(vertex[i[m2]].p);
|
|
dedge.p = ddx[2 - (m2 << 1)].yzzw(dedge.p);
|
|
|
|
DrawTriangleSection(tb.x, tb.w, edge, dedge, dscan, vertex[i[1 - m2]].p);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if(tb.x < tb.z)
|
|
{
|
|
edge = v0;
|
|
|
|
edge.p = edge.p.xxzw();
|
|
dedge.p = ddx[m2].xyzw(dedge.p);
|
|
|
|
DrawTriangleSection(tb.x, tb.z, edge, dedge, dscan, v0.p);
|
|
}
|
|
|
|
if(tb.y < tb.w)
|
|
{
|
|
edge = v1;
|
|
|
|
edge.p = (v0.p.xxxx() + ddx[m2] * dv[0].p.yyyy()).xyzw(edge.p);
|
|
dedge.p = ddx[2 - (m2 << 1)].yzzw(dedge.p);
|
|
|
|
DrawTriangleSection(tb.y, tb.w, edge, dedge, dscan, v1.p);
|
|
}
|
|
}
|
|
|
|
Flush(vertex, index, dscan);
|
|
|
|
if(m_ds->HasEdge())
|
|
{
|
|
GSVector4 a = dx.abs() < dy.abs(); // |dx| <= |dy|
|
|
GSVector4 b = dx < GSVector4::zero(); // dx < 0
|
|
GSVector4 c = cross < GSVector4::zero(); // longest.p.x < 0
|
|
|
|
int orientation = a.mask();
|
|
int side = ((a | b) ^ c).mask() ^ 2; // evil
|
|
|
|
DrawEdge(v0, v1, dv[0], orientation & 1, side & 1);
|
|
DrawEdge(v0, v2, dv[1], orientation & 2, side & 2);
|
|
DrawEdge(v1, v2, dv[2], orientation & 4, side & 4);
|
|
|
|
Flush(vertex, index, GSVertexSW::zero(), true);
|
|
}
|
|
}
|
|
|
|
void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW& edge, const GSVertexSW& dedge, const GSVertexSW& dscan, const GSVector4& p0)
|
|
{
|
|
ASSERT(top < bottom);
|
|
ASSERT(edge.p.x <= edge.p.y);
|
|
|
|
GSVertexSW* RESTRICT e = &m_edge.buff[m_edge.count];
|
|
|
|
GSVector4 scissor = m_fscissor_x;
|
|
|
|
top = FindMyNextScanline(top);
|
|
|
|
while(top < bottom)
|
|
{
|
|
GSVector4 dy = GSVector4(top) - p0.yyyy();
|
|
|
|
GSVertexSW scan;
|
|
|
|
scan.p = edge.p + dedge.p * dy;
|
|
|
|
GSVector4 lrf = scan.p.ceil();
|
|
GSVector4 l = lrf.max(scissor);
|
|
GSVector4 r = lrf.min(scissor);
|
|
GSVector4i lr = GSVector4i(l.xxyy(r));
|
|
|
|
int left = lr.extract32<0>();
|
|
int right = lr.extract32<2>();
|
|
|
|
int pixels = right - left;
|
|
|
|
if(pixels > 0)
|
|
{
|
|
scan.t = edge.t + dedge.t * dy;
|
|
scan.c = edge.c + dedge.c * dy;
|
|
|
|
GSVector4 prestep = (l - p0).xxxx();
|
|
|
|
scan.p = scan.p + dscan.p * prestep;
|
|
scan.t = scan.t + dscan.t * prestep;
|
|
scan.c = scan.c + dscan.c * prestep;
|
|
|
|
AddScanline(e++, pixels, left, top, scan);
|
|
}
|
|
|
|
top++;
|
|
|
|
if(!IsOneOfMyScanlines(top))
|
|
{
|
|
top += (m_threads - 1) << m_thread_height;
|
|
}
|
|
}
|
|
|
|
m_edge.count += e - &m_edge.buff[m_edge.count];
|
|
}
|
|
|
|
#endif
|
|
|
|
void GSRasterizer::DrawSprite(const GSVertexSW* vertex, const uint32* index)
|
|
{
|
|
const GSVertexSW& v0 = vertex[index[0]];
|
|
const GSVertexSW& v1 = vertex[index[1]];
|
|
|
|
GSVector4 mask = (v0.p < v1.p).xyzw(GSVector4::zero());
|
|
|
|
GSVertexSW v[2];
|
|
|
|
v[0].p = v1.p.blend32(v0.p, mask);
|
|
v[0].t = v1.t.blend32(v0.t, mask);
|
|
v[0].c = v1.c;
|
|
|
|
v[1].p = v0.p.blend32(v1.p, mask);
|
|
v[1].t = v0.t.blend32(v1.t, mask);
|
|
|
|
GSVector4i r(v[0].p.xyxy(v[1].p).ceil());
|
|
|
|
r = r.rintersect(m_scissor);
|
|
|
|
if(r.rempty()) return;
|
|
|
|
GSVertexSW scan = v[0];
|
|
|
|
if(m_ds->IsSolidRect())
|
|
{
|
|
if(m_threads == 1)
|
|
{
|
|
m_ds->DrawRect(r, scan);
|
|
|
|
int pixels = r.width() * r.height();
|
|
|
|
m_pixels.actual += pixels;
|
|
m_pixels.total += pixels;
|
|
}
|
|
else
|
|
{
|
|
int top = FindMyNextScanline(r.top);
|
|
int bottom = r.bottom;
|
|
|
|
while(top < bottom)
|
|
{
|
|
r.top = top;
|
|
r.bottom = std::min<int>((top + (1 << m_thread_height)) & ~((1 << m_thread_height) - 1), bottom);
|
|
|
|
m_ds->DrawRect(r, scan);
|
|
|
|
int pixels = r.width() * r.height();
|
|
|
|
m_pixels.actual += pixels;
|
|
m_pixels.total += pixels;
|
|
|
|
top = r.bottom + ((m_threads - 1) << m_thread_height);
|
|
}
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
GSVertexSW dv = v[1] - v[0];
|
|
|
|
GSVector4 dt = dv.t / dv.p.xyxy();
|
|
|
|
GSVertexSW dedge;
|
|
GSVertexSW dscan;
|
|
|
|
dedge.t = GSVector4::zero().insert32<1, 1>(dt);
|
|
dscan.t = GSVector4::zero().insert32<0, 0>(dt);
|
|
|
|
GSVector4 prestep = GSVector4(r.left, r.top) - scan.p;
|
|
|
|
int m = (prestep == GSVector4::zero()).mask();
|
|
|
|
if((m & 2) == 0) scan.t += dedge.t * prestep.yyyy();
|
|
if((m & 1) == 0) scan.t += dscan.t * prestep.xxxx();
|
|
|
|
m_ds->SetupPrim(vertex, index, dscan);
|
|
|
|
while(1)
|
|
{
|
|
if(IsOneOfMyScanlines(r.top))
|
|
{
|
|
DrawScanline(r.width(), r.left, r.top, scan);
|
|
}
|
|
|
|
if(++r.top >= r.bottom) break;
|
|
|
|
scan.t += dedge.t;
|
|
}
|
|
}
|
|
|
|
void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GSVertexSW& dv, int orientation, int side)
|
|
{
|
|
// orientation:
|
|
// - true: |dv.p.y| > |dv.p.x|
|
|
// - false |dv.p.x| > |dv.p.y|
|
|
// side:
|
|
// - true: top/left edge
|
|
// - false: bottom/right edge
|
|
|
|
// TODO: bit slow and too much duplicated code
|
|
// TODO: inner pre-step is still missing (hardly noticable)
|
|
// TODO: it does not always line up with the edge of the surrounded triangle
|
|
|
|
GSVertexSW* RESTRICT e = &m_edge.buff[m_edge.count];
|
|
|
|
if(orientation)
|
|
{
|
|
GSVector4 tbf = v0.p.yyyy(v1.p).ceil(); // t t b b
|
|
GSVector4 tbmax = tbf.max(m_fscissor_y); // max(t, st) max(t, sb) max(b, st) max(b, sb)
|
|
GSVector4 tbmin = tbf.min(m_fscissor_y); // min(t, st) min(t, sb) min(b, st) min(b, sb)
|
|
GSVector4i tb = GSVector4i(tbmax.xzyw(tbmin)); // max(t, st) max(b, sb) min(t, st) min(b, sb)
|
|
|
|
int top, bottom;
|
|
|
|
GSVertexSW edge, dedge;
|
|
|
|
if((dv.p >= GSVector4::zero()).mask() & 2)
|
|
{
|
|
top = tb.extract32<0>(); // max(t, st)
|
|
bottom = tb.extract32<3>(); // min(b, sb)
|
|
|
|
if(top >= bottom) return;
|
|
|
|
edge = v0;
|
|
dedge = dv / dv.p.yyyy();
|
|
|
|
edge += dedge * (tbmax.xxxx() - edge.p.yyyy());
|
|
}
|
|
else
|
|
{
|
|
top = tb.extract32<1>(); // max(b, st)
|
|
bottom = tb.extract32<2>(); // min(t, sb)
|
|
|
|
if(top >= bottom) return;
|
|
|
|
edge = v1;
|
|
dedge = dv / dv.p.yyyy();
|
|
|
|
edge += dedge * (tbmax.zzzz() - edge.p.yyyy());
|
|
}
|
|
|
|
GSVector4i p = GSVector4i(edge.p.upl(dedge.p) * 0x10000);
|
|
|
|
int x = p.extract32<0>();
|
|
int dx = p.extract32<1>();
|
|
|
|
if(side)
|
|
{
|
|
while(1)
|
|
{
|
|
int xi = x >> 16;
|
|
int xf = x & 0xffff;
|
|
|
|
if(m_scissor.left <= xi && xi < m_scissor.right && IsOneOfMyScanlines(top))
|
|
{
|
|
AddScanline(e, 1, xi, top, edge);
|
|
|
|
e->t.u32[3] = (0x10000 - xf) & 0xffff;
|
|
|
|
e++;
|
|
}
|
|
|
|
if(++top >= bottom) break;
|
|
|
|
edge += dedge;
|
|
x += dx;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
while(1)
|
|
{
|
|
int xi = (x >> 16) + 1;
|
|
int xf = x & 0xffff;
|
|
|
|
if(m_scissor.left <= xi && xi < m_scissor.right && IsOneOfMyScanlines(top))
|
|
{
|
|
AddScanline(e, 1, xi, top, edge);
|
|
|
|
e->t.u32[3] = xf;
|
|
|
|
e++;
|
|
}
|
|
|
|
if(++top >= bottom) break;
|
|
|
|
edge += dedge;
|
|
x += dx;
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
GSVector4 lrf = v0.p.xxxx(v1.p).ceil(); // l l r r
|
|
GSVector4 lrmax = lrf.max(m_fscissor_x); // max(l, sl) max(l, sr) max(r, sl) max(r, sr)
|
|
GSVector4 lrmin = lrf.min(m_fscissor_x); // min(l, sl) min(l, sr) min(r, sl) min(r, sr)
|
|
GSVector4i lr = GSVector4i(lrmax.xzyw(lrmin)); // max(l, sl) max(r, sl) min(l, sr) min(r, sr)
|
|
|
|
int left, right;
|
|
|
|
GSVertexSW edge, dedge;
|
|
|
|
if((dv.p >= GSVector4::zero()).mask() & 1)
|
|
{
|
|
left = lr.extract32<0>(); // max(l, sl)
|
|
right = lr.extract32<3>(); // min(r, sr)
|
|
|
|
if(left >= right) return;
|
|
|
|
edge = v0;
|
|
dedge = dv / dv.p.xxxx();
|
|
|
|
edge += dedge * (lrmax.xxxx() - edge.p.xxxx());
|
|
}
|
|
else
|
|
{
|
|
left = lr.extract32<1>(); // max(r, sl)
|
|
right = lr.extract32<2>(); // min(l, sr)
|
|
|
|
if(left >= right) return;
|
|
|
|
edge = v1;
|
|
dedge = dv / dv.p.xxxx();
|
|
|
|
edge += dedge * (lrmax.zzzz() - edge.p.xxxx());
|
|
}
|
|
|
|
GSVector4i p = GSVector4i(edge.p.upl(dedge.p) * 0x10000);
|
|
|
|
int y = p.extract32<2>();
|
|
int dy = p.extract32<3>();
|
|
|
|
if(side)
|
|
{
|
|
while(1)
|
|
{
|
|
int yi = y >> 16;
|
|
int yf = y & 0xffff;
|
|
|
|
if(m_scissor.top <= yi && yi < m_scissor.bottom && IsOneOfMyScanlines(yi))
|
|
{
|
|
AddScanline(e, 1, left, yi, edge);
|
|
|
|
e->t.u32[3] = (0x10000 - yf) & 0xffff;
|
|
|
|
e++;
|
|
}
|
|
|
|
if(++left >= right) break;
|
|
|
|
edge += dedge;
|
|
y += dy;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
while(1)
|
|
{
|
|
int yi = (y >> 16) + 1;
|
|
int yf = y & 0xffff;
|
|
|
|
if(m_scissor.top <= yi && yi < m_scissor.bottom && IsOneOfMyScanlines(yi))
|
|
{
|
|
AddScanline(e, 1, left, yi, edge);
|
|
|
|
e->t.u32[3] = yf;
|
|
|
|
e++;
|
|
}
|
|
|
|
if(++left >= right) break;
|
|
|
|
edge += dedge;
|
|
y += dy;
|
|
}
|
|
}
|
|
}
|
|
|
|
m_edge.count += e - &m_edge.buff[m_edge.count];
|
|
}
|
|
|
|
void GSRasterizer::AddScanline(GSVertexSW* e, int pixels, int left, int top, const GSVertexSW& scan)
|
|
{
|
|
*e = scan;
|
|
|
|
e->_pad.i32[0] = pixels;
|
|
e->_pad.i32[1] = left;
|
|
e->_pad.i32[2] = top;
|
|
}
|
|
|
|
void GSRasterizer::Flush(const GSVertexSW* vertex, const uint32* index, const GSVertexSW& dscan, bool edge)
|
|
{
|
|
// TODO: on win64 this could be the place where xmm6-15 are preserved (not by each DrawScanline)
|
|
|
|
int count = m_edge.count;
|
|
|
|
if(count > 0)
|
|
{
|
|
m_ds->SetupPrim(vertex, index, dscan);
|
|
|
|
const GSVertexSW* RESTRICT e = m_edge.buff;
|
|
const GSVertexSW* RESTRICT ee = e + count;
|
|
|
|
if(!edge)
|
|
{
|
|
do
|
|
{
|
|
int pixels = e->_pad.i32[0];
|
|
int left = e->_pad.i32[1];
|
|
int top = e->_pad.i32[2];
|
|
|
|
DrawScanline(pixels, left, top, *e++);
|
|
}
|
|
while(e < ee);
|
|
}
|
|
else
|
|
{
|
|
do
|
|
{
|
|
int pixels = e->_pad.i32[0];
|
|
int left = e->_pad.i32[1];
|
|
int top = e->_pad.i32[2];
|
|
|
|
DrawEdge(pixels, left, top, *e++);
|
|
}
|
|
while(e < ee);
|
|
}
|
|
|
|
m_edge.count = 0;
|
|
}
|
|
}
|
|
|
|
#if _M_SSE >= 0x501
|
|
#define PIXELS_PER_LOOP 8
|
|
#else
|
|
#define PIXELS_PER_LOOP 4
|
|
#endif
|
|
|
|
void GSRasterizer::DrawScanline(int pixels, int left, int top, const GSVertexSW& scan)
|
|
{
|
|
m_pixels.actual += pixels;
|
|
m_pixels.total += ((left + pixels + (PIXELS_PER_LOOP - 1)) & ~(PIXELS_PER_LOOP - 1)) - (left & (PIXELS_PER_LOOP - 1));
|
|
//m_pixels.total += ((left + pixels + (PIXELS_PER_LOOP - 1)) & ~(PIXELS_PER_LOOP - 1)) - left;
|
|
|
|
ASSERT(m_pixels.actual <= m_pixels.total);
|
|
|
|
m_ds->DrawScanline(pixels, left, top, scan);
|
|
}
|
|
|
|
void GSRasterizer::DrawEdge(int pixels, int left, int top, const GSVertexSW& scan)
|
|
{
|
|
m_pixels.actual += 1;
|
|
m_pixels.total += PIXELS_PER_LOOP - 1;
|
|
|
|
ASSERT(m_pixels.actual <= m_pixels.total);
|
|
|
|
m_ds->DrawEdge(pixels, left, top, scan);
|
|
}
|
|
|
|
//
|
|
|
|
GSRasterizerList::GSRasterizerList(int threads, GSPerfMon* perfmon)
|
|
: m_perfmon(perfmon)
|
|
{
|
|
m_thread_height = compute_best_thread_height(threads);
|
|
|
|
int rows = (2048 >> m_thread_height) + 16;
|
|
m_scanline = (uint8*)_aligned_malloc(rows, 64);
|
|
|
|
int row = 0;
|
|
|
|
while(row < rows)
|
|
{
|
|
for(int i = 0; i < threads; i++, row++)
|
|
{
|
|
m_scanline[row] = (uint8)i;
|
|
}
|
|
}
|
|
}
|
|
|
|
GSRasterizerList::~GSRasterizerList()
|
|
{
|
|
_aligned_free(m_scanline);
|
|
}
|
|
|
|
void GSRasterizerList::Queue(const std::shared_ptr<GSRasterizerData>& data)
|
|
{
|
|
GSVector4i r = data->bbox.rintersect(data->scissor);
|
|
|
|
ASSERT(r.top >= 0 && r.top < 2048 && r.bottom >= 0 && r.bottom < 2048);
|
|
|
|
int top = r.top >> m_thread_height;
|
|
int bottom = std::min<int>((r.bottom + (1 << m_thread_height) - 1) >> m_thread_height, top + m_workers.size());
|
|
|
|
while(top < bottom)
|
|
{
|
|
m_workers[m_scanline[top++]]->Push(data);
|
|
}
|
|
}
|
|
|
|
void GSRasterizerList::Sync()
|
|
{
|
|
if(!IsSynced())
|
|
{
|
|
for(size_t i = 0; i < m_workers.size(); i++)
|
|
{
|
|
m_workers[i]->Wait();
|
|
}
|
|
|
|
m_perfmon->Put(GSPerfMon::SyncPoint, 1);
|
|
}
|
|
}
|
|
|
|
bool GSRasterizerList::IsSynced() const
|
|
{
|
|
for(size_t i = 0; i < m_workers.size(); i++)
|
|
{
|
|
if(!m_workers[i]->IsEmpty())
|
|
{
|
|
return false;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
int GSRasterizerList::GetPixels(bool reset)
|
|
{
|
|
int pixels = 0;
|
|
|
|
for(size_t i = 0; i < m_workers.size(); i++)
|
|
{
|
|
pixels += m_r[i]->GetPixels(reset);
|
|
}
|
|
|
|
return pixels;
|
|
}
|