[spline/bezier]Move bezier patch loops into the function and pre-convert control-points.

This commit is contained in:
xebra 2018-01-31 18:39:43 +09:00
parent d0682d7829
commit a340390996
2 changed files with 101 additions and 120 deletions

View file

@ -72,28 +72,13 @@ inline float bern2deriv(float x) { return 3 * (2 - 3 * x) * x; }
inline float bern3deriv(float x) { return 3 * x * x; }
// http://en.wikipedia.org/wiki/Bernstein_polynomial
static Vec2f Bernstein3D(const Vec2f& p0, const Vec2f& p1, const Vec2f& p2, const Vec2f& p3, float x) {
template<class T>
static T Bernstein3D(const T& p0, const T& p1, const T& p2, const T& p3, float x) {
if (x == 0) return p0;
else if (x == 1) return p3;
if (x == 1) return p3;
return p0 * bern0(x) + p1 * bern1(x) + p2 * bern2(x) + p3 * bern3(x);
}
static Vec3f Bernstein3D(const Vec3f& p0, const Vec3f& p1, const Vec3f& p2, const Vec3f& p3, float x) {
if (x == 0) return p0;
else if (x == 1) return p3;
return p0 * bern0(x) + p1 * bern1(x) + p2 * bern2(x) + p3 * bern3(x);
}
static Vec4f Bernstein3D(const Vec4f& p0, const Vec4f& p1, const Vec4f& p2, const Vec4f& p3, float x) {
if (x == 0) return p0;
else if (x == 1) return p3;
return p0 * bern0(x) + p1 * bern1(x) + p2 * bern2(x) + p3 * bern3(x);
}
static Vec4f Bernstein3D(const u32& p0, const u32& p1, const u32& p2, const u32& p3, float x) {
return Bernstein3D(Vec4f::FromRGBA(p0), Vec4f::FromRGBA(p1), Vec4f::FromRGBA(p2), Vec4f::FromRGBA(p3), x);
}
static Vec3f Bernstein3DDerivative(const Vec3f& p0, const Vec3f& p1, const Vec3f& p2, const Vec3f& p3, float x) {
return p0 * bern0deriv(x) + p1 * bern1deriv(x) + p2 * bern2deriv(x) + p3 * bern3deriv(x);
}
@ -485,36 +470,48 @@ static void _BezierPatchHighQuality(u8 *&dest, u16 *&indices, int &count, int te
const bool sampleColors = (origVertType & GE_VTYPE_COL_MASK) != 0;
const bool sampleTexcoords = (origVertType & GE_VTYPE_TC_MASK) != 0;
int num_patches_u = (patch.count_u - 1) / 3;
int num_patches_v = (patch.count_v - 1) / 3;
for (int patch_u = 0; patch_u < num_patches_u; ++patch_u) {
for (int patch_v = 0; patch_v < num_patches_v; ++patch_v) {
// Precompute the horizontal curves to we only have to evaluate the vertical ones.
Vec3f *_pos[16];
Vec4f *_col[16];
Vec2f *_tex[16];
for (int point = 0; point < 16; ++point) {
int idx = (patch_u * 3 + point % 4) + (patch_v * 3 + point / 4) * patch.count_u;
_pos[point] = &patch.pos[idx];
_col[point] = &patch.col[idx];
_tex[point] = &patch.tex[idx];
}
for (int i = 0; i < tess_u + 1; i++) {
float u = ((float)i / (float)tess_u);
prepos.horiz1[i] = Bernstein3D(patch.points[0]->pos, patch.points[1]->pos, patch.points[2]->pos, patch.points[3]->pos, u);
prepos.horiz2[i] = Bernstein3D(patch.points[4]->pos, patch.points[5]->pos, patch.points[6]->pos, patch.points[7]->pos, u);
prepos.horiz3[i] = Bernstein3D(patch.points[8]->pos, patch.points[9]->pos, patch.points[10]->pos, patch.points[11]->pos, u);
prepos.horiz4[i] = Bernstein3D(patch.points[12]->pos, patch.points[13]->pos, patch.points[14]->pos, patch.points[15]->pos, u);
prepos.horiz1[i] = Bernstein3D(*_pos[0], *_pos[1], *_pos[2], *_pos[3], u);
prepos.horiz2[i] = Bernstein3D(*_pos[4], *_pos[5], *_pos[6], *_pos[7], u);
prepos.horiz3[i] = Bernstein3D(*_pos[8], *_pos[9], *_pos[10], *_pos[11], u);
prepos.horiz4[i] = Bernstein3D(*_pos[12], *_pos[13], *_pos[14], *_pos[15], u);
if (sampleColors) {
precol.horiz1[i] = Bernstein3D(patch.points[0]->color_32, patch.points[1]->color_32, patch.points[2]->color_32, patch.points[3]->color_32, u);
precol.horiz2[i] = Bernstein3D(patch.points[4]->color_32, patch.points[5]->color_32, patch.points[6]->color_32, patch.points[7]->color_32, u);
precol.horiz3[i] = Bernstein3D(patch.points[8]->color_32, patch.points[9]->color_32, patch.points[10]->color_32, patch.points[11]->color_32, u);
precol.horiz4[i] = Bernstein3D(patch.points[12]->color_32, patch.points[13]->color_32, patch.points[14]->color_32, patch.points[15]->color_32, u);
precol.horiz1[i] = Bernstein3D(*_col[0], *_col[1], *_col[2], *_col[3], u);
precol.horiz2[i] = Bernstein3D(*_col[4], *_col[5], *_col[6], *_col[7], u);
precol.horiz3[i] = Bernstein3D(*_col[8], *_col[9], *_col[10], *_col[11], u);
precol.horiz4[i] = Bernstein3D(*_col[12], *_col[13], *_col[14], *_col[15], u);
}
if (sampleTexcoords) {
pretex.horiz1[i] = Bernstein3D(Vec2f(patch.points[0]->uv), Vec2f(patch.points[1]->uv), Vec2f(patch.points[2]->uv), Vec2f(patch.points[3]->uv), u);
pretex.horiz2[i] = Bernstein3D(Vec2f(patch.points[4]->uv), Vec2f(patch.points[5]->uv), Vec2f(patch.points[6]->uv), Vec2f(patch.points[7]->uv), u);
pretex.horiz3[i] = Bernstein3D(Vec2f(patch.points[8]->uv), Vec2f(patch.points[9]->uv), Vec2f(patch.points[10]->uv), Vec2f(patch.points[11]->uv), u);
pretex.horiz4[i] = Bernstein3D(Vec2f(patch.points[12]->uv), Vec2f(patch.points[13]->uv), Vec2f(patch.points[14]->uv), Vec2f(patch.points[15]->uv), u);
pretex.horiz1[i] = Bernstein3D(*_tex[0], *_tex[1], *_tex[2], *_tex[3], u);
pretex.horiz2[i] = Bernstein3D(*_tex[4], *_tex[5], *_tex[6], *_tex[7], u);
pretex.horiz3[i] = Bernstein3D(*_tex[8], *_tex[9], *_tex[10], *_tex[11], u);
pretex.horiz4[i] = Bernstein3D(*_tex[12], *_tex[13], *_tex[14], *_tex[15], u);
}
if (computeNormals) {
prederivU.horiz1[i] = Bernstein3DDerivative(patch.points[0]->pos, patch.points[1]->pos, patch.points[2]->pos, patch.points[3]->pos, u);
prederivU.horiz2[i] = Bernstein3DDerivative(patch.points[4]->pos, patch.points[5]->pos, patch.points[6]->pos, patch.points[7]->pos, u);
prederivU.horiz3[i] = Bernstein3DDerivative(patch.points[8]->pos, patch.points[9]->pos, patch.points[10]->pos, patch.points[11]->pos, u);
prederivU.horiz4[i] = Bernstein3DDerivative(patch.points[12]->pos, patch.points[13]->pos, patch.points[14]->pos, patch.points[15]->pos, u);
prederivU.horiz1[i] = Bernstein3DDerivative(*_pos[0], *_pos[1], *_pos[2], *_pos[3], u);
prederivU.horiz2[i] = Bernstein3DDerivative(*_pos[4], *_pos[5], *_pos[6], *_pos[7], u);
prederivU.horiz3[i] = Bernstein3DDerivative(*_pos[8], *_pos[9], *_pos[10], *_pos[11], u);
prederivU.horiz4[i] = Bernstein3DDerivative(*_pos[12], *_pos[13], *_pos[14], *_pos[15], u);
}
}
for (int tile_v = 0; tile_v < tess_v + 1; ++tile_v) {
for (int tile_u = 0; tile_u < tess_u + 1; ++tile_u) {
float u = ((float)tile_u / (float)tess_u);
@ -538,8 +535,8 @@ static void _BezierPatchHighQuality(u8 *&dest, u16 *&indices, int &count, int te
if (!sampleTexcoords) {
// Generate texcoord
vert.uv[0] = u + patch.u_index * third;
vert.uv[1] = v + patch.v_index * third;
vert.uv[0] = u + patch_u * third;
vert.uv[1] = v + patch_u * third;
} else {
// Sample UV from control points
const Vec2f res = pretex.Bernstein3D(tile_u, bv);
@ -550,26 +547,18 @@ static void _BezierPatchHighQuality(u8 *&dest, u16 *&indices, int &count, int te
if (sampleColors) {
vert.color_32 = precol.Bernstein3D(tile_u, bv).ToRGBA();
} else {
memcpy(vert.color, patch.points[0]->color, 4);
vert.color_32 = patch.defcolor;
}
}
}
GEPatchPrimType prim_type = patch.primType;
// Combine the vertices into triangles.
for (int tile_v = 0; tile_v < tess_v; ++tile_v) {
for (int tile_u = 0; tile_u < tess_u; ++tile_u) {
int total = patch.index * (tess_u + 1) * (tess_v + 1);
int idx0 = total + tile_v * (tess_u + 1) + tile_u;
int idx1 = total + tile_v * (tess_u + 1) + tile_u + 1;
int idx2 = total + (tile_v + 1) * (tess_u + 1) + tile_u;
int idx3 = total + (tile_v + 1) * (tess_u + 1) + tile_u + 1;
int patch_index = patch_v * num_patches_u + patch_u;
int total = patch_index * (tess_u + 1) * (tess_v + 1);
BuildIndex(indices + count, count, tess_u, tess_v, patch.primType, total);
CopyQuadIndex(indices, prim_type, idx0, idx1, idx2, idx3);
count += 6;
}
}
dest += (tess_u + 1) * (tess_v + 1) * sizeof(SimpleVertex);
}
}
}
// Prepare mesh of one patch for "Instanced Tessellation".
@ -796,21 +785,20 @@ void DrawEngineCommon::SubmitBezier(const void *control_points, const void *indi
TessellateBezierPatchHardware(dest, inds, count, tess_u, tess_v, prim_type);
numPatches = num_patches_u * num_patches_v;
} else {
BezierPatch *patches = (BezierPatch *)managedBuf.Allocate(sizeof(BezierPatch) * num_patches_u * num_patches_v);
for (int patch_u = 0; patch_u < num_patches_u; patch_u++) {
for (int patch_v = 0; patch_v < num_patches_v; patch_v++) {
BezierPatch& patch = patches[patch_u + patch_v * num_patches_u];
for (int point = 0; point < 16; ++point) {
int idx = (patch_u * 3 + point % 4) + (patch_v * 3 + point / 4) * count_u;
patch.points[point] = points[idx];
}
patch.u_index = patch_u * 3;
patch.v_index = patch_v * 3;
patch.index = patch_v * num_patches_u + patch_u;
BezierPatch patch;
patch.count_u = count_u;
patch.count_v = count_v;
patch.primType = prim_type;
patch.computeNormals = computeNormals;
patch.patchFacing = patchFacing;
}
patch.defcolor = points[0]->color_32;
patch.pos = (Vec3f *)managedBuf.Allocate(sizeof(Vec3f) * count_u * count_v);
patch.tex = (Vec2f *)managedBuf.Allocate(sizeof(Vec2f) * count_u * count_v);
patch.col = (Vec4f *)managedBuf.Allocate(sizeof(Vec4f) * count_u * count_v);
for (int idx = 0; idx < count_u * count_v; idx++) {
patch.pos[idx] = Vec3f(points[idx]->pos);
patch.tex[idx] = Vec2f(points[idx]->uv);
patch.col[idx] = Vec4f::FromRGBA(points[idx]->color_32);
}
int maxVertices = SPLINE_BUFFER_SIZE / vertexSize;
// Downsample until it fits, in case crazy tessellation factors are sent.
@ -818,13 +806,8 @@ void DrawEngineCommon::SubmitBezier(const void *control_points, const void *indi
tess_u /= 2;
tess_v /= 2;
}
// We shouldn't really split up into separate 4x4 patches, instead we should do something that works
// like the splines, so we subdivide across the whole "mega-patch".
for (int patch_idx = 0; patch_idx < num_patches_u*num_patches_v; ++patch_idx) {
const BezierPatch &patch = patches[patch_idx];
TessellateBezierPatch(dest, inds, count, tess_u, tess_v, patch, origVertType);
}
}
u32 vertTypeWithIndex16 = (vertType & ~GE_VTYPE_IDX_MASK) | GE_VTYPE_IDX_16BIT;

View file

@ -39,11 +39,9 @@ struct BezierPatch {
Vec3f *pos;
Vec4f *col;
Vec2f *tex;
// These are used to generate UVs.
int u_index, v_index;
int index;
u32_le defcolor;
int count_u;
int count_v;
GEPatchPrimType primType;
bool computeNormals;
bool patchFacing;