Start of mixed transformation

This commit is contained in:
Ced2911 2013-08-20 18:47:11 +02:00
parent 0e473a7909
commit f05e1dbf5a
6 changed files with 680 additions and 144 deletions

View file

@ -199,7 +199,7 @@ void GenerateFragmentShader(char *buffer) {
WRITE(p, " { ");
WRITE(p, " float4 v_texcoord: TEXCOORD0; ");
WRITE(p, " float4 v_color0: COLOR0; ");
WRITE(p, " float4 v_color1: COLOR1; ");
WRITE(p, " float3 v_color1: COLOR1; ");
if (enableFog) {
WRITE(p, "float v_fogdepth:FOG;\n");
}

View file

@ -406,7 +406,8 @@ void LinkedShader::updateUniforms() {
ConvertMatrix4x3To4x4(gstate.boneMatrix + 12 * i, bonetemp);
//glUniformMatrix4fv(u_bone[i], 1, GL_FALSE, bonetemp);
//m_vs->constant->SetMatrix(pD3Ddevice, u_bone[i], (D3DXMATRIX*)bonetemp);
if (u_bone[i] != 0)
m_vs->constant->SetMatrix(pD3Ddevice, u_bone[i], (D3DXMATRIX*)bonetemp);
}
}
#endif
@ -427,7 +428,6 @@ void LinkedShader::updateUniforms() {
if (u_matspecular != 0 && (dirtyUniforms & DIRTY_MATSPECULAR)) {
SetColorUniform3ExtraFloat(m_vs->constant,u_matspecular, gstate.materialspecular, getFloat24(gstate.materialspecularcoef));
}
/*
for (int i = 0; i < 4; i++) {
if (dirtyUniforms & (DIRTY_LIGHT0 << i)) {
if (gstate.isDirectionalLight(i)) {
@ -441,20 +441,33 @@ void LinkedShader::updateUniforms() {
else
len = 1.0f / len;
float vec[3] = { x * len, y * len, z * len };
if (u_lightpos[i] != -1) glUniform3fv(u_lightpos[i], 1, vec);
if (u_lightpos[i] != 0)
m_vs->constant->SetFloatArray(pD3Ddevice, u_lightpos[i], vec, 3);
} else {
if (u_lightpos[i] != -1) glUniform3fv(u_lightpos[i], 1, gstate_c.lightpos[i]);
if (u_lightpos[i] != 0)
m_vs->constant->SetFloatArray(pD3Ddevice, u_lightpos[i], gstate_c.lightpos[i], 3);
}
if (u_lightdir[i] != -1) glUniform3fv(u_lightdir[i], 1, gstate_c.lightdir[i]);
if (u_lightatt[i] != -1) glUniform3fv(u_lightatt[i], 1, gstate_c.lightatt[i]);
if (u_lightangle[i] != -1) glUniform1f(u_lightangle[i], gstate_c.lightangle[i]);
if (u_lightspotCoef[i] != -1) glUniform1f(u_lightspotCoef[i], gstate_c.lightspotCoef[i]);
if (u_lightambient[i] != -1) glUniform3fv(u_lightambient[i], 1, gstate_c.lightColor[0][i]);
if (u_lightdiffuse[i] != -1) glUniform3fv(u_lightdiffuse[i], 1, gstate_c.lightColor[1][i]);
if (u_lightspecular[i] != -1) glUniform3fv(u_lightspecular[i], 1, gstate_c.lightColor[2][i]);
if (u_lightdir[i] != 0)
m_vs->constant->SetFloatArray(pD3Ddevice, u_lightdir[i], gstate_c.lightdir[i], 3);
if (u_lightatt[i] != 0)
m_vs->constant->SetFloatArray(pD3Ddevice, u_lightatt[i], gstate_c.lightatt[i], 3);
if (u_lightangle[i] != 0)
m_vs->constant->SetFloat(pD3Ddevice, u_lightangle[i], gstate_c.lightangle[i]);
if (u_lightspotCoef[i] != 0)
m_vs->constant->SetFloat(pD3Ddevice, u_lightspotCoef[i], gstate_c.lightspotCoef[i]);
if (u_lightambient[i] != 0)
m_vs->constant->SetFloatArray(pD3Ddevice, u_lightambient[i], gstate_c.lightColor[0][i], 3);
if (u_lightdiffuse[i] != 0)
m_vs->constant->SetFloatArray(pD3Ddevice, u_lightdiffuse[i], gstate_c.lightColor[1][i], 3);
if (u_lightspecular[i] != 0)
m_vs->constant->SetFloatArray(pD3Ddevice, u_lightspecular[i], gstate_c.lightColor[2][i], 3);
}
}
*/
dirtyUniforms = 0;
}

View file

@ -266,7 +266,7 @@ void TransformDrawEngine::ApplyDrawState(int prim) {
float renderWidthFactor, renderHeightFactor;
float renderWidth, renderHeight;
float renderX, renderY;
bool useBufferedRendering = g_Config.iRenderingMode != 0 ? 1 : 0;
bool useBufferedRendering = g_Config.iRenderingMode != FB_NON_BUFFERED_MODE;
if (useBufferedRendering) {
renderX = 0.0f;
renderY = 0.0f;
@ -286,10 +286,10 @@ void TransformDrawEngine::ApplyDrawState(int prim) {
bool throughmode = (gstate.vertType & GE_VTYPE_THROUGH_MASK) != 0;
// Scissor
int scissorX1 = (gstate.getScissorX1());
int scissorY1 = (gstate.getScissorY1());
int scissorX2 = (gstate.getScissorX2());
int scissorY2 = (gstate.getScissorY2());
int scissorX1 = gstate.getScissorX1();
int scissorY1 = gstate.getScissorY1();
int scissorX2 = gstate.getScissorX2();
int scissorY2 = gstate.getScissorY2();
// This is a bit of a hack as the render buffer isn't always that size
if (scissorX1 == 0 && scissorY1 == 0

View file

@ -27,6 +27,7 @@
#include "helper/dx_state.h"
#include "native/ext/cityhash/city.h"
#include "ext/xxhash.h"
#include "GPU/Math3D.h"
#include "GPU/GPUState.h"
@ -39,6 +40,50 @@
#include "ShaderManager.h"
#include "DisplayListInterpreter.h"
IDirect3DVertexDeclaration9* pMixedVertexDecl = NULL;
#pragma pack(push, 1)
struct MixedVertexFormat {
// float 3
float position[3];
// float 1
float fog;
// float 3
float texcoord[3];
// float 3
float normal[3];
// D3DDECLTYPE_D3DCOLOR
float color0[4]; // prelit
float color1[4]; // prelit
};
#pragma pack(pop)
static MixedVertexFormat * mixedVertices = NULL;
static MixedVertexFormat * mixedVertices_ = NULL;
static u16 * mixedIndices;
static u16 * mixedIndices_;
static void CreateVertexDeclaration() {
const D3DVERTEXELEMENT9 vertexElements[] =
{
//{ 0, 0, D3DDECLTYPE_FLOAT3, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_POSITION, 0 },
//{ 0, 12, D3DDECLTYPE_FLOAT1, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_FOG, 0 },
{ 0, 0, D3DDECLTYPE_FLOAT4, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_POSITION, 0 }, // merge fog
{ 0, 16, D3DDECLTYPE_FLOAT3, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_TEXCOORD, 0 },
{ 0, 28, D3DDECLTYPE_FLOAT3, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_NORMAL, 0 },
{ 0, 40, D3DDECLTYPE_FLOAT4, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_COLOR, 0 },
{ 0, 56, D3DDECLTYPE_FLOAT4, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_COLOR, 1 },
D3DDECL_END()
};
pD3Ddevice->CreateVertexDeclaration( vertexElements, &pMixedVertexDecl );
}
const D3DPRIMITIVETYPE glprim[8] = {
D3DPT_POINTLIST,
D3DPT_LINELIST,
@ -56,9 +101,13 @@ int D3DPrimCount(D3DPRIMITIVETYPE prim, int size) {
enum {
DECODED_VERTEX_BUFFER_SIZE = 65536 * 48,
DECODED_INDEX_BUFFER_SIZE = 65536 * 20,
TRANSFORMED_VERTEX_BUFFER_SIZE = 65536 * sizeof(TransformedVertex)
TRANSFORMED_VERTEX_BUFFER_SIZE = 65536 * sizeof(TransformedVertex),
MIXED_VERTEX_BUFFER_SIZE = 65536 * sizeof(MixedVertexFormat)
};
#define VERTEXCACHE_DECIMATION_INTERVAL 17
inline float clamp(float in, float min, float max) {
return in < min ? min : (in > max ? max : in);
}
@ -74,6 +123,7 @@ TransformDrawEngine::TransformDrawEngine()
framebufferManager_(0),
numDrawCalls(0),
uvScale(0) {
decimationCounter_ = VERTEXCACHE_DECIMATION_INTERVAL;
// Allocate nicely aligned memory. Maybe graphics drivers will
// appreciate it.
// All this is a LOT of memory, need to see if we can cut down somehow.
@ -81,10 +131,15 @@ TransformDrawEngine::TransformDrawEngine()
decIndex = (u16 *)AllocateMemoryPages(DECODED_INDEX_BUFFER_SIZE);
transformed = (TransformedVertex *)AllocateMemoryPages(TRANSFORMED_VERTEX_BUFFER_SIZE);
transformedExpanded = (TransformedVertex *)AllocateMemoryPages(3 * TRANSFORMED_VERTEX_BUFFER_SIZE);
mixedVertices = (MixedVertexFormat *)AllocateMemoryPages(MIXED_VERTEX_BUFFER_SIZE);
// memset(vbo_, 0, sizeof(vbo_));
// memset(ebo_, 0, sizeof(ebo_));
if (g_Config.bPrescaleUV) {
uvScale = new UVScale[MAX_DEFERRED_DRAW_CALLS];
}
indexGen.Setup(decIndex);
InitDeviceObjects();
CreateVertexDeclaration();
//register_gl_resource_holder(this);
}
@ -94,40 +149,28 @@ TransformDrawEngine::~TransformDrawEngine() {
FreeMemoryPages(decIndex, DECODED_INDEX_BUFFER_SIZE);
FreeMemoryPages(transformed, TRANSFORMED_VERTEX_BUFFER_SIZE);
FreeMemoryPages(transformedExpanded, 3 * TRANSFORMED_VERTEX_BUFFER_SIZE);
FreeMemoryPages(mixedVertices, MIXED_VERTEX_BUFFER_SIZE);
//unregister_gl_resource_holder(this);
for (auto iter = decoderMap_.begin(); iter != decoderMap_.end(); iter++) {
delete iter->second;
}
delete [] uvScale;
pMixedVertexDecl->Release();
}
void TransformDrawEngine::InitDeviceObjects() {
for(size_t i = 0;i < NUM_VBOS; i++) {
pD3Ddevice->CreateVertexBuffer(DECODED_VERTEX_BUFFER_SIZE, NULL, NULL, D3DPOOL_DEFAULT, &vbo_[i], NULL);
pD3Ddevice->CreateIndexBuffer(DECODED_INDEX_BUFFER_SIZE, NULL, D3DFMT_INDEX16, D3DPOOL_DEFAULT, &ebo_[i], NULL);
for (int i = 0; i < NUM_VBOS; i++) {
pD3Ddevice->CreateVertexBuffer(MIXED_VERTEX_BUFFER_SIZE, NULL, NULL, D3DPOOL_DEFAULT, &vb[i], NULL);
pD3Ddevice->CreateIndexBuffer(MIXED_VERTEX_BUFFER_SIZE, NULL, D3DFMT_INDEX16, D3DPOOL_DEFAULT, &ib[i], NULL);
}
/*
if (!vbo_[0]) {
glGenBuffers(NUM_VBOS, &vbo_[0]);
glGenBuffers(NUM_VBOS, &ebo_[0]);
} else {
ERROR_LOG(G3D, "Device objects already initialized!");
}
*/
}
void TransformDrawEngine::DestroyDeviceObjects() {
/*
glDeleteBuffers(NUM_VBOS, &vbo_[0]);
glDeleteBuffers(NUM_VBOS, &ebo_[0]);
*/
for(size_t i = 0;i < NUM_VBOS; i++) {
vbo_[i]->Release();
ebo_[i]->Release();
for (int i = 0; i < NUM_VBOS; i++) {
vb[i]->Release();
ib[i]->Release();
}
memset(vbo_, 0, sizeof(vbo_));
memset(ebo_, 0, sizeof(ebo_));
ClearTrackedVertexArrays();
}
/*
@ -238,7 +281,6 @@ public:
void Light(float colorOut0[4], float colorOut1[4], const float colorIn[4], Vec3f pos, Vec3f normal);
private:
bool disabled_;
Color4 globalAmbient;
Color4 materialEmissive;
Color4 materialAmbient;
@ -543,8 +585,8 @@ void TransformDrawEngine::SoftwareTransformAndDraw(
vscale /= gstate_c.curTextureHeight;
}
int w = 1 << (gstate.texsize[0] & 0xf);
int h = 1 << ((gstate.texsize[0] >> 8) & 0xf);
int w = gstate.getTextureWidth(0);
int h = gstate.getTextureHeight(0);
float widthFactor = (float) w / (float) gstate_c.curTextureWidth;
float heightFactor = (float) h / (float) gstate_c.curTextureHeight;
@ -847,6 +889,129 @@ void TransformDrawEngine::SoftwareTransformAndDraw(
}
}
// Actually again, single quads could be drawn more efficiently using GL_TRIANGLE_STRIP, no need to duplicate verts as for
// GL_TRIANGLES. Still need to sw transform to compute the extra two corners though.
void TransformDrawEngine::MixedTransformAndDraw(int prim, u8 *decoded, LinkedShader *program, int vertexCount, u32 vertType, void *inds,
int indexType, const DecVtxFormat &decVtxFormat, int maxIndex, LPDIRECT3DVERTEXBUFFER9 vb_, LPDIRECT3DINDEXBUFFER9 ib_
) {
#if 0
// Get vertices/indices pointer
vb_->Lock(0, vertexCount * sizeof(MixedVertexFormat), (void**)&mixedVertices, D3DLOCK_NOOVERWRITE);
ib_->Lock(0, maxIndex * sizeof(short), (void**)&mixedIndices, D3DLOCK_NOOVERWRITE);
#endif
MixedVertexFormat * _mixedVertices = mixedVertices;
VertexReader reader(decoded, decVtxFormat, vertType);
for (int index = 0; index < maxIndex; index++) {
reader.Goto(index);
_mixedVertices->fog = 1.0f;
reader.ReadPos(_mixedVertices->position);
if (reader.hasUV())
reader.ReadUV(_mixedVertices->texcoord);
if (reader.hasNormal())
reader.ReadNrm(_mixedVertices->normal);
if (reader.hasColor0())
reader.ReadColor0(_mixedVertices->color0);
if (reader.hasColor1())
reader.ReadColor0(_mixedVertices->color1);
_mixedVertices++;
}
// Step 2: expand rectangles.
int numTrans = 0;
bool drawIndexed = true;
numTrans = vertexCount;
/*
if (prim != GE_PRIM_RECTANGLES) {
// We can simply draw the unexpanded buffer.
numTrans = vertexCount;
drawIndexed = true;
} else {
numTrans = 0;
drawBuffer = transformedExpanded;
TransformedVertex *trans = &transformedExpanded[0];
TransformedVertex saved;
for (int i = 0; i < vertexCount; i += 2) {
int index = ((const u16*)inds)[i];
saved = transformed[index];
int index2 = ((const u16*)inds)[i + 1];
TransformedVertex &transVtx = transformed[index2];
// We have to turn the rectangle into two triangles, so 6 points. Sigh.
// bottom right
trans[0] = transVtx;
// bottom left
trans[1] = transVtx;
trans[1].y = saved.y;
trans[1].v = saved.v;
// top left
trans[2] = transVtx;
trans[2].x = saved.x;
trans[2].y = saved.y;
trans[2].u = saved.u;
trans[2].v = saved.v;
// top right
trans[3] = transVtx;
trans[3].x = saved.x;
trans[3].u = saved.u;
// That's the four corners. Now process UV rotation.
if (throughmode)
RotateUVThrough(trans);
// Apparently, non-through RotateUV just breaks things.
// If we find a game where it helps, we'll just have to figure out how they differ.
// Possibly, it has something to do with flipped viewport Y axis, which a few games use.
// else
// RotateUV(trans);
// bottom right
trans[4] = trans[0];
// top left
trans[5] = trans[2];
trans += 6;
numTrans += 6;
}
}
*/
// TODO: Add a post-transform cache here for multi-RECTANGLES only.
// Might help for text drawing.
pD3Ddevice->SetVertexDeclaration(pMixedVertexDecl);
/// Debug !!
//pD3Ddevice->SetRenderState(D3DRS_FILLMODE, D3DFILL_WIREFRAME);
#if 0
vb_->Unlock();
ib_->Unlock();
#endif
if (drawIndexed) {
pD3Ddevice->DrawIndexedPrimitiveUP(glprim[prim], 0, vertexCount, D3DPrimCount(glprim[prim], numTrans), inds, D3DFMT_INDEX16, mixedVertices, sizeof(MixedVertexFormat));
//pD3Ddevice->SetStreamSource(0, vb_, 0, sizeof(MixedVertexFormat));
//pD3Ddevice->SetIndices(ib_);
//pD3Ddevice->DrawIndexedPrimitive(glprim[prim], 0, 0, 0, 0, D3DPrimCount(glprim[prim], numTrans));
} else {
//pD3Ddevice->DrawPrimitiveUP(glprim[prim], D3DPrimCount(glprim[prim], numTrans), drawBuffer, sizeof(MixedVertexFormat));
}
}
VertexDecoder *TransformDrawEngine::GetVertexDecoder(u32 vtype) {
auto iter = decoderMap_.find(vtype);
if (iter != decoderMap_.end())
@ -1009,16 +1174,17 @@ u32 TransformDrawEngine::ComputeHash() {
int vertexSize = dec_->GetDecVtxFmt().stride;
// TODO: Add some caps both for numDrawCalls and num verts to check?
// It is really very expensive to check all the vertex data so often.
for (int i = 0; i < numDrawCalls; i++) {
if (!drawCalls[i].inds) {
fullhash += CityHash32((const char *)drawCalls[i].verts, vertexSize * drawCalls[i].vertexCount);
fullhash += XXH32((const char *)drawCalls[i].verts, vertexSize * drawCalls[i].vertexCount, 0x1DE8CAC4);
} else {
// This could get seriously expensive with sparse indices. Need to combine hashing ranges the same way
// we do when drawing.
fullhash += CityHash32((const char *)drawCalls[i].verts + vertexSize * drawCalls[i].indexLowerBound,
vertexSize * (drawCalls[i].indexUpperBound - drawCalls[i].indexLowerBound));
fullhash += XXH32((const char *)drawCalls[i].verts + vertexSize * drawCalls[i].indexLowerBound,
vertexSize * (drawCalls[i].indexUpperBound - drawCalls[i].indexLowerBound), 0x029F3EE1);
int indexSize = (dec_->VertexType() & GE_VTYPE_IDX_MASK) == GE_VTYPE_IDX_16BIT ? 2 : 1;
fullhash += CityHash32((const char *)drawCalls[i].inds, indexSize * drawCalls[i].vertexCount);
fullhash += XXH32((const char *)drawCalls[i].inds, indexSize * drawCalls[i].vertexCount, 0x955FD1CA);
}
}
@ -1051,6 +1217,12 @@ void TransformDrawEngine::ClearTrackedVertexArrays() {
}
void TransformDrawEngine::DecimateTrackedVertexArrays() {
if (--decimationCounter_ <= 0) {
decimationCounter_ = VERTEXCACHE_DECIMATION_INTERVAL;
} else {
return;
}
int threshold = gpuStats.numFlips - VAI_KILL_AGE;
for (auto iter = vai_.begin(); iter != vai_.end(); ) {
if (iter->second->lastFrame < threshold) {
@ -1082,45 +1254,32 @@ VertexArrayInfo::~VertexArrayInfo() {
}
}
void TransformDrawEngine::Flush() {
if (!numDrawCalls)
return;
void TransformDrawEngine::DoFlush() {
gpuStats.numFlushes++;
gpuStats.numTrackedVertexArrays = (int)vai_.size();
// TODO: This should not be done on every drawcall, we should collect vertex data
// This is not done on every drawcall, we should collect vertex data
// until critical state changes. That's when we draw (flush).
int prim = prevPrim_;
ApplyDrawState(prim);
LinkedShader *program = shaderManager_->ApplyShader(prim);
#if 0 // Not tested !
#if 1 // Not tested !
if (program->useHWTransform_) {
int vertexCount = 0;
bool useElements = true;
// Cannot cache vertex data with morph enabled.
DecodeVerts();
gpuStats.numUncachedVertsDrawn += indexGen.VertexCount();
useElements = !indexGen.SeenOnlyPurePrims();
vertexCount = indexGen.VertexCount();
if (!useElements && indexGen.PureCount()) {
vertexCount = indexGen.PureCount();
}
prim = indexGen.Prim();
DEBUG_LOG(G3D, "Flush prim %i! %i verts in one go", prim, vertexCount);
/*
SetupDecFmtForDraw(program, dec_->GetDecVtxFmt(), decoded);
*/
if (useElements) {
pD3Ddevice->DrawIndexedPrimitiveUP(glprim[prim], 0, vertexCount, D3DPrimCount(glprim[prim], vertexCount), decIndex, D3DFMT_INDEX16, decoded, sizeof(TransformedVertex));
} else {
pD3Ddevice->DrawPrimitiveUP(glprim[prim], D3DPrimCount(glprim[prim], vertexCount), decoded, sizeof(TransformedVertex));
}
// Undo the strip optimization, not supported by the SW code yet.
if (prim == GE_PRIM_TRIANGLE_STRIP)
prim = GE_PRIM_TRIANGLES;
DEBUG_LOG(G3D, "Flush prim %i SW! %i verts in one go", prim, indexGen.VertexCount());
MixedTransformAndDraw(
prim, decoded, program, indexGen.VertexCount(),
dec_->VertexType(), (void *)decIndex, GE_VTYPE_IDX_16BIT, dec_->GetDecVtxFmt(),
indexGen.MaxIndex(), 0, 0);
} else
#endif
{

View file

@ -99,7 +99,6 @@ public:
void DrawBezier(int ucount, int vcount);
void DrawSpline(int ucount, int vcount, int utype, int vtype);
void DecodeVerts();
void Flush();
void SetShaderManager(ShaderManager *shaderManager) {
shaderManager_ = shaderManager;
}
@ -121,8 +120,17 @@ public:
// This requires a SetupVertexDecoder call first.
int EstimatePerVertexCost();
// So that this can be inlined
void Flush() {
if (!numDrawCalls)
return;
DoFlush();
}
private:
void DoFlush();
void SoftwareTransformAndDraw(int prim, u8 *decoded, LinkedShader *program, int vertexCount, u32 vertexType, void *inds, int indexType, const DecVtxFormat &decVtxFormat, int maxIndex);
void MixedTransformAndDraw(int prim, u8 *decoded, LinkedShader *program, int vertexCount, u32 vertexType, void *inds, int indexType, const DecVtxFormat &decVtxFormat, int maxIndex, LPDIRECT3DVERTEXBUFFER9 vb_, LPDIRECT3DINDEXBUFFER9 ib_);
void ApplyDrawState(int prim);
bool IsReallyAClear(int numVerts) const;
@ -165,10 +173,9 @@ private:
std::map<u32, VertexArrayInfo *> vai_;
// Vertex buffer objects
// Element buffer objects
enum { NUM_VBOS = 2 };
LPDIRECT3DVERTEXBUFFER9 vbo_[NUM_VBOS];
LPDIRECT3DINDEXBUFFER9 ebo_[NUM_VBOS];
LPDIRECT3DVERTEXBUFFER9 vb[NUM_VBOS];
LPDIRECT3DINDEXBUFFER9 ib[NUM_VBOS];
int curVbo_;
// Other
@ -179,6 +186,9 @@ private:
enum { MAX_DEFERRED_DRAW_CALLS = 128 };
DeferredDrawCall drawCalls[MAX_DEFERRED_DRAW_CALLS];
int numDrawCalls;
int decimationCounter_;
UVScale *uvScale;
};

View file

@ -126,34 +126,34 @@ enum DoLightComputation {
#if 0 // used for debugging
void GenerateVertexShader(int prim, char *buffer, bool useHWTransform) {
const char * vscode =
" float4x4 u_proj : register(c0); "
" "
" struct VS_IN "
" "
" { "
" float4 ObjPos : POSITION; "
" float3 Uv : TEXCOORD0; "
" float4 C1 : COLOR0; " // Vertex color
" float4 C2 : COLOR1; " // Vertex color
" }; "
" "
" struct VS_OUT "
" { "
" float4 ObjPos : POSITION; "
" float4 Uv : TEXCOORD0; "
" float4 C1 : COLOR0; " // Vertex color
" float4 C2 : COLOR1; " // Vertex color
" }; "
" "
" VS_OUT main( VS_IN In ) "
" { "
" VS_OUT Out; "
" Out.ObjPos = mul( float4(In.ObjPos.xyz, 1), u_proj ); " // Transform vertex into
" Out.Uv = float4(In.Uv.xy, 0, In.Uv.z); "
" Out.C1 = In.C1; "
" Out.C2 = In.C2; "
" return Out; " // Transfer color
" } ";
" float4x4 u_proj : register(c0); "
" "
" struct VS_IN "
" "
" { "
" float4 ObjPos : POSITION; "
" float3 Uv : TEXCOORD0; "
" float4 C1 : COLOR0; " // Vertex color
" float4 C2 : COLOR1; " // Vertex color
" }; "
" "
" struct VS_OUT "
" { "
" float4 ObjPos : POSITION; "
" float4 Uv : TEXCOORD0; "
" float4 C1 : COLOR0; " // Vertex color
" float4 C2 : COLOR1; " // Vertex color
" }; "
" "
" VS_OUT main( VS_IN In ) "
" { "
" VS_OUT Out; "
" Out.ObjPos = mul( float4(In.ObjPos.xyz, 1), u_proj ); " // Transform vertex into
" Out.Uv = float4(In.Uv.xy, 0, In.Uv.z); "
" Out.C1 = In.C1; "
" Out.C2 = In.C2; "
" return Out; " // Transfer color
" } ";
strcpy(buffer, vscode);
}
@ -185,6 +185,9 @@ void GenerateVertexShader(int prim, char *buffer, bool useHWTransform) {
}
}
if (enableFog) {
WRITE(p, "float2 u_fogcoef;\n");
}
if (gstate.isModeThrough()) {
WRITE(p, "float4x4 u_proj_through;\n");
@ -194,54 +197,405 @@ void GenerateVertexShader(int prim, char *buffer, bool useHWTransform) {
}
if (useHWTransform || !hasColor)
WRITE(p, "float4 u_matambientalpha;\n"); // matambient + matalpha
WRITE(p, " struct VS_IN ");
WRITE(p, " ");
WRITE(p, " { ");
WRITE(p, " float4 ObjPos : POSITION; ");
WRITE(p, " float3 Uv : TEXCOORD0; ");
WRITE(p, " float4 C1 : COLOR0; ");
WRITE(p, " float4 C2 : COLOR1; ");
WRITE(p, " }; ");
WRITE(p, " ");
WRITE(p, " struct VS_OUT ");
WRITE(p, " { ");
WRITE(p, " float4 ObjPos : POSITION; ");
WRITE(p, " float4 Uv : TEXCOORD0; ");
WRITE(p, " float4 C1 : COLOR0; ");
WRITE(p, " float4 C2 : COLOR1; ");
if (enableFog) {
WRITE(p, "float v_fogdepth:FOG;\n");
if (useHWTransform) {
WRITE(p, "float4x4 u_world;\n");
WRITE(p, "float4x4 u_view;\n");
if (gstate.getUVGenMode() == 1)
WRITE(p, "float4x4 u_texmtx;\n");
if (gstate.getWeightMask() != GE_VTYPE_WEIGHT_NONE) {
int numBones = TranslateNumBones(gstate.getNumBoneWeights());
#ifdef USE_BONE_ARRAY
WRITE(p, "float4x4 u_bone[%i];\n", numBones);
#else
for (int i = 0; i < numBones; i++) {
WRITE(p, "float4x4 u_bone%i;\n", i);
}
#endif
}
if (doTexture) {
WRITE(p, "float4 u_uvscaleoffset;\n");
}
for (int i = 0; i < 4; i++) {
if (doLight[i] != LIGHT_OFF) {
// This is needed for shade mapping
WRITE(p, "float3 u_lightpos%i;\n", i);
}
if (doLight[i] == LIGHT_FULL) {
// These are needed for the full thing
WRITE(p, "float3 u_lightdir%i;\n", i);
GELightType type = gstate.getLightType(i);
if (type != GE_LIGHTTYPE_DIRECTIONAL)
WRITE(p, "float3 u_lightatt%i;\n", i);
if (type == GE_LIGHTTYPE_SPOT) {
WRITE(p, "float u_lightangle%i;\n", i);
WRITE(p, "float u_lightspotCoef%i;\n", i);
}
WRITE(p, "float3 u_lightambient%i;\n", i);
WRITE(p, "float3 u_lightdiffuse%i;\n", i);
if (gstate.isUsingSpecularLight(i))
WRITE(p, "float3 u_lightspecular%i;\n", i);
}
}
if (gstate.isLightingEnabled()) {
WRITE(p, "float4 u_ambient;\n");
if ((gstate.materialupdate & 2) == 0)
WRITE(p, "float3 u_matdiffuse;\n");
// if ((gstate.materialupdate & 4) == 0)
WRITE(p, "float4 u_matspecular;\n"); // Specular coef is contained in alpha
WRITE(p, "float3 u_matemissive;\n");
}
}
WRITE(p, " }; ");
WRITE(p, " ");
WRITE(p, " VS_OUT main( VS_IN In ) ");
WRITE(p, " { ");
WRITE(p, " VS_OUT Out; ");
if (1) {
if (useHWTransform) {
WRITE(p, " struct VS_IN \n");
WRITE(p, " \n");
WRITE(p, " { \n");
WRITE(p, " float4 ObjPos: POSITION; \n");
WRITE(p, " float3 Uv : TEXCOORD0; \n");
WRITE(p, " float3 Normal: NORMAL; \n");
WRITE(p, " float4 C1 : COLOR0; \n");
WRITE(p, " float4 C2 : COLOR1; \n");
WRITE(p, " }; \n");
WRITE(p, " \n");
WRITE(p, " struct VS_OUT \n");
WRITE(p, " { \n");
WRITE(p, " float4 ObjPos : POSITION; \n");
WRITE(p, " float4 Uv : TEXCOORD0; \n");
WRITE(p, " float4 C1 : COLOR0; \n");
WRITE(p, " float3 C2 : COLOR1; \n");
if (enableFog) {
WRITE(p, "float v_fogdepth:FOG;\n");
}
WRITE(p, " }; \n");
WRITE(p, " \n");
} else {
WRITE(p, " struct VS_IN \n");
WRITE(p, " \n");
WRITE(p, " { \n");
WRITE(p, " float4 ObjPos : POSITION; \n");
WRITE(p, " float3 Uv : TEXCOORD0; \n");
WRITE(p, " float4 C1 : COLOR0; \n");
WRITE(p, " float4 C2 : COLOR1; \n");
WRITE(p, " }; \n");
WRITE(p, " \n");
WRITE(p, " struct VS_OUT \n");
WRITE(p, " { \n");
WRITE(p, " float4 ObjPos : POSITION; \n");
WRITE(p, " float4 Uv : TEXCOORD0; \n");
WRITE(p, " float4 C1 : COLOR0; \n");
WRITE(p, " float3 C2 : COLOR1; \n");
if (enableFog) {
WRITE(p, "float v_fogdepth:FOG;\n");
}
WRITE(p, " }; \n");
WRITE(p, " \n");
}
WRITE(p, " VS_OUT main( VS_IN In ) \n");
WRITE(p, " { \n");
WRITE(p, " VS_OUT Out = (VS_OUT)0; \n");
if (useHWTransform) {
// Step 1: World Transform / Skinning
if (gstate.getWeightMask() == GE_VTYPE_WEIGHT_NONE) {
// No skinning, just standard T&L.
WRITE(p, " float3 worldpos = mul(float4(In.ObjPos.xyz, 1.0), u_world).xyz;\n");
if (hasNormal)
WRITE(p, " float3 worldnormal = normalize( mul(float4(In.Normal, 0.0), u_world).xyz);\n");
else
WRITE(p, " float3 worldnormal = float3(0.0, 0.0, 1.0);\n");
} else {
int numWeights = TranslateNumBones(gstate.getNumBoneWeights());
static const char *rescale[4] = {"", " * 1.9921875", " * 1.999969482421875", ""}; // 2*127.5f/128.f, 2*32767.5f/32768.f, 1.0f};
const char *factor = rescale[gstate.getWeightMask() >> GE_VTYPE_WEIGHT_SHIFT];
static const char * const boneWeightAttr[8] = {
"a_w1.x", "a_w1.y", "a_w1.z", "a_w1.w",
"a_w2.x", "a_w2.y", "a_w2.z", "a_w2.w",
};
#if defined(USE_FOR_LOOP) && defined(USE_BONE_ARRAY)
// To loop through the weights, we unfortunately need to put them in a float array.
// GLSL ES sucks - no way to directly initialize an array!
switch (numWeights) {
case 1: WRITE(p, " float w[1]; w[0] = a_w1;\n"); break;
case 2: WRITE(p, " float w[2]; w[0] = a_w1.x; w[1] = a_w1.y;\n"); break;
case 3: WRITE(p, " float w[3]; w[0] = a_w1.x; w[1] = a_w1.y; w[2] = a_w1.z;\n"); break;
case 4: WRITE(p, " float w[4]; w[0] = a_w1.x; w[1] = a_w1.y; w[2] = a_w1.z; w[3] = a_w1.w;\n"); break;
case 5: WRITE(p, " float w[5]; w[0] = a_w1.x; w[1] = a_w1.y; w[2] = a_w1.z; w[3] = a_w1.w; w[4] = a_w2;\n"); break;
case 6: WRITE(p, " float w[6]; w[0] = a_w1.x; w[1] = a_w1.y; w[2] = a_w1.z; w[3] = a_w1.w; w[4] = a_w2.x; w[5] = a_w2.y;\n"); break;
case 7: WRITE(p, " float w[7]; w[0] = a_w1.x; w[1] = a_w1.y; w[2] = a_w1.z; w[3] = a_w1.w; w[4] = a_w2.x; w[5] = a_w2.y; w[6] = a_w2.z;\n"); break;
case 8: WRITE(p, " float w[8]; w[0] = a_w1.x; w[1] = a_w1.y; w[2] = a_w1.z; w[3] = a_w1.w; w[4] = a_w2.x; w[5] = a_w2.y; w[6] = a_w2.z; w[7] = a_w2.w;\n"); break;
}
WRITE(p, " mat4 skinMatrix = w[0] * u_bone[0];\n");
if (numWeights > 1) {
WRITE(p, " for (int i = 1; i < %i; i++) {\n", numWeights);
WRITE(p, " skinMatrix += w[i] * u_bone[i];\n");
WRITE(p, " }\n");
}
#else
#ifdef USE_BONE_ARRAY
if (numWeights == 1)
WRITE(p, " mat4 skinMatrix = a_w1 * u_bone[0]");
else
WRITE(p, " mat4 skinMatrix = a_w1.x * u_bone[0]");
for (int i = 1; i < numWeights; i++) {
const char *weightAttr = boneWeightAttr[i];
// workaround for "cant do .x of scalar" issue
if (numWeights == 1 && i == 0) weightAttr = "a_w1";
if (numWeights == 5 && i == 4) weightAttr = "a_w2";
WRITE(p, " + %s * u_bone[%i]", weightAttr, i);
}
#else
// Uncomment this to screw up bone shaders to check the vertex shader software fallback
// WRITE(p, "THIS SHOULD ERROR! #error");
if (numWeights == 1)
WRITE(p, " float4x4 skinMatrix = a_w1 * u_bone0");
else
WRITE(p, " float4x4 skinMatrix = a_w1.x * u_bone0");
for (int i = 1; i < numWeights; i++) {
const char *weightAttr = boneWeightAttr[i];
// workaround for "cant do .x of scalar" issue
if (numWeights == 1 && i == 0) weightAttr = "a_w1";
if (numWeights == 5 && i == 4) weightAttr = "a_w2";
WRITE(p, " + %s * u_bone%i", weightAttr, i);
}
#endif
#endif
WRITE(p, ";\n");
// Trying to simplify this results in bugs in LBP...
WRITE(p, " float3 skinnedpos = (skinMatrix * float4(a_position, 1.0)).xyz %s;\n", factor);
WRITE(p, " float3 worldpos = (u_world * float4(skinnedpos, 1.0)).xyz;\n");
if (hasNormal) {
WRITE(p, " float3 skinnednormal = (skinMatrix * float4(a_normal, 0.0)).xyz %s;\n", factor);
WRITE(p, " float3 worldnormal = normalize((u_world * float4(skinnednormal, 0.0)).xyz);\n");
} else {
WRITE(p, " float3 worldnormal = (u_world * (skinMatrix * float4(0.0, 0.0, 1.0, 0.0))).xyz;\n");
}
}
WRITE(p, " float4 viewPos = mul(float4(worldpos, 1.0), u_view);\n");
// Final view and projection transforms.
WRITE(p, " Out.ObjPos = mul(viewPos, u_proj);\n");
// TODO: Declare variables for dots for shade mapping if needed.
const char *ambientStr = (gstate.materialupdate & 1) ? (hasColor ? "In.C1" : "u_matambientalpha") : "u_matambientalpha";
const char *diffuseStr = (gstate.materialupdate & 2) ? (hasColor ? "In.C1.rgb" : "u_matambientalpha.rgb") : "u_matdiffuse";
const char *specularStr = (gstate.materialupdate & 4) ? (hasColor ? "In.C1.rgb" : "u_matambientalpha.rgb") : "u_matspecular.rgb";
bool diffuseIsZero = true;
bool specularIsZero = true;
bool distanceNeeded = false;
if (gstate.isLightingEnabled()) {
WRITE(p, " float4 lightSum0 = u_ambient * %s + float4(u_matemissive, 0.0);\n", ambientStr);
for (int i = 0; i < 4; i++) {
if (doLight[i] != LIGHT_FULL)
continue;
diffuseIsZero = false;
if (gstate.isUsingSpecularLight(i))
specularIsZero = false;
GELightType type = gstate.getLightType(i);
if (type != GE_LIGHTTYPE_DIRECTIONAL)
distanceNeeded = true;
}
if (!specularIsZero) {
WRITE(p, " float3 lightSum1 = 0;\n");
}
if (!diffuseIsZero) {
WRITE(p, " float3 toLight;\n");
WRITE(p, " float3 diffuse;\n");
}
if (distanceNeeded) {
WRITE(p, " float distance;\n");
WRITE(p, " float lightScale;\n");
}
}
// Calculate lights if needed. If shade mapping is enabled, lights may need to be
// at least partially calculated.
for (int i = 0; i < 4; i++) {
if (doLight[i] != LIGHT_FULL)
continue;
GELightType type = gstate.getLightType(i);
if (type == GE_LIGHTTYPE_DIRECTIONAL) {
// We prenormalize light positions for directional lights.
WRITE(p, " toLight = u_lightpos%i;\n", i);
} else {
WRITE(p, " toLight = u_lightpos%i - worldpos;\n", i);
WRITE(p, " distance = length(toLight);\n");
WRITE(p, " toLight /= distance;\n");
}
bool doSpecular = gstate.isUsingSpecularLight(i);
bool poweredDiffuse = gstate.isUsingPoweredDiffuseLight(i);
if (poweredDiffuse) {
WRITE(p, " float dot%i = pow(dot(toLight, worldnormal), u_matspecular.a);\n", i);
} else {
WRITE(p, " float dot%i = dot(toLight, worldnormal);\n", i);
}
const char *timesLightScale = " * lightScale";
// Attenuation
switch (type) {
case GE_LIGHTTYPE_DIRECTIONAL:
timesLightScale = "";
break;
case GE_LIGHTTYPE_POINT:
WRITE(p, " lightScale = clamp(1.0 / dot(u_lightatt%i, float3(1.0, distance, distance*distance)), 0.0, 1.0);\n", i);
break;
case GE_LIGHTTYPE_SPOT:
WRITE(p, " lowp float angle%i = dot(normalize(u_lightdir%i), toLight);\n", i, i);
WRITE(p, " if (angle%i >= u_lightangle%i) {\n", i, i);
WRITE(p, " lightScale = clamp(1.0 / dot(u_lightatt%i, float3(1.0, distance, distance*distance)), 0.0, 1.0) * pow(angle%i, u_lightspotCoef%i);\n", i, i, i);
WRITE(p, " } else {\n");
WRITE(p, " lightScale = 0.0;\n");
WRITE(p, " }\n");
break;
default:
// ILLEGAL
break;
}
WRITE(p, " diffuse = (u_lightdiffuse%i * %s) * max(dot%i, 0.0);\n", i, diffuseStr, i);
if (doSpecular) {
WRITE(p, " dot%i = dot(normalize(toLight + float3(0.0, 0.0, 1.0)), worldnormal);\n", i);
WRITE(p, " if (dot%i > 0.0)\n", i);
WRITE(p, " lightSum1 += u_lightspecular%i * %s * (pow(dot%i, u_matspecular.a) %s);\n", i, specularStr, i, timesLightScale);
}
WRITE(p, " lightSum0.rgb += (u_lightambient%i * %s.rgb + diffuse)%s;\n", i, ambientStr, timesLightScale);
}
if (gstate.isLightingEnabled()) {
// Sum up ambient, emissive here.
if (lmode) {
WRITE(p, " Out.C1 = clamp(lightSum0, 0.0, 1.0);\n");
// v_color1 only exists when lmode = 1.
if (specularIsZero) {
WRITE(p, " Out.C2 = 0;\n");
} else {
WRITE(p, " Out.C2 = clamp(lightSum1, 0.0, 1.0);\n");
}
} else {
if (specularIsZero) {
WRITE(p, " Out.C1 = clamp(lightSum0, 0.0, 1.0);\n");
} else {
WRITE(p, " Out.C1 = clamp(clamp(lightSum0, 0.0, 1.0) + float4(lightSum1, 0.0), 0.0, 1.0);\n");
}
}
} else {
// Lighting doesn't affect color.
if (hasColor) {
WRITE(p, " Out.C1 = In.C1;\n");
} else {
WRITE(p, " Out.C1 = u_matambientalpha;\n");
}
if (lmode)
WRITE(p, " Out.C2 = 0.0;\n");
}
// Step 3: UV generation
if (doTexture) {
bool prescale = g_Config.bPrescaleUV && !throughmode && gstate.getTextureFunction() == 0;
switch (gstate.getUVGenMode()) {
case 0: // Scale-offset. Easy.
if (prescale) {
WRITE(p, " Out.Uv = In.Uv;\n");
} else {
WRITE(p, " Out.Uv.xy = In.Uv.xy * u_uvscaleoffset.xy + u_uvscaleoffset.zw;\n");
}
break;
case 1: // Projection mapping.
{
std::string temp_tc;
switch (gstate.getUVProjMode()) {
case 0: // Use model space XYZ as source
temp_tc = "float4(a_position.xyz, 1.0)";
break;
case 1: // Use unscaled UV as source
{
static const char *rescaleuv[4] = {"", " * 1.9921875", " * 1.999969482421875", ""}; // 2*127.5f/128.f, 2*32767.5f/32768.f, 1.0f};
const char *factor = rescaleuv[(vertType & GE_VTYPE_TC_MASK) >> GE_VTYPE_TC_SHIFT];
temp_tc = StringFromFormat("float4(a_texcoord.xy %s, 0.0, 1.0)", factor);
}
break;
case 2: // Use normalized transformed normal as source
if (hasNormal)
temp_tc = "float4(normalize(a_normal), 1.0)";
else
temp_tc = "float4(0.0, 0.0, 1.0, 1.0)";
break;
case 3: // Use non-normalized transformed normal as source
if (hasNormal)
temp_tc = "float4(a_normal, 1.0)";
else
temp_tc = "float4(0.0, 0.0, 1.0, 1.0)";
break;
}
WRITE(p, " Out.Uv.xyz = (u_texmtx * %s).xyz * float3(u_uvscaleoffset.xy, 1.0);\n", temp_tc.c_str());
}
// Transform by texture matrix. XYZ as we are doing projection mapping.
break;
case 2: // Shade mapping - use dots from light sources.
WRITE(p, " Out.Uv.xy = u_uvscaleoffset.xy * float2(1.0 + dot(normalize(u_lightpos%i), worldnormal), 1.0 - dot(normalize(u_lightpos%i), worldnormal)) * 0.5;\n", gstate.getUVLS0(), gstate.getUVLS1());
break;
default:
// ILLEGAL
break;
}
if (flipV)
WRITE(p, " Out.Uv.y = 1.0 - Out.Uv.y;\n");
}
// Compute fogdepth
if (enableFog)
WRITE(p, " Out.v_fogdepth = (viewPos.z + u_fogcoef.x) * u_fogcoef.y;\n");
WRITE(p, " return Out; ");
} else {
// Simple pass-through of vertex data to fragment shader
if (gstate.isModeThrough()) {
WRITE(p, "Out.ObjPos = mul( float4(In.ObjPos.xyz, 1), u_proj_through );");
//WRITE(p, "Out.ObjPos.z = ((1+Out.ObjPos.z)/2);"); // Dx z versus opengl z
} else {
//WRITE(p, " Out.ObjPos = mul( u_proj, float4(In.ObjPos.xyz, 1) );");
WRITE(p, "Out.ObjPos = mul( float4(In.ObjPos.xyz, 1), u_proj );");
//WRITE(p, "Out.ObjPos.z = ((1+Out.ObjPos.z)/2);"); // Dx z versus opengl z
}
//WRITE(p, "Out.Uv = In.Uv;");
WRITE(p, "Out.Uv = float4(In.Uv.xy, 0, In.Uv.z);");
if (hasColor) {
WRITE(p, "Out.C1 = In.C1;");
WRITE(p, "Out.C2 = In.C2;");
} else {
WRITE(p, " Out.C1 = u_matambientalpha;\n");
WRITE(p, " Out.C2 = float4(0,0,0,0);\n");
}
if (enableFog) {
WRITE(p, " Out.v_fogdepth = In.ObjPos.w;\n");
}
WRITE(p, " return Out; ");
WRITE(p, "Out.Uv = float4(In.Uv.xy, 0, In.Uv.z);");
if (hasColor) {
WRITE(p, "Out.C1 = In.C1;");
WRITE(p, "Out.C2 = In.C2.rgb;");
} else {
WRITE(p, " Out.C1 = u_matambientalpha;\n");
WRITE(p, " Out.C2 = float3(0,0,0);\n");
}
if (enableFog) {
WRITE(p, " Out.v_fogdepth = In.ObjPos.w;\n");
}
WRITE(p, " return Out; ");
}
WRITE(p, "}\n");
}