Reduce the diff between the two draw engines

This commit is contained in:
Henrik Rydgard 2014-09-18 00:40:25 +02:00
parent 9755f6ba49
commit 6c313385ab
4 changed files with 57 additions and 72 deletions

View file

@ -83,25 +83,19 @@ enum {
enum { VAI_KILL_AGE = 120, VAI_UNRELIABLE_KILL_AGE = 240, VAI_UNRELIABLE_KILL_MAX = 4 };
// Check for max first as clamping to max is more common than min when lighting.
inline float clamp(float in, float min, float max) {
return in > max ? max : (in < min ? min : in);
}
TransformDrawEngineDX9::TransformDrawEngineDX9()
:
decodedVerts_(0),
prevPrim_(GE_PRIM_INVALID),
dec_(0),
lastVType_(-1),
shaderManager_(0),
textureCache_(0),
framebufferManager_(0),
numDrawCalls(0),
vertexCountInDrawCalls(0),
decodeCounter_(0),
dcid_(0),
uvScale(0) {
: decodedVerts_(0),
prevPrim_(GE_PRIM_INVALID),
dec_(0),
lastVType_(-1),
shaderManager_(0),
textureCache_(0),
framebufferManager_(0),
numDrawCalls(0),
vertexCountInDrawCalls(0),
decodeCounter_(0),
dcid_(0),
uvScale(0) {
memset(&decOptions_, 0, sizeof(decOptions_));
decOptions_.expandAllUVtoFloat = true;
@ -126,7 +120,7 @@ TransformDrawEngineDX9::TransformDrawEngineDX9()
quadIndices_[i * 6 + 4] = i * 4 + 2;
quadIndices_[i * 6 + 5] = i * 4 + 3;
}
if (g_Config.bPrescaleUV) {
uvScale = new UVScale[MAX_DEFERRED_DRAW_CALLS];
}
@ -143,6 +137,9 @@ TransformDrawEngineDX9::~TransformDrawEngineDX9() {
FreeMemoryPages(decIndex, DECODED_INDEX_BUFFER_SIZE);
FreeMemoryPages(transformed, TRANSFORMED_VERTEX_BUFFER_SIZE);
FreeMemoryPages(transformedExpanded, 3 * TRANSFORMED_VERTEX_BUFFER_SIZE);
delete[] quadIndices_;
delete decJitCache_;
for (auto decl = vertexDeclMap_.begin(); decl != vertexDeclMap_.end(); ++decl) {
if (decl->second) {
@ -150,15 +147,10 @@ TransformDrawEngineDX9::~TransformDrawEngineDX9() {
}
}
delete [] quadIndices_;
for (auto iter = decoderMap_.begin(); iter != decoderMap_.end(); iter++) {
delete iter->second;
}
delete [] uvScale;
delete decJitCache_;
}
void TransformDrawEngineDX9::InitDeviceObjects() {
@ -255,7 +247,7 @@ IDirect3DVertexDeclaration9 *TransformDrawEngineDX9::SetupDecFmtForDraw(VSShader
// End
D3DVERTEXELEMENT9 end = D3DDECL_END();
memcpy(VertexElement, &end, sizeof(D3DVERTEXELEMENT9));
// Create declaration
IDirect3DVertexDeclaration9 *pHardwareVertexDecl = nullptr;
HRESULT hr = pD3Ddevice->CreateVertexDeclaration( VertexElements, &pHardwareVertexDecl );
@ -299,34 +291,6 @@ inline void TransformDrawEngineDX9::SetupVertexDecoderInternal(u32 vertType) {
}
}
int TransformDrawEngineDX9::EstimatePerVertexCost() {
// TODO: This is transform cost, also account for rasterization cost somehow... although it probably
// runs in parallel with transform.
// Also, this is all pure guesswork. If we can find a way to do measurements, that would be great.
// GTA wants a low value to run smooth, GoW wants a high value (otherwise it thinks things
// went too fast and starts doing all the work over again).
int cost = 20;
if (gstate.isLightingEnabled()) {
cost += 10;
}
for (int i = 0; i < 4; i++) {
if (gstate.isLightChanEnabled(i))
cost += 10;
}
if (gstate.getUVGenMode() != GE_TEXMAP_TEXTURE_COORDS) {
cost += 20;
}
if (dec_ && dec_->morphcount > 1) {
cost += 5 * dec_->morphcount;
}
return cost;
}
void TransformDrawEngineDX9::SubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertType, int *bytesRead) {
if (vertexCount == 0)
return; // we ignore zero-sized draw calls.
@ -748,16 +712,12 @@ void TransformDrawEngineDX9::DoFlush() {
if (!useElements && indexGen.PureCount()) {
vai->numVerts = indexGen.PureCount();
}
// Always
if (1) {
void * pVb;
u32 size = dec_->GetDecVtxFmt().stride * indexGen.MaxIndex();
pD3Ddevice->CreateVertexBuffer(size, D3DUSAGE_WRITEONLY, 0, D3DPOOL_DEFAULT, &vai->vbo, NULL);
vai->vbo->Lock(0, size, &pVb, 0);
memcpy(pVb, decoded, size);
vai->vbo->Unlock();
}
// Ib
void * pVb;
u32 size = dec_->GetDecVtxFmt().stride * indexGen.MaxIndex();
pD3Ddevice->CreateVertexBuffer(size, D3DUSAGE_WRITEONLY, 0, D3DPOOL_DEFAULT, &vai->vbo, NULL);
vai->vbo->Lock(0, size, &pVb, 0);
memcpy(pVb, decoded, size);
vai->vbo->Unlock();
if (useElements) {
void * pIb;
u32 size = sizeof(short) * indexGen.VertexCount();

View file

@ -106,7 +106,7 @@ class TransformDrawEngineDX9 : public DrawEngineCommon {
public:
TransformDrawEngineDX9();
virtual ~TransformDrawEngineDX9();
void SubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertType, int *bytesRead);
void SubmitSpline(void* control_points, void* indices, int count_u, int count_v, int type_u, int type_v, GEPatchPrimType prim_type, u32 vertType);
void SubmitBezier(void* control_points, void* indices, int count_u, int count_v, GEPatchPrimType prim_type, u32 vertType);
@ -132,9 +132,35 @@ public:
void SetupVertexDecoder(u32 vertType);
void SetupVertexDecoderInternal(u32 vertType);
bool IsCodePtrVertexDecoder(const u8 *ptr) const;
// This requires a SetupVertexDecoder call first.
int EstimatePerVertexCost();
int EstimatePerVertexCost() {
// TODO: This is transform cost, also account for rasterization cost somehow... although it probably
// runs in parallel with transform.
// Also, this is all pure guesswork. If we can find a way to do measurements, that would be great.
// GTA wants a low value to run smooth, GoW wants a high value (otherwise it thinks things
// went too fast and starts doing all the work over again).
int cost = 20;
if (gstate.isLightingEnabled()) {
cost += 10;
for (int i = 0; i < 4; i++) {
if (gstate.isLightChanEnabled(i))
cost += 10;
}
}
if (gstate.getUVGenMode() != GE_TEXMAP_TEXTURE_COORDS) {
cost += 20;
}
if (dec_ && dec_->morphcount > 1) {
cost += 5 * dec_->morphcount;
}
return cost;
}
// So that this can be inlined
void Flush() {
@ -143,6 +169,8 @@ public:
DoFlush();
}
bool IsCodePtrVertexDecoder(const u8 *ptr) const;
protected:
// Preprocessing for spline/bezier
virtual u32 NormalizeVertices(u8 *outPtr, u8 *bufPtr, const u8 *inPtr, int lowerBound, int upperBound, u32 vertType) override;
@ -159,7 +187,7 @@ private:
// Preprocessing for spline/bezier
u32 NormalizeVertices(u8 *outPtr, u8 *bufPtr, const u8 *inPtr, VertexDecoder *dec, int lowerBound, int upperBound, u32 vertType);
u32 ComputeMiniHash();
u32 ComputeHash(); // Reads deferred vertex data.
void MarkUnreliable(VertexArrayInfoDX9 *vai);

View file

@ -142,8 +142,8 @@ TransformDrawEngine::TransformDrawEngine()
decIndex = (u16 *)AllocateMemoryPages(DECODED_INDEX_BUFFER_SIZE);
transformed = (TransformedVertex *)AllocateMemoryPages(TRANSFORMED_VERTEX_BUFFER_SIZE);
transformedExpanded = (TransformedVertex *)AllocateMemoryPages(3 * TRANSFORMED_VERTEX_BUFFER_SIZE);
quadIndices_ = new u16[6 * QUAD_INDICES_MAX];
quadIndices_ = new u16[6 * QUAD_INDICES_MAX];
for (int i = 0; i < QUAD_INDICES_MAX; i++) {
quadIndices_[i * 6 + 0] = i * 4;
quadIndices_[i * 6 + 1] = i * 4 + 2;
@ -609,12 +609,10 @@ void TransformDrawEngine::DoFlush() {
Shader *vshader = shaderManager_->ApplyVertexShader(prim, lastVType_);
// Compiler warns about this because it's only used in the #ifdeffed out RangeElements path.
int maxIndex = 0;
if (vshader->UseHWTransform()) {
GLuint vbo = 0, ebo = 0;
int vertexCount = 0;
int maxIndex = 0; // Compiler warns about this because it's only used in the #ifdeffed out RangeElements path.
bool useElements = true;
// Cannot cache vertex data with morph enabled.

View file

@ -169,7 +169,6 @@ public:
bool IsCodePtrVertexDecoder(const u8 *ptr) const;
// Really just for convenience to share with softgpu.
static u32 NormalizeVertices(u8 *outPtr, u8 *bufPtr, const u8 *inPtr, VertexDecoder *dec, int lowerBound, int upperBound, u32 vertType);
protected: