diff --git a/GPU/Directx9/TransformPipelineDX9.cpp b/GPU/Directx9/TransformPipelineDX9.cpp index e79894aeb5..51b6872b34 100644 --- a/GPU/Directx9/TransformPipelineDX9.cpp +++ b/GPU/Directx9/TransformPipelineDX9.cpp @@ -83,25 +83,19 @@ enum { enum { VAI_KILL_AGE = 120, VAI_UNRELIABLE_KILL_AGE = 240, VAI_UNRELIABLE_KILL_MAX = 4 }; -// Check for max first as clamping to max is more common than min when lighting. -inline float clamp(float in, float min, float max) { - return in > max ? max : (in < min ? min : in); -} - TransformDrawEngineDX9::TransformDrawEngineDX9() - : - decodedVerts_(0), - prevPrim_(GE_PRIM_INVALID), - dec_(0), - lastVType_(-1), - shaderManager_(0), - textureCache_(0), - framebufferManager_(0), - numDrawCalls(0), - vertexCountInDrawCalls(0), - decodeCounter_(0), - dcid_(0), - uvScale(0) { + : decodedVerts_(0), + prevPrim_(GE_PRIM_INVALID), + dec_(0), + lastVType_(-1), + shaderManager_(0), + textureCache_(0), + framebufferManager_(0), + numDrawCalls(0), + vertexCountInDrawCalls(0), + decodeCounter_(0), + dcid_(0), + uvScale(0) { memset(&decOptions_, 0, sizeof(decOptions_)); decOptions_.expandAllUVtoFloat = true; @@ -126,7 +120,7 @@ TransformDrawEngineDX9::TransformDrawEngineDX9() quadIndices_[i * 6 + 4] = i * 4 + 2; quadIndices_[i * 6 + 5] = i * 4 + 3; } - + if (g_Config.bPrescaleUV) { uvScale = new UVScale[MAX_DEFERRED_DRAW_CALLS]; } @@ -143,6 +137,9 @@ TransformDrawEngineDX9::~TransformDrawEngineDX9() { FreeMemoryPages(decIndex, DECODED_INDEX_BUFFER_SIZE); FreeMemoryPages(transformed, TRANSFORMED_VERTEX_BUFFER_SIZE); FreeMemoryPages(transformedExpanded, 3 * TRANSFORMED_VERTEX_BUFFER_SIZE); + delete[] quadIndices_; + + delete decJitCache_; for (auto decl = vertexDeclMap_.begin(); decl != vertexDeclMap_.end(); ++decl) { if (decl->second) { @@ -150,15 +147,10 @@ TransformDrawEngineDX9::~TransformDrawEngineDX9() { } } - delete [] quadIndices_; - for (auto iter = decoderMap_.begin(); iter != decoderMap_.end(); iter++) { delete iter->second; } delete [] uvScale; - - delete decJitCache_; - } void TransformDrawEngineDX9::InitDeviceObjects() { @@ -255,7 +247,7 @@ IDirect3DVertexDeclaration9 *TransformDrawEngineDX9::SetupDecFmtForDraw(VSShader // End D3DVERTEXELEMENT9 end = D3DDECL_END(); memcpy(VertexElement, &end, sizeof(D3DVERTEXELEMENT9)); - + // Create declaration IDirect3DVertexDeclaration9 *pHardwareVertexDecl = nullptr; HRESULT hr = pD3Ddevice->CreateVertexDeclaration( VertexElements, &pHardwareVertexDecl ); @@ -299,34 +291,6 @@ inline void TransformDrawEngineDX9::SetupVertexDecoderInternal(u32 vertType) { } } -int TransformDrawEngineDX9::EstimatePerVertexCost() { - // TODO: This is transform cost, also account for rasterization cost somehow... although it probably - // runs in parallel with transform. - - // Also, this is all pure guesswork. If we can find a way to do measurements, that would be great. - - // GTA wants a low value to run smooth, GoW wants a high value (otherwise it thinks things - // went too fast and starts doing all the work over again). - - int cost = 20; - if (gstate.isLightingEnabled()) { - cost += 10; - } - - for (int i = 0; i < 4; i++) { - if (gstate.isLightChanEnabled(i)) - cost += 10; - } - if (gstate.getUVGenMode() != GE_TEXMAP_TEXTURE_COORDS) { - cost += 20; - } - if (dec_ && dec_->morphcount > 1) { - cost += 5 * dec_->morphcount; - } - - return cost; -} - void TransformDrawEngineDX9::SubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertType, int *bytesRead) { if (vertexCount == 0) return; // we ignore zero-sized draw calls. @@ -748,16 +712,12 @@ void TransformDrawEngineDX9::DoFlush() { if (!useElements && indexGen.PureCount()) { vai->numVerts = indexGen.PureCount(); } - // Always - if (1) { - void * pVb; - u32 size = dec_->GetDecVtxFmt().stride * indexGen.MaxIndex(); - pD3Ddevice->CreateVertexBuffer(size, D3DUSAGE_WRITEONLY, 0, D3DPOOL_DEFAULT, &vai->vbo, NULL); - vai->vbo->Lock(0, size, &pVb, 0); - memcpy(pVb, decoded, size); - vai->vbo->Unlock(); - } - // Ib + void * pVb; + u32 size = dec_->GetDecVtxFmt().stride * indexGen.MaxIndex(); + pD3Ddevice->CreateVertexBuffer(size, D3DUSAGE_WRITEONLY, 0, D3DPOOL_DEFAULT, &vai->vbo, NULL); + vai->vbo->Lock(0, size, &pVb, 0); + memcpy(pVb, decoded, size); + vai->vbo->Unlock(); if (useElements) { void * pIb; u32 size = sizeof(short) * indexGen.VertexCount(); diff --git a/GPU/Directx9/TransformPipelineDX9.h b/GPU/Directx9/TransformPipelineDX9.h index dd8afb70a6..d0500b8e98 100644 --- a/GPU/Directx9/TransformPipelineDX9.h +++ b/GPU/Directx9/TransformPipelineDX9.h @@ -106,7 +106,7 @@ class TransformDrawEngineDX9 : public DrawEngineCommon { public: TransformDrawEngineDX9(); virtual ~TransformDrawEngineDX9(); - + void SubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertType, int *bytesRead); void SubmitSpline(void* control_points, void* indices, int count_u, int count_v, int type_u, int type_v, GEPatchPrimType prim_type, u32 vertType); void SubmitBezier(void* control_points, void* indices, int count_u, int count_v, GEPatchPrimType prim_type, u32 vertType); @@ -132,9 +132,35 @@ public: void SetupVertexDecoder(u32 vertType); void SetupVertexDecoderInternal(u32 vertType); - bool IsCodePtrVertexDecoder(const u8 *ptr) const; // This requires a SetupVertexDecoder call first. - int EstimatePerVertexCost(); + int EstimatePerVertexCost() { + // TODO: This is transform cost, also account for rasterization cost somehow... although it probably + // runs in parallel with transform. + + // Also, this is all pure guesswork. If we can find a way to do measurements, that would be great. + + // GTA wants a low value to run smooth, GoW wants a high value (otherwise it thinks things + // went too fast and starts doing all the work over again). + + int cost = 20; + if (gstate.isLightingEnabled()) { + cost += 10; + + for (int i = 0; i < 4; i++) { + if (gstate.isLightChanEnabled(i)) + cost += 10; + } + } + + if (gstate.getUVGenMode() != GE_TEXMAP_TEXTURE_COORDS) { + cost += 20; + } + if (dec_ && dec_->morphcount > 1) { + cost += 5 * dec_->morphcount; + } + + return cost; + } // So that this can be inlined void Flush() { @@ -143,6 +169,8 @@ public: DoFlush(); } + bool IsCodePtrVertexDecoder(const u8 *ptr) const; + protected: // Preprocessing for spline/bezier virtual u32 NormalizeVertices(u8 *outPtr, u8 *bufPtr, const u8 *inPtr, int lowerBound, int upperBound, u32 vertType) override; @@ -159,7 +187,7 @@ private: // Preprocessing for spline/bezier u32 NormalizeVertices(u8 *outPtr, u8 *bufPtr, const u8 *inPtr, VertexDecoder *dec, int lowerBound, int upperBound, u32 vertType); - + u32 ComputeMiniHash(); u32 ComputeHash(); // Reads deferred vertex data. void MarkUnreliable(VertexArrayInfoDX9 *vai); diff --git a/GPU/GLES/TransformPipeline.cpp b/GPU/GLES/TransformPipeline.cpp index 0d2d303f32..2ae5fc3f77 100644 --- a/GPU/GLES/TransformPipeline.cpp +++ b/GPU/GLES/TransformPipeline.cpp @@ -142,8 +142,8 @@ TransformDrawEngine::TransformDrawEngine() decIndex = (u16 *)AllocateMemoryPages(DECODED_INDEX_BUFFER_SIZE); transformed = (TransformedVertex *)AllocateMemoryPages(TRANSFORMED_VERTEX_BUFFER_SIZE); transformedExpanded = (TransformedVertex *)AllocateMemoryPages(3 * TRANSFORMED_VERTEX_BUFFER_SIZE); - quadIndices_ = new u16[6 * QUAD_INDICES_MAX]; + quadIndices_ = new u16[6 * QUAD_INDICES_MAX]; for (int i = 0; i < QUAD_INDICES_MAX; i++) { quadIndices_[i * 6 + 0] = i * 4; quadIndices_[i * 6 + 1] = i * 4 + 2; @@ -609,12 +609,10 @@ void TransformDrawEngine::DoFlush() { Shader *vshader = shaderManager_->ApplyVertexShader(prim, lastVType_); - // Compiler warns about this because it's only used in the #ifdeffed out RangeElements path. - int maxIndex = 0; - if (vshader->UseHWTransform()) { GLuint vbo = 0, ebo = 0; int vertexCount = 0; + int maxIndex = 0; // Compiler warns about this because it's only used in the #ifdeffed out RangeElements path. bool useElements = true; // Cannot cache vertex data with morph enabled. diff --git a/GPU/GLES/TransformPipeline.h b/GPU/GLES/TransformPipeline.h index badbcd675d..d26c953262 100644 --- a/GPU/GLES/TransformPipeline.h +++ b/GPU/GLES/TransformPipeline.h @@ -169,7 +169,6 @@ public: bool IsCodePtrVertexDecoder(const u8 *ptr) const; - // Really just for convenience to share with softgpu. static u32 NormalizeVertices(u8 *outPtr, u8 *bufPtr, const u8 *inPtr, VertexDecoder *dec, int lowerBound, int upperBound, u32 vertType); protected: