diff --git a/GPU/Common/DrawEngineCommon.cpp b/GPU/Common/DrawEngineCommon.cpp index 1e4a2463a5..1e447f6618 100644 --- a/GPU/Common/DrawEngineCommon.cpp +++ b/GPU/Common/DrawEngineCommon.cpp @@ -1,20 +1,109 @@ -// Copyright (c) 2013- PPSSPP Project. - -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation, version 2.0 or later versions. - -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License 2.0 for more details. - -// A copy of the GPL 2.0 should have been included with the program. -// If not, see http://www.gnu.org/licenses/ - -// Official git repository and contact information can be found at -// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. - -#include "GPU/Common/DrawEngineCommon.h" - +// Copyright (c) 2013- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#include "GPU/Common/DrawEngineCommon.h" +#include "GPU/Common/SplineCommon.h" +#include "GPU/ge_constants.h" +#include "GPU/GPUState.h" + DrawEngineCommon::~DrawEngineCommon() { } + +struct Plane { + float x, y, z, w; + void Set(float _x, float _y, float _z, float _w) { x = _x; y = _y; z = _z; w = _w; } + float Test(float f[3]) const { return x * f[0] + y * f[1] + z * f[2] + w; } +}; + +static void PlanesFromMatrix(float mtx[16], Plane planes[6]) { + planes[0].Set(mtx[3]-mtx[0], mtx[7]-mtx[4], mtx[11]-mtx[8], mtx[15]-mtx[12]); // Right + planes[1].Set(mtx[3]+mtx[0], mtx[7]+mtx[4], mtx[11]+mtx[8], mtx[15]+mtx[12]); // Left + planes[2].Set(mtx[3]+mtx[1], mtx[7]+mtx[5], mtx[11]+mtx[9], mtx[15]+mtx[13]); // Bottom + planes[3].Set(mtx[3]-mtx[1], mtx[7]-mtx[5], mtx[11]-mtx[9], mtx[15]-mtx[13]); // Top + planes[4].Set(mtx[3]+mtx[2], mtx[7]+mtx[6], mtx[11]+mtx[10], mtx[15]+mtx[14]); // Near + planes[5].Set(mtx[3]-mtx[2], mtx[7]-mtx[6], mtx[11]-mtx[10], mtx[15]-mtx[14]); // Far +} + +// This code is HIGHLY unoptimized! +// +// It does the simplest and safest test possible: If all points of a bbox is outside a single of +// our clipping planes, we reject the box. Tighter bounds would be desirable but would take more calculations. +bool DrawEngineCommon::TestBoundingBox(void* control_points, int vertexCount, u32 vertType) { + SimpleVertex *corners = (SimpleVertex *)(decoded + 65536 * 12); + float *verts = (float *)(decoded + 65536 * 18); + + // Try to skip NormalizeVertices if it's pure positions. No need to bother with a vertex decoder + // and a large vertex format. + if ((vertType & 0xFFFFFF) == GE_VTYPE_POS_FLOAT) { + // memcpy(verts, control_points, 12 * vertexCount); + verts = (float *)control_points; + } else if ((vertType & 0xFFFFFF) == GE_VTYPE_POS_8BIT) { + const s8 *vtx = (const s8 *)control_points; + for (int i = 0; i < vertexCount * 3; i++) { + verts[i] = vtx[i] * (1.0f / 128.0f); + } + } else if ((vertType & 0xFFFFFF) == GE_VTYPE_POS_16BIT) { + const s16 *vtx = (const s16*)control_points; + for (int i = 0; i < vertexCount * 3; i++) { + verts[i] = vtx[i] * (1.0f / 32768.0f); + } + } else { + // Simplify away bones and morph before proceeding + u8 *temp_buffer = decoded + 65536 * 24; + NormalizeVertices((u8 *)corners, temp_buffer, (u8 *)control_points, 0, vertexCount, vertType); + // Special case for float positions only. + const float *ctrl = (const float *)control_points; + for (int i = 0; i < vertexCount; i++) { + verts[i * 3] = corners[i].pos.x; + verts[i * 3 + 1] = corners[i].pos.y; + verts[i * 3 + 2] = corners[i].pos.z; + } + } + + Plane planes[6]; + + float world[16]; + float view[16]; + float worldview[16]; + float worldviewproj[16]; + ConvertMatrix4x3To4x4(world, gstate.worldMatrix); + ConvertMatrix4x3To4x4(view, gstate.viewMatrix); + Matrix4ByMatrix4(worldview, world, view); + Matrix4ByMatrix4(worldviewproj, worldview, gstate.projMatrix); + PlanesFromMatrix(worldviewproj, planes); + for (int plane = 0; plane < 6; plane++) { + int inside = 0; + int out = 0; + for (int i = 0; i < vertexCount; i++) { + // Here we can test against the frustum planes! + float value = planes[plane].Test(verts + i * 3); + if (value < 0) + out++; + else + inside++; + } + + if (inside == 0) { + // All out + return false; + } + + // Any out. For testing that the planes are in the right locations. + // if (out != 0) return false; + } + + return true; +} diff --git a/GPU/Common/DrawEngineCommon.h b/GPU/Common/DrawEngineCommon.h index 909bbdcb56..07229675c9 100644 --- a/GPU/Common/DrawEngineCommon.h +++ b/GPU/Common/DrawEngineCommon.h @@ -1,24 +1,35 @@ -// Copyright (c) 2013- PPSSPP Project. - -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation, version 2.0 or later versions. - -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License 2.0 for more details. - -// A copy of the GPL 2.0 should have been included with the program. -// If not, see http://www.gnu.org/licenses/ - -// Official git repository and contact information can be found at -// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. +// Copyright (c) 2013- PPSSPP Project. +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. #pragma once +#include "Common/CommonTypes.h" + class DrawEngineCommon { public: virtual ~DrawEngineCommon(); + + bool TestBoundingBox(void* control_points, int vertexCount, u32 vertType); + + // TODO: This can be shared once the decoder cache / etc. are. + virtual u32 NormalizeVertices(u8 *outPtr, u8 *bufPtr, const u8 *inPtr, int lowerBound, int upperBound, u32 vertType) = 0; + +protected: + // Vertex collector buffers + u8 *decoded; + u16 *decIndex; }; \ No newline at end of file diff --git a/GPU/Directx9/GPU_DX9.cpp b/GPU/Directx9/GPU_DX9.cpp index 83ddda14f1..8f66b2f0dc 100644 --- a/GPU/Directx9/GPU_DX9.cpp +++ b/GPU/Directx9/GPU_DX9.cpp @@ -314,7 +314,7 @@ static const CommandTableEntry commandTable[] = { {GE_CMD_VADDR, FLAG_EXECUTE, &DIRECTX9_GPU::Execute_Vaddr}, {GE_CMD_IADDR, FLAG_EXECUTE, &DIRECTX9_GPU::Execute_Iaddr}, {GE_CMD_BJUMP, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC}, // EXECUTE - {GE_CMD_BOUNDINGBOX, FLAG_EXECUTE}, // + FLUSHBEFORE when we implement + {GE_CMD_BOUNDINGBOX, FLAG_EXECUTE, &DIRECTX9_GPU::Execute_BoundingBox}, // + FLUSHBEFORE when we implement... or not, do we need to? // Changing the vertex type requires us to flush. {GE_CMD_VERTEXTYPE, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &DIRECTX9_GPU::Execute_VertexType}, @@ -868,6 +868,32 @@ void DIRECTX9_GPU::Execute_ViewportType(u32 op, u32 diff) { } } +void DIRECTX9_GPU::Execute_BoundingBox(u32 op, u32 diff) { + // Just resetting, nothing to bound. + const u32 data = op & 0x00FFFFFF; + if (data == 0) { + // TODO: Should this set the bboxResult? Let's set it true for now. + currentList->bboxResult = true; + return; + } + if (((data & 7) == 0) && data <= 64) { // Sanity check + void *control_points = Memory::GetPointer(gstate_c.vertexAddr); + if (gstate.vertType & GE_VTYPE_IDX_MASK) { + ERROR_LOG_REPORT_ONCE(boundingbox, G3D, "Indexed bounding box data not supported."); + // Data seems invalid. Let's assume the box test passed. + currentList->bboxResult = true; + return; + } + + // Test if the bounding box is within the drawing region. + currentList->bboxResult = transformDraw_.TestBoundingBox(control_points, data, gstate.vertType); + } else { + ERROR_LOG_REPORT_ONCE(boundingbox, G3D, "Bad bounding box data: %06x", data); + // Data seems invalid. Let's assume the box test passed. + currentList->bboxResult = true; + } +} + void DIRECTX9_GPU::Execute_Region(u32 op, u32 diff) { gstate_c.framebufChanged = true; gstate_c.textureChanged |= TEXCHANGE_PARAMSONLY; @@ -1285,28 +1311,7 @@ void DIRECTX9_GPU::Execute_Generic(u32 op, u32 diff) { break; case GE_CMD_BOUNDINGBOX: - // Just resetting, nothing to bound. - if (data == 0) { - // TODO: Should this set the bboxResult? Let's set it true for now. - currentList->bboxResult = true; - break; - } - if ((data % 8 == 0) && data < 64) { // Sanity check - void *control_points = Memory::GetPointer(gstate_c.vertexAddr); - if (gstate.vertType & GE_VTYPE_IDX_MASK) { - ERROR_LOG_REPORT_ONCE(boundingbox, G3D, "Indexed bounding box data not supported."); - // Data seems invalid. Let's assume the box test passed. - currentList->bboxResult = true; - break; - } - - // Test if the bounding box is within the drawing region. - currentList->bboxResult = transformDraw_.TestBoundingBox(control_points, data, gstate.vertType); - } else { - ERROR_LOG_REPORT_ONCE(boundingbox, G3D, "Bad bounding box data: %06x", data); - // Data seems invalid. Let's assume the box test passed. - currentList->bboxResult = true; - } + Execute_BoundingBox(op, diff); break; case GE_CMD_REGION1: diff --git a/GPU/Directx9/GPU_DX9.h b/GPU/Directx9/GPU_DX9.h index f53b24ecaa..1ca5063d56 100644 --- a/GPU/Directx9/GPU_DX9.h +++ b/GPU/Directx9/GPU_DX9.h @@ -90,6 +90,7 @@ public: void Execute_Prim(u32 op, u32 diff); void Execute_Bezier(u32 op, u32 diff); void Execute_Spline(u32 op, u32 diff); + void Execute_BoundingBox(u32 op, u32 diff); void Execute_VertexType(u32 op, u32 diff); void Execute_VertexTypeSkinning(u32 op, u32 diff); void Execute_Region(u32 op, u32 diff); diff --git a/GPU/Directx9/TransformPipelineDX9.cpp b/GPU/Directx9/TransformPipelineDX9.cpp index 05fcbb5e1d..1d858024b4 100644 --- a/GPU/Directx9/TransformPipelineDX9.cpp +++ b/GPU/Directx9/TransformPipelineDX9.cpp @@ -81,6 +81,8 @@ enum { #define VERTEXCACHE_DECIMATION_INTERVAL 17 +enum { VAI_KILL_AGE = 120, VAI_UNRELIABLE_KILL_AGE = 240, VAI_UNRELIABLE_KILL_MAX = 4 }; + // Check for max first as clamping to max is more common than min when lighting. inline float clamp(float in, float min, float max) { return in > max ? max : (in < min ? min : in); @@ -275,13 +277,12 @@ VertexDecoder *TransformDrawEngineDX9::GetVertexDecoder(u32 vtype) { auto iter = decoderMap_.find(vtype); if (iter != decoderMap_.end()) return iter->second; - VertexDecoder*dec = new VertexDecoder(); + VertexDecoder *dec = new VertexDecoder(); dec->SetVertexType(vtype, decOptions_, decJitCache_); decoderMap_[vtype] = dec; return dec; } - void TransformDrawEngineDX9::SetupVertexDecoder(u32 vertType) { SetupVertexDecoderInternal(vertType); } @@ -396,6 +397,27 @@ void TransformDrawEngineDX9::SubmitPrim(void *verts, void *inds, GEPrimitiveType } } +void TransformDrawEngineDX9::DecodeVerts() { + if (uvScale) { + const UVScale origUV = gstate_c.uv; + for (; decodeCounter_ < numDrawCalls; decodeCounter_++) { + gstate_c.uv = uvScale[decodeCounter_]; + DecodeVertsStep(); + } + gstate_c.uv = origUV; + } else { + for (; decodeCounter_ < numDrawCalls; decodeCounter_++) { + DecodeVertsStep(); + } + } + // Sanity check + if (indexGen.Prim() < 0) { + ERROR_LOG_REPORT(G3D, "DecodeVerts: Failed to deduce prim: %i", indexGen.Prim()); + // Force to points (0) + indexGen.AddPrim(GE_PRIM_POINTS, 0); + } +} + void TransformDrawEngineDX9::DecodeVertsStep() { const int i = decodeCounter_; @@ -470,31 +492,66 @@ void TransformDrawEngineDX9::DecodeVertsStep() { } } +inline u32 ComputeMiniHashRange(const void *ptr, size_t sz) { + // Switch to u32 units. + const u32 *p = (const u32 *)ptr; + sz >>= 2; -void TransformDrawEngineDX9::DecodeVerts() { - if (uvScale) { - const UVScale origUV = gstate_c.uv; - for (; decodeCounter_ < numDrawCalls; decodeCounter_++) { - gstate_c.uv = uvScale[decodeCounter_]; - DecodeVertsStep(); + if (sz > 100) { + size_t step = sz / 4; + u32 hash = 0; + for (size_t i = 0; i < sz; i += step) { + hash += DoReliableHash(p + i, 100, 0x3A44B9C4); } - gstate_c.uv = origUV; + return hash; } else { - for (; decodeCounter_ < numDrawCalls; decodeCounter_++) { - DecodeVertsStep(); + return p[0] + p[sz - 1]; + } +} + +u32 TransformDrawEngineDX9::ComputeMiniHash() { + u32 fullhash = 0; + const int vertexSize = dec_->GetDecVtxFmt().stride; + const int indexSize = (dec_->VertexType() & GE_VTYPE_IDX_MASK) == GE_VTYPE_IDX_16BIT ? 2 : 1; + + int step; + if (numDrawCalls < 3) { + step = 1; + } else if (numDrawCalls < 8) { + step = 4; + } else { + step = numDrawCalls / 8; + } + for (int i = 0; i < numDrawCalls; i += step) { + const DeferredDrawCall &dc = drawCalls[i]; + if (!dc.inds) { + fullhash += ComputeMiniHashRange(dc.verts, vertexSize * dc.vertexCount); + } else { + int indexLowerBound = dc.indexLowerBound, indexUpperBound = dc.indexUpperBound; + fullhash += ComputeMiniHashRange((const u8 *)dc.verts + vertexSize * indexLowerBound, vertexSize * (indexUpperBound - indexLowerBound)); + fullhash += ComputeMiniHashRange(dc.inds, indexSize * dc.vertexCount); } } - // Sanity check - if (indexGen.Prim() < 0) { - ERROR_LOG_REPORT(G3D, "DecodeVerts: Failed to deduce prim: %i", indexGen.Prim()); - // Force to points (0) - indexGen.AddPrim(GE_PRIM_POINTS, 0); + + return fullhash; +} + +void TransformDrawEngineDX9::MarkUnreliable(VertexArrayInfoDX9 *vai) { + vai->status = VertexArrayInfoDX9::VAI_UNRELIABLE; + if (vai->vbo) { + vai->vbo->Release(); + vai->vbo = nullptr; + } + if (vai->ebo) { + vai->ebo->Release(); + vai->ebo = nullptr; } } u32 TransformDrawEngineDX9::ComputeHash() { u32 fullhash = 0; - int vertexSize = dec_->GetDecVtxFmt().stride; + const int vertexSize = dec_->GetDecVtxFmt().stride; + const int indexSize = (dec_->VertexType() & GE_VTYPE_IDX_MASK) == GE_VTYPE_IDX_16BIT ? 2 : 1; // TODO: Add some caps both for numDrawCalls and num verts to check? // It is really very expensive to check all the vertex data so often. @@ -518,7 +575,6 @@ u32 TransformDrawEngineDX9::ComputeHash() { // we do when drawing. fullhash += DoReliableHash((const char *)dc.verts + vertexSize * indexLowerBound, vertexSize * (indexUpperBound - indexLowerBound), 0x029F3EE1); - int indexSize = (dec_->VertexType() & GE_VTYPE_IDX_MASK) == GE_VTYPE_IDX_16BIT ? 2 : 1; // Hm, we will miss some indices when combining above, but meh, it should be fine. fullhash += DoReliableHash((const char *)dc.inds, indexSize * dc.vertexCount, 0x955FD1CA); i = lastMatch; @@ -531,24 +587,6 @@ u32 TransformDrawEngineDX9::ComputeHash() { return fullhash; } -u32 TransformDrawEngineDX9::ComputeFastDCID() { - u32 hash = 0; - for (int i = 0; i < numDrawCalls; i++) { - hash ^= (u32)(uintptr_t)drawCalls[i].verts; - hash = __rotl(hash, 13); - hash ^= (u32)(uintptr_t)drawCalls[i].inds; - hash = __rotl(hash, 13); - hash ^= (u32)drawCalls[i].vertType; - hash = __rotl(hash, 13); - hash ^= (u32)drawCalls[i].vertexCount; - hash = __rotl(hash, 13); - hash ^= (u32)drawCalls[i].prim; - } - return hash; -} - -enum { VAI_KILL_AGE = 120 }; - void TransformDrawEngineDX9::ClearTrackedVertexArrays() { for (auto vai = vai_.begin(); vai != vai_.end(); vai++) { delete vai->second; @@ -563,14 +601,23 @@ void TransformDrawEngineDX9::DecimateTrackedVertexArrays() { return; } - int threshold = gpuStats.numFlips - VAI_KILL_AGE; + const int threshold = gpuStats.numFlips - VAI_KILL_AGE; + const int unreliableThreshold = gpuStats.numFlips - VAI_UNRELIABLE_KILL_AGE; + int unreliableLeft = VAI_UNRELIABLE_KILL_MAX; for (auto iter = vai_.begin(); iter != vai_.end(); ) { - if (iter->second->lastFrame < threshold) { + bool kill; + if (iter->second->status == VertexArrayInfoDX9::VAI_UNRELIABLE) { + // We limit killing unreliable so we don't rehash too often. + kill = iter->second->lastFrame < unreliableThreshold && --unreliableLeft >= 0; + } else { + kill = iter->second->lastFrame < threshold; + } + if (kill) { delete iter->second; vai_.erase(iter++); - } - else + } else { ++iter; + } } // Enable if you want to see vertex decoders in the log output. Need a better way. @@ -596,7 +643,6 @@ VertexArrayInfoDX9::~VertexArrayInfoDX9() { void TransformDrawEngineDX9::DoFlush() { gpuStats.numFlushes++; - gpuStats.numTrackedVertexArrays = (int)vai_.size(); // This is not done on every drawcall, we should collect vertex data @@ -622,7 +668,7 @@ void TransformDrawEngineDX9::DoFlush() { useCache = false; if (useCache) { - u32 id = ComputeFastDCID(); + u32 id = dcid_; auto iter = vai_.find(id); VertexArrayInfoDX9 *vai; if (iter != vai_.end()) { @@ -639,6 +685,7 @@ void TransformDrawEngineDX9::DoFlush() { // Haven't seen this one before. u32 dataHash = ComputeHash(); vai->hash = dataHash; + vai->minihash = ComputeMiniHash(); vai->status = VertexArrayInfoDX9::VAI_HASHING; vai->drawsUntilNextFullHash = 0; DecodeVerts(); // writes to indexGen @@ -659,21 +706,18 @@ void TransformDrawEngineDX9::DoFlush() { vai->numFrames++; } if (vai->drawsUntilNextFullHash == 0) { - u32 newHash = ComputeHash(); - if (newHash != vai->hash) { - vai->status = VertexArrayInfoDX9::VAI_UNRELIABLE; - if (vai->vbo) { - vai->vbo->Release(); - vai->vbo = NULL; - } - if (vai->ebo) { - vai->ebo->Release(); - vai->ebo = NULL; - } + // Let's try to skip a full hash if mini would fail. + const u32 newMiniHash = ComputeMiniHash(); + u32 newHash = vai->hash; + if (newMiniHash == vai->minihash) { + newHash = ComputeHash(); + } + if (newMiniHash != vai->minihash || newHash != vai->hash) { + MarkUnreliable(vai); DecodeVerts(); goto rotateVBO; } - if (vai->numVerts > 100) { + if (vai->numVerts > 64) { // exponential backoff up to 16 draws, then every 24 vai->drawsUntilNextFullHash = std::min(24, vai->numFrames); } else { @@ -686,7 +730,12 @@ void TransformDrawEngineDX9::DoFlush() { //} } else { vai->drawsUntilNextFullHash--; - // TODO: "mini-hashing" the first 32 bytes of the vertex/index data or something. + u32 newMiniHash = ComputeMiniHash(); + if (newMiniHash != vai->minihash) { + MarkUnreliable(vai); + DecodeVerts(); + goto rotateVBO; + } } if (vai->vbo == 0) { @@ -694,6 +743,7 @@ void TransformDrawEngineDX9::DoFlush() { vai->numVerts = indexGen.VertexCount(); vai->prim = indexGen.Prim(); vai->maxIndex = indexGen.MaxIndex(); + vai->flags = gstate_c.vertexFullAlpha ? VAI_FLAG_VERTEXFULLALPHA : 0; useElements = !indexGen.SeenOnlyPurePrims(); if (!useElements && indexGen.PureCount()) { vai->numVerts = indexGen.PureCount(); @@ -702,8 +752,8 @@ void TransformDrawEngineDX9::DoFlush() { if (1) { void * pVb; u32 size = dec_->GetDecVtxFmt().stride * indexGen.MaxIndex(); - pD3Ddevice->CreateVertexBuffer(size, NULL, NULL, D3DPOOL_DEFAULT, &vai->vbo, NULL); - vai->vbo->Lock(0, size, &pVb, D3DLOCK_NOOVERWRITE ); + pD3Ddevice->CreateVertexBuffer(size, D3DUSAGE_WRITEONLY, 0, D3DPOOL_DEFAULT, &vai->vbo, NULL); + vai->vbo->Lock(0, size, &pVb, 0); memcpy(pVb, decoded, size); vai->vbo->Unlock(); } @@ -711,8 +761,8 @@ void TransformDrawEngineDX9::DoFlush() { if (useElements) { void * pIb; u32 size = sizeof(short) * indexGen.VertexCount(); - pD3Ddevice->CreateIndexBuffer(size, NULL, D3DFMT_INDEX16, D3DPOOL_DEFAULT, &vai->ebo, NULL); - vai->ebo->Lock(0, size, &pIb, D3DLOCK_NOOVERWRITE ); + pD3Ddevice->CreateIndexBuffer(size, D3DUSAGE_WRITEONLY, D3DFMT_INDEX16, D3DPOOL_DEFAULT, &vai->ebo, NULL); + vai->ebo->Lock(0, size, &pIb, 0); memcpy(pIb, decIndex, size); vai->ebo->Unlock(); } else { @@ -802,7 +852,7 @@ rotateVBO: if (useElements) { pD3Ddevice->SetIndices(ib_); - pD3Ddevice->DrawIndexedPrimitive(glprim[prim], 0, 0, 0, 0, D3DPrimCount(glprim[prim], vertexCount)); + pD3Ddevice->DrawIndexedPrimitive(glprim[prim], 0, 0, vertexCount, 0, D3DPrimCount(glprim[prim], vertexCount)); } else { pD3Ddevice->DrawPrimitive(glprim[prim], 0, D3DPrimCount(glprim[prim], vertexCount)); } @@ -895,54 +945,16 @@ void TransformDrawEngineDX9::Resized() { } decoderMap_.clear(); - // ... + if (g_Config.bPrescaleUV && !uvScale) { + uvScale = new UVScale[MAX_DEFERRED_DRAW_CALLS]; + } else if (!g_Config.bPrescaleUV && uvScale) { + delete uvScale; + uvScale = 0; + } } -bool TransformDrawEngineDX9::TestBoundingBox(void* control_points, int vertexCount, u32 vertType) { - // Simplify away bones and morph before proceeding - - /* - SimpleVertex *corners = (SimpleVertex *)(decoded + 65536 * 12); - u8 *temp_buffer = decoded + 65536 * 24; - - u32 origVertType = vertType; - vertType = NormalizeVertices((u8 *)corners, temp_buffer, (u8 *)control_points, 0, vertexCount, vertType); - - for (int cube = 0; cube < vertexCount / 8; cube++) { - // For each cube... - - for (int i = 0; i < 8; i++) { - const SimpleVertex &vert = corners[cube * 8 + i]; - - // To world space... - float worldPos[3]; - Vec3ByMatrix43(worldPos, (float *)&vert.pos.x, gstate.worldMatrix); - - // To view space... - float viewPos[3]; - Vec3ByMatrix43(viewPos, worldPos, gstate.viewMatrix); - - // And finally to screen space. - float frustumPos[4]; - Vec3ByMatrix44(frustumPos, viewPos, gstate.projMatrix); - - // Project to 2D - float x = frustumPos[0] / frustumPos[3]; - float y = frustumPos[1] / frustumPos[3]; - - // Rescale 2d position - // ... - } - } - */ - - - // Let's think. A better approach might be to take the edges of the drawing region and the projection - // matrix to build a frustum pyramid, and then clip the cube against those planes. If all vertices fail the same test, - // the cube is out. Otherwise it's in. - // TODO.... - - return true; +bool TransformDrawEngineDX9::IsCodePtrVertexDecoder(const u8 *ptr) const { + return decJitCache_->IsInSpace(ptr); } // TODO: Probably move this to common code (with normalization?) @@ -972,10 +984,6 @@ static Vec3f ScreenToDrawing(const Vec3f& coords) { return ret; } -bool TransformDrawEngineDX9::IsCodePtrVertexDecoder(const u8 *ptr) const { - return decJitCache_->IsInSpace(ptr); -} - // TODO: This probably is not the best interface. bool TransformDrawEngineDX9::GetCurrentSimpleVertices(int count, std::vector &vertices, std::vector &indices) { // This is always for the current vertices. diff --git a/GPU/Directx9/TransformPipelineDX9.h b/GPU/Directx9/TransformPipelineDX9.h index e643d14347..5be4f6c33b 100644 --- a/GPU/Directx9/TransformPipelineDX9.h +++ b/GPU/Directx9/TransformPipelineDX9.h @@ -82,6 +82,7 @@ public: }; u32 hash; + u32 minihash; Status status; @@ -111,7 +112,6 @@ public: void SubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertType, int *bytesRead); void SubmitSpline(void* control_points, void* indices, int count_u, int count_v, int type_u, int type_v, GEPatchPrimType prim_type, u32 vertType); void SubmitBezier(void* control_points, void* indices, int count_u, int count_v, GEPatchPrimType prim_type, u32 vertType); - bool TestBoundingBox(void* control_points, int vertexCount, u32 vertType); bool GetCurrentSimpleVertices(int count, std::vector &vertices, std::vector &indices); @@ -147,6 +147,10 @@ public: DoFlush(); } +protected: + // Preprocessing for spline/bezier + virtual u32 NormalizeVertices(u8 *outPtr, u8 *bufPtr, const u8 *inPtr, int lowerBound, int upperBound, u32 vertType) override; + private: void DecodeVerts(); void DecodeVertsStep(); @@ -160,11 +164,10 @@ private: // Preprocessing for spline/bezier u32 NormalizeVertices(u8 *outPtr, u8 *bufPtr, const u8 *inPtr, VertexDecoder *dec, int lowerBound, int upperBound, u32 vertType); - u32 NormalizeVertices(u8 *outPtr, u8 *bufPtr, const u8 *inPtr, int lowerBound, int upperBound, u32 vertType); - // drawcall ID - u32 ComputeFastDCID(); + u32 ComputeMiniHash(); u32 ComputeHash(); // Reads deferred vertex data. + void MarkUnreliable(VertexArrayInfoDX9 *vai); VertexDecoder *GetVertexDecoder(u32 vtype); diff --git a/GPU/GLES/TransformPipeline.cpp b/GPU/GLES/TransformPipeline.cpp index c6d089adc5..1538d03fa4 100644 --- a/GPU/GLES/TransformPipeline.cpp +++ b/GPU/GLES/TransformPipeline.cpp @@ -547,8 +547,8 @@ void TransformDrawEngine::DecimateTrackedVertexArrays() { return; } - int threshold = gpuStats.numFlips - VAI_KILL_AGE; - int unreliableThreshold = gpuStats.numFlips - VAI_UNRELIABLE_KILL_AGE; + const int threshold = gpuStats.numFlips - VAI_KILL_AGE; + const int unreliableThreshold = gpuStats.numFlips - VAI_UNRELIABLE_KILL_AGE; int unreliableLeft = VAI_UNRELIABLE_KILL_MAX; for (auto iter = vai_.begin(); iter != vai_.end(); ) { bool kill; @@ -935,92 +935,6 @@ void TransformDrawEngine::Resized() { } } -struct Plane { - float x, y, z, w; - void Set(float _x, float _y, float _z, float _w) { x = _x; y = _y; z = _z; w = _w; } - float Test(float f[3]) const { return x * f[0] + y * f[1] + z * f[2] + w; } -}; - -static void PlanesFromMatrix(float mtx[16], Plane planes[6]) { - planes[0].Set(mtx[3]-mtx[0], mtx[7]-mtx[4], mtx[11]-mtx[8], mtx[15]-mtx[12]); // Right - planes[1].Set(mtx[3]+mtx[0], mtx[7]+mtx[4], mtx[11]+mtx[8], mtx[15]+mtx[12]); // Left - planes[2].Set(mtx[3]+mtx[1], mtx[7]+mtx[5], mtx[11]+mtx[9], mtx[15]+mtx[13]); // Bottom - planes[3].Set(mtx[3]-mtx[1], mtx[7]-mtx[5], mtx[11]-mtx[9], mtx[15]-mtx[13]); // Top - planes[4].Set(mtx[3]+mtx[2], mtx[7]+mtx[6], mtx[11]+mtx[10], mtx[15]+mtx[14]); // Near - planes[5].Set(mtx[3]-mtx[2], mtx[7]-mtx[6], mtx[11]-mtx[10], mtx[15]-mtx[14]); // Far -} - -// This code is HIGHLY unoptimized! -// -// It does the simplest and safest test possible: If all points of a bbox is outside a single of -// our clipping planes, we reject the box. Tighter bounds would be desirable but would take more calculations. -bool TransformDrawEngine::TestBoundingBox(void* control_points, int vertexCount, u32 vertType) { - SimpleVertex *corners = (SimpleVertex *)(decoded + 65536 * 12); - float *verts = (float *)(decoded + 65536 * 18); - - // Try to skip NormalizeVertices if it's pure positions. No need to bother with a vertex decoder - // and a large vertex format. - if ((vertType & 0xFFFFFF) == GE_VTYPE_POS_FLOAT) { - // memcpy(verts, control_points, 12 * vertexCount); - verts = (float *)control_points; - } else if ((vertType & 0xFFFFFF) == GE_VTYPE_POS_8BIT) { - const s8 *vtx = (const s8 *)control_points; - for (int i = 0; i < vertexCount * 3; i++) { - verts[i] = vtx[i] * (1.0f / 128.0f); - } - } else if ((vertType & 0xFFFFFF) == GE_VTYPE_POS_16BIT) { - const s16 *vtx = (const s16*)control_points; - for (int i = 0; i < vertexCount * 3; i++) { - verts[i] = vtx[i] * (1.0f / 32768.0f); - } - } else { - // Simplify away bones and morph before proceeding - u8 *temp_buffer = decoded + 65536 * 24; - NormalizeVertices((u8 *)corners, temp_buffer, (u8 *)control_points, 0, vertexCount, vertType); - // Special case for float positions only. - const float *ctrl = (const float *)control_points; - for (int i = 0; i < vertexCount; i++) { - verts[i * 3] = corners[i].pos.x; - verts[i * 3 + 1] = corners[i].pos.y; - verts[i * 3 + 2] = corners[i].pos.z; - } - } - - Plane planes[6]; - - float world[16]; - float view[16]; - float worldview[16]; - float worldviewproj[16]; - ConvertMatrix4x3To4x4(world, gstate.worldMatrix); - ConvertMatrix4x3To4x4(view, gstate.viewMatrix); - Matrix4ByMatrix4(worldview, world, view); - Matrix4ByMatrix4(worldviewproj, worldview, gstate.projMatrix); - PlanesFromMatrix(worldviewproj, planes); - for (int plane = 0; plane < 6; plane++) { - int inside = 0; - int out = 0; - for (int i = 0; i < vertexCount; i++) { - // Here we can test against the frustum planes! - float value = planes[plane].Test(verts + i * 3); - if (value < 0) - out++; - else - inside++; - } - - if (inside == 0) { - // All out - return false; - } - - // Any out. For testing that the planes are in the right locations. - // if (out != 0) return false; - } - - return true; -} - bool TransformDrawEngine::IsCodePtrVertexDecoder(const u8 *ptr) const { return decJitCache_->IsInSpace(ptr); } diff --git a/GPU/GLES/TransformPipeline.h b/GPU/GLES/TransformPipeline.h index 1f899ef5bd..f1a1ede6c4 100644 --- a/GPU/GLES/TransformPipeline.h +++ b/GPU/GLES/TransformPipeline.h @@ -108,7 +108,6 @@ public: void SubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertType, int *bytesRead); void SubmitSpline(void* control_points, void* indices, int count_u, int count_v, int type_u, int type_v, GEPatchPrimType prim_type, u32 vertType); void SubmitBezier(void* control_points, void* indices, int count_u, int count_v, GEPatchPrimType prim_type, u32 vertType); - bool TestBoundingBox(void* control_points, int vertexCount, u32 vertType); bool GetCurrentSimpleVertices(int count, std::vector &vertices, std::vector &indices); @@ -177,6 +176,10 @@ public: // Really just for convenience to share with softgpu. static u32 NormalizeVertices(u8 *outPtr, u8 *bufPtr, const u8 *inPtr, VertexDecoder *dec, int lowerBound, int upperBound, u32 vertType); +protected: + // Preprocessing for spline/bezier + virtual u32 NormalizeVertices(u8 *outPtr, u8 *bufPtr, const u8 *inPtr, int lowerBound, int upperBound, u32 vertType) override; + private: void DecodeVerts(); void DecodeVertsStep(); @@ -190,9 +193,6 @@ private: GLuint AllocateBuffer(); void FreeBuffer(GLuint buf); - // Preprocessing for spline/bezier - u32 NormalizeVertices(u8 *outPtr, u8 *bufPtr, const u8 *inPtr, int lowerBound, int upperBound, u32 vertType); - u32 ComputeMiniHash(); u32 ComputeHash(); // Reads deferred vertex data. void MarkUnreliable(VertexArrayInfo *vai); @@ -222,10 +222,6 @@ private: VertexDecoder *dec_; VertexDecoderJitCache *decJitCache_; u32 lastVType_; - - // Vertex collector buffers - u8 *decoded; - u16 *decIndex; TransformedVertex *transformed; TransformedVertex *transformedExpanded;