diff --git a/GPU/Common/DrawEngineCommon.h b/GPU/Common/DrawEngineCommon.h index a18a483ae4..a71eb502d3 100644 --- a/GPU/Common/DrawEngineCommon.h +++ b/GPU/Common/DrawEngineCommon.h @@ -51,8 +51,8 @@ public: // Same for SubmitPrim virtual void DispatchSubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertType, int *bytesRead) = 0; - void SubmitSpline(const void *control_points, const void *indices, int count_u, int count_v, int type_u, int type_v, GEPatchPrimType prim_type, u32 vertType); - void SubmitBezier(const void *control_points, const void *indices, int count_u, int count_v, GEPatchPrimType prim_type, u32 vertType); + void SubmitSpline(const void *control_points, const void *indices, int tess_u, int tess_v, int count_u, int count_v, int type_u, int type_v, GEPatchPrimType prim_type, bool computeNormals, u32 vertType); + void SubmitBezier(const void *control_points, const void *indices, int tess_u, int tess_v, int count_u, int count_v, GEPatchPrimType prim_type, u32 vertType); protected: // Preprocessing for spline/bezier diff --git a/GPU/Common/SoftwareTransformCommon.cpp b/GPU/Common/SoftwareTransformCommon.cpp index a15c83e47f..7fade324e6 100644 --- a/GPU/Common/SoftwareTransformCommon.cpp +++ b/GPU/Common/SoftwareTransformCommon.cpp @@ -197,9 +197,7 @@ void SoftwareTransform( vert.u *= uscale; vert.v *= vscale; - } - else - { + } else { vert.u = 0.0f; vert.v = 0.0f; } diff --git a/GPU/Common/SplineCommon.cpp b/GPU/Common/SplineCommon.cpp index e095bffa05..4b9cca11c7 100644 --- a/GPU/Common/SplineCommon.cpp +++ b/GPU/Common/SplineCommon.cpp @@ -33,7 +33,6 @@ #include "GPU/Common/SplineCommon.h" #include "GPU/Common/DrawEngineCommon.h" #include "GPU/ge_constants.h" -#include "GPU/GPUState.h" #if defined(_M_SSE) #include @@ -309,8 +308,8 @@ static void SplinePatchFullQuality(u8 *&dest, u16 *indices, int &count, const Sp spline_knot(spatch.count_v - 1, spatch.type_v, knot_v); // Increase tesselation based on the size. Should be approximately right? - int patch_div_s = (spatch.count_u - 3) * gstate.getPatchDivisionU(); - int patch_div_t = (spatch.count_v - 3) * gstate.getPatchDivisionV(); + int patch_div_s = (spatch.count_u - 3) * spatch.tess_u; + int patch_div_t = (spatch.count_v - 3) * spatch.tess_v; if (quality > 1) { patch_div_s /= quality; patch_div_t /= quality; @@ -333,7 +332,7 @@ static void SplinePatchFullQuality(u8 *&dest, u16 *indices, int &count, const Sp // int max_idx = spatch.count_u * spatch.count_v; - bool computeNormals = gstate.isLightingEnabled(); + bool computeNormals = spatch.computeNormals; float one_over_patch_div_s = 1.0f / (float)(patch_div_s); float one_over_patch_div_t = 1.0f / (float)(patch_div_t); @@ -753,7 +752,7 @@ u32 DrawEngineCommon::NormalizeVertices(u8 *outPtr, u8 *bufPtr, const u8 *inPtr, const GEPrimitiveType primType[] = { GE_PRIM_TRIANGLES, GE_PRIM_LINES, GE_PRIM_POINTS }; -void DrawEngineCommon::SubmitSpline(const void *control_points, const void *indices, int count_u, int count_v, int type_u, int type_v, GEPatchPrimType prim_type, u32 vertType) { +void DrawEngineCommon::SubmitSpline(const void *control_points, const void *indices, int tess_u, int tess_v, int count_u, int count_v, int type_u, int type_v, GEPatchPrimType prim_type, bool computeNormals, u32 vertType) { PROFILE_THIS_SCOPE("spline"); DispatchFlush(); @@ -799,11 +798,14 @@ void DrawEngineCommon::SubmitSpline(const void *control_points, const void *indi u8 *dest = splineBuffer; SplinePatchLocal patch; + patch.tess_u = tess_u; + patch.tess_v = tess_v; patch.type_u = type_u; patch.type_v = type_v; patch.count_u = count_u; patch.count_v = count_v; patch.points = points; + patch.computeNormals = computeNormals; int maxVertexCount = SPLINE_BUFFER_SIZE / vertexSize; TesselateSplinePatch(dest, quadIndices_, count, patch, origVertType, maxVertexCount); @@ -832,7 +834,7 @@ void DrawEngineCommon::SubmitSpline(const void *control_points, const void *indi } } -void DrawEngineCommon::SubmitBezier(const void *control_points, const void *indices, int count_u, int count_v, GEPatchPrimType prim_type, u32 vertType) { +void DrawEngineCommon::SubmitBezier(const void *control_points, const void *indices, int tess_u, int tess_v, int count_u, int count_v, GEPatchPrimType prim_type, u32 vertType) { PROFILE_THIS_SCOPE("bezier"); DispatchFlush(); @@ -892,8 +894,6 @@ void DrawEngineCommon::SubmitBezier(const void *control_points, const void *indi // like the splines, so we subdivide across the whole "mega-patch". if (num_patches_u == 0) num_patches_u = 1; if (num_patches_v == 0) num_patches_v = 1; - int tess_u = gstate.getPatchDivisionU(); - int tess_v = gstate.getPatchDivisionV(); if (tess_u < 4) tess_u = 4; if (tess_v < 4) tess_v = 4; diff --git a/GPU/Common/SplineCommon.h b/GPU/Common/SplineCommon.h index 0ab10fd6b7..428f617fb5 100644 --- a/GPU/Common/SplineCommon.h +++ b/GPU/Common/SplineCommon.h @@ -109,10 +109,13 @@ struct BezierPatch { struct SplinePatchLocal { SimpleVertex **points; + int tess_u; + int tess_v; int count_u; int count_v; int type_u; int type_v; + bool computeNormals; }; enum quality { diff --git a/GPU/Directx9/GPU_DX9.cpp b/GPU/Directx9/GPU_DX9.cpp index 13b73936b4..458755230f 100644 --- a/GPU/Directx9/GPU_DX9.cpp +++ b/GPU/Directx9/GPU_DX9.cpp @@ -832,7 +832,7 @@ void DIRECTX9_GPU::Execute_Bezier(u32 op, u32 diff) { GEPatchPrimType patchPrim = gstate.getPatchPrimitiveType(); int bz_ucount = op & 0xFF; int bz_vcount = (op >> 8) & 0xFF; - transformDraw_.SubmitBezier(control_points, indices, bz_ucount, bz_vcount, patchPrim, gstate.vertType); + transformDraw_.SubmitBezier(control_points, indices, gstate.getPatchDivisionU(), gstate.getPatchDivisionV(), bz_ucount, bz_vcount, patchPrim, gstate.vertType); } void DIRECTX9_GPU::Execute_Spline(u32 op, u32 diff) { @@ -875,7 +875,8 @@ void DIRECTX9_GPU::Execute_Spline(u32 op, u32 diff) { int sp_utype = (op >> 16) & 0x3; int sp_vtype = (op >> 18) & 0x3; GEPatchPrimType patchPrim = gstate.getPatchPrimitiveType(); - transformDraw_.SubmitSpline(control_points, indices, sp_ucount, sp_vcount, sp_utype, sp_vtype, patchPrim, gstate.vertType); + bool computeNormals = gstate.isLightingEnabled(); + transformDraw_.SubmitSpline(control_points, indices, gstate.getPatchDivisionU(), gstate.getPatchDivisionV(), sp_ucount, sp_vcount, sp_utype, sp_vtype, patchPrim, computeNormals, gstate.vertType); } void DIRECTX9_GPU::Execute_ViewportType(u32 op, u32 diff) { @@ -1016,7 +1017,7 @@ void DIRECTX9_GPU::Execute_TexLevel(u32 op, u32 diff) { void DIRECTX9_GPU::Execute_LoadClut(u32 op, u32 diff) { gstate_c.textureChanged |= TEXCHANGE_PARAMSONLY; - textureCache_.LoadClut(); + textureCache_.LoadClut(gstate.getClutAddress(), gstate.getClutLoadBytes()); // This could be used to "dirty" textures with clut. } diff --git a/GPU/Directx9/TextureCacheDX9.cpp b/GPU/Directx9/TextureCacheDX9.cpp index a89447a1cf..0d3e2f1b6f 100644 --- a/GPU/Directx9/TextureCacheDX9.cpp +++ b/GPU/Directx9/TextureCacheDX9.cpp @@ -822,15 +822,14 @@ inline bool TextureCacheDX9::TexCacheEntry::Matches(u16 dim2, u8 format2, int ma return dim == dim2 && format == format2 && maxLevel == maxLevel2; } -void TextureCacheDX9::LoadClut() { - u32 clutAddr = gstate.getClutAddress(); - clutTotalBytes_ = gstate.getClutLoadBytes(); +void TextureCacheDX9::LoadClut(u32 clutAddr, u32 loadBytes) { + clutTotalBytes_ = loadBytes; if (Memory::IsValidAddress(clutAddr)) { // It's possible for a game to (successfully) access outside valid memory. - u32 bytes = Memory::ValidSize(clutAddr, clutTotalBytes_); + u32 bytes = Memory::ValidSize(clutAddr, loadBytes); #ifdef _M_SSE int numBlocks = bytes / 16; - if (bytes == clutTotalBytes_) { + if (bytes == loadBytes) { const __m128i *source = (const __m128i *)Memory::GetPointerUnchecked(clutAddr); __m128i *dest = (__m128i *)clutBufRaw_; for (int i = 0; i < numBlocks; i++, source += 2, dest += 2) { @@ -841,27 +840,25 @@ void TextureCacheDX9::LoadClut() { } } else { Memory::MemcpyUnchecked(clutBufRaw_, clutAddr, bytes); - if (bytes < clutTotalBytes_) { - memset(clutBufRaw_ + bytes, 0x00, clutTotalBytes_ - bytes); + if (bytes < loadBytes) { + memset(clutBufRaw_ + bytes, 0x00, loadBytes - bytes); } } #else Memory::MemcpyUnchecked(clutBufRaw_, clutAddr, bytes); if (bytes < clutTotalBytes_) { - memset(clutBufRaw_ + bytes, 0x00, clutTotalBytes_ - bytes); + memset(clutBufRaw_ + bytes, 0x00, loadBytes - bytes); } #endif } else { - memset(clutBufRaw_, 0x00, clutTotalBytes_); + memset(clutBufRaw_, 0x00, loadBytes); } // Reload the clut next time. clutLastFormat_ = 0xFFFFFFFF; - clutMaxBytes_ = std::max(clutMaxBytes_, clutTotalBytes_); + clutMaxBytes_ = std::max(clutMaxBytes_, loadBytes); } -void TextureCacheDX9::UpdateCurrentClut() { - const GEPaletteFormat clutFormat = gstate.getClutPaletteFormat(); - const u32 clutBase = gstate.getClutIndexStartPos(); +void TextureCacheDX9::UpdateCurrentClut(GEPaletteFormat clutFormat, u32 clutBase, bool clutIndexIsSimple) { const u32 clutBaseBytes = clutBase * (clutFormat == GE_CMODE_32BIT_ABGR8888 ? sizeof(u32) : sizeof(u16)); // Technically, these extra bytes weren't loaded, but hopefully it was loaded earlier. // If not, we're going to hash random data, which hopefully doesn't cause a performance issue. @@ -878,7 +875,7 @@ void TextureCacheDX9::UpdateCurrentClut() { // Special optimization: fonts typically draw clut4 with just alpha values in a single color. clutAlphaLinear_ = false; clutAlphaLinearColor_ = 0; - if (gstate.getClutPaletteFormat() == GE_CMODE_16BIT_ABGR4444 && gstate.isClutIndexSimple()) { + if (clutFormat == GE_CMODE_16BIT_ABGR4444 && clutIndexIsSimple) { const u16_le *clut = GetCurrentClut(); clutAlphaLinear_ = true; clutAlphaLinearColor_ = clut[15] & 0x0FFF; @@ -1091,7 +1088,7 @@ void TextureCacheDX9::SetTexture(bool force) { if (hasClut) { if (clutLastFormat_ != gstate.clutformat) { // We update here because the clut format can be specified after the load. - UpdateCurrentClut(); + UpdateCurrentClut(gstate.getClutPaletteFormat(), gstate.getClutIndexStartPos(), gstate.isClutIndexSimple()); } cluthash = GetCurrentClutHash() ^ gstate.clutformat; cachekey ^= cluthash; @@ -1309,7 +1306,7 @@ void TextureCacheDX9::SetTexture(bool force) { entry->maxLevel = maxLevel; entry->lodBias = 0.0f; - entry->dim = gstate.getTextureDimension(0); + entry->dim = dim; entry->bufw = bufw; // This would overestimate the size in many case so we underestimate instead diff --git a/GPU/Directx9/TextureCacheDX9.h b/GPU/Directx9/TextureCacheDX9.h index 0948573dea..25cb5d82b7 100644 --- a/GPU/Directx9/TextureCacheDX9.h +++ b/GPU/Directx9/TextureCacheDX9.h @@ -61,7 +61,7 @@ public: void Invalidate(u32 addr, int size, GPUInvalidationType type); void InvalidateAll(GPUInvalidationType type); void ClearNextFrame(); - void LoadClut(); + void LoadClut(u32 clutAddr, u32 loadBytes); // FramebufferManager keeps TextureCache updated about what regions of memory // are being rendered to. This is barebones so far. @@ -177,7 +177,7 @@ private: template const T *GetCurrentClut(); u32 GetCurrentClutHash(); - void UpdateCurrentClut(); + void UpdateCurrentClut(GEPaletteFormat clutFormat, u32 clutBase, bool clutIndexIsSimple); bool AttachFramebuffer(TexCacheEntry *entry, u32 address, VirtualFramebuffer *framebuffer, u32 texaddrOffset = 0); void DetachFramebuffer(TexCacheEntry *entry, u32 address, VirtualFramebuffer *framebuffer); void SetTextureFramebuffer(TexCacheEntry *entry, VirtualFramebuffer *framebuffer); diff --git a/GPU/GLES/GLES_GPU.cpp b/GPU/GLES/GLES_GPU.cpp index 9cfacb7e58..48bcb449db 100644 --- a/GPU/GLES/GLES_GPU.cpp +++ b/GPU/GLES/GLES_GPU.cpp @@ -917,7 +917,7 @@ void GLES_GPU::Execute_Bezier(u32 op, u32 diff) { GEPatchPrimType patchPrim = gstate.getPatchPrimitiveType(); int bz_ucount = op & 0xFF; int bz_vcount = (op >> 8) & 0xFF; - transformDraw_.SubmitBezier(control_points, indices, bz_ucount, bz_vcount, patchPrim, gstate.vertType); + transformDraw_.SubmitBezier(control_points, indices, gstate.getPatchDivisionU(), gstate.getPatchDivisionV(),bz_ucount, bz_vcount, patchPrim, gstate.vertType); } void GLES_GPU::Execute_Spline(u32 op, u32 diff) { @@ -960,7 +960,8 @@ void GLES_GPU::Execute_Spline(u32 op, u32 diff) { int sp_utype = (op >> 16) & 0x3; int sp_vtype = (op >> 18) & 0x3; GEPatchPrimType patchPrim = gstate.getPatchPrimitiveType(); - transformDraw_.SubmitSpline(control_points, indices, sp_ucount, sp_vcount, sp_utype, sp_vtype, patchPrim, gstate.vertType); + bool computeNormals = gstate.isLightingEnabled(); + transformDraw_.SubmitSpline(control_points, indices, gstate.getPatchDivisionU(), gstate.getPatchDivisionV(), sp_ucount, sp_vcount, sp_utype, sp_vtype, patchPrim, computeNormals, gstate.vertType); } void GLES_GPU::Execute_BoundingBox(u32 op, u32 diff) { @@ -1093,7 +1094,7 @@ void GLES_GPU::Execute_TexLevel(u32 op, u32 diff) { void GLES_GPU::Execute_LoadClut(u32 op, u32 diff) { gstate_c.textureChanged |= TEXCHANGE_PARAMSONLY; - textureCache_.LoadClut(); + textureCache_.LoadClut(gstate.getClutAddress(), gstate.getClutLoadBytes()); // This could be used to "dirty" textures with clut. } diff --git a/GPU/GLES/TextureCache.cpp b/GPU/GLES/TextureCache.cpp index e82a104d27..6c32f3eb34 100644 --- a/GPU/GLES/TextureCache.cpp +++ b/GPU/GLES/TextureCache.cpp @@ -841,15 +841,14 @@ inline bool TextureCache::TexCacheEntry::Matches(u16 dim2, u8 format2, int maxLe return dim == dim2 && format == format2 && maxLevel == maxLevel2; } -void TextureCache::LoadClut() { - u32 clutAddr = gstate.getClutAddress(); - clutTotalBytes_ = gstate.getClutLoadBytes(); +void TextureCache::LoadClut(u32 clutAddr, u32 loadBytes) { + clutTotalBytes_ = loadBytes; if (Memory::IsValidAddress(clutAddr)) { // It's possible for a game to (successfully) access outside valid memory. - u32 bytes = Memory::ValidSize(clutAddr, clutTotalBytes_); + u32 bytes = Memory::ValidSize(clutAddr, loadBytes); #ifdef _M_SSE int numBlocks = bytes / 16; - if (bytes == clutTotalBytes_) { + if (bytes == loadBytes) { const __m128i *source = (const __m128i *)Memory::GetPointerUnchecked(clutAddr); __m128i *dest = (__m128i *)clutBufRaw_; for (int i = 0; i < numBlocks; i++, source += 2, dest += 2) { @@ -860,8 +859,8 @@ void TextureCache::LoadClut() { } } else { Memory::MemcpyUnchecked(clutBufRaw_, clutAddr, bytes); - if (bytes < clutTotalBytes_) { - memset((u8 *)clutBufRaw_ + bytes, 0x00, clutTotalBytes_ - bytes); + if (bytes < loadBytes) { + memset((u8 *)clutBufRaw_ + bytes, 0x00, loadBytes - bytes); } } #else @@ -871,16 +870,14 @@ void TextureCache::LoadClut() { } #endif } else { - memset(clutBufRaw_, 0x00, clutTotalBytes_); + memset(clutBufRaw_, 0x00, loadBytes); } // Reload the clut next time. clutLastFormat_ = 0xFFFFFFFF; - clutMaxBytes_ = std::max(clutMaxBytes_, clutTotalBytes_); + clutMaxBytes_ = std::max(clutMaxBytes_, loadBytes); } -void TextureCache::UpdateCurrentClut() { - const GEPaletteFormat clutFormat = gstate.getClutPaletteFormat(); - const u32 clutBase = gstate.getClutIndexStartPos(); +void TextureCache::UpdateCurrentClut(GEPaletteFormat clutFormat, u32 clutBase, bool clutIndexIsSimple) { const u32 clutBaseBytes = clutFormat == GE_CMODE_32BIT_ABGR8888 ? (clutBase * sizeof(u32)) : (clutBase * sizeof(u16)); // Technically, these extra bytes weren't loaded, but hopefully it was loaded earlier. // If not, we're going to hash random data, which hopefully doesn't cause a performance issue. @@ -905,7 +902,7 @@ void TextureCache::UpdateCurrentClut() { // Special optimization: fonts typically draw clut4 with just alpha values in a single color. clutAlphaLinear_ = false; clutAlphaLinearColor_ = 0; - if (gstate.getClutPaletteFormat() == GE_CMODE_16BIT_ABGR4444 && gstate.isClutIndexSimple()) { + if (clutFormat == GE_CMODE_16BIT_ABGR4444 && clutIndexIsSimple) { const u16_le *clut = GetCurrentClut(); clutAlphaLinear_ = true; clutAlphaLinearColor_ = clut[15] & 0xFFF0; @@ -1156,7 +1153,7 @@ void TextureCache::SetTexture(bool force) { if (hasClut) { if (clutLastFormat_ != gstate.clutformat) { // We update here because the clut format can be specified after the load. - UpdateCurrentClut(); + UpdateCurrentClut(gstate.getClutPaletteFormat(), gstate.getClutIndexStartPos(), gstate.isClutIndexSimple()); } cluthash = GetCurrentClutHash() ^ gstate.clutformat; cachekey ^= cluthash; diff --git a/GPU/GLES/TextureCache.h b/GPU/GLES/TextureCache.h index e5d4d6a663..9b7a9593ac 100644 --- a/GPU/GLES/TextureCache.h +++ b/GPU/GLES/TextureCache.h @@ -67,7 +67,7 @@ public: void Invalidate(u32 addr, int size, GPUInvalidationType type); void InvalidateAll(GPUInvalidationType type); void ClearNextFrame(); - void LoadClut(); + void LoadClut(u32 clutAddr, u32 loadBytes); // FramebufferManager keeps TextureCache updated about what regions of memory // are being rendered to. This is barebones so far. @@ -189,7 +189,7 @@ private: template const T *GetCurrentClut(); u32 GetCurrentClutHash(); - void UpdateCurrentClut(); + void UpdateCurrentClut(GEPaletteFormat clutFormat, u32 clutBase, bool clutIndexIsSimple); bool AttachFramebuffer(TexCacheEntry *entry, u32 address, VirtualFramebuffer *framebuffer, u32 texaddrOffset = 0); void DetachFramebuffer(TexCacheEntry *entry, u32 address, VirtualFramebuffer *framebuffer); void SetTextureFramebuffer(TexCacheEntry *entry, VirtualFramebuffer *framebuffer);