From f061eadc046b8806a8158b644de2c73742d4f3ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Mon, 25 Jul 2022 18:51:08 +0200 Subject: [PATCH 01/16] Initial implementation of 3D texturing through equal-size mips (see #6357) Vulkan-only currently, though all the other backends except ES 2.0 without GL_OES_texture_3d can support it with some work. --- Common/GPU/Vulkan/VulkanImage.cpp | 12 ++++--- Common/GPU/Vulkan/VulkanImage.h | 15 +++++---- Common/GPU/Vulkan/thin3d_vulkan.cpp | 8 ++--- GPU/Common/FragmentShaderGenerator.cpp | 20 +++++++++--- GPU/Common/ShaderId.cpp | 3 +- GPU/Common/ShaderId.h | 1 + GPU/Common/ShaderUniforms.cpp | 2 ++ GPU/Common/ShaderUniforms.h | 4 +-- GPU/Common/TextureCacheCommon.cpp | 44 ++++++++++++++++++++++++-- GPU/Common/TextureCacheCommon.h | 7 +++- GPU/GPUCommon.cpp | 6 ++++ GPU/GPUState.h | 7 ++++ GPU/Vulkan/DepalettizeShaderVulkan.cpp | 4 +-- GPU/Vulkan/TextureCacheVulkan.cpp | 26 ++++++++++----- 14 files changed, 122 insertions(+), 37 deletions(-) diff --git a/Common/GPU/Vulkan/VulkanImage.cpp b/Common/GPU/Vulkan/VulkanImage.cpp index 8df9696705..c013f5d9ee 100644 --- a/Common/GPU/Vulkan/VulkanImage.cpp +++ b/Common/GPU/Vulkan/VulkanImage.cpp @@ -31,7 +31,7 @@ static bool IsDepthStencilFormat(VkFormat format) { } } -bool VulkanTexture::CreateDirect(VkCommandBuffer cmd, int w, int h, int numMips, VkFormat format, VkImageLayout initialLayout, VkImageUsageFlags usage, const VkComponentMapping *mapping) { +bool VulkanTexture::CreateDirect(VkCommandBuffer cmd, int w, int h, int depth, int numMips, VkFormat format, VkImageLayout initialLayout, VkImageUsageFlags usage, const VkComponentMapping *mapping) { if (w == 0 || h == 0 || numMips == 0) { ERROR_LOG(G3D, "Can't create a zero-size VulkanTexture"); return false; @@ -41,17 +41,18 @@ bool VulkanTexture::CreateDirect(VkCommandBuffer cmd, int w, int h, int numMips, width_ = w; height_ = h; + depth_ = depth; numMips_ = numMips; format_ = format; VkImageAspectFlags aspect = IsDepthStencilFormat(format) ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_COLOR_BIT; VkImageCreateInfo image_create_info{ VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO }; - image_create_info.imageType = VK_IMAGE_TYPE_2D; + image_create_info.imageType = depth > 1 ? VK_IMAGE_TYPE_3D : VK_IMAGE_TYPE_2D; image_create_info.format = format_; image_create_info.extent.width = width_; image_create_info.extent.height = height_; - image_create_info.extent.depth = 1; + image_create_info.extent.depth = depth; image_create_info.mipLevels = numMips; image_create_info.arrayLayers = 1; image_create_info.samples = VK_SAMPLE_COUNT_1_BIT; @@ -98,7 +99,7 @@ bool VulkanTexture::CreateDirect(VkCommandBuffer cmd, int w, int h, int numMips, // Create the view while we're at it. VkImageViewCreateInfo view_info{ VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO }; view_info.image = image_; - view_info.viewType = VK_IMAGE_VIEW_TYPE_2D; + view_info.viewType = depth > 1 ? VK_IMAGE_VIEW_TYPE_3D : VK_IMAGE_VIEW_TYPE_2D; view_info.format = format_; if (mapping) { view_info.components = *mapping; @@ -122,11 +123,12 @@ bool VulkanTexture::CreateDirect(VkCommandBuffer cmd, int w, int h, int numMips, } // TODO: Batch these. -void VulkanTexture::UploadMip(VkCommandBuffer cmd, int mip, int mipWidth, int mipHeight, VkBuffer buffer, uint32_t offset, size_t rowLength) { +void VulkanTexture::UploadMip(VkCommandBuffer cmd, int mip, int mipWidth, int mipHeight, int depthLayer, VkBuffer buffer, uint32_t offset, size_t rowLength) { VkBufferImageCopy copy_region{}; copy_region.bufferOffset = offset; copy_region.bufferRowLength = (uint32_t)rowLength; copy_region.bufferImageHeight = 0; // 2D + copy_region.imageOffset.z = depthLayer; copy_region.imageExtent.width = mipWidth; copy_region.imageExtent.height = mipHeight; copy_region.imageExtent.depth = 1; diff --git a/Common/GPU/Vulkan/VulkanImage.h b/Common/GPU/Vulkan/VulkanImage.h index 75f09a89ae..50d7e0c7f5 100644 --- a/Common/GPU/Vulkan/VulkanImage.h +++ b/Common/GPU/Vulkan/VulkanImage.h @@ -22,9 +22,11 @@ public: // Fast uploads from buffer. Mipmaps supported. // Usage must at least include VK_IMAGE_USAGE_TRANSFER_DST_BIT in order to use UploadMip. // When using UploadMip, initialLayout should be VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL. - bool CreateDirect(VkCommandBuffer cmd, int w, int h, int numMips, VkFormat format, VkImageLayout initialLayout, VkImageUsageFlags usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, const VkComponentMapping *mapping = nullptr); + bool CreateDirect(VkCommandBuffer cmd, int w, int h, int depth, int numMips, VkFormat format, VkImageLayout initialLayout, VkImageUsageFlags usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, const VkComponentMapping *mapping = nullptr); void ClearMip(VkCommandBuffer cmd, int mip, uint32_t value); - void UploadMip(VkCommandBuffer cmd, int mip, int mipWidth, int mipHeight, VkBuffer buffer, uint32_t offset, size_t rowLength); // rowLength is in pixels + + // Can also be used to copy individual levels of a 3D texture. + void UploadMip(VkCommandBuffer cmd, int mip, int mipWidth, int mipHeight, int depthLayer, VkBuffer buffer, uint32_t offset, size_t rowLength); // rowLength is in pixels void GenerateMips(VkCommandBuffer cmd, int firstMipToGenerate, bool fromCompute); void EndCreate(VkCommandBuffer cmd, bool vertexTexture, VkPipelineStageFlags prevStage, VkImageLayout layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); @@ -62,10 +64,11 @@ private: VkImageView view_ = VK_NULL_HANDLE; VmaAllocation allocation_ = VK_NULL_HANDLE; - int32_t width_ = 0; - int32_t height_ = 0; - int32_t numMips_ = 1; + int16_t width_ = 0; + int16_t height_ = 0; + int16_t numMips_ = 1; + int16_t depth_ = 1; + VkFormat format_ = VK_FORMAT_UNDEFINED; - size_t offset_ = 0; std::string tag_; }; diff --git a/Common/GPU/Vulkan/thin3d_vulkan.cpp b/Common/GPU/Vulkan/thin3d_vulkan.cpp index d4276192cf..1529f4626b 100644 --- a/Common/GPU/Vulkan/thin3d_vulkan.cpp +++ b/Common/GPU/Vulkan/thin3d_vulkan.cpp @@ -640,7 +640,7 @@ VulkanTexture *VKContext::GetNullTexture() { nullTexture_->SetTag("Null"); int w = 8; int h = 8; - nullTexture_->CreateDirect(cmdInit, w, h, 1, VK_FORMAT_A8B8G8R8_UNORM_PACK32, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + nullTexture_->CreateDirect(cmdInit, w, h, 1, 1, VK_FORMAT_A8B8G8R8_UNORM_PACK32, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT); uint32_t bindOffset; VkBuffer bindBuf; @@ -651,7 +651,7 @@ VulkanTexture *VKContext::GetNullTexture() { data[y*w + x] = 0; // black } } - nullTexture_->UploadMip(cmdInit, 0, w, h, bindBuf, bindOffset, w); + nullTexture_->UploadMip(cmdInit, 0, w, h, 0, bindBuf, bindOffset, w); nullTexture_->EndCreate(cmdInit, false, VK_PIPELINE_STAGE_TRANSFER_BIT); } else { nullTexture_->Touch(); @@ -733,7 +733,7 @@ bool VKTexture::Create(VkCommandBuffer cmd, VulkanPushBuffer *push, const Textur usageBits |= VK_IMAGE_USAGE_TRANSFER_SRC_BIT; } - if (!vkTex_->CreateDirect(cmd, width_, height_, mipLevels_, vulkanFormat, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, usageBits)) { + if (!vkTex_->CreateDirect(cmd, width_, height_, 1, mipLevels_, vulkanFormat, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, usageBits)) { ERROR_LOG(G3D, "Failed to create VulkanTexture: %dx%dx%d fmt %d, %d levels", width_, height_, depth_, (int)vulkanFormat, mipLevels_); return false; } @@ -755,7 +755,7 @@ bool VKTexture::Create(VkCommandBuffer cmd, VulkanPushBuffer *push, const Textur } else { offset = push->PushAligned((const void *)desc.initData[i], size, 16, &buf); } - vkTex_->UploadMip(cmd, i, w, h, buf, offset, w); + vkTex_->UploadMip(cmd, i, w, h, 0, buf, offset, w); w = (w + 1) / 2; h = (h + 1) / 2; d = (d + 1) / 2; diff --git a/GPU/Common/FragmentShaderGenerator.cpp b/GPU/Common/FragmentShaderGenerator.cpp index df12e041d4..285d813531 100644 --- a/GPU/Common/FragmentShaderGenerator.cpp +++ b/GPU/Common/FragmentShaderGenerator.cpp @@ -78,6 +78,7 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu bool enableColorDoubling = id.Bit(FS_BIT_COLOR_DOUBLE); bool doTextureProjection = id.Bit(FS_BIT_DO_TEXTURE_PROJ); bool doTextureAlpha = id.Bit(FS_BIT_TEXALPHA); + bool texture3D = id.Bit(FS_BIT_3D_TEXTURE); bool flatBug = bugs.Has(Draw::Bugs::BROKEN_FLAT_IN_SHADER) && g_Config.bVendorBugChecksEnabled; @@ -136,7 +137,7 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu WRITE(p, "layout (std140, set = 0, binding = 3) uniform baseUBO {\n%s};\n", ub_baseStr); if (doTexture) { - WRITE(p, "layout (binding = 0) uniform sampler2D tex;\n"); + WRITE(p, "layout (binding = 0) uniform %s tex;\n", texture3D ? "sampler3D" : "sampler2D"); } if (readFramebufferTex) { @@ -558,10 +559,19 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu WRITE(p, " vec4 t = tex2D(tex, %s.xy)%s;\n", texcoord, bgraTexture ? ".bgra" : ""); } } else { - if (doTextureProjection) { - WRITE(p, " vec4 t = %sProj(tex, %s);\n", compat.texture, texcoord); + if (texture3D) { + WRITE(p, " float bias = pow(u_mipBias, 1.0);\n"); + if (doTextureProjection) { + WRITE(p, " vec4 t = %sProj(tex, vec4(%s.xy, bias, %s.z));\n", compat.texture, texcoord, texcoord); + } else { + WRITE(p, " vec4 t = %s(tex, vec3(%s.xy, bias));\n", compat.texture, texcoord); + } } else { - WRITE(p, " vec4 t = %s(tex, %s.xy);\n", compat.texture, texcoord); + if (doTextureProjection) { + WRITE(p, " vec4 t = %sProj(tex, %s);\n", compat.texture, texcoord); + } else { + WRITE(p, " vec4 t = %s(tex, %s.xy);\n", compat.texture, texcoord); + } } } } else { @@ -572,7 +582,7 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu } else { WRITE(p, " vec2 uv = %s.xy;\n vec2 uv_round;\n", texcoord); } - WRITE(p, " vec2 tsize = vec2(textureSize(tex, 0));\n"); + WRITE(p, " vec2 tsize = textureSize(tex, 0).xy;\n"); WRITE(p, " vec2 fraction;\n"); WRITE(p, " bool bilinear = (u_depal_mask_shift_off_fmt >> 31) != 0U;\n"); WRITE(p, " if (bilinear) {\n"); diff --git a/GPU/Common/ShaderId.cpp b/GPU/Common/ShaderId.cpp index 2a7441a92f..12a5996b5d 100644 --- a/GPU/Common/ShaderId.cpp +++ b/GPU/Common/ShaderId.cpp @@ -170,7 +170,7 @@ std::string FragmentShaderDesc(const FShaderID &id) { std::stringstream desc; desc << StringFromFormat("%08x:%08x ", id.d[1], id.d[0]); if (id.Bit(FS_BIT_CLEARMODE)) desc << "Clear "; - if (id.Bit(FS_BIT_DO_TEXTURE)) desc << "Tex "; + if (id.Bit(FS_BIT_DO_TEXTURE)) desc << (id.Bit(FS_BIT_3D_TEXTURE) ? "Tex3D " : "Tex "); if (id.Bit(FS_BIT_DO_TEXTURE_PROJ)) desc << "TexProj "; if (id.Bit(FS_BIT_TEXALPHA)) desc << "TexAlpha "; if (id.Bit(FS_BIT_TEXTURE_AT_OFFSET)) desc << "TexOffs "; @@ -289,6 +289,7 @@ void ComputeFragmentShaderID(FShaderID *id_out, const Draw::Bugs &bugs) { } id.SetBit(FS_BIT_BGRA_TEXTURE, gstate_c.bgraTexture); id.SetBit(FS_BIT_SHADER_DEPAL, useShaderDepal); + id.SetBit(FS_BIT_3D_TEXTURE, gstate_c.curTextureIs3D); } id.SetBit(FS_BIT_LMODE, lmode); diff --git a/GPU/Common/ShaderId.h b/GPU/Common/ShaderId.h index 3c1d75226f..8dcee32c1e 100644 --- a/GPU/Common/ShaderId.h +++ b/GPU/Common/ShaderId.h @@ -93,6 +93,7 @@ enum FShaderBit : uint8_t { FS_BIT_TEST_DISCARD_TO_ZERO = 48, FS_BIT_NO_DEPTH_CANNOT_DISCARD_STENCIL = 49, FS_BIT_COLOR_WRITEMASK = 50, + FS_BIT_3D_TEXTURE = 51, }; static inline FShaderBit operator +(FShaderBit bit, int i) { diff --git a/GPU/Common/ShaderUniforms.cpp b/GPU/Common/ShaderUniforms.cpp index b07d4d1fdc..0fe8ce807a 100644 --- a/GPU/Common/ShaderUniforms.cpp +++ b/GPU/Common/ShaderUniforms.cpp @@ -104,6 +104,8 @@ void BaseUpdateUniforms(UB_VS_FS_Base *ub, uint64_t dirtyUniforms, bool flipView ub->texClamp[3] = invH * 0.5f; ub->texClampOffset[0] = gstate_c.curTextureXOffset * invW; ub->texClampOffset[1] = gstate_c.curTextureYOffset * invH; + + ub->mipBias = (float)gstate.getTexLevelOffset16() * (1.0 / 16.0f); } if (dirtyUniforms & DIRTY_PROJMATRIX) { diff --git a/GPU/Common/ShaderUniforms.h b/GPU/Common/ShaderUniforms.h index 1195af4547..691835a262 100644 --- a/GPU/Common/ShaderUniforms.h +++ b/GPU/Common/ShaderUniforms.h @@ -34,7 +34,7 @@ struct UB_VS_FS_Base { float cullRangeMin[4]; float cullRangeMax[4]; uint32_t spline_counts; uint32_t depal_mask_shift_off_fmt; // 4 params packed into one. - uint32_t colorWriteMask; int pad3; + uint32_t colorWriteMask; float mipBias; // Fragment data float fogColor[4]; float texEnvColor[4]; @@ -63,7 +63,7 @@ R"( mat4 u_proj; uint u_spline_counts; uint u_depal_mask_shift_off_fmt; uint u_colorWriteMask; - int u_pad3; + float u_mipBias; vec3 u_fogcolor; vec3 u_texenv; ivec4 u_alphacolorref; diff --git a/GPU/Common/TextureCacheCommon.cpp b/GPU/Common/TextureCacheCommon.cpp index 74a63fcf97..bd214bbb4b 100644 --- a/GPU/Common/TextureCacheCommon.cpp +++ b/GPU/Common/TextureCacheCommon.cpp @@ -481,7 +481,8 @@ TexCacheEntry *TextureCacheCommon::SetTexture() { if (match && (entry->status & TexCacheEntry::STATUS_TO_REPLACE) && replacementTimeThisFrame_ < replacementFrameBudget_) { int w0 = gstate.getTextureWidth(0); int h0 = gstate.getTextureHeight(0); - ReplacedTexture &replaced = FindReplacement(entry, w0, h0); + int d0 = 1; + ReplacedTexture &replaced = FindReplacement(entry, w0, h0, d0); if (replaced.Valid()) { match = false; reason = "replacing"; @@ -492,6 +493,7 @@ TexCacheEntry *TextureCacheCommon::SetTexture() { // got one! gstate_c.curTextureWidth = w; gstate_c.curTextureHeight = h; + gstate_c.SetTextureIs3D((entry->status & TexCacheEntry::STATUS_3D) != 0); if (rehash) { // Update in case any of these changed. entry->sizeInRAM = (textureBitsPerPixel[format] * bufw * h / 2) / 8; @@ -597,6 +599,7 @@ TexCacheEntry *TextureCacheCommon::SetTexture() { gstate_c.curTextureWidth = w; gstate_c.curTextureHeight = h; + gstate_c.SetTextureIs3D((entry->status & TexCacheEntry::STATUS_3D) != 0); nextTexture_ = entry; if (nextFramebufferTexture_) { @@ -1132,6 +1135,13 @@ void TextureCacheCommon::NotifyVideoUpload(u32 addr, int size, int width, GEBuff } void TextureCacheCommon::LoadClut(u32 clutAddr, u32 loadBytes) { + if (loadBytes == 0) { + // Don't accidentally overwrite clutTotalBytes_ with a zero. + return; + } + + u32 startPos = gstate.getClutIndexStartPos(); + clutTotalBytes_ = loadBytes; clutRenderAddress_ = 0xFFFFFFFF; @@ -1288,7 +1298,12 @@ u32 TextureCacheCommon::EstimateTexMemoryUsage(const TexCacheEntry *entry) { return pixelSize << (dimW + dimH); } -ReplacedTexture &TextureCacheCommon::FindReplacement(TexCacheEntry *entry, int &w, int &h) { +ReplacedTexture &TextureCacheCommon::FindReplacement(TexCacheEntry *entry, int &w, int &h, int &d) { + if (d != 1) { + // We don't yet support replacing 3D textures. + return replacer_.FindNone(); + } + // Short circuit the non-enabled case. // Otherwise, due to bReplaceTexturesAllowLate, we'll still spawn tasks looking for replacements // that then won't be used. @@ -1751,6 +1766,7 @@ void TextureCacheCommon::ApplyTexture() { ApplyTextureFramebuffer(nextFramebufferTexture_, gstate.getTextureFormat(), depth ? NOTIFY_FB_DEPTH : NOTIFY_FB_COLOR); nextFramebufferTexture_ = nullptr; } + gstate_c.SetTextureIs3D(false); return; } @@ -1805,6 +1821,7 @@ void TextureCacheCommon::ApplyTexture() { entry->lastFrame = gpuStats.numFlips; BindTexture(entry); gstate_c.SetTextureFullAlpha(entry->GetAlphaStatus() == TexCacheEntry::STATUS_ALPHA_FULL); + gstate_c.SetTextureIs3D((entry->status & TexCacheEntry::STATUS_3D) != 0); } void TextureCacheCommon::Clear(bool delete_them) { @@ -2055,6 +2072,7 @@ bool TextureCacheCommon::PrepareBuildTexture(BuildTexturePlan &plan, TexCacheEnt } plan.scaleFactor = standardScaleFactor_; + plan.depth = 1; // Rachet down scale factor in low-memory mode. // TODO: I think really we should just turn it off? @@ -2064,7 +2082,23 @@ bool TextureCacheCommon::PrepareBuildTexture(BuildTexturePlan &plan, TexCacheEnt } if (plan.badMipSizes) { + // Check for pure 3D texture. + int tw = gstate.getTextureWidth(0); + int th = gstate.getTextureHeight(0); + bool pure3D = true; + for (int i = 0; i < plan.levelsToLoad; i++) { + if (gstate.getTextureWidth(i) != gstate.getTextureWidth(0) || gstate.getTextureHeight(i) != gstate.getTextureHeight(0)) { + pure3D = false; + } + } + + if (pure3D) { + plan.depth = plan.levelsToLoad; + plan.scaleFactor = 1; + } + plan.levelsToLoad = 1; + plan.levelsToCreate = 1; } if (plan.hardwareScaling) { @@ -2079,7 +2113,7 @@ bool TextureCacheCommon::PrepareBuildTexture(BuildTexturePlan &plan, TexCacheEnt plan.w = gstate.getTextureWidth(0); plan.h = gstate.getTextureHeight(0); - plan.replaced = &FindReplacement(entry, plan.w, plan.h); + plan.replaced = &FindReplacement(entry, plan.w, plan.h, plan.depth); if (plan.replaced->Valid()) { // We're replacing, so we won't scale. plan.scaleFactor = 1; @@ -2138,6 +2172,10 @@ bool TextureCacheCommon::PrepareBuildTexture(BuildTexturePlan &plan, TexCacheEnt entry->status &= ~TexCacheEntry::STATUS_NO_MIPS; } + if (plan.depth > 1) { + entry->status |= TexCacheEntry::STATUS_3D; + } + // Will be filled in again during decode. entry->status &= ~TexCacheEntry::STATUS_ALPHA_MASK; return true; diff --git a/GPU/Common/TextureCacheCommon.h b/GPU/Common/TextureCacheCommon.h index 33430d6c97..5c8630bb81 100644 --- a/GPU/Common/TextureCacheCommon.h +++ b/GPU/Common/TextureCacheCommon.h @@ -135,6 +135,8 @@ struct TexCacheEntry { STATUS_FRAMEBUFFER_OVERLAP = 0x1000, STATUS_FORCE_REBUILD = 0x2000, + + STATUS_3D = 0x4000, }; // Status, but int so we can zero initialize. @@ -264,6 +266,9 @@ struct BuildTexturePlan { int w; int h; + // Used for 3D textures only. If not a 3D texture, will be 1. + int depth; + // The replacement for the texture. ReplacedTexture *replaced; }; @@ -327,7 +332,7 @@ protected: CheckAlphaResult DecodeTextureLevel(u8 *out, int outPitch, GETextureFormat format, GEPaletteFormat clutformat, uint32_t texaddr, int level, int bufw, bool reverseColors, bool expandTo32Bit); void UnswizzleFromMem(u32 *dest, u32 destPitch, const u8 *texptr, u32 bufw, u32 height, u32 bytesPerPixel); CheckAlphaResult ReadIndexedTex(u8 *out, int outPitch, int level, const u8 *texptr, int bytesPerIndex, int bufw, bool reverseColors, bool expandTo32Bit); - ReplacedTexture &FindReplacement(TexCacheEntry *entry, int &w, int &h); + ReplacedTexture &FindReplacement(TexCacheEntry *entry, int &w, int &h, int &d); // Return value is mapData normally, but could be another buffer allocated with AllocateAlignedMemory. void LoadTextureLevel(TexCacheEntry &entry, uint8_t *mapData, int mapRowPitch, ReplacedTexture &replaced, int srcLevel, int scaleFactor, Draw::DataFormat dstFmt, bool reverseColors); diff --git a/GPU/GPUCommon.cpp b/GPU/GPUCommon.cpp index d47eb06881..eb5bf312e4 100644 --- a/GPU/GPUCommon.cpp +++ b/GPU/GPUCommon.cpp @@ -1565,6 +1565,12 @@ void GPUCommon::Execute_TexLevel(u32 op, u32 diff) { Flush(); } gstate.texlevel ^= diff; + + if (diff & 0xFF0000) { + // Piggyback on this flag for 3D textures. + gstate_c.Dirty(DIRTY_TEXCLAMP); + } + gstate_c.Dirty(DIRTY_TEXTURE_PARAMS | DIRTY_FRAGMENTSHADER_STATE); } diff --git a/GPU/GPUState.h b/GPU/GPUState.h index 9026cfc24f..4bd4dae890 100644 --- a/GPU/GPUState.h +++ b/GPU/GPUState.h @@ -551,6 +551,12 @@ struct GPUStateCache { Dirty(DIRTY_FRAGMENTSHADER_STATE); } } + void SetTextureIs3D(bool is3D) { + if (is3D != curTextureIs3D) { + curTextureIs3D = is3D; + Dirty(DIRTY_FRAGMENTSHADER_STATE | DIRTY_UVSCALEOFFSET); + } + } u32 featureFlags; @@ -580,6 +586,7 @@ struct GPUStateCache { // Only applied when needShaderTexClamp = true. u32 curTextureXOffset; u32 curTextureYOffset; + bool curTextureIs3D; float vpWidth; float vpHeight; diff --git a/GPU/Vulkan/DepalettizeShaderVulkan.cpp b/GPU/Vulkan/DepalettizeShaderVulkan.cpp index a72c2af038..52269387bf 100644 --- a/GPU/Vulkan/DepalettizeShaderVulkan.cpp +++ b/GPU/Vulkan/DepalettizeShaderVulkan.cpp @@ -168,7 +168,7 @@ VulkanTexture *DepalShaderCacheVulkan::GetClutTexture(GEPaletteFormat clutFormat VulkanTexture *vktex = new VulkanTexture(vulkan); vktex->SetTag("DepalClut"); VkCommandBuffer cmd = (VkCommandBuffer)draw_->GetNativeObject(Draw::NativeObject::INIT_COMMANDBUFFER); - if (!vktex->CreateDirect(cmd, texturePixels, 1, 1, destFormat, + if (!vktex->CreateDirect(cmd, texturePixels, 1, 1, 1, destFormat, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT, &componentMapping)) { ERROR_LOG(G3D, "Failed to create texture for CLUT"); return nullptr; @@ -177,7 +177,7 @@ VulkanTexture *DepalShaderCacheVulkan::GetClutTexture(GEPaletteFormat clutFormat VkBuffer pushBuffer; uint32_t pushOffset = push_->PushAligned(rawClut, 1024, 4, &pushBuffer); - vktex->UploadMip(cmd, 0, texturePixels, 1, pushBuffer, pushOffset, texturePixels); + vktex->UploadMip(cmd, 0, texturePixels, 1, 0, pushBuffer, pushOffset, texturePixels); vktex->EndCreate(cmd, false, VK_PIPELINE_STAGE_TRANSFER_BIT); DepalTextureVulkan *tex = new DepalTextureVulkan(); diff --git a/GPU/Vulkan/TextureCacheVulkan.cpp b/GPU/Vulkan/TextureCacheVulkan.cpp index f396c57769..1b52ec5689 100644 --- a/GPU/Vulkan/TextureCacheVulkan.cpp +++ b/GPU/Vulkan/TextureCacheVulkan.cpp @@ -652,7 +652,7 @@ void TextureCacheVulkan::BuildTexture(TexCacheEntry *const entry) { snprintf(texName, sizeof(texName), "tex_%08x_%s", entry->addr, GeTextureFormatToString((GETextureFormat)entry->format, gstate.getClutPaletteFormat())); image->SetTag(texName); - bool allocSuccess = image->CreateDirect(cmdInit, plan.w * plan.scaleFactor, plan.h * plan.scaleFactor, plan.levelsToCreate, actualFmt, imageLayout, usage, mapping); + bool allocSuccess = image->CreateDirect(cmdInit, plan.w * plan.scaleFactor, plan.h * plan.scaleFactor, plan.depth, plan.levelsToCreate, actualFmt, imageLayout, usage, mapping); if (!allocSuccess && !lowMemoryMode_) { WARN_LOG_REPORT(G3D, "Texture cache ran out of GPU memory; switching to low memory mode"); lowMemoryMode_ = true; @@ -671,7 +671,7 @@ void TextureCacheVulkan::BuildTexture(TexCacheEntry *const entry) { plan.scaleFactor = 1; actualFmt = dstFmt; - allocSuccess = image->CreateDirect(cmdInit, plan.w * plan.scaleFactor, plan.h * plan.scaleFactor, plan.levelsToCreate, actualFmt, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, mapping); + allocSuccess = image->CreateDirect(cmdInit, plan.w * plan.scaleFactor, plan.h * plan.scaleFactor, plan.depth, plan.levelsToCreate, actualFmt, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, mapping); } if (!allocSuccess) { @@ -686,7 +686,7 @@ void TextureCacheVulkan::BuildTexture(TexCacheEntry *const entry) { ReplacedTextureDecodeInfo replacedInfo; bool willSaveTex = false; - if (replacer_.Enabled() && !plan.replaced->Valid()) { + if (replacer_.Enabled() && !plan.replaced->Valid() && plan.depth == 1) { replacedInfo.cachekey = entry->CacheKey(); replacedInfo.hash = entry->fullhash; replacedInfo.addr = entry->addr; @@ -700,8 +700,15 @@ void TextureCacheVulkan::BuildTexture(TexCacheEntry *const entry) { VK_PROFILE_BEGIN(vulkan, cmdInit, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, "Texture Upload (%08x) video=%d", entry->addr, plan.isVideo); - // Upload the texture data. - for (int i = 0; i < plan.levelsToLoad; i++) { + // Upload the texture data. We simply reuse the same loop for 3D texture slices instead of mips, if we have those. + int levels; + if (plan.depth > 1) { + levels = plan.depth; + } else { + levels = plan.levelsToLoad; + } + + for (int i = 0; i < levels; i++) { int mipUnscaledWidth = gstate.getTextureWidth(i); int mipUnscaledHeight = gstate.getTextureHeight(i); @@ -742,10 +749,13 @@ void TextureCacheVulkan::BuildTexture(TexCacheEntry *const entry) { replacementTimeThisFrame_ += time_now_d() - replaceStart; VK_PROFILE_BEGIN(vulkan, cmdInit, VK_PIPELINE_STAGE_TRANSFER_BIT, "Copy Upload (replaced): %dx%d", mipWidth, mipHeight); - entry->vkTex->UploadMip(cmdInit, i, mipWidth, mipHeight, texBuf, bufferOffset, stride / bpp); + entry->vkTex->UploadMip(cmdInit, i, mipWidth, mipHeight, 0, texBuf, bufferOffset, stride / bpp); VK_PROFILE_END(vulkan, cmdInit, VK_PIPELINE_STAGE_TRANSFER_BIT); } else { - if (computeUpload) { + if (plan.depth != 1) { + loadLevel(size, i, stride, plan.scaleFactor); + entry->vkTex->UploadMip(cmdInit, 0, mipWidth, mipHeight, i, texBuf, bufferOffset, stride / bpp); + } else if (computeUpload) { int srcBpp = dstFmt == VULKAN_8888_FORMAT ? 4 : 2; int srcStride = mipUnscaledWidth * srcBpp; int srcSize = srcStride * mipUnscaledHeight; @@ -768,7 +778,7 @@ void TextureCacheVulkan::BuildTexture(TexCacheEntry *const entry) { loadLevel(size, i == 0 ? plan.baseLevelSrc : i, stride, plan.scaleFactor); VK_PROFILE_BEGIN(vulkan, cmdInit, VK_PIPELINE_STAGE_TRANSFER_BIT, "Copy Upload: %dx%d", mipWidth, mipHeight); - entry->vkTex->UploadMip(cmdInit, i, mipWidth, mipHeight, texBuf, bufferOffset, stride / bpp); + entry->vkTex->UploadMip(cmdInit, i, mipWidth, mipHeight, 0, texBuf, bufferOffset, stride / bpp); VK_PROFILE_END(vulkan, cmdInit, VK_PIPELINE_STAGE_TRANSFER_BIT); } if (replacer_.Enabled()) { From 2cb9e09ad3edaa859fdc1796b69c4d4e26c2a1ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Mon, 25 Jul 2022 20:51:52 +0200 Subject: [PATCH 02/16] Implement new discovery of a larger CLUT than we thought, plus non-shared CLUTs for 8-bit indices. Not sure yet what limitations apply, regarding using it for even more 16-bit colors, etc... --- GPU/Common/FragmentShaderGenerator.cpp | 4 ++-- GPU/Common/TextureCacheCommon.cpp | 8 ++++++-- GPU/GPUState.h | 4 ++-- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/GPU/Common/FragmentShaderGenerator.cpp b/GPU/Common/FragmentShaderGenerator.cpp index 285d813531..2a6296ab70 100644 --- a/GPU/Common/FragmentShaderGenerator.cpp +++ b/GPU/Common/FragmentShaderGenerator.cpp @@ -560,7 +560,7 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu } } else { if (texture3D) { - WRITE(p, " float bias = pow(u_mipBias, 1.0);\n"); + WRITE(p, " float bias = u_mipBias * 1.0;\n"); if (doTextureProjection) { WRITE(p, " vec4 t = %sProj(tex, vec4(%s.xy, bias, %s.z));\n", compat.texture, texcoord, texcoord); } else { @@ -573,7 +573,7 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu WRITE(p, " vec4 t = %s(tex, %s.xy);\n", compat.texture, texcoord); } } - } + } } else { if (doTextureProjection) { // We don't use textureProj because we need better control and it's probably not much of a savings anyway. diff --git a/GPU/Common/TextureCacheCommon.cpp b/GPU/Common/TextureCacheCommon.cpp index bd214bbb4b..92605780a1 100644 --- a/GPU/Common/TextureCacheCommon.cpp +++ b/GPU/Common/TextureCacheCommon.cpp @@ -1678,10 +1678,13 @@ CheckAlphaResult TextureCacheCommon::ReadIndexedTex(u8 *out, int outPitch, int l texptr = (u8 *)tmpTexBuf32_.data(); } + const bool mipmapShareClut = gstate.isClutSharedForMipmaps(); + const int clutSharingOffset = mipmapShareClut ? 0 : (level & 1) * 256; + GEPaletteFormat palFormat = (GEPaletteFormat)gstate.getClutPaletteFormat(); - const u16 *clut16 = (const u16 *)clutBuf_; - const u32 *clut32 = (const u32 *)clutBuf_; + const u16 *clut16 = (const u16 *)clutBuf_ + clutSharingOffset; + const u32 *clut32 = (const u32 *)clutBuf_ + clutSharingOffset; if (expandTo32Bit && palFormat != GE_CMODE_32BIT_ABGR8888) { ConvertFormatToRGBA8888(GEPaletteFormat(palFormat), expandClut_, clut16, 256); @@ -1721,6 +1724,7 @@ CheckAlphaResult TextureCacheCommon::ReadIndexedTex(u8 *out, int outPitch, int l case GE_CMODE_32BIT_ABGR8888: { + switch (bytesPerIndex) { case 1: for (int y = 0; y < h; ++y) { diff --git a/GPU/GPUState.h b/GPU/GPUState.h index 4bd4dae890..8675afd109 100644 --- a/GPU/GPUState.h +++ b/GPU/GPUState.h @@ -300,8 +300,8 @@ struct GPUgstate { bool isTextureFormatIndexed() const { return (texformat & 4) != 0; } // GE_TFMT_CLUT4 - GE_TFMT_CLUT32 are 0b1xx. int getTextureEnvColRGB() const { return texenvcolor & 0x00FFFFFF; } u32 getClutAddress() const { return (clutaddr & 0x00FFFFF0) | ((clutaddrupper << 8) & 0x0F000000); } - int getClutLoadBytes() const { return (loadclut & 0x3F) * 32; } - int getClutLoadBlocks() const { return (loadclut & 0x3F); } + int getClutLoadBytes() const { return (loadclut & 0x7F) * 32; } + int getClutLoadBlocks() const { return (loadclut & 0x7F); } GEPaletteFormat getClutPaletteFormat() const { return static_cast(clutformat & 3); } int getClutIndexShift() const { return (clutformat >> 2) & 0x1F; } int getClutIndexMask() const { return (clutformat >> 8) & 0xFF; } From d7aa3ee486399138b8421c1ce32f5da17f42deb3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Mon, 25 Jul 2022 21:13:56 +0200 Subject: [PATCH 03/16] Fix the mip bias to take the texture depth and sampling offset into account. --- GPU/Common/FragmentShaderGenerator.cpp | 5 ++--- GPU/Common/ShaderUniforms.cpp | 3 ++- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/GPU/Common/FragmentShaderGenerator.cpp b/GPU/Common/FragmentShaderGenerator.cpp index 2a6296ab70..6d3886035e 100644 --- a/GPU/Common/FragmentShaderGenerator.cpp +++ b/GPU/Common/FragmentShaderGenerator.cpp @@ -560,11 +560,10 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu } } else { if (texture3D) { - WRITE(p, " float bias = u_mipBias * 1.0;\n"); if (doTextureProjection) { - WRITE(p, " vec4 t = %sProj(tex, vec4(%s.xy, bias, %s.z));\n", compat.texture, texcoord, texcoord); + WRITE(p, " vec4 t = %sProj(tex, vec4(%s.xy, u_mipBias, %s.z));\n", compat.texture, texcoord, texcoord); } else { - WRITE(p, " vec4 t = %s(tex, vec3(%s.xy, bias));\n", compat.texture, texcoord); + WRITE(p, " vec4 t = %s(tex, vec3(%s.xy, u_mipBias));\n", compat.texture, texcoord); } } else { if (doTextureProjection) { diff --git a/GPU/Common/ShaderUniforms.cpp b/GPU/Common/ShaderUniforms.cpp index 0fe8ce807a..77308b91ee 100644 --- a/GPU/Common/ShaderUniforms.cpp +++ b/GPU/Common/ShaderUniforms.cpp @@ -105,7 +105,8 @@ void BaseUpdateUniforms(UB_VS_FS_Base *ub, uint64_t dirtyUniforms, bool flipView ub->texClampOffset[0] = gstate_c.curTextureXOffset * invW; ub->texClampOffset[1] = gstate_c.curTextureYOffset * invH; - ub->mipBias = (float)gstate.getTexLevelOffset16() * (1.0 / 16.0f); + float mipBias = (float)gstate.getTexLevelOffset16() * (1.0 / 16.0f); + ub->mipBias = (mipBias + 0.5f) / (float)(gstate.getTextureMaxLevel() + 1); } if (dirtyUniforms & DIRTY_PROJMATRIX) { From 176b460d7648df45386c1155ba373ba87645a74e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Tue, 26 Jul 2022 10:43:30 +0200 Subject: [PATCH 04/16] Quick attempt at fixing the Macross glitch --- GPU/Common/TextureCacheCommon.cpp | 1 + GPU/Common/TextureCacheCommon.h | 1 + GPU/Vulkan/TextureCacheVulkan.cpp | 3 ++- 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/GPU/Common/TextureCacheCommon.cpp b/GPU/Common/TextureCacheCommon.cpp index 92605780a1..aa43ff87f0 100644 --- a/GPU/Common/TextureCacheCommon.cpp +++ b/GPU/Common/TextureCacheCommon.cpp @@ -162,6 +162,7 @@ SamplerCacheKey TextureCacheCommon::GetSamplingParams(int maxLevel, const TexCac key.sClamp = gstate.isTexCoordClampedS(); key.tClamp = gstate.isTexCoordClampedT(); key.aniso = false; + key.texture3d = gstate_c.curTextureIs3D; GETexLevelMode mipMode = gstate.getTexLevelMode(); bool autoMip = mipMode == GE_TEXLEVEL_MODE_AUTO; diff --git a/GPU/Common/TextureCacheCommon.h b/GPU/Common/TextureCacheCommon.h index 5c8630bb81..3bd49fb7f2 100644 --- a/GPU/Common/TextureCacheCommon.h +++ b/GPU/Common/TextureCacheCommon.h @@ -71,6 +71,7 @@ struct SamplerCacheKey { bool sClamp : 1; bool tClamp : 1; bool aniso : 1; + bool texture3d : 1; }; }; bool operator < (const SamplerCacheKey &other) const { diff --git a/GPU/Vulkan/TextureCacheVulkan.cpp b/GPU/Vulkan/TextureCacheVulkan.cpp index 1b52ec5689..cf10aa30cb 100644 --- a/GPU/Vulkan/TextureCacheVulkan.cpp +++ b/GPU/Vulkan/TextureCacheVulkan.cpp @@ -119,7 +119,8 @@ VkSampler SamplerCache::GetOrCreateSampler(const SamplerCacheKey &key) { VkSamplerCreateInfo samp = { VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO }; samp.addressModeU = key.sClamp ? VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE : VK_SAMPLER_ADDRESS_MODE_REPEAT; samp.addressModeV = key.tClamp ? VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE : VK_SAMPLER_ADDRESS_MODE_REPEAT; - samp.addressModeW = samp.addressModeU; // irrelevant, but Mali recommends that all clamp modes are the same if possible. + // W addressing is irrelevant for 2d textures, but Mali recommends that all clamp modes are the same if possible so just copy from U. + samp.addressModeW = key.texture3d ? VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE : samp.addressModeU; samp.compareOp = VK_COMPARE_OP_ALWAYS; samp.flags = 0; samp.magFilter = key.magFilt ? VK_FILTER_LINEAR : VK_FILTER_NEAREST; From e455d6bcfe508624da971990a3db40557cb3bd38 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Tue, 26 Jul 2022 10:43:44 +0200 Subject: [PATCH 05/16] Some safety checks to avoid bad combinations, probably not really needed in practice. --- GPU/Common/FragmentShaderGenerator.cpp | 4 +++- GPU/Vulkan/TextureCacheVulkan.cpp | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/GPU/Common/FragmentShaderGenerator.cpp b/GPU/Common/FragmentShaderGenerator.cpp index 6d3886035e..5b03db8cf0 100644 --- a/GPU/Common/FragmentShaderGenerator.cpp +++ b/GPU/Common/FragmentShaderGenerator.cpp @@ -83,7 +83,7 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu bool flatBug = bugs.Has(Draw::Bugs::BROKEN_FLAT_IN_SHADER) && g_Config.bVendorBugChecksEnabled; bool doFlatShading = id.Bit(FS_BIT_FLATSHADE) && !flatBug; - bool shaderDepal = id.Bit(FS_BIT_SHADER_DEPAL); + bool shaderDepal = id.Bit(FS_BIT_SHADER_DEPAL) && !texture3D; // combination with texture3D not supported. Enforced elsewhere too. bool bgraTexture = id.Bit(FS_BIT_BGRA_TEXTURE); bool colorWriteMask = id.Bit(FS_BIT_COLOR_WRITEMASK) && compat.bitwiseOps; @@ -559,6 +559,8 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu WRITE(p, " vec4 t = tex2D(tex, %s.xy)%s;\n", texcoord, bgraTexture ? ".bgra" : ""); } } else { + // Note that here we're relying on the filter to be linear. We would have to otherwise to do two samples and manually filter in Z. + // Let's add that if we run into a case... if (texture3D) { if (doTextureProjection) { WRITE(p, " vec4 t = %sProj(tex, vec4(%s.xy, u_mipBias, %s.z));\n", compat.texture, texcoord, texcoord); diff --git a/GPU/Vulkan/TextureCacheVulkan.cpp b/GPU/Vulkan/TextureCacheVulkan.cpp index cf10aa30cb..876ea81f56 100644 --- a/GPU/Vulkan/TextureCacheVulkan.cpp +++ b/GPU/Vulkan/TextureCacheVulkan.cpp @@ -414,7 +414,7 @@ void TextureCacheVulkan::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer bool expand32 = !gstate_c.Supports(GPU_SUPPORTS_16BIT_FORMATS); bool depth = channel == NOTIFY_FB_DEPTH; - bool useShaderDepal = framebufferManager_->GetCurrentRenderVFB() != framebuffer && !depth; + bool useShaderDepal = framebufferManager_->GetCurrentRenderVFB() != framebuffer && !depth && !gstate_c.curTextureIs3D; bool need_depalettize = IsClutFormat(texFormat); From 0262fbc70a04ee88d8514b0599207de0e9716ff5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Sat, 30 Jul 2022 19:55:56 +0200 Subject: [PATCH 06/16] Implement 3D textures for D3D11 --- GPU/Common/FragmentShaderGenerator.cpp | 20 ++++++-- GPU/Common/TextureCacheCommon.cpp | 4 -- GPU/D3D11/TextureCacheD3D11.cpp | 67 ++++++++++++++++++++------ GPU/D3D11/TextureCacheD3D11.h | 4 +- GPU/Vulkan/TextureCacheVulkan.cpp | 5 ++ 5 files changed, 74 insertions(+), 26 deletions(-) diff --git a/GPU/Common/FragmentShaderGenerator.cpp b/GPU/Common/FragmentShaderGenerator.cpp index 5b03db8cf0..0dca9af611 100644 --- a/GPU/Common/FragmentShaderGenerator.cpp +++ b/GPU/Common/FragmentShaderGenerator.cpp @@ -210,7 +210,11 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu } } else { WRITE(p, "SamplerState samp : register(s0);\n"); - WRITE(p, "Texture2D tex : register(t0);\n"); + if (texture3D) { + WRITE(p, "Texture3D tex : register(t0);\n"); + } else { + WRITE(p, "Texture2D tex : register(t0);\n"); + } if (readFramebufferTex) { // No sampler required, we Load WRITE(p, "Texture2D fboTex : register(t1);\n"); @@ -547,10 +551,18 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu if (!shaderDepal) { if (compat.shaderLanguage == HLSL_D3D11) { - if (doTextureProjection) { - WRITE(p, " vec4 t = tex.Sample(samp, v_texcoord.xy / v_texcoord.z)%s;\n", bgraTexture ? ".bgra" : ""); + if (texture3D) { + if (doTextureProjection) { + WRITE(p, " vec4 t = tex.Sample(samp, vec3(v_texcoord.xy / v_texcoord.z, u_mipBias))%s;\n", bgraTexture ? ".bgra" : ""); + } else { + WRITE(p, " vec4 t = tex.Sample(samp, vec3(%s.xy, u_mipBias))%s;\n", texcoord, bgraTexture ? ".bgra" : ""); + } } else { - WRITE(p, " vec4 t = tex.Sample(samp, %s.xy)%s;\n", texcoord, bgraTexture ? ".bgra" : ""); + if (doTextureProjection) { + WRITE(p, " vec4 t = tex.Sample(samp, v_texcoord.xy / v_texcoord.z)%s;\n", bgraTexture ? ".bgra" : ""); + } else { + WRITE(p, " vec4 t = tex.Sample(samp, %s.xy)%s;\n", texcoord, bgraTexture ? ".bgra" : ""); + } } } else if (compat.shaderLanguage == HLSL_D3D9) { if (doTextureProjection) { diff --git a/GPU/Common/TextureCacheCommon.cpp b/GPU/Common/TextureCacheCommon.cpp index aa43ff87f0..1c09fd6065 100644 --- a/GPU/Common/TextureCacheCommon.cpp +++ b/GPU/Common/TextureCacheCommon.cpp @@ -2177,10 +2177,6 @@ bool TextureCacheCommon::PrepareBuildTexture(BuildTexturePlan &plan, TexCacheEnt entry->status &= ~TexCacheEntry::STATUS_NO_MIPS; } - if (plan.depth > 1) { - entry->status |= TexCacheEntry::STATUS_3D; - } - // Will be filled in again during decode. entry->status &= ~TexCacheEntry::STATUS_ALPHA_MASK; return true; diff --git a/GPU/D3D11/TextureCacheD3D11.cpp b/GPU/D3D11/TextureCacheD3D11.cpp index e2bb7ece02..9fef0f602e 100644 --- a/GPU/D3D11/TextureCacheD3D11.cpp +++ b/GPU/D3D11/TextureCacheD3D11.cpp @@ -467,24 +467,46 @@ void TextureCacheD3D11::BuildTexture(TexCacheEntry *const entry) { } // We don't yet have mip generation, so clamp the number of levels to the ones we can load directly. - int levels = std::min(plan.levelsToCreate, plan.levelsToLoad); - - D3D11_TEXTURE2D_DESC desc{}; - desc.CPUAccessFlags = 0; - desc.Usage = D3D11_USAGE_DEFAULT; - desc.ArraySize = 1; - desc.SampleDesc.Count = 1; - desc.Width = tw; - desc.Height = th; - desc.Format = dstFmt; - desc.MipLevels = levels; - desc.BindFlags = D3D11_BIND_SHADER_RESOURCE; + int levels;; ID3D11ShaderResourceView *view; - ID3D11Texture2D *texture = DxTex(entry); + ID3D11Resource *texture = DxTex(entry); _assert_(texture == nullptr); - ASSERT_SUCCESS(device_->CreateTexture2D(&desc, nullptr, &texture)); + if (plan.depth == 1) { + ID3D11Texture2D *tex; + D3D11_TEXTURE2D_DESC desc{}; + desc.CPUAccessFlags = 0; + desc.Usage = D3D11_USAGE_DEFAULT; + desc.ArraySize = 1; + desc.SampleDesc.Count = 1; + desc.Width = tw; + desc.Height = th; + desc.Format = dstFmt; + desc.MipLevels = plan.levelsToCreate; + desc.BindFlags = D3D11_BIND_SHADER_RESOURCE; + + ASSERT_SUCCESS(device_->CreateTexture2D(&desc, nullptr, &tex)); + texture = tex; + + levels = std::min(plan.levelsToCreate, plan.levelsToLoad); + } else { + ID3D11Texture3D *tex; + D3D11_TEXTURE3D_DESC desc{}; + desc.CPUAccessFlags = 0; + desc.Usage = D3D11_USAGE_DEFAULT; + desc.Width = tw; + desc.Height = th; + desc.Depth = plan.depth; + desc.Format = dstFmt; + desc.MipLevels = 1; + desc.BindFlags = D3D11_BIND_SHADER_RESOURCE; + ASSERT_SUCCESS(device_->CreateTexture3D(&desc, nullptr, &tex)); + texture = tex; + + levels = plan.depth; + } + ASSERT_SUCCESS(device_->CreateShaderResourceView(texture, nullptr, &view)); entry->texturePtr = texture; entry->textureView = view; @@ -525,11 +547,24 @@ void TextureCacheD3D11::BuildTexture(TexCacheEntry *const entry) { LoadTextureLevel(*entry, data, stride, *plan.replaced, srcLevel, plan.scaleFactor, texFmt, false); - ID3D11Texture2D *texture = DxTex(entry); - context_->UpdateSubresource(texture, i, nullptr, data, stride, 0); + if (plan.depth == 1) { + context_->UpdateSubresource(texture, i, nullptr, data, stride, 0); + } else { + D3D11_BOX box{}; + box.front = i; + box.back = i + 1; + box.right = w * plan.scaleFactor; + box.bottom = h * plan.scaleFactor; + context_->UpdateSubresource(texture, 0, &box, data, stride, 0); + } FreeAlignedMemory(data); } + // Signal that we support depth textures so use it as one. + if (plan.depth > 1) { + entry->status |= TexCacheEntry::STATUS_3D; + } + if (levels == 1) { entry->status |= TexCacheEntry::STATUS_NO_MIPS; } else { diff --git a/GPU/D3D11/TextureCacheD3D11.h b/GPU/D3D11/TextureCacheD3D11.h index 131398e2dc..6ababb2b30 100644 --- a/GPU/D3D11/TextureCacheD3D11.h +++ b/GPU/D3D11/TextureCacheD3D11.h @@ -77,8 +77,8 @@ private: ID3D11Device *device_; ID3D11DeviceContext *context_; - ID3D11Texture2D *&DxTex(TexCacheEntry *entry) { - return (ID3D11Texture2D *&)entry->texturePtr; + ID3D11Resource *&DxTex(TexCacheEntry *entry) { + return (ID3D11Resource *&)entry->texturePtr; } ID3D11ShaderResourceView *DxView(TexCacheEntry *entry) { return (ID3D11ShaderResourceView *)entry->textureView; diff --git a/GPU/Vulkan/TextureCacheVulkan.cpp b/GPU/Vulkan/TextureCacheVulkan.cpp index 876ea81f56..6b31a5a9e0 100644 --- a/GPU/Vulkan/TextureCacheVulkan.cpp +++ b/GPU/Vulkan/TextureCacheVulkan.cpp @@ -808,6 +808,11 @@ void TextureCacheVulkan::BuildTexture(TexCacheEntry *const entry) { entry->vkTex->EndCreate(cmdInit, false, prevStage, layout); VK_PROFILE_END(vulkan, cmdInit, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT); + // Signal that we support depth textures so use it as one. + if (plan.depth > 1) { + entry->status |= TexCacheEntry::STATUS_3D; + } + if (plan.replaced->Valid()) { entry->SetAlphaStatus(TexCacheEntry::TexStatus(plan.replaced->AlphaStatus())); } From f87b4cf2326e01baa3724301eff8e9f061f713f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Sat, 30 Jul 2022 20:41:31 +0200 Subject: [PATCH 07/16] Implement 3D textures for D3D9 as well --- GPU/Common/FragmentShaderGenerator.cpp | 17 +++++-- GPU/Common/FragmentShaderGenerator.h | 3 +- GPU/Directx9/ShaderManagerDX9.cpp | 5 ++ GPU/Directx9/TextureCacheDX9.cpp | 65 +++++++++++++++++++------- GPU/Directx9/TextureCacheDX9.h | 6 +-- 5 files changed, 72 insertions(+), 24 deletions(-) diff --git a/GPU/Common/FragmentShaderGenerator.cpp b/GPU/Common/FragmentShaderGenerator.cpp index 0dca9af611..6f3773c6c4 100644 --- a/GPU/Common/FragmentShaderGenerator.cpp +++ b/GPU/Common/FragmentShaderGenerator.cpp @@ -208,6 +208,9 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu if (enableFog) { WRITE(p, "float3 u_fogcolor : register(c%i);\n", CONST_PS_FOGCOLOR); } + if (texture3D) { + WRITE(p, "float u_mipBias : register(c%i);\n", CONST_PS_MIPBIAS); + } } else { WRITE(p, "SamplerState samp : register(s0);\n"); if (texture3D) { @@ -565,10 +568,18 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu } } } else if (compat.shaderLanguage == HLSL_D3D9) { - if (doTextureProjection) { - WRITE(p, " vec4 t = tex2Dproj(tex, vec4(v_texcoord.x, v_texcoord.y, 0, v_texcoord.z))%s;\n", bgraTexture ? ".bgra" : ""); + if (texture3D) { + if (doTextureProjection) { + WRITE(p, " vec4 t = tex3Dproj(tex, vec4(v_texcoord.x, v_texcoord.y, u_mipBias, v_texcoord.z))%s;\n", bgraTexture ? ".bgra" : ""); + } else { + WRITE(p, " vec4 t = tex3D(tex, vec3(%s.x, %s.y, u_mipBias))%s;\n", texcoord, texcoord, bgraTexture ? ".bgra" : ""); + } } else { - WRITE(p, " vec4 t = tex2D(tex, %s.xy)%s;\n", texcoord, bgraTexture ? ".bgra" : ""); + if (doTextureProjection) { + WRITE(p, " vec4 t = tex2Dproj(tex, vec3(v_texcoord.x, v_texcoord.y, v_texcoord.z))%s;\n", bgraTexture ? ".bgra" : ""); + } else { + WRITE(p, " vec4 t = tex2D(tex, %s.xy)%s;\n", texcoord, bgraTexture ? ".bgra" : ""); + } } } else { // Note that here we're relying on the filter to be linear. We would have to otherwise to do two samples and manually filter in Z. diff --git a/GPU/Common/FragmentShaderGenerator.h b/GPU/Common/FragmentShaderGenerator.h index b7c48eb355..b459543a50 100644 --- a/GPU/Common/FragmentShaderGenerator.h +++ b/GPU/Common/FragmentShaderGenerator.h @@ -34,8 +34,9 @@ struct FShaderID; #define CONST_PS_FBOTEXSIZE 7 #define CONST_PS_TEXCLAMP 8 #define CONST_PS_TEXCLAMPOFF 9 +#define CONST_PS_MIPBIAS 10 // For stencil upload -#define CONST_PS_STENCILVALUE 10 +#define CONST_PS_STENCILVALUE 11 bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLanguageDesc &compat, Draw::Bugs bugs, uint64_t *uniformMask, std::string *errorString); diff --git a/GPU/Directx9/ShaderManagerDX9.cpp b/GPU/Directx9/ShaderManagerDX9.cpp index 0d3e55555f..111680c72d 100644 --- a/GPU/Directx9/ShaderManagerDX9.cpp +++ b/GPU/Directx9/ShaderManagerDX9.cpp @@ -314,6 +314,11 @@ void ShaderManagerDX9::PSUpdateUniforms(u64 dirtyUniforms) { }; PSSetFloatArray(CONST_PS_TEXCLAMP, texclamp, 4); PSSetFloatArray(CONST_PS_TEXCLAMPOFF, texclampoff, 2); + + float mipBias = (float)gstate.getTexLevelOffset16() * (1.0 / 16.0f); + mipBias = (mipBias + 0.5f) / (float)(gstate.getTextureMaxLevel() + 1); + + PSSetFloatArray(CONST_PS_MIPBIAS, &mipBias, 1); } } diff --git a/GPU/Directx9/TextureCacheDX9.cpp b/GPU/Directx9/TextureCacheDX9.cpp index 85af315d8d..538b654473 100644 --- a/GPU/Directx9/TextureCacheDX9.cpp +++ b/GPU/Directx9/TextureCacheDX9.cpp @@ -98,7 +98,7 @@ void TextureCacheDX9::SetFramebufferManager(FramebufferManagerDX9 *fbManager) { } void TextureCacheDX9::ReleaseTexture(TexCacheEntry *entry, bool delete_them) { - LPDIRECT3DTEXTURE9 &texture = DxTex(entry); + LPDIRECT3DBASETEXTURE9 &texture = DxTex(entry); if (texture) { texture->Release(); texture = nullptr; @@ -205,7 +205,7 @@ void TextureCacheDX9::UpdateCurrentClut(GEPaletteFormat clutFormat, u32 clutBase } void TextureCacheDX9::BindTexture(TexCacheEntry *entry) { - LPDIRECT3DTEXTURE9 texture = DxTex(entry); + LPDIRECT3DBASETEXTURE9 texture = DxTex(entry); if (texture != lastBoundTexture) { device_->SetTexture(0, texture); lastBoundTexture = texture; @@ -425,10 +425,20 @@ void TextureCacheDX9::BuildTexture(TexCacheEntry *const entry) { // We don't yet have mip generation, so clamp the number of levels to the ones we can load directly. int levels = std::min(plan.levelsToCreate, plan.levelsToLoad); - LPDIRECT3DTEXTURE9 &texture = DxTex(entry); + LPDIRECT3DBASETEXTURE9 &texture = DxTex(entry); D3DPOOL pool = D3DPOOL_DEFAULT; int usage = D3DUSAGE_DYNAMIC; - HRESULT hr = device_->CreateTexture(tw, th, levels, usage, dstFmt, pool, &texture, NULL); + + HRESULT hr; + if (plan.depth == 1) { + LPDIRECT3DTEXTURE9 tex; + hr = device_->CreateTexture(tw, th, levels, usage, dstFmt, pool, &tex, nullptr); + texture = tex; + } else { + LPDIRECT3DVOLUMETEXTURE9 tex; + hr = device_->CreateVolumeTexture(tw, th, plan.depth, 1, usage, dstFmt, pool, &tex, nullptr); + texture = tex; + } if (FAILED(hr)) { INFO_LOG(G3D, "Failed to create D3D texture: %dx%d", tw, th); @@ -443,24 +453,45 @@ void TextureCacheDX9::BuildTexture(TexCacheEntry *const entry) { Draw::DataFormat texFmt = FromD3D9Format(dstFmt); - // Mipmapping is only enabled when texture scaling is disabled. - for (int i = 0; i < levels; i++) { - int dstLevel = i; - HRESULT result; - uint32_t lockFlag = dstLevel == 0 ? D3DLOCK_DISCARD : 0; // Can only discard the top level - D3DLOCKED_RECT rect{}; - result = texture->LockRect(dstLevel, &rect, NULL, lockFlag); + if (plan.depth == 1) { + // Regular loop. + for (int i = 0; i < levels; i++) { + int dstLevel = i; + HRESULT result; + uint32_t lockFlag = dstLevel == 0 ? D3DLOCK_DISCARD : 0; // Can only discard the top level + D3DLOCKED_RECT rect{}; + + result = ((LPDIRECT3DTEXTURE9)texture)->LockRect(dstLevel, &rect, NULL, lockFlag); + if (FAILED(result)) { + ERROR_LOG(G3D, "Failed to lock D3D 2D texture at level %d: %dx%d", i, plan.w, plan.h); + return; + } + uint8_t *data = (uint8_t *)rect.pBits; + int stride = rect.Pitch; + LoadTextureLevel(*entry, data, stride, *plan.replaced, (i == 0) ? plan.baseLevelSrc : i, plan.scaleFactor, texFmt, false); + ((LPDIRECT3DTEXTURE9)texture)->UnlockRect(dstLevel); + } + } else { + // 3D loop. + D3DLOCKED_BOX box; + HRESULT result = ((LPDIRECT3DVOLUMETEXTURE9)texture)->LockBox(0, &box, nullptr, D3DLOCK_DISCARD); if (FAILED(result)) { - ERROR_LOG(G3D, "Failed to lock D3D texture at level %d: %dx%d", i, plan.w, plan.h); + ERROR_LOG(G3D, "Failed to lock D3D 2D texture: %dx%dx%d", plan.w, plan.h, plan.depth); return; } - uint8_t *data = (uint8_t *)rect.pBits; - int stride = rect.Pitch; + uint8_t *data = (uint8_t *)box.pBits; + int stride = box.RowPitch; + for (int i = 0; i < plan.depth; i++) { + LoadTextureLevel(*entry, data, stride, *plan.replaced, (i == 0) ? plan.baseLevelSrc : i, plan.scaleFactor, texFmt, false); + data += box.SlicePitch; + } + ((LPDIRECT3DVOLUMETEXTURE9)texture)->UnlockBox(0); + } - LoadTextureLevel(*entry, data, stride, *plan.replaced, (i == 0) ? plan.baseLevelSrc : i, plan.scaleFactor, texFmt, false); - - texture->UnlockRect(dstLevel); + // Signal that we support depth textures so use it as one. + if (plan.depth > 1) { + entry->status |= TexCacheEntry::STATUS_3D; } if (plan.replaced->Valid()) { diff --git a/GPU/Directx9/TextureCacheDX9.h b/GPU/Directx9/TextureCacheDX9.h index 194d14c347..22355e6008 100644 --- a/GPU/Directx9/TextureCacheDX9.h +++ b/GPU/Directx9/TextureCacheDX9.h @@ -68,8 +68,8 @@ private: void ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer, GETextureFormat texFormat, FramebufferNotificationChannel channel) override; void BuildTexture(TexCacheEntry *const entry) override; - LPDIRECT3DTEXTURE9 &DxTex(TexCacheEntry *entry) { - return *(LPDIRECT3DTEXTURE9 *)&entry->texturePtr; + LPDIRECT3DBASETEXTURE9 &DxTex(TexCacheEntry *entry) { + return *(LPDIRECT3DBASETEXTURE9 *)&entry->texturePtr; } LPDIRECT3DDEVICE9 device_; @@ -77,7 +77,7 @@ private: LPDIRECT3DVERTEXDECLARATION9 pFramebufferVertexDecl; - LPDIRECT3DTEXTURE9 lastBoundTexture; + LPDIRECT3DBASETEXTURE9 lastBoundTexture; float maxAnisotropyLevel; FramebufferManagerDX9 *framebufferManagerDX9_; From 566385f762cc02f5c98d33f3ac15a47020e420ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Sat, 30 Jul 2022 20:42:56 +0200 Subject: [PATCH 08/16] Remove some dead code --- GPU/Common/FragmentShaderGenerator.cpp | 35 -------------------------- 1 file changed, 35 deletions(-) diff --git a/GPU/Common/FragmentShaderGenerator.cpp b/GPU/Common/FragmentShaderGenerator.cpp index 6f3773c6c4..327c1ec000 100644 --- a/GPU/Common/FragmentShaderGenerator.cpp +++ b/GPU/Common/FragmentShaderGenerator.cpp @@ -271,41 +271,6 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu } WRITE(p, "};\n"); } - } else if (compat.shaderLanguage == HLSL_D3D9) { - if (doTexture) - WRITE(p, "sampler tex : register(s0);\n"); - if (readFramebufferTex) { - WRITE(p, "vec2 u_fbotexSize : register(c%i);\n", CONST_PS_FBOTEXSIZE); - WRITE(p, "sampler fbotex : register(s1);\n"); - } - if (replaceBlend > REPLACE_BLEND_STANDARD) { - if (replaceBlendFuncA >= GE_SRCBLEND_FIXA) { - WRITE(p, "float3 u_blendFixA : register(c%i);\n", CONST_PS_BLENDFIXA); - } - if (replaceBlendFuncB >= GE_DSTBLEND_FIXB) { - WRITE(p, "float3 u_blendFixB : register(c%i);\n", CONST_PS_BLENDFIXB); - } - } - if (needShaderTexClamp && doTexture) { - WRITE(p, "vec4 u_texclamp : register(c%i);\n", CONST_PS_TEXCLAMP); - if (textureAtOffset) { - WRITE(p, "vec2 u_texclampoff : register(c%i);\n", CONST_PS_TEXCLAMPOFF); - } - } - - if (enableAlphaTest || enableColorTest) { - WRITE(p, "vec4 u_alphacolorref : register(c%i);\n", CONST_PS_ALPHACOLORREF); - WRITE(p, "vec4 u_alphacolormask : register(c%i);\n", CONST_PS_ALPHACOLORMASK); - } - if (stencilToAlpha && replaceAlphaWithStencilType == STENCIL_VALUE_UNIFORM) { - WRITE(p, "float u_stencilReplaceValue : register(c%i);\n", CONST_PS_STENCILREPLACE); - } - if (doTexture && texFunc == GE_TEXFUNC_BLEND) { - WRITE(p, "float3 u_texenv : register(c%i);\n", CONST_PS_TEXENV); - } - if (enableFog) { - WRITE(p, "float3 u_fogcolor : register(c%i);\n", CONST_PS_FOGCOLOR); - } } else if (ShaderLanguageIsOpenGL(compat.shaderLanguage)) { if ((shaderDepal || colorWriteMask) && gl_extensions.IsGLES) { WRITE(p, "precision highp int;\n"); From fecf9127a033f8365cec4af6e8ca75065c775c96 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Sat, 30 Jul 2022 21:33:24 +0200 Subject: [PATCH 09/16] Implement 3D texturing in the OpenGL backend too. Assorted fixes. --- Common/GPU/D3D9/D3D9StateCache.cpp | 1 + Common/GPU/D3D9/D3D9StateCache.h | 1 + Common/GPU/OpenGL/GLQueueRunner.cpp | 34 ++++++++--- Common/GPU/OpenGL/GLQueueRunner.h | 5 +- Common/GPU/OpenGL/GLRenderManager.cpp | 3 +- Common/GPU/OpenGL/GLRenderManager.h | 12 ++-- Common/GPU/OpenGL/thin3d_gl.cpp | 4 +- GPU/Common/FragmentShaderGenerator.cpp | 14 ++++- GPU/Directx9/ShaderManagerDX9.cpp | 3 +- GPU/GLES/DepalettizeShaderGLES.cpp | 4 +- GPU/GLES/DrawEngineGLES.cpp | 12 ++-- GPU/GLES/FragmentTestCacheGLES.cpp | 4 +- GPU/GLES/ShaderManagerGLES.cpp | 8 +++ GPU/GLES/ShaderManagerGLES.h | 1 + GPU/GLES/TextureCacheGLES.cpp | 84 +++++++++++++++++--------- 15 files changed, 128 insertions(+), 62 deletions(-) diff --git a/Common/GPU/D3D9/D3D9StateCache.cpp b/Common/GPU/D3D9/D3D9StateCache.cpp index ae9e2bc2a4..3388dd2486 100644 --- a/Common/GPU/D3D9/D3D9StateCache.cpp +++ b/Common/GPU/D3D9/D3D9StateCache.cpp @@ -59,6 +59,7 @@ void DirectXState::Restore() { texMaxMipLevel.restore(); count++; texAddressU.restore(); count++; texAddressV.restore(); count++; + texAddressW.restore(); count++; } } // namespace DX9 diff --git a/Common/GPU/D3D9/D3D9StateCache.h b/Common/GPU/D3D9/D3D9StateCache.h index 5231a6cbbe..09f3286222 100644 --- a/Common/GPU/D3D9/D3D9StateCache.h +++ b/Common/GPU/D3D9/D3D9StateCache.h @@ -393,6 +393,7 @@ public: DxSampler0State1 texMaxMipLevel; DxSampler0State1 texAddressU; DxSampler0State1 texAddressV; + DxSampler0State1 texAddressW; }; #undef STATE1 diff --git a/Common/GPU/OpenGL/GLQueueRunner.cpp b/Common/GPU/OpenGL/GLQueueRunner.cpp index f4cd16737f..bc0c7274e9 100644 --- a/Common/GPU/OpenGL/GLQueueRunner.cpp +++ b/Common/GPU/OpenGL/GLQueueRunner.cpp @@ -348,7 +348,17 @@ void GLQueueRunner::RunInitSteps(const std::vector &steps, bool ski GLenum internalFormat, format, type; int alignment; Thin3DFormatToFormatAndType(step.texture_image.format, internalFormat, format, type, alignment); - glTexImage2D(tex->target, step.texture_image.level, internalFormat, step.texture_image.width, step.texture_image.height, 0, format, type, step.texture_image.data); + if (step.texture_image.depth == 1) { + glTexImage2D(tex->target, + step.texture_image.level, internalFormat, + step.texture_image.width, step.texture_image.height, 0, + format, type, step.texture_image.data); + } else { + glTexImage3D(tex->target, + step.texture_image.level, internalFormat, + step.texture_image.width, step.texture_image.height, step.texture_image.depth, 0, + format, type, step.texture_image.data); + } allocatedTextures = true; if (step.texture_image.allocType == GLRAllocType::ALIGNED) { FreeAlignedMemory(step.texture_image.data); @@ -364,6 +374,9 @@ void GLQueueRunner::RunInitSteps(const std::vector &steps, bool ski glTexParameteri(tex->target, GL_TEXTURE_WRAP_T, tex->wrapT); glTexParameteri(tex->target, GL_TEXTURE_MAG_FILTER, tex->magFilter); glTexParameteri(tex->target, GL_TEXTURE_MIN_FILTER, tex->minFilter); + if (step.texture_image.depth > 1) { + glTexParameteri(tex->target, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE); + } CHECK_GL_ERROR_IF_DEBUG(); break; } @@ -375,7 +388,7 @@ void GLQueueRunner::RunInitSteps(const std::vector &steps, bool ski glBindTexture(tex->target, tex->texture); boundTexture = tex->texture; } - if (!gl_extensions.IsGLES || gl_extensions.GLES3) { + if ((!gl_extensions.IsGLES || gl_extensions.GLES3) && step.texture_finalize.loadedLevels > 1) { glTexParameteri(tex->target, GL_TEXTURE_MAX_LEVEL, step.texture_finalize.loadedLevels - 1); } tex->maxLod = (float)step.texture_finalize.loadedLevels - 1; @@ -1139,28 +1152,28 @@ void GLQueueRunner::PerformRenderPass(const GLRStep &step, bool first, bool last CHECK_GL_ERROR_IF_DEBUG(); if (tex->canWrap) { if (tex->wrapS != c.textureSampler.wrapS) { - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, c.textureSampler.wrapS); + glTexParameteri(tex->target, GL_TEXTURE_WRAP_S, c.textureSampler.wrapS); tex->wrapS = c.textureSampler.wrapS; } if (tex->wrapT != c.textureSampler.wrapT) { - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, c.textureSampler.wrapT); + glTexParameteri(tex->target, GL_TEXTURE_WRAP_T, c.textureSampler.wrapT); tex->wrapT = c.textureSampler.wrapT; } } CHECK_GL_ERROR_IF_DEBUG(); if (tex->magFilter != c.textureSampler.magFilter) { - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, c.textureSampler.magFilter); + glTexParameteri(tex->target, GL_TEXTURE_MAG_FILTER, c.textureSampler.magFilter); tex->magFilter = c.textureSampler.magFilter; } CHECK_GL_ERROR_IF_DEBUG(); if (tex->minFilter != c.textureSampler.minFilter) { - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, c.textureSampler.minFilter); + glTexParameteri(tex->target, GL_TEXTURE_MIN_FILTER, c.textureSampler.minFilter); tex->minFilter = c.textureSampler.minFilter; } CHECK_GL_ERROR_IF_DEBUG(); if (tex->anisotropy != c.textureSampler.anisotropy) { if (c.textureSampler.anisotropy != 0.0f) { - glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAX_ANISOTROPY_EXT, c.textureSampler.anisotropy); + glTexParameterf(tex->target, GL_TEXTURE_MAX_ANISOTROPY_EXT, c.textureSampler.anisotropy); } tex->anisotropy = c.textureSampler.anisotropy; } @@ -1180,16 +1193,16 @@ void GLQueueRunner::PerformRenderPass(const GLRStep &step, bool first, bool last } #ifndef USING_GLES2 if (tex->lodBias != c.textureLod.lodBias && !gl_extensions.IsGLES) { - glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_LOD_BIAS, c.textureLod.lodBias); + glTexParameterf(tex->target, GL_TEXTURE_LOD_BIAS, c.textureLod.lodBias); tex->lodBias = c.textureLod.lodBias; } #endif if (tex->minLod != c.textureLod.minLod) { - glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MIN_LOD, c.textureLod.minLod); + glTexParameterf(tex->target, GL_TEXTURE_MIN_LOD, c.textureLod.minLod); tex->minLod = c.textureLod.minLod; } if (tex->maxLod != c.textureLod.maxLod) { - glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAX_LOD, c.textureLod.maxLod); + glTexParameterf(tex->target, GL_TEXTURE_MAX_LOD, c.textureLod.maxLod); tex->maxLod = c.textureLod.maxLod; } break; @@ -1200,6 +1213,7 @@ void GLQueueRunner::PerformRenderPass(const GLRStep &step, bool first, bool last // TODO: Need bind? if (!c.texture_subimage.data) Crash(); + _assert_(tex->target == GL_TEXTURE_2D); // For things to show in RenderDoc, need to split into glTexImage2D(..., nullptr) and glTexSubImage. GLuint internalFormat, format, type; int alignment; diff --git a/Common/GPU/OpenGL/GLQueueRunner.h b/Common/GPU/OpenGL/GLQueueRunner.h index 715bc3382a..c6173195dd 100644 --- a/Common/GPU/OpenGL/GLQueueRunner.h +++ b/Common/GPU/OpenGL/GLQueueRunner.h @@ -259,8 +259,9 @@ struct GLRInitStep { GLRTexture *texture; Draw::DataFormat format; int level; - int width; - int height; + uint16_t width; + uint16_t height; + uint16_t depth; GLRAllocType allocType; bool linearFilter; uint8_t *data; // owned, delete[]-d diff --git a/Common/GPU/OpenGL/GLRenderManager.cpp b/Common/GPU/OpenGL/GLRenderManager.cpp index 1475d8d5ff..45a6dcef9a 100644 --- a/Common/GPU/OpenGL/GLRenderManager.cpp +++ b/Common/GPU/OpenGL/GLRenderManager.cpp @@ -21,7 +21,7 @@ static bool OnRenderThread() { } #endif -GLRTexture::GLRTexture(int width, int height, int numMips) { +GLRTexture::GLRTexture(int width, int height, int depth, int numMips) { if (gl_extensions.OES_texture_npot) { canWrap = true; } else { @@ -29,6 +29,7 @@ GLRTexture::GLRTexture(int width, int height, int numMips) { } w = width; h = height; + depth = depth; this->numMips = numMips; } diff --git a/Common/GPU/OpenGL/GLRenderManager.h b/Common/GPU/OpenGL/GLRenderManager.h index bb5f49a92a..4af5629dca 100644 --- a/Common/GPU/OpenGL/GLRenderManager.h +++ b/Common/GPU/OpenGL/GLRenderManager.h @@ -25,12 +25,13 @@ constexpr int MAX_GL_TEXTURE_SLOTS = 8; class GLRTexture { public: - GLRTexture(int width, int height, int numMips); + GLRTexture(int width, int height, int depth, int numMips); ~GLRTexture(); GLuint texture = 0; uint16_t w; uint16_t h; + uint16_t d; // We don't trust OpenGL defaults - setting wildly off values ensures that we'll end up overwriting these parameters. GLenum target = 0xFFFF; @@ -49,7 +50,7 @@ public: class GLRFramebuffer { public: GLRFramebuffer(int _width, int _height, bool z_stencil) - : color_texture(_width, _height, 1), z_stencil_texture(_width, _height, 1), + : color_texture(_width, _height, 1, 1), z_stencil_texture(_width, _height, 1, 1), width(_width), height(_height), z_stencil_(z_stencil) { } @@ -384,9 +385,9 @@ public: // Creation commands. These were not needed in Vulkan since there we can do that on the main thread. // We pass in width/height here even though it's not strictly needed until we support glTextureStorage // and then we'll also need formats and stuff. - GLRTexture *CreateTexture(GLenum target, int width, int height, int numMips) { + GLRTexture *CreateTexture(GLenum target, int width, int height, int depth, int numMips) { GLRInitStep step{ GLRInitStepType::CREATE_TEXTURE }; - step.create_texture.texture = new GLRTexture(width, height, numMips); + step.create_texture.texture = new GLRTexture(width, height, depth, numMips); step.create_texture.texture->target = target; initSteps_.push_back(step); return step.create_texture.texture; @@ -537,7 +538,7 @@ public: } // Takes ownership over the data pointer and delete[]-s it. - void TextureImage(GLRTexture *texture, int level, int width, int height, Draw::DataFormat format, uint8_t *data, GLRAllocType allocType = GLRAllocType::NEW, bool linearFilter = false) { + void TextureImage(GLRTexture *texture, int level, int width, int height, int depth, Draw::DataFormat format, uint8_t *data, GLRAllocType allocType = GLRAllocType::NEW, bool linearFilter = false) { GLRInitStep step{ GLRInitStepType::TEXTURE_IMAGE }; step.texture_image.texture = texture; step.texture_image.data = data; @@ -545,6 +546,7 @@ public: step.texture_image.level = level; step.texture_image.width = width; step.texture_image.height = height; + step.texture_image.depth = depth; step.texture_image.allocType = allocType; step.texture_image.linearFilter = linearFilter; initSteps_.push_back(step); diff --git a/Common/GPU/OpenGL/thin3d_gl.cpp b/Common/GPU/OpenGL/thin3d_gl.cpp index 5fa71a3445..69d06bd81b 100644 --- a/Common/GPU/OpenGL/thin3d_gl.cpp +++ b/Common/GPU/OpenGL/thin3d_gl.cpp @@ -792,7 +792,7 @@ OpenGLTexture::OpenGLTexture(GLRenderManager *render, const TextureDesc &desc) : format_ = desc.format; type_ = desc.type; GLenum target = TypeToTarget(desc.type); - tex_ = render->CreateTexture(target, desc.width, desc.height, desc.mipLevels); + tex_ = render->CreateTexture(target, desc.width, desc.height, 1, desc.mipLevels); mipLevels_ = desc.mipLevels; if (desc.initData.empty()) @@ -877,7 +877,7 @@ void OpenGLTexture::SetImageData(int x, int y, int z, int width, int height, int } } - render_->TextureImage(tex_, level, width, height, format_, texData); + render_->TextureImage(tex_, level, width, height, depth, format_, texData); } #ifdef DEBUG_READ_PIXELS diff --git a/GPU/Common/FragmentShaderGenerator.cpp b/GPU/Common/FragmentShaderGenerator.cpp index 327c1ec000..93e244a34f 100644 --- a/GPU/Common/FragmentShaderGenerator.cpp +++ b/GPU/Common/FragmentShaderGenerator.cpp @@ -276,8 +276,13 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu WRITE(p, "precision highp int;\n"); } - if (doTexture) - WRITE(p, "uniform sampler2D tex;\n"); + if (doTexture) { + if (texture3D) { + WRITE(p, "uniform sampler3D tex;\n"); + } else { + WRITE(p, "uniform sampler2D tex;\n"); + } + } if (readFramebufferTex) { if (!compat.texelFetch) { @@ -337,6 +342,11 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu WRITE(p, "uniform vec3 u_texenv;\n"); } + if (texture3D) { + *uniformMask |= DIRTY_TEXCLAMP; + WRITE(p, "uniform float u_mipBias;\n"); + } + WRITE(p, "%s %s lowp vec4 v_color0;\n", shading, compat.varying_fs); if (lmode) WRITE(p, "%s %s lowp vec3 v_color1;\n", shading, compat.varying_fs); diff --git a/GPU/Directx9/ShaderManagerDX9.cpp b/GPU/Directx9/ShaderManagerDX9.cpp index 111680c72d..cce2e7c119 100644 --- a/GPU/Directx9/ShaderManagerDX9.cpp +++ b/GPU/Directx9/ShaderManagerDX9.cpp @@ -316,8 +316,9 @@ void ShaderManagerDX9::PSUpdateUniforms(u64 dirtyUniforms) { PSSetFloatArray(CONST_PS_TEXCLAMPOFF, texclampoff, 2); float mipBias = (float)gstate.getTexLevelOffset16() * (1.0 / 16.0f); - mipBias = (mipBias + 0.5f) / (float)(gstate.getTextureMaxLevel() + 1); + // NOTE: This equation needs some adjustment in D3D9. Can't get it to look completely smooth :( + mipBias = (mipBias + 0.25f) / (float)(gstate.getTextureMaxLevel() + 1); PSSetFloatArray(CONST_PS_MIPBIAS, &mipBias, 1); } } diff --git a/GPU/GLES/DepalettizeShaderGLES.cpp b/GPU/GLES/DepalettizeShaderGLES.cpp index fbf544f1ca..b49034718a 100644 --- a/GPU/GLES/DepalettizeShaderGLES.cpp +++ b/GPU/GLES/DepalettizeShaderGLES.cpp @@ -98,11 +98,11 @@ GLRTexture *DepalShaderCacheGLES::GetClutTexture(GEPaletteFormat clutFormat, con int texturePixels = clutFormat == GE_CMODE_32BIT_ABGR8888 ? 256 : 512; DepalTexture *tex = new DepalTexture(); - tex->texture = render_->CreateTexture(GL_TEXTURE_2D, texturePixels, 1, 1); + tex->texture = render_->CreateTexture(GL_TEXTURE_2D, texturePixels, 1, 1, 1); uint8_t *clutCopy = new uint8_t[1024]; memcpy(clutCopy, rawClut, 1024); - render_->TextureImage(tex->texture, 0, texturePixels, 1, dstFmt, clutCopy, GLRAllocType::NEW, false); + render_->TextureImage(tex->texture, 0, texturePixels, 1, 1, dstFmt, clutCopy, GLRAllocType::NEW, false); tex->lastFrame = gpuStats.numFlips; texCache_[clutId] = tex; diff --git a/GPU/GLES/DrawEngineGLES.cpp b/GPU/GLES/DrawEngineGLES.cpp index f8f7a2cbbc..31ade7552d 100644 --- a/GPU/GLES/DrawEngineGLES.cpp +++ b/GPU/GLES/DrawEngineGLES.cpp @@ -502,8 +502,8 @@ void TessellationDataTransferGLES::SendDataToShader(const SimpleVertex *const *p prevSizeU = size_u; prevSizeV = size_v; if (!data_tex[0]) - data_tex[0] = renderManager_->CreateTexture(GL_TEXTURE_2D, size_u * 3, size_v, 1); - renderManager_->TextureImage(data_tex[0], 0, size_u * 3, size_v, Draw::DataFormat::R32G32B32A32_FLOAT, nullptr, GLRAllocType::NONE, false); + data_tex[0] = renderManager_->CreateTexture(GL_TEXTURE_2D, size_u * 3, size_v, 1, 1); + renderManager_->TextureImage(data_tex[0], 0, size_u * 3, size_v, 1, Draw::DataFormat::R32G32B32A32_FLOAT, nullptr, GLRAllocType::NONE, false); renderManager_->FinalizeTexture(data_tex[0], 0, false); } renderManager_->BindTexture(TEX_SLOT_SPLINE_POINTS, data_tex[0]); @@ -520,8 +520,8 @@ void TessellationDataTransferGLES::SendDataToShader(const SimpleVertex *const *p if (prevSizeWU < weights.size_u) { prevSizeWU = weights.size_u; if (!data_tex[1]) - data_tex[1] = renderManager_->CreateTexture(GL_TEXTURE_2D, weights.size_u * 2, 1, 1); - renderManager_->TextureImage(data_tex[1], 0, weights.size_u * 2, 1, Draw::DataFormat::R32G32B32A32_FLOAT, nullptr, GLRAllocType::NONE, false); + data_tex[1] = renderManager_->CreateTexture(GL_TEXTURE_2D, weights.size_u * 2, 1, 1, 1); + renderManager_->TextureImage(data_tex[1], 0, weights.size_u * 2, 1, 1, Draw::DataFormat::R32G32B32A32_FLOAT, nullptr, GLRAllocType::NONE, false); renderManager_->FinalizeTexture(data_tex[1], 0, false); } renderManager_->BindTexture(TEX_SLOT_SPLINE_WEIGHTS_U, data_tex[1]); @@ -531,8 +531,8 @@ void TessellationDataTransferGLES::SendDataToShader(const SimpleVertex *const *p if (prevSizeWV < weights.size_v) { prevSizeWV = weights.size_v; if (!data_tex[2]) - data_tex[2] = renderManager_->CreateTexture(GL_TEXTURE_2D, weights.size_v * 2, 1, 1); - renderManager_->TextureImage(data_tex[2], 0, weights.size_v * 2, 1, Draw::DataFormat::R32G32B32A32_FLOAT, nullptr, GLRAllocType::NONE, false); + data_tex[2] = renderManager_->CreateTexture(GL_TEXTURE_2D, weights.size_v * 2, 1, 1, 1); + renderManager_->TextureImage(data_tex[2], 0, weights.size_v * 2, 1, 1, Draw::DataFormat::R32G32B32A32_FLOAT, nullptr, GLRAllocType::NONE, false); renderManager_->FinalizeTexture(data_tex[2], 0, false); } renderManager_->BindTexture(TEX_SLOT_SPLINE_WEIGHTS_V, data_tex[2]); diff --git a/GPU/GLES/FragmentTestCacheGLES.cpp b/GPU/GLES/FragmentTestCacheGLES.cpp index bd977cf103..1206911f4d 100644 --- a/GPU/GLES/FragmentTestCacheGLES.cpp +++ b/GPU/GLES/FragmentTestCacheGLES.cpp @@ -144,8 +144,8 @@ GLRTexture *FragmentTestCacheGLES::CreateTestTexture(const GEComparison funcs[4] } } - GLRTexture *tex = render_->CreateTexture(GL_TEXTURE_2D, 256, 1, 1); - render_->TextureImage(tex, 0, 256, 1, Draw::DataFormat::R8G8B8A8_UNORM, data); + GLRTexture *tex = render_->CreateTexture(GL_TEXTURE_2D, 256, 1, 1, 1); + render_->TextureImage(tex, 0, 256, 1, 1, Draw::DataFormat::R8G8B8A8_UNORM, data); return tex; } diff --git a/GPU/GLES/ShaderManagerGLES.cpp b/GPU/GLES/ShaderManagerGLES.cpp index be2467778f..edc77dccd1 100644 --- a/GPU/GLES/ShaderManagerGLES.cpp +++ b/GPU/GLES/ShaderManagerGLES.cpp @@ -174,6 +174,7 @@ LinkedShader::LinkedShader(GLRenderManager *render, VShaderID VSID, Shader *vs, queries.push_back({ &u_tess_weights_v, "u_tess_weights_v" }); queries.push_back({ &u_spline_counts, "u_spline_counts" }); queries.push_back({ &u_depal_mask_shift_off_fmt, "u_depal_mask_shift_off_fmt" }); + queries.push_back({ &u_mipBias, "u_mipBias" }); attrMask = vs->GetAttrMask(); availableUniforms = vs->GetUniformMask() | fs->GetUniformMask(); @@ -458,6 +459,13 @@ void LinkedShader::UpdateUniforms(u32 vertType, const ShaderID &vsid, bool useBu } } + if ((dirty & DIRTY_TEXCLAMP) && u_mipBias != -1) { + float mipBias = (float)gstate.getTexLevelOffset16() * (1.0 / 16.0f); + mipBias = (mipBias + 0.5f) / (float)(gstate.getTextureMaxLevel() + 1); + + render_->SetUniformF(&u_mipBias, 1, &mipBias); + } + // Transform if (dirty & DIRTY_WORLDMATRIX) { SetMatrix4x3(render_, &u_world, gstate.worldMatrix); diff --git a/GPU/GLES/ShaderManagerGLES.h b/GPU/GLES/ShaderManagerGLES.h index 89b335b094..bcf5130805 100644 --- a/GPU/GLES/ShaderManagerGLES.h +++ b/GPU/GLES/ShaderManagerGLES.h @@ -61,6 +61,7 @@ public: int u_cullRangeMin; int u_cullRangeMax; int u_rotation; + int u_mipBias; #ifdef USE_BONE_ARRAY int u_bone; // array, size is numBones diff --git a/GPU/GLES/TextureCacheGLES.cpp b/GPU/GLES/TextureCacheGLES.cpp index de4dc36994..c6a0364412 100644 --- a/GPU/GLES/TextureCacheGLES.cpp +++ b/GPU/GLES/TextureCacheGLES.cpp @@ -451,7 +451,11 @@ void TextureCacheGLES::BuildTexture(TexCacheEntry *const entry) { dstFmt = Draw::DataFormat::R8G8B8A8_UNORM; } - entry->textureName = render_->CreateTexture(GL_TEXTURE_2D, tw, tw, plan.levelsToCreate); + if (plan.depth == 1) { + entry->textureName = render_->CreateTexture(GL_TEXTURE_2D, tw, tw, 1, plan.levelsToCreate); + } else { + entry->textureName = render_->CreateTexture(GL_TEXTURE_3D, tw, tw, plan.depth, 1); + } // Apply some additional compatibility checks. if (plan.levelsToLoad > 1) { @@ -468,46 +472,68 @@ void TextureCacheGLES::BuildTexture(TexCacheEntry *const entry) { plan.levelsToCreate = plan.levelsToLoad; } - for (int i = 0; i < plan.levelsToLoad; i++) { - int srcLevel = i == 0 ? plan.baseLevelSrc : i; + if (plan.depth == 1) { + for (int i = 0; i < plan.levelsToLoad; i++) { + int srcLevel = i == 0 ? plan.baseLevelSrc : i; - int w = gstate.getTextureWidth(srcLevel); - int h = gstate.getTextureHeight(srcLevel); + int w = gstate.getTextureWidth(srcLevel); + int h = gstate.getTextureHeight(srcLevel); - u8 *data = nullptr; - int stride = 0; + u8 *data = nullptr; + int stride = 0; - if (plan.replaced->GetSize(srcLevel, w, h)) { - int bpp = (int)Draw::DataFormatSizeInBytes(plan.replaced->Format(srcLevel)); - stride = w * bpp; - data = (u8 *)AllocateAlignedMemory(stride * h, 16); - } else { - if (plan.scaleFactor > 1) { - data = (u8 *)AllocateAlignedMemory(4 * (w * plan.scaleFactor) * (h * plan.scaleFactor), 16); - stride = w * plan.scaleFactor * 4; - } else { - int bpp = dstFmt == Draw::DataFormat::R8G8B8A8_UNORM ? 4 : 2; - - stride = std::max(w * bpp, 4); + if (plan.replaced->GetSize(srcLevel, w, h)) { + int bpp = (int)Draw::DataFormatSizeInBytes(plan.replaced->Format(srcLevel)); + stride = w * bpp; data = (u8 *)AllocateAlignedMemory(stride * h, 16); + } else { + if (plan.scaleFactor > 1) { + data = (u8 *)AllocateAlignedMemory(4 * (w * plan.scaleFactor) * (h * plan.scaleFactor), 16); + stride = w * plan.scaleFactor * 4; + } else { + int bpp = dstFmt == Draw::DataFormat::R8G8B8A8_UNORM ? 4 : 2; + + stride = std::max(w * bpp, 4); + data = (u8 *)AllocateAlignedMemory(stride * h, 16); + } } + + if (!data) { + ERROR_LOG(G3D, "Ran out of RAM trying to allocate a temporary texture upload buffer (%dx%d)", w, h); + return; + } + + LoadTextureLevel(*entry, data, stride, *plan.replaced, srcLevel, plan.scaleFactor, dstFmt, true); + + // NOTE: TextureImage takes ownership of data, so we don't free it afterwards. + render_->TextureImage(entry->textureName, i, w * plan.scaleFactor, h * plan.scaleFactor, 1, dstFmt, data, GLRAllocType::ALIGNED); } - if (!data) { - ERROR_LOG(G3D, "Ran out of RAM trying to allocate a temporary texture upload buffer (%dx%d)", w, h); - return; + bool genMips = plan.levelsToCreate > plan.levelsToLoad; + + render_->FinalizeTexture(entry->textureName, plan.levelsToLoad, genMips); + } else { + int bpp = dstFmt == Draw::DataFormat::R8G8B8A8_UNORM ? 4 : 2; + int stride = bpp * (plan.w * plan.scaleFactor); + int levelStride = stride * (plan.h * plan.scaleFactor); + + u8 *data = (u8 *)AllocateAlignedMemory(levelStride * plan.depth, 16); + memset(data, 0, levelStride * plan.depth); + u8 *p = data; + + for (int i = 0; i < plan.depth; i++) { + LoadTextureLevel(*entry, p, stride, *plan.replaced, i, plan.scaleFactor, dstFmt, true); + p += levelStride; } - LoadTextureLevel(*entry, data, stride, *plan.replaced, srcLevel, plan.scaleFactor, dstFmt, true); + render_->TextureImage(entry->textureName, 0, plan.w * plan.scaleFactor, plan.h * plan.scaleFactor, plan.depth, dstFmt, data, GLRAllocType::ALIGNED); - // NOTE: TextureImage takes ownership of data, so we don't free it afterwards. - render_->TextureImage(entry->textureName, i, w * plan.scaleFactor, h * plan.scaleFactor, dstFmt, data, GLRAllocType::ALIGNED); + // Signal that we support depth textures so use it as one. + entry->status |= TexCacheEntry::STATUS_3D; + + render_->FinalizeTexture(entry->textureName, 1, false); } - bool genMips = plan.levelsToCreate > plan.levelsToLoad; - - render_->FinalizeTexture(entry->textureName, plan.levelsToLoad, genMips); - if (plan.replaced->Valid()) { entry->SetAlphaStatus(TexCacheEntry::TexStatus(plan.replaced->AlphaStatus())); } From 129f3fe9974cc94cb09568b52b4640fc41c80d07 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Sat, 30 Jul 2022 21:52:58 +0200 Subject: [PATCH 10/16] Testfix, cleanup --- Common/GPU/OpenGL/thin3d_gl.cpp | 3 +++ Common/GPU/Shader.cpp | 4 ++++ Common/GPU/Shader.h | 1 + GPU/Common/FragmentShaderGenerator.cpp | 10 +++------- 4 files changed, 11 insertions(+), 7 deletions(-) diff --git a/Common/GPU/OpenGL/thin3d_gl.cpp b/Common/GPU/OpenGL/thin3d_gl.cpp index 69d06bd81b..740d62862e 100644 --- a/Common/GPU/OpenGL/thin3d_gl.cpp +++ b/Common/GPU/OpenGL/thin3d_gl.cpp @@ -636,6 +636,7 @@ OpenGLContext::OpenGLContext() { shaderLanguageDesc_.shaderLanguage = ShaderLanguage::GLSL_3xx; shaderLanguageDesc_.fragColor0 = "fragColor0"; shaderLanguageDesc_.texture = "texture"; + shaderLanguageDesc_.texture3D = "texture"; shaderLanguageDesc_.glslES30 = true; shaderLanguageDesc_.bitwiseOps = true; shaderLanguageDesc_.texelFetch = "texelFetch"; @@ -659,6 +660,7 @@ OpenGLContext::OpenGLContext() { shaderLanguageDesc_.shaderLanguage = ShaderLanguage::GLSL_3xx; shaderLanguageDesc_.fragColor0 = "fragColor0"; shaderLanguageDesc_.texture = "texture"; + shaderLanguageDesc_.texture3D = "texture"; shaderLanguageDesc_.glslES30 = true; shaderLanguageDesc_.bitwiseOps = true; shaderLanguageDesc_.texelFetch = "texelFetch"; @@ -669,6 +671,7 @@ OpenGLContext::OpenGLContext() { shaderLanguageDesc_.shaderLanguage = ShaderLanguage::GLSL_1xx; shaderLanguageDesc_.fragColor0 = "fragColor0"; shaderLanguageDesc_.texture = "texture"; + shaderLanguageDesc_.texture3D = "texture"; shaderLanguageDesc_.bitwiseOps = true; shaderLanguageDesc_.texelFetch = "texelFetch"; shaderLanguageDesc_.varying_vs = "out"; diff --git a/Common/GPU/Shader.cpp b/Common/GPU/Shader.cpp index 14b703f544..09f2314997 100644 --- a/Common/GPU/Shader.cpp +++ b/Common/GPU/Shader.cpp @@ -44,6 +44,7 @@ void ShaderLanguageDesc::Init(ShaderLanguage lang) { fragColor0 = "gl_FragColor"; fragColor1 = "fragColor1"; texture = "texture2D"; + texture3D = "texture3D"; texelFetch = nullptr; bitwiseOps = false; lastFragData = nullptr; @@ -59,6 +60,7 @@ void ShaderLanguageDesc::Init(ShaderLanguage lang) { fragColor0 = "fragColor0"; fragColor1 = "fragColor1"; texture = "texture"; + texture3D = "texture"; texelFetch = "texelFetch"; bitwiseOps = true; lastFragData = nullptr; @@ -80,6 +82,7 @@ void ShaderLanguageDesc::Init(ShaderLanguage lang) { glslVersionNumber = 450; lastFragData = nullptr; texture = "texture"; + texture3D = "texture"; texelFetch = "texelFetch"; forceMatrix4x4 = false; coefsFromBuffers = true; @@ -102,6 +105,7 @@ void ShaderLanguageDesc::Init(ShaderLanguage lang) { glslVersionNumber = 0; lastFragData = nullptr; texture = "texture"; + texture3D = "texture"; texelFetch = "texelFetch"; forceMatrix4x4 = false; coefsFromBuffers = true; diff --git a/Common/GPU/Shader.h b/Common/GPU/Shader.h index e2a754f011..71cd26cae6 100644 --- a/Common/GPU/Shader.h +++ b/Common/GPU/Shader.h @@ -46,6 +46,7 @@ struct ShaderLanguageDesc { const char *fragColor0 = nullptr; const char *fragColor1 = nullptr; const char *texture = nullptr; + const char *texture3D = nullptr; const char *texelFetch = nullptr; const char *lastFragData = nullptr; const char *framebufferFetchExtension = nullptr; diff --git a/GPU/Common/FragmentShaderGenerator.cpp b/GPU/Common/FragmentShaderGenerator.cpp index 93e244a34f..7857fc99fd 100644 --- a/GPU/Common/FragmentShaderGenerator.cpp +++ b/GPU/Common/FragmentShaderGenerator.cpp @@ -277,11 +277,7 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu } if (doTexture) { - if (texture3D) { - WRITE(p, "uniform sampler3D tex;\n"); - } else { - WRITE(p, "uniform sampler2D tex;\n"); - } + WRITE(p, "uniform %s tex;\n", texture3D ? "sampler3D" : "sampler2D"); } if (readFramebufferTex) { @@ -561,9 +557,9 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu // Let's add that if we run into a case... if (texture3D) { if (doTextureProjection) { - WRITE(p, " vec4 t = %sProj(tex, vec4(%s.xy, u_mipBias, %s.z));\n", compat.texture, texcoord, texcoord); + WRITE(p, " vec4 t = %sProj(tex, vec4(%s.xy, u_mipBias, %s.z));\n", compat.texture3D, texcoord, texcoord); } else { - WRITE(p, " vec4 t = %s(tex, vec3(%s.xy, u_mipBias));\n", compat.texture, texcoord); + WRITE(p, " vec4 t = %s(tex, vec3(%s.xy, u_mipBias));\n", compat.texture3D, texcoord); } } else { if (doTextureProjection) { From 46d6b43618425fe3e8e5b76fad6a72ef91fc0db1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Sun, 31 Jul 2022 00:18:28 +0200 Subject: [PATCH 11/16] Support 3D textures in OpenGL ES, add feature check --- Common/GPU/D3D11/thin3d_d3d11.cpp | 1 + Common/GPU/D3D9/thin3d_d3d9.cpp | 2 ++ Common/GPU/OpenGL/GLFeatures.cpp | 1 + Common/GPU/OpenGL/GLFeatures.h | 1 + Common/GPU/OpenGL/thin3d_gl.cpp | 2 ++ Common/GPU/Vulkan/thin3d_vulkan.cpp | 1 + Common/GPU/thin3d.h | 2 ++ GPU/Common/FragmentShaderGenerator.cpp | 13 +++++++++++-- GPU/Common/TextureCacheCommon.cpp | 2 +- 9 files changed, 22 insertions(+), 3 deletions(-) diff --git a/Common/GPU/D3D11/thin3d_d3d11.cpp b/Common/GPU/D3D11/thin3d_d3d11.cpp index a913bfde2a..9d13d0789d 100644 --- a/Common/GPU/D3D11/thin3d_d3d11.cpp +++ b/Common/GPU/D3D11/thin3d_d3d11.cpp @@ -283,6 +283,7 @@ D3D11DrawContext::D3D11DrawContext(ID3D11Device *device, ID3D11DeviceContext *de caps_.framebufferCopySupported = true; caps_.framebufferDepthBlitSupported = false; caps_.framebufferDepthCopySupported = true; + caps_.texture3DSupported = true; D3D11_FEATURE_DATA_D3D11_OPTIONS options{}; HRESULT result = device_->CheckFeatureSupport(D3D11_FEATURE_D3D11_OPTIONS, &options, sizeof(options)); diff --git a/Common/GPU/D3D9/thin3d_d3d9.cpp b/Common/GPU/D3D9/thin3d_d3d9.cpp index 1913523b86..9afba75110 100644 --- a/Common/GPU/D3D9/thin3d_d3d9.cpp +++ b/Common/GPU/D3D9/thin3d_d3d9.cpp @@ -667,6 +667,8 @@ D3D9Context::D3D9Context(IDirect3D9 *d3d, IDirect3D9Ex *d3dEx, int adapterId, ID caps_.framebufferCopySupported = false; caps_.framebufferDepthBlitSupported = true; caps_.framebufferDepthCopySupported = false; + caps_.texture3DSupported = true; + if (d3d) { D3DDISPLAYMODE displayMode; d3d->GetAdapterDisplayMode(D3DADAPTER_DEFAULT, &displayMode); diff --git a/Common/GPU/OpenGL/GLFeatures.cpp b/Common/GPU/OpenGL/GLFeatures.cpp index 9808dbded1..f0a43249a6 100644 --- a/Common/GPU/OpenGL/GLFeatures.cpp +++ b/Common/GPU/OpenGL/GLFeatures.cpp @@ -383,6 +383,7 @@ void CheckGLExtensions() { gl_extensions.EXT_shader_framebuffer_fetch = g_set_gl_extensions.count("GL_EXT_shader_framebuffer_fetch") != 0; gl_extensions.ARM_shader_framebuffer_fetch = g_set_gl_extensions.count("GL_ARM_shader_framebuffer_fetch") != 0; gl_extensions.OES_texture_float = g_set_gl_extensions.count("GL_OES_texture_float") != 0; + gl_extensions.OES_texture_3D = g_set_gl_extensions.count("GL_OES_texture_3D") != 0; gl_extensions.EXT_buffer_storage = g_set_gl_extensions.count("GL_EXT_buffer_storage") != 0; gl_extensions.EXT_clip_cull_distance = g_set_gl_extensions.count("GL_EXT_clip_cull_distance") != 0; gl_extensions.APPLE_clip_distance = g_set_gl_extensions.count("GL_APPLE_clip_distance") != 0; diff --git a/Common/GPU/OpenGL/GLFeatures.h b/Common/GPU/OpenGL/GLFeatures.h index 8efd782a14..9a87aa3cfa 100644 --- a/Common/GPU/OpenGL/GLFeatures.h +++ b/Common/GPU/OpenGL/GLFeatures.h @@ -51,6 +51,7 @@ struct GLExtensions { bool OES_vertex_array_object; bool OES_copy_image; bool OES_texture_float; + bool OES_texture_3D; // ARB bool ARB_framebuffer_object; diff --git a/Common/GPU/OpenGL/thin3d_gl.cpp b/Common/GPU/OpenGL/thin3d_gl.cpp index 740d62862e..e61cb93aae 100644 --- a/Common/GPU/OpenGL/thin3d_gl.cpp +++ b/Common/GPU/OpenGL/thin3d_gl.cpp @@ -536,8 +536,10 @@ OpenGLContext::OpenGLContext() { } else { caps_.preferredDepthBufferFormat = DataFormat::D16; } + caps_.texture3DSupported = gl_extensions.OES_texture_3D; } else { caps_.preferredDepthBufferFormat = DataFormat::D24_S8; + caps_.texture3DSupported = true; } caps_.framebufferBlitSupported = gl_extensions.NV_framebuffer_blit || gl_extensions.ARB_framebuffer_object; caps_.framebufferDepthBlitSupported = caps_.framebufferBlitSupported; diff --git a/Common/GPU/Vulkan/thin3d_vulkan.cpp b/Common/GPU/Vulkan/thin3d_vulkan.cpp index 1529f4626b..06bca64a33 100644 --- a/Common/GPU/Vulkan/thin3d_vulkan.cpp +++ b/Common/GPU/Vulkan/thin3d_vulkan.cpp @@ -787,6 +787,7 @@ VKContext::VKContext(VulkanContext *vulkan, bool splitSubmit) caps_.framebufferDepthBlitSupported = false; // Can be checked for. caps_.framebufferDepthCopySupported = true; // Will pretty much always be the case. caps_.preferredDepthBufferFormat = DataFormat::D24_S8; // TODO: Ask vulkan. + caps_.texture3DSupported = true; auto deviceProps = vulkan->GetPhysicalDeviceProperties(vulkan_->GetCurrentPhysicalDeviceIndex()).properties; switch (deviceProps.vendorID) { diff --git a/Common/GPU/thin3d.h b/Common/GPU/thin3d.h index 6c547ed6a9..55093b83d0 100644 --- a/Common/GPU/thin3d.h +++ b/Common/GPU/thin3d.h @@ -532,6 +532,8 @@ struct DeviceCaps { bool framebufferDepthCopySupported; bool framebufferDepthBlitSupported; bool framebufferFetchSupported; + bool texture3DSupported; + std::string deviceName; // The device name to use when creating the thin3d context, to get the same one. }; diff --git a/GPU/Common/FragmentShaderGenerator.cpp b/GPU/Common/FragmentShaderGenerator.cpp index 7857fc99fd..8b2c1a944a 100644 --- a/GPU/Common/FragmentShaderGenerator.cpp +++ b/GPU/Common/FragmentShaderGenerator.cpp @@ -49,6 +49,8 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu highpTexcoord = highpFog; } + bool texture3D = id.Bit(FS_BIT_3D_TEXTURE); + ReplaceAlphaType stencilToAlpha = static_cast(id.Bits(FS_BIT_STENCIL_TO_ALPHA, 2)); std::vector gl_exts; @@ -62,6 +64,9 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu if (compat.framebufferFetchExtension) { gl_exts.push_back(compat.framebufferFetchExtension); } + if (gl_extensions.OES_texture_3D && texture3D) { + gl_exts.push_back("#extension GL_OES_texture_3D: enable"); + } } ShaderWriter p(buffer, compat, ShaderStage::Fragment, gl_exts.data(), gl_exts.size()); @@ -78,7 +83,6 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu bool enableColorDoubling = id.Bit(FS_BIT_COLOR_DOUBLE); bool doTextureProjection = id.Bit(FS_BIT_DO_TEXTURE_PROJ); bool doTextureAlpha = id.Bit(FS_BIT_TEXALPHA); - bool texture3D = id.Bit(FS_BIT_3D_TEXTURE); bool flatBug = bugs.Has(Draw::Bugs::BROKEN_FLAT_IN_SHADER) && g_Config.bVendorBugChecksEnabled; @@ -277,7 +281,12 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu } if (doTexture) { - WRITE(p, "uniform %s tex;\n", texture3D ? "sampler3D" : "sampler2D"); + if (texture3D) { + // For whatever reason, a precision specifier is required here. + WRITE(p, "uniform lowp sampler3D tex;\n"); + } else { + WRITE(p, "uniform sampler2D tex;\n"); + } } if (readFramebufferTex) { diff --git a/GPU/Common/TextureCacheCommon.cpp b/GPU/Common/TextureCacheCommon.cpp index 1c09fd6065..9e2968727e 100644 --- a/GPU/Common/TextureCacheCommon.cpp +++ b/GPU/Common/TextureCacheCommon.cpp @@ -2097,7 +2097,7 @@ bool TextureCacheCommon::PrepareBuildTexture(BuildTexturePlan &plan, TexCacheEnt } } - if (pure3D) { + if (pure3D && draw_->GetDeviceCaps().texture3DSupported) { plan.depth = plan.levelsToLoad; plan.scaleFactor = 1; } From cc857e8217cb70fdca165dc283e730f7b4b020ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Sun, 31 Jul 2022 00:21:54 +0200 Subject: [PATCH 12/16] Revert mistaken change in D3D9 fragment shader gen --- GPU/Common/FragmentShaderGenerator.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GPU/Common/FragmentShaderGenerator.cpp b/GPU/Common/FragmentShaderGenerator.cpp index 8b2c1a944a..6fece2c6cd 100644 --- a/GPU/Common/FragmentShaderGenerator.cpp +++ b/GPU/Common/FragmentShaderGenerator.cpp @@ -556,7 +556,7 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu } } else { if (doTextureProjection) { - WRITE(p, " vec4 t = tex2Dproj(tex, vec3(v_texcoord.x, v_texcoord.y, v_texcoord.z))%s;\n", bgraTexture ? ".bgra" : ""); + WRITE(p, " vec4 t = tex2Dproj(tex, vec4(v_texcoord.x, v_texcoord.y, 0.0, v_texcoord.z))%s;\n", bgraTexture ? ".bgra" : ""); } else { WRITE(p, " vec4 t = tex2D(tex, %s.xy)%s;\n", texcoord, bgraTexture ? ".bgra" : ""); } From 0caaa74be9b1dbc8b6a19a0cba8fb15186dda4c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Sun, 31 Jul 2022 00:37:21 +0200 Subject: [PATCH 13/16] Possible flicker fix for Macross --- GPU/GPUCommon.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/GPU/GPUCommon.cpp b/GPU/GPUCommon.cpp index eb5bf312e4..9d307acbb7 100644 --- a/GPU/GPUCommon.cpp +++ b/GPU/GPUCommon.cpp @@ -1560,16 +1560,18 @@ void GPUCommon::Execute_TexLevel(u32 op, u32 diff) { // TODO: If you change the rules here, don't forget to update the inner interpreter in Execute_Prim. if (diff == 0xFFFFFFFF) return; - gstate.texlevel ^= diff; - if (gstate.getTexLevelMode() != GE_TEXLEVEL_MODE_AUTO && (0x00FF0000 & gstate.texlevel) != 0) { - Flush(); - } + gstate.texlevel ^= diff; if (diff & 0xFF0000) { // Piggyback on this flag for 3D textures. gstate_c.Dirty(DIRTY_TEXCLAMP); } + if (gstate.getTexLevelMode() != GE_TEXLEVEL_MODE_AUTO && (0x00FF0000 & gstate.texlevel) != 0) { + Flush(); + } + + gstate.texlevel ^= diff; gstate_c.Dirty(DIRTY_TEXTURE_PARAMS | DIRTY_FRAGMENTSHADER_STATE); } From c158414858ca5b5f673ee3ccbdc5b3690a55bf86 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Sun, 31 Jul 2022 10:43:12 +0200 Subject: [PATCH 14/16] Give the mip bias its own uniform flag. --- GPU/Common/FragmentShaderGenerator.cpp | 2 +- GPU/Common/ShaderCommon.h | 4 +++- GPU/Common/ShaderUniforms.cpp | 2 ++ GPU/Directx9/ShaderManagerDX9.cpp | 4 +++- GPU/GLES/ShaderManagerGLES.cpp | 2 +- GPU/GPUCommon.cpp | 2 +- GPU/GPUState.h | 2 +- 7 files changed, 12 insertions(+), 6 deletions(-) diff --git a/GPU/Common/FragmentShaderGenerator.cpp b/GPU/Common/FragmentShaderGenerator.cpp index 6fece2c6cd..d5286af5ea 100644 --- a/GPU/Common/FragmentShaderGenerator.cpp +++ b/GPU/Common/FragmentShaderGenerator.cpp @@ -348,7 +348,7 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu } if (texture3D) { - *uniformMask |= DIRTY_TEXCLAMP; + *uniformMask |= DIRTY_MIPBIAS; WRITE(p, "uniform float u_mipBias;\n"); } diff --git a/GPU/Common/ShaderCommon.h b/GPU/Common/ShaderCommon.h index e83e5ea388..d15d666e0f 100644 --- a/GPU/Common/ShaderCommon.h +++ b/GPU/Common/ShaderCommon.h @@ -87,11 +87,13 @@ enum : uint64_t { DIRTY_DEPAL = 1ULL << 35, DIRTY_COLORWRITEMASK = 1ULL << 36, + DIRTY_MIPBIAS = 1ULL << 37, + // space for 4 more uniform dirty flags. Remember to update DIRTY_ALL_UNIFORMS. DIRTY_BONE_UNIFORMS = 0xFF000000ULL, - DIRTY_ALL_UNIFORMS = 0x1FFFFFFFFFULL, + DIRTY_ALL_UNIFORMS = 0x3FFFFFFFFFULL, DIRTY_ALL_LIGHTS = DIRTY_LIGHT0 | DIRTY_LIGHT1 | DIRTY_LIGHT2 | DIRTY_LIGHT3, // Other dirty elements that aren't uniforms! diff --git a/GPU/Common/ShaderUniforms.cpp b/GPU/Common/ShaderUniforms.cpp index 77308b91ee..0a8888321b 100644 --- a/GPU/Common/ShaderUniforms.cpp +++ b/GPU/Common/ShaderUniforms.cpp @@ -104,7 +104,9 @@ void BaseUpdateUniforms(UB_VS_FS_Base *ub, uint64_t dirtyUniforms, bool flipView ub->texClamp[3] = invH * 0.5f; ub->texClampOffset[0] = gstate_c.curTextureXOffset * invW; ub->texClampOffset[1] = gstate_c.curTextureYOffset * invH; + } + if (dirtyUniforms & DIRTY_MIPBIAS) { float mipBias = (float)gstate.getTexLevelOffset16() * (1.0 / 16.0f); ub->mipBias = (mipBias + 0.5f) / (float)(gstate.getTextureMaxLevel() + 1); } diff --git a/GPU/Directx9/ShaderManagerDX9.cpp b/GPU/Directx9/ShaderManagerDX9.cpp index cce2e7c119..38cda2d8b1 100644 --- a/GPU/Directx9/ShaderManagerDX9.cpp +++ b/GPU/Directx9/ShaderManagerDX9.cpp @@ -263,7 +263,7 @@ static void ConvertProjMatrixToD3DThrough(Matrix4x4 &in) { in.translateAndScale(Vec3(xoff, yoff, 0.5f), Vec3(1.0f, 1.0f, 0.5f)); } -const uint64_t psUniforms = DIRTY_TEXENV | DIRTY_ALPHACOLORREF | DIRTY_ALPHACOLORMASK | DIRTY_FOGCOLOR | DIRTY_STENCILREPLACEVALUE | DIRTY_SHADERBLEND | DIRTY_TEXCLAMP; +const uint64_t psUniforms = DIRTY_TEXENV | DIRTY_ALPHACOLORREF | DIRTY_ALPHACOLORMASK | DIRTY_FOGCOLOR | DIRTY_STENCILREPLACEVALUE | DIRTY_SHADERBLEND | DIRTY_TEXCLAMP | DIRTY_MIPBIAS; void ShaderManagerDX9::PSUpdateUniforms(u64 dirtyUniforms) { if (dirtyUniforms & DIRTY_TEXENV) { @@ -314,7 +314,9 @@ void ShaderManagerDX9::PSUpdateUniforms(u64 dirtyUniforms) { }; PSSetFloatArray(CONST_PS_TEXCLAMP, texclamp, 4); PSSetFloatArray(CONST_PS_TEXCLAMPOFF, texclampoff, 2); + } + if (dirtyUniforms & DIRTY_MIPBIAS) { float mipBias = (float)gstate.getTexLevelOffset16() * (1.0 / 16.0f); // NOTE: This equation needs some adjustment in D3D9. Can't get it to look completely smooth :( diff --git a/GPU/GLES/ShaderManagerGLES.cpp b/GPU/GLES/ShaderManagerGLES.cpp index edc77dccd1..858052f43e 100644 --- a/GPU/GLES/ShaderManagerGLES.cpp +++ b/GPU/GLES/ShaderManagerGLES.cpp @@ -459,7 +459,7 @@ void LinkedShader::UpdateUniforms(u32 vertType, const ShaderID &vsid, bool useBu } } - if ((dirty & DIRTY_TEXCLAMP) && u_mipBias != -1) { + if ((dirty & DIRTY_MIPBIAS) && u_mipBias != -1) { float mipBias = (float)gstate.getTexLevelOffset16() * (1.0 / 16.0f); mipBias = (mipBias + 0.5f) / (float)(gstate.getTextureMaxLevel() + 1); diff --git a/GPU/GPUCommon.cpp b/GPU/GPUCommon.cpp index 9d307acbb7..9fc627fc75 100644 --- a/GPU/GPUCommon.cpp +++ b/GPU/GPUCommon.cpp @@ -1565,7 +1565,7 @@ void GPUCommon::Execute_TexLevel(u32 op, u32 diff) { if (diff & 0xFF0000) { // Piggyback on this flag for 3D textures. - gstate_c.Dirty(DIRTY_TEXCLAMP); + gstate_c.Dirty(DIRTY_MIPBIAS); } if (gstate.getTexLevelMode() != GE_TEXLEVEL_MODE_AUTO && (0x00FF0000 & gstate.texlevel) != 0) { Flush(); diff --git a/GPU/GPUState.h b/GPU/GPUState.h index 8675afd109..605b9a9a66 100644 --- a/GPU/GPUState.h +++ b/GPU/GPUState.h @@ -554,7 +554,7 @@ struct GPUStateCache { void SetTextureIs3D(bool is3D) { if (is3D != curTextureIs3D) { curTextureIs3D = is3D; - Dirty(DIRTY_FRAGMENTSHADER_STATE | DIRTY_UVSCALEOFFSET); + Dirty(DIRTY_FRAGMENTSHADER_STATE | (is3D ? DIRTY_MIPBIAS : 0)); } } From 99b4e7c54c4de1eda7f0a8ab392e39bbaf797920 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Mon, 1 Aug 2022 00:16:57 +0200 Subject: [PATCH 15/16] Vulkan: Don't try to auto-mipmap 3D textures in max quality filter mode --- GPU/Vulkan/TextureCacheVulkan.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GPU/Vulkan/TextureCacheVulkan.cpp b/GPU/Vulkan/TextureCacheVulkan.cpp index 6b31a5a9e0..a4f5c4709b 100644 --- a/GPU/Vulkan/TextureCacheVulkan.cpp +++ b/GPU/Vulkan/TextureCacheVulkan.cpp @@ -589,7 +589,7 @@ void TextureCacheVulkan::BuildTexture(TexCacheEntry *const entry) { } int maxPossibleMipLevels; - if (plan.isVideo) { + if (plan.isVideo || plan.depth != 1) { maxPossibleMipLevels = 1; } else { maxPossibleMipLevels = log2i(std::min(plan.w * plan.scaleFactor, plan.h * plan.scaleFactor)) + 1; From 33316543969f0232d0e12279170b7457b8fd4e5b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Mon, 1 Aug 2022 00:19:47 +0200 Subject: [PATCH 16/16] Fix Macross glitches. --- GPU/Common/TextureCacheCommon.cpp | 8 ++++++-- GPU/D3D11/DrawEngineD3D11.cpp | 4 +++- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/GPU/Common/TextureCacheCommon.cpp b/GPU/Common/TextureCacheCommon.cpp index 9e2968727e..f3d685a8c4 100644 --- a/GPU/Common/TextureCacheCommon.cpp +++ b/GPU/Common/TextureCacheCommon.cpp @@ -538,7 +538,7 @@ TexCacheEntry *TextureCacheCommon::SetTexture() { const AttachCandidate &candidate = candidates[index]; nextTexture_ = nullptr; nextNeedsRebuild_ = false; - SetTextureFramebuffer(candidate); + SetTextureFramebuffer(candidate); // sets curTexture3D return nullptr; } } @@ -1073,6 +1073,8 @@ void TextureCacheCommon::SetTextureFramebuffer(const AttachCandidate &candidate) nextTexture_ = nullptr; } + gstate_c.SetTextureIs3D(false); + nextNeedsRehash_ = false; nextNeedsChange_ = false; nextNeedsRebuild_ = false; @@ -1771,7 +1773,9 @@ void TextureCacheCommon::ApplyTexture() { ApplyTextureFramebuffer(nextFramebufferTexture_, gstate.getTextureFormat(), depth ? NOTIFY_FB_DEPTH : NOTIFY_FB_COLOR); nextFramebufferTexture_ = nullptr; } - gstate_c.SetTextureIs3D(false); + + // We don't set the 3D texture state here or anything else, on some backends (?) + // a nextTexture_ of nullptr means keep the current texture. return; } diff --git a/GPU/D3D11/DrawEngineD3D11.cpp b/GPU/D3D11/DrawEngineD3D11.cpp index 8715116b30..e801f49d6d 100644 --- a/GPU/D3D11/DrawEngineD3D11.cpp +++ b/GPU/D3D11/DrawEngineD3D11.cpp @@ -335,6 +335,8 @@ void DrawEngineD3D11::DoFlush() { // until critical state changes. That's when we draw (flush). GEPrimitiveType prim = prevPrim_; + + // SetTexture is called in here, along with setting a lot of other state. ApplyDrawState(prim); // Always use software for flat shading to fix the provoking index. @@ -508,7 +510,7 @@ rotateVBO: prim = indexGen.Prim(); } - VERBOSE_LOG(G3D, "Flush prim %i! %i verts in one go", prim, vertexCount); + VERBOSE_LOG(G3D, "Flush prim %d! %d verts in one go", prim, vertexCount); bool hasColor = (lastVType_ & GE_VTYPE_COL_MASK) != GE_VTYPE_COL_NONE; if (gstate.isModeThrough()) { gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && (hasColor || gstate.getMaterialAmbientA() == 255);