diff --git a/Core/Compatibility.cpp b/Core/Compatibility.cpp index 7a5e12d349..f17c1fd6a1 100644 --- a/Core/Compatibility.cpp +++ b/Core/Compatibility.cpp @@ -98,7 +98,6 @@ void Compatibility::CheckSettings(IniFile &iniFile, const std::string &gameID) { CheckSetting(iniFile, gameID, "AllowLargeFBTextureOffsets", &flags_.AllowLargeFBTextureOffsets); CheckSetting(iniFile, gameID, "AtracLoopHack", &flags_.AtracLoopHack); CheckSetting(iniFile, gameID, "DeswizzleDepth", &flags_.DeswizzleDepth); - CheckSetting(iniFile, gameID, "SmoothedDepal", &flags_.SmoothedDepal); } void Compatibility::CheckSetting(IniFile &iniFile, const std::string &gameID, const char *option, bool *flag) { diff --git a/Core/Compatibility.h b/Core/Compatibility.h index 5d52e3aec9..29345aee42 100644 --- a/Core/Compatibility.h +++ b/Core/Compatibility.h @@ -88,7 +88,6 @@ struct CompatFlags { bool AllowLargeFBTextureOffsets; bool AtracLoopHack; bool DeswizzleDepth; - bool SmoothedDepal; }; class IniFile; diff --git a/GPU/Common/FragmentShaderGenerator.cpp b/GPU/Common/FragmentShaderGenerator.cpp index e7fd8b5fed..a38e70fd5c 100644 --- a/GPU/Common/FragmentShaderGenerator.cpp +++ b/GPU/Common/FragmentShaderGenerator.cpp @@ -90,7 +90,7 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu bool doFlatShading = id.Bit(FS_BIT_FLATSHADE) && !flatBug; bool shaderDepal = id.Bit(FS_BIT_SHADER_DEPAL) && !texture3D; // combination with texture3D not supported. Enforced elsewhere too. - bool smoothedDepal = PSP_CoreParameter().compat.flags().SmoothedDepal; + bool smoothedDepal = id.Bit(FS_BIT_SHADER_SMOOTHED_DEPAL); bool bgraTexture = id.Bit(FS_BIT_BGRA_TEXTURE); bool colorWriteMask = id.Bit(FS_BIT_COLOR_WRITEMASK) && compat.bitwiseOps; diff --git a/GPU/Common/ShaderId.cpp b/GPU/Common/ShaderId.cpp index 510e90e929..849a1b8b19 100644 --- a/GPU/Common/ShaderId.cpp +++ b/GPU/Common/ShaderId.cpp @@ -261,6 +261,7 @@ void ComputeFragmentShaderID(FShaderID *id_out, const Draw::Bugs &bugs) { bool doTextureAlpha = gstate.isTextureAlphaUsed(); bool doFlatShading = gstate.getShadeMode() == GE_SHADE_FLAT; bool useShaderDepal = gstate_c.useShaderDepal; + bool useSmoothedDepal = gstate_c.useSmoothedShaderDepal; bool colorWriteMask = IsColorWriteMaskComplex(gstate_c.allowFramebufferRead); // Note how we here recompute some of the work already done in state mapping. @@ -290,6 +291,7 @@ void ComputeFragmentShaderID(FShaderID *id_out, const Draw::Bugs &bugs) { } id.SetBit(FS_BIT_BGRA_TEXTURE, gstate_c.bgraTexture); id.SetBit(FS_BIT_SHADER_DEPAL, useShaderDepal); + id.SetBit(FS_BIT_SHADER_SMOOTHED_DEPAL, useSmoothedDepal); id.SetBit(FS_BIT_3D_TEXTURE, gstate_c.curTextureIs3D); } diff --git a/GPU/Common/ShaderId.h b/GPU/Common/ShaderId.h index 8dcee32c1e..a105af6746 100644 --- a/GPU/Common/ShaderId.h +++ b/GPU/Common/ShaderId.h @@ -94,6 +94,7 @@ enum FShaderBit : uint8_t { FS_BIT_NO_DEPTH_CANNOT_DISCARD_STENCIL = 49, FS_BIT_COLOR_WRITEMASK = 50, FS_BIT_3D_TEXTURE = 51, + FS_BIT_SHADER_SMOOTHED_DEPAL = 52, }; static inline FShaderBit operator +(FShaderBit bit, int i) { diff --git a/GPU/Common/TextureCacheCommon.cpp b/GPU/Common/TextureCacheCommon.cpp index de6c840722..6d14e0cd4e 100644 --- a/GPU/Common/TextureCacheCommon.cpp +++ b/GPU/Common/TextureCacheCommon.cpp @@ -1886,8 +1886,8 @@ void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer const GEPaletteFormat clutFormat = gstate.getClutPaletteFormat(); // Very icky conflation here of native and thin3d rendering. This will need careful work per backend in BindAsClutTexture. - Draw::Texture *clutTexture = textureShaderCache_->GetClutTexture(clutFormat, clutHash_, clutBufRaw_); - BindAsClutTexture(clutTexture); + ClutTexture clutTexture = textureShaderCache_->GetClutTexture(clutFormat, clutHash_, clutBufRaw_); + BindAsClutTexture(clutTexture.texture); framebufferManager_->BindFramebufferAsColorTexture(0, framebuffer, BINDFBCOLOR_MAY_COPY_WITH_UV | BINDFBCOLOR_APPLY_TEX_OFFSET); // Vulkan needs to do some extra work here to pick out the native handle from Draw. @@ -1901,7 +1901,7 @@ void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer // Since we started/ended render passes, might need these. gstate_c.Dirty(DIRTY_DEPAL); - gstate_c.SetUseShaderDepal(true); + gstate_c.SetUseShaderDepal(true, gstate.getClutIndexStartPos() == 0 && gstate.getClutIndexMask() <= clutTexture.rampLength); gstate_c.depalFramebufferFormat = framebuffer->drawnFormat; const u32 bytesPerColor = clutFormat == GE_CMODE_32BIT_ABGR8888 ? sizeof(u32) : sizeof(u16); const u32 clutTotalColors = clutMaxBytes_ / bytesPerColor; @@ -1914,12 +1914,12 @@ void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer } textureShader = textureShaderCache_->GetDepalettizeShader(clutMode, texFormat, depth ? GE_FORMAT_DEPTH16 : framebuffer->drawnFormat); - gstate_c.SetUseShaderDepal(false); + gstate_c.SetUseShaderDepal(false, false); } if (textureShader) { const GEPaletteFormat clutFormat = gstate.getClutPaletteFormat(); - Draw::Texture *clutTexture = textureShaderCache_->GetClutTexture(clutFormat, clutHash_, clutBufRaw_); + ClutTexture clutTexture = textureShaderCache_->GetClutTexture(clutFormat, clutHash_, clutBufRaw_); Draw::Framebuffer *depalFBO = framebufferManager_->GetTempFBO(TempFBO::DEPAL, framebuffer->renderWidth, framebuffer->renderHeight); draw_->BindTexture(0, nullptr); draw_->BindTexture(1, nullptr); @@ -1930,7 +1930,7 @@ void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer draw_->SetViewports(1, &vp); draw_->BindFramebufferAsTexture(framebuffer->fbo, 0, depth ? Draw::FB_DEPTH_BIT : Draw::FB_COLOR_BIT, 0); - draw_->BindTexture(1, clutTexture); + draw_->BindTexture(1, clutTexture.texture); Draw::SamplerState *nearest = textureShaderCache_->GetSampler(); draw_->BindSamplerStates(0, 1, &nearest); draw_->BindSamplerStates(1, 1, &nearest); @@ -1958,7 +1958,7 @@ void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer framebufferManager_->BindFramebufferAsColorTexture(0, framebuffer, BINDFBCOLOR_MAY_COPY_WITH_UV | BINDFBCOLOR_APPLY_TEX_OFFSET); BoundFramebufferTexture(); - gstate_c.SetUseShaderDepal(false); + gstate_c.SetUseShaderDepal(false, false); gstate_c.SetTextureFullAlpha(gstate.getTextureFormat() == GE_TFMT_5650); } diff --git a/GPU/Common/TextureShaderCommon.cpp b/GPU/Common/TextureShaderCommon.cpp index defbeb1575..1f684af5da 100644 --- a/GPU/Common/TextureShaderCommon.cpp +++ b/GPU/Common/TextureShaderCommon.cpp @@ -51,22 +51,22 @@ void TextureShaderCache::DeviceLost() { Clear(); } -Draw::Texture *TextureShaderCache::GetClutTexture(GEPaletteFormat clutFormat, const u32 clutHash, u32 *rawClut) { +ClutTexture TextureShaderCache::GetClutTexture(GEPaletteFormat clutFormat, const u32 clutHash, u32 *rawClut) { // Simplistic, but works well enough. u32 clutId = clutHash ^ (uint32_t)clutFormat; auto oldtex = texCache_.find(clutId); if (oldtex != texCache_.end()) { oldtex->second->lastFrame = gpuStats.numFlips; - return oldtex->second->texture; + return *oldtex->second; } - int texturePixels = clutFormat == GE_CMODE_32BIT_ABGR8888 ? 256 : 512; + int maxClutEntries = clutFormat == GE_CMODE_32BIT_ABGR8888 ? 256 : 512; ClutTexture *tex = new ClutTexture(); Draw::TextureDesc desc{}; - desc.width = texturePixels; + desc.width = maxClutEntries; desc.height = 1; desc.depth = 1; desc.mipLevels = 1; @@ -81,24 +81,49 @@ Draw::Texture *TextureShaderCache::GetClutTexture(GEPaletteFormat clutFormat, co desc.initData.push_back((const uint8_t *)rawClut); break; case GEPaletteFormat::GE_CMODE_16BIT_BGR5650: - ConvertRGB565ToRGBA8888((u32 *)convTemp, (const u16 *)rawClut, texturePixels); + ConvertRGB565ToRGBA8888((u32 *)convTemp, (const u16 *)rawClut, maxClutEntries); desc.initData.push_back(convTemp); break; case GEPaletteFormat::GE_CMODE_16BIT_ABGR5551: - ConvertRGBA5551ToRGBA8888((u32 *)convTemp, (const u16 *)rawClut, texturePixels); + ConvertRGBA5551ToRGBA8888((u32 *)convTemp, (const u16 *)rawClut, maxClutEntries); desc.initData.push_back(convTemp); break; case GEPaletteFormat::GE_CMODE_16BIT_ABGR4444: - ConvertRGBA4444ToRGBA8888((u32 *)convTemp, (const u16 *)rawClut, texturePixels); + ConvertRGBA4444ToRGBA8888((u32 *)convTemp, (const u16 *)rawClut, maxClutEntries); desc.initData.push_back(convTemp); break; } + int lastR = 0; + int lastG = 0; + int lastB = 0; + int lastA = 0; + + int rampLength = 0; + // Quick check for how many continouosly growing entries we have at the start. + // Bilinearly filtering CLUTs only really makes sense for this kind of ramp. + for (int i = 0; i < maxClutEntries; i++) { + rampLength = i + 1; + int r = desc.initData[0][i * 4]; + int g = desc.initData[0][i * 4 + 1]; + int b = desc.initData[0][i * 4 + 2]; + int a = desc.initData[0][i * 4 + 3]; + if (r < lastR || g < lastG || b < lastB || a < lastA) { + break; + } else { + lastR = r; + lastG = g; + lastB = b; + lastA = a; + } + } + tex->texture = draw_->CreateTexture(desc); tex->lastFrame = gpuStats.numFlips; + tex->rampLength = rampLength; texCache_[clutId] = tex; - return tex->texture; + return *tex; } void TextureShaderCache::Clear() { diff --git a/GPU/Common/TextureShaderCommon.h b/GPU/Common/TextureShaderCommon.h index e2967ea89c..f5ff0af8dc 100644 --- a/GPU/Common/TextureShaderCommon.h +++ b/GPU/Common/TextureShaderCommon.h @@ -39,6 +39,7 @@ class ClutTexture { public: Draw::Texture *texture; int lastFrame; + int rampLength; }; // For CLUT depal shaders, and other pre-bind texture shaders. @@ -49,7 +50,7 @@ public: ~TextureShaderCache(); TextureShader *GetDepalettizeShader(uint32_t clutMode, GETextureFormat texFormat, GEBufferFormat pixelFormat); - Draw::Texture *GetClutTexture(GEPaletteFormat clutFormat, const u32 clutHash, u32 *rawClut); + ClutTexture GetClutTexture(GEPaletteFormat clutFormat, const u32 clutHash, u32 *rawClut); Draw::SamplerState *GetSampler(); diff --git a/GPU/GLES/TextureCacheGLES.cpp b/GPU/GLES/TextureCacheGLES.cpp index 6fb19338d9..3af9488560 100644 --- a/GPU/GLES/TextureCacheGLES.cpp +++ b/GPU/GLES/TextureCacheGLES.cpp @@ -225,7 +225,7 @@ void TextureCacheGLES::BindTexture(TexCacheEntry *entry) { int maxLevel = (entry->status & TexCacheEntry::STATUS_NO_MIPS) ? 0 : entry->maxLevel; SamplerCacheKey samplerKey = GetSamplingParams(maxLevel, entry); ApplySamplingParams(samplerKey); - gstate_c.SetUseShaderDepal(false); + gstate_c.SetUseShaderDepal(false, false); } void TextureCacheGLES::Unbind() { diff --git a/GPU/GPUState.h b/GPU/GPUState.h index ae1c2c6510..f75a09fdad 100644 --- a/GPU/GPUState.h +++ b/GPU/GPUState.h @@ -529,9 +529,10 @@ struct GPUStateCache { bool IsDirty(u64 what) const { return (dirty & what) != 0ULL; } - void SetUseShaderDepal(bool depal) { + void SetUseShaderDepal(bool depal, bool smoothed) { if (depal != useShaderDepal) { useShaderDepal = depal; + useSmoothedShaderDepal = smoothed; Dirty(DIRTY_FRAGMENTSHADER_STATE); } } @@ -635,6 +636,7 @@ struct GPUStateCache { int spline_num_points_u; bool useShaderDepal; + bool useSmoothedShaderDepal; GEBufferFormat depalFramebufferFormat; u32 getRelativeAddress(u32 data) const; diff --git a/GPU/Vulkan/TextureCacheVulkan.cpp b/GPU/Vulkan/TextureCacheVulkan.cpp index 6c15264b1f..522d88ce5a 100644 --- a/GPU/Vulkan/TextureCacheVulkan.cpp +++ b/GPU/Vulkan/TextureCacheVulkan.cpp @@ -402,7 +402,7 @@ void TextureCacheVulkan::BindTexture(TexCacheEntry *entry) { curSampler_ = samplerCache_.GetOrCreateSampler(samplerKey); imageView_ = entry->vkTex->GetImageView(); drawEngine_->SetDepalTexture(VK_NULL_HANDLE); - gstate_c.SetUseShaderDepal(false); + gstate_c.SetUseShaderDepal(false, false); } void TextureCacheVulkan::ApplySamplingParams(const SamplerCacheKey &key) { diff --git a/assets/compat.ini b/assets/compat.ini index 55ddf688fc..974a6216de 100644 --- a/assets/compat.ini +++ b/assets/compat.ini @@ -1276,10 +1276,3 @@ UCKS45048 = true UCJS18030 = true UCJS18047 = true NPJG00015 = true - -[SmoothedDepal] -# Test Drive Unlimited smoothed CLUT lookups. See comments in #13355 -ULET00386 = true -ULES00637 = true -ULKS46126 = true -ULUS10249 = true