Allow disabling the freeing of texture guest memory

This helps to prevent issues that result from the overlapping of buffer and texture data, by only ever syncing back textures if they are actually used as RTs, which are much less likely to overlap buffers.
This commit is contained in:
Billy Laws 2023-02-20 17:45:54 +00:00
parent 5e8cdfda92
commit 7150ce0d1d
8 changed files with 47 additions and 7 deletions

View file

@ -45,6 +45,7 @@ namespace skyline {
useDirectMemoryImport = ktSettings.GetBool("useDirectMemoryImport");
forceMaxGpuClocks = ktSettings.GetBool("forceMaxGpuClocks");
disableShaderCache = ktSettings.GetBool("disableShaderCache");
freeGuestTextureMemory = ktSettings.GetBool("freeGuestTextureMemory");
enableFastGpuReadbackHack = ktSettings.GetBool("enableFastGpuReadbackHack");
enableFastReadbackWrites = ktSettings.GetBool("enableFastReadbackWrites");
disableSubgroupShuffle = ktSettings.GetBool("disableSubgroupShuffle");

View file

@ -77,6 +77,7 @@ namespace skyline {
Setting<u32> executorFlushThreshold; //!< Number of commands that need to accumulate before they're flushed to the GPU
Setting<bool> useDirectMemoryImport; //!< If buffer emulation should be done by importing guest buffer mappings
Setting<bool> forceMaxGpuClocks; //!< If the GPU should be forced to run at maximum clocks
Setting<bool> freeGuestTextureMemory; //!< If guest textrue memory should be freed when the owning texture is GPU dirty
// Hacks
Setting<bool> enableFastGpuReadbackHack; //!< If the CPU texture readback skipping hack should be used

View file

@ -225,7 +225,7 @@ namespace skyline::gpu {
return true; // If the texture is already CPU dirty or we can transition it to being CPU dirty then we don't need to do anything
}
if (texture->accumulatedGuestWaitTime > SkipReadbackHackWaitTimeThreshold && *texture->gpu.state.settings->enableFastGpuReadbackHack) {
if (texture->accumulatedGuestWaitTime > SkipReadbackHackWaitTimeThreshold && *texture->gpu.state.settings->enableFastGpuReadbackHack && !texture->memoryFreed) {
texture->dirtyState = DirtyState::Clean;
return true;
}
@ -494,6 +494,14 @@ namespace skyline::gpu {
}
}
void Texture::FreeGuest() {
// Avoid freeing memory if the backing format doesn't match, as otherwise texture data would be lost on the guest side, also avoid if fast readback is active
if (*gpu.state.settings->freeGuestTextureMemory && guest->format == format && !(accumulatedGuestWaitTime > SkipReadbackHackWaitTimeThreshold && *gpu.state.settings->enableFastGpuReadbackHack)) {
gpu.state.process->memory.FreeMemory(mirror);
memoryFreed = true;
}
}
Texture::Texture(GPU &gpu, BackingType &&backing, texture::Dimensions dimensions, texture::Format format, vk::ImageLayout layout, vk::ImageTiling tiling, vk::ImageCreateFlags flags, vk::ImageUsageFlags usage, u32 levelCount, u32 layerCount, vk::SampleCountFlagBits sampleCount)
: gpu(gpu),
backing(std::move(backing)),
@ -721,6 +729,10 @@ namespace skyline::gpu {
if (!guest)
return;
// FIXME (TEXMAN): This should really be tracked on the texture usage side
if (!*gpu.state.settings->freeGuestTextureMemory && !everUsedAsRt)
gpuDirty = false;
TRACE_EVENT("gpu", "Texture::SynchronizeHost");
{
std::scoped_lock lock{stateMutex};
@ -728,7 +740,7 @@ namespace skyline::gpu {
// If a texture is Clean then we can just transition it to being GPU dirty and retrap it
dirtyState = DirtyState::GpuDirty;
gpu.state.nce->TrapRegions(*trapHandle, false);
gpu.state.process->memory.FreeMemory(mirror);
FreeGuest();
return;
} else if (dirtyState != DirtyState::CpuDirty) {
return; // If the texture has not been modified on the CPU, there is no need to synchronize it
@ -755,8 +767,8 @@ namespace skyline::gpu {
{
std::scoped_lock lock{stateMutex};
if (dirtyState != DirtyState::CpuDirty && gpuDirty)
gpu.state.process->memory.FreeMemory(mirror); // All data can be paged out from the guest as the guest mirror won't be used
if (dirtyState == DirtyState::GpuDirty)
FreeGuest();
}
}
@ -765,13 +777,16 @@ namespace skyline::gpu {
return;
TRACE_EVENT("gpu", "Texture::SynchronizeHostInline");
// FIXME (TEXMAN): This should really be tracked on the texture usage side
if (!*gpu.state.settings->freeGuestTextureMemory && !everUsedAsRt)
gpuDirty = false;
{
std::scoped_lock lock{stateMutex};
if (gpuDirty && dirtyState == DirtyState::Clean) {
dirtyState = DirtyState::GpuDirty;
gpu.state.nce->TrapRegions(*trapHandle, false);
gpu.state.process->memory.FreeMemory(mirror);
FreeGuest();
return;
} else if (dirtyState != DirtyState::CpuDirty) {
return;
@ -792,8 +807,8 @@ namespace skyline::gpu {
{
std::scoped_lock lock{stateMutex};
if (dirtyState != DirtyState::CpuDirty && gpuDirty)
gpu.state.process->memory.FreeMemory(mirror); // All data can be paged out from the guest as the guest mirror won't be used
if (dirtyState == DirtyState::GpuDirty)
FreeGuest();
}
}
@ -815,6 +830,7 @@ namespace skyline::gpu {
}
dirtyState = cpuDirty ? DirtyState::CpuDirty : DirtyState::Clean;
memoryFreed = false;
}
if (layout == vk::ImageLayout::eUndefined || format != guest->format)
@ -1008,6 +1024,7 @@ namespace skyline::gpu {
lastRenderPassIndex = renderPassIndex;
if (renderPassUsage == texture::RenderPassUsage::RenderTarget) {
everUsedAsRt = true;
pendingStageMask = vk::PipelineStageFlagBits::eVertexShader |
vk::PipelineStageFlagBits::eTessellationControlShader |
vk::PipelineStageFlagBits::eTessellationEvaluationShader |
@ -1030,6 +1047,9 @@ namespace skyline::gpu {
}
void Texture::PopulateReadBarrier(vk::PipelineStageFlagBits dstStage, vk::PipelineStageFlags &srcStageMask, vk::PipelineStageFlags &dstStageMask) {
if (!guest)
return;
readStageMask |= dstStage;
if (!(pendingStageMask & dstStage))

View file

@ -389,6 +389,7 @@ namespace skyline::gpu {
CpuDirty, //!< The CPU mappings have been modified but the GPU texture is not up to date
GpuDirty, //!< The GPU texture has been modified but the CPU mappings have not been updated
} dirtyState{DirtyState::CpuDirty}; //!< The state of the CPU mappings with respect to the GPU texture
bool memoryFreed{}; //!< If the guest backing memory has been freed
std::recursive_mutex stateMutex; //!< Synchronizes access to the dirty state
/**
@ -410,6 +411,7 @@ namespace skyline::gpu {
u32 lastRenderPassIndex{}; //!< The index of the last render pass that used this texture
texture::RenderPassUsage lastRenderPassUsage{texture::RenderPassUsage::None}; //!< The type of usage in the last render pass
bool everUsedAsRt{}; //!< If this texture has ever been used as a rendertarget
vk::PipelineStageFlags pendingStageMask{}; //!< List of pipeline stages that are yet to be flushed for reads since the last time this texture was used an an RT
vk::PipelineStageFlags readStageMask{}; //!< Set of pipeline stages that this texture has been read in since it was last used as an RT
@ -444,6 +446,12 @@ namespace skyline::gpu {
*/
void CopyToGuest(u8 *hostBuffer);
/**
* @brief Frees the guest side copy of the texture
* @note `stateMutex` must be locked when calling this function
*/
void FreeGuest();
/**
* @return A vector of all the buffer image copies that need to be done for every aspect of every level of every layer of the texture
*/

View file

@ -48,6 +48,7 @@ class EmulationSettings private constructor(context : Context, prefName : String
var executorFlushThreshold by sharedPreferences(context, 256, prefName = prefName)
var useDirectMemoryImport by sharedPreferences(context, false, prefName = prefName)
var forceMaxGpuClocks by sharedPreferences(context, false, prefName = prefName)
var freeGuestTextureMemory by sharedPreferences(context, true, prefName = prefName)
var disableShaderCache by sharedPreferences(context, false, prefName = prefName)
// Hacks

View file

@ -35,6 +35,7 @@ data class NativeSettings(
var executorFlushThreshold : Int,
var useDirectMemoryImport : Boolean,
var forceMaxGpuClocks : Boolean,
var freeGuestTextureMemory : Boolean,
var disableShaderCache : Boolean,
// Hacks
@ -60,6 +61,7 @@ data class NativeSettings(
pref.executorFlushThreshold,
pref.useDirectMemoryImport,
pref.forceMaxGpuClocks,
pref.freeGuestTextureMemory,
pref.disableShaderCache,
pref.enableFastGpuReadbackHack,
pref.enableFastReadbackWrites,

View file

@ -110,6 +110,8 @@
<string name="force_max_gpu_clocks">Force Maximum GPU Clocks</string>
<string name="force_max_gpu_clocks_desc">Forces the GPU to run at its maximum possible clock speed (May cause excessive heating and power usage)</string>
<string name="force_max_gpu_clocks_desc_unsupported">Your device does not support forcing maximum GPU clocks</string>
<string name="free_guest_texture_memory">Free Guest Texture Memory</string>
<string name="free_guest_texture_memory_desc">Allows guest texture data to be freed from memory when unneeded (Can rarely cause crashes)</string>
<string name="shader_cache">Disable Shader Cache</string>
<string name="shader_cache_disabled">Cached shaders won\'t be loaded, will cause stutters</string>
<string name="shader_cache_enabled">Cached shaders will be loaded, can heavily reduce stuttering</string>

View file

@ -123,6 +123,11 @@
android:summary="@string/force_max_gpu_clocks_desc"
app:key="force_max_gpu_clocks"
app:title="@string/force_max_gpu_clocks" />
<CheckBoxPreference
android:defaultValue="false"
android:summary="@string/free_guest_texture_memory_desc"
app:key="free_guest_texture_memory"
app:title="@string/free_guest_texture_memory" />
<CheckBoxPreference
android:defaultValue="false"
android:summaryOff="@string/shader_cache_enabled"