diff options
Diffstat (limited to '')
-rw-r--r-- | src/video_core/buffer_cache/buffer_cache.h | 71 |
1 files changed, 58 insertions, 13 deletions
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index fa26eb8b0..3f2bf6294 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -11,7 +11,6 @@ #include <mutex> #include <numeric> #include <span> -#include <unordered_map> #include <vector> #include <boost/container/small_vector.hpp> @@ -22,7 +21,6 @@ #include "common/literals.h" #include "common/lru_cache.h" #include "common/microprofile.h" -#include "common/scope_exit.h" #include "common/settings.h" #include "core/memory.h" #include "video_core/buffer_cache/buffer_base.h" @@ -78,8 +76,9 @@ class BufferCache { static constexpr BufferId NULL_BUFFER_ID{0}; - static constexpr u64 EXPECTED_MEMORY = 512_MiB; - static constexpr u64 CRITICAL_MEMORY = 1_GiB; + static constexpr s64 DEFAULT_EXPECTED_MEMORY = 512_MiB; + static constexpr s64 DEFAULT_CRITICAL_MEMORY = 1_GiB; + static constexpr s64 TARGET_THRESHOLD = 4_GiB; using Maxwell = Tegra::Engines::Maxwell3D::Regs; @@ -438,6 +437,8 @@ private: Common::LeastRecentlyUsedCache<LRUItemParams> lru_cache; u64 frame_tick = 0; u64 total_used_memory = 0; + u64 minimum_memory = 0; + u64 critical_memory = 0; std::array<BufferId, ((1ULL << 39) >> PAGE_BITS)> page_table; }; @@ -453,11 +454,30 @@ BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_, // Ensure the first slot is used for the null buffer void(slot_buffers.insert(runtime, NullBufferParams{})); common_ranges.clear(); + + if (!runtime.CanReportMemoryUsage()) { + minimum_memory = DEFAULT_EXPECTED_MEMORY; + critical_memory = DEFAULT_CRITICAL_MEMORY; + return; + } + + const s64 device_memory = static_cast<s64>(runtime.GetDeviceLocalMemory()); + const s64 min_spacing_expected = device_memory - 1_GiB - 512_MiB; + const s64 min_spacing_critical = device_memory - 1_GiB; + const s64 mem_threshold = std::min(device_memory, TARGET_THRESHOLD); + const s64 min_vacancy_expected = (6 * mem_threshold) / 10; + const s64 min_vacancy_critical = (3 * mem_threshold) / 10; + minimum_memory = static_cast<u64>( + std::max(std::min(device_memory - min_vacancy_expected, min_spacing_expected), + DEFAULT_EXPECTED_MEMORY)); + critical_memory = static_cast<u64>( + std::max(std::min(device_memory - min_vacancy_critical, min_spacing_critical), + DEFAULT_CRITICAL_MEMORY)); } template <class P> void BufferCache<P>::RunGarbageCollector() { - const bool aggressive_gc = total_used_memory >= CRITICAL_MEMORY; + const bool aggressive_gc = total_used_memory >= critical_memory; const u64 ticks_to_destroy = aggressive_gc ? 60 : 120; int num_iterations = aggressive_gc ? 64 : 32; const auto clean_up = [this, &num_iterations](BufferId buffer_id) { @@ -488,7 +508,11 @@ void BufferCache<P>::TickFrame() { const bool skip_preferred = hits * 256 < shots * 251; uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0; - if (total_used_memory >= EXPECTED_MEMORY) { + // If we can obtain the memory info, use it instead of the estimate. + if (runtime.CanReportMemoryUsage()) { + total_used_memory = runtime.GetDeviceMemoryUsage(); + } + if (total_used_memory >= minimum_memory) { RunGarbageCollector(); } ++frame_tick; @@ -1287,7 +1311,20 @@ void BufferCache<P>::UpdateVertexBuffer(u32 index) { const GPUVAddr gpu_addr_begin = array.StartAddress(); const GPUVAddr gpu_addr_end = limit.LimitAddress() + 1; const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr_begin); - const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin); + u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin); + if (address_size >= 64_MiB) { + // Reported vertex buffer size is very large, cap to mapped buffer size + GPUVAddr submapped_addr_end = gpu_addr_begin; + + const auto ranges{gpu_memory.GetSubmappedRange(gpu_addr_begin, address_size)}; + if (ranges.size() > 0) { + const auto& [addr, size] = *ranges.begin(); + submapped_addr_end = addr + size; + } + + address_size = + std::min(address_size, static_cast<u32>(submapped_addr_end - gpu_addr_begin)); + } const u32 size = address_size; // TODO: Analyze stride and number of vertices if (array.enable == 0 || size == 0 || !cpu_addr) { vertex_buffers[index] = NULL_BINDING; @@ -1469,19 +1506,27 @@ typename BufferCache<P>::OverlapResult BufferCache<P>::ResolveOverlaps(VAddr cpu overlap_ids.push_back(overlap_id); overlap.Pick(); const VAddr overlap_cpu_addr = overlap.CpuAddr(); - if (overlap_cpu_addr < begin) { + const bool expands_left = overlap_cpu_addr < begin; + if (expands_left) { cpu_addr = begin = overlap_cpu_addr; } - end = std::max(end, overlap_cpu_addr + overlap.SizeBytes()); - + const VAddr overlap_end = overlap_cpu_addr + overlap.SizeBytes(); + const bool expands_right = overlap_end > end; + if (overlap_end > end) { + end = overlap_end; + } stream_score += overlap.StreamScore(); if (stream_score > STREAM_LEAP_THRESHOLD && !has_stream_leap) { // When this memory region has been joined a bunch of times, we assume it's being used // as a stream buffer. Increase the size to skip constantly recreating buffers. has_stream_leap = true; - begin -= PAGE_SIZE * 256; - cpu_addr = begin; - end += PAGE_SIZE * 256; + if (expands_right) { + begin -= PAGE_SIZE * 256; + cpu_addr = begin; + } + if (expands_left) { + end += PAGE_SIZE * 256; + } } } return OverlapResult{ |