From 030e6b3980aa5ce6069041c339d49d21d68ca73b Mon Sep 17 00:00:00 2001 From: Liam Date: Tue, 12 Dec 2023 17:15:52 -0500 Subject: video_core: use interval map for page count tracking --- src/tests/video_core/memory_tracker.cpp | 6 +- src/video_core/buffer_cache/word_manager.h | 2 +- src/video_core/rasterizer_accelerated.cpp | 97 ++++++++++++++-------------- src/video_core/rasterizer_accelerated.h | 27 ++------ src/video_core/rasterizer_interface.h | 2 +- src/video_core/shader_cache.cpp | 4 +- src/video_core/texture_cache/texture_cache.h | 10 +-- 7 files changed, 69 insertions(+), 79 deletions(-) diff --git a/src/tests/video_core/memory_tracker.cpp b/src/tests/video_core/memory_tracker.cpp index 618793668..2dbff21af 100644 --- a/src/tests/video_core/memory_tracker.cpp +++ b/src/tests/video_core/memory_tracker.cpp @@ -23,13 +23,13 @@ constexpr VAddr c = 16 * HIGH_PAGE_SIZE; class RasterizerInterface { public: - void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) { + void UpdatePagesCachedCount(VAddr addr, u64 size, bool cache) { const u64 page_start{addr >> Core::Memory::YUZU_PAGEBITS}; const u64 page_end{(addr + size + Core::Memory::YUZU_PAGESIZE - 1) >> Core::Memory::YUZU_PAGEBITS}; for (u64 page = page_start; page < page_end; ++page) { int& value = page_table[page]; - value += delta; + value += (cache ? 1 : -1); if (value < 0) { throw std::logic_error{"negative page"}; } @@ -546,4 +546,4 @@ TEST_CASE("MemoryTracker: Cached write downloads") { REQUIRE(!memory_track->IsRegionGpuModified(c + PAGE, PAGE)); memory_track->MarkRegionAsCpuModified(c, WORD); REQUIRE(rasterizer.Count() == 0); -} \ No newline at end of file +} diff --git a/src/video_core/buffer_cache/word_manager.h b/src/video_core/buffer_cache/word_manager.h index a336bde41..95b752055 100644 --- a/src/video_core/buffer_cache/word_manager.h +++ b/src/video_core/buffer_cache/word_manager.h @@ -473,7 +473,7 @@ private: VAddr addr = cpu_addr + word_index * BYTES_PER_WORD; IteratePages(changed_bits, [&](size_t offset, size_t size) { rasterizer->UpdatePagesCachedCount(addr + offset * BYTES_PER_PAGE, - size * BYTES_PER_PAGE, add_to_rasterizer ? 1 : -1); + size * BYTES_PER_PAGE, add_to_rasterizer); }); } diff --git a/src/video_core/rasterizer_accelerated.cpp b/src/video_core/rasterizer_accelerated.cpp index f200a650f..3abfd5ff3 100644 --- a/src/video_core/rasterizer_accelerated.cpp +++ b/src/video_core/rasterizer_accelerated.cpp @@ -3,6 +3,7 @@ #include +#include "common/alignment.h" #include "common/assert.h" #include "common/common_types.h" #include "common/div_ceil.h" @@ -11,61 +12,63 @@ namespace VideoCore { +static constexpr u16 IdentityValue = 1; + using namespace Core::Memory; -RasterizerAccelerated::RasterizerAccelerated(Memory& cpu_memory_) - : cached_pages(std::make_unique()), cpu_memory{cpu_memory_} {} +RasterizerAccelerated::RasterizerAccelerated(Memory& cpu_memory_) : map{}, cpu_memory{cpu_memory_} { + // We are tracking CPU memory, which cannot map more than 39 bits. + const VAddr start_address = 0; + const VAddr end_address = (1ULL << 39); + const IntervalType address_space_interval(start_address, end_address); + const auto value = std::make_pair(address_space_interval, IdentityValue); + + map.add(value); +} RasterizerAccelerated::~RasterizerAccelerated() = default; -void RasterizerAccelerated::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) { - u64 uncache_begin = 0; - u64 cache_begin = 0; - u64 uncache_bytes = 0; - u64 cache_bytes = 0; - - std::atomic_thread_fence(std::memory_order_acquire); - const u64 page_end = Common::DivCeil(addr + size, YUZU_PAGESIZE); - for (u64 page = addr >> YUZU_PAGEBITS; page != page_end; ++page) { - std::atomic_uint16_t& count = cached_pages->at(page >> 2).Count(page); - - if (delta > 0) { - ASSERT_MSG(count.load(std::memory_order::relaxed) < UINT16_MAX, "Count may overflow!"); - } else if (delta < 0) { - ASSERT_MSG(count.load(std::memory_order::relaxed) > 0, "Count may underflow!"); - } else { - ASSERT_MSG(false, "Delta must be non-zero!"); - } +void RasterizerAccelerated::UpdatePagesCachedCount(VAddr addr, u64 size, bool cache) { + // Align sizes. + addr = Common::AlignDown(addr, YUZU_PAGESIZE); + size = Common::AlignUp(size, YUZU_PAGESIZE); - // Adds or subtracts 1, as count is a unsigned 8-bit value - count.fetch_add(static_cast(delta), std::memory_order_release); - - // Assume delta is either -1 or 1 - if (count.load(std::memory_order::relaxed) == 0) { - if (uncache_bytes == 0) { - uncache_begin = page; - } - uncache_bytes += YUZU_PAGESIZE; - } else if (uncache_bytes > 0) { - cpu_memory.RasterizerMarkRegionCached(uncache_begin << YUZU_PAGEBITS, uncache_bytes, - false); - uncache_bytes = 0; - } - if (count.load(std::memory_order::relaxed) == 1 && delta > 0) { - if (cache_bytes == 0) { - cache_begin = page; - } - cache_bytes += YUZU_PAGESIZE; - } else if (cache_bytes > 0) { - cpu_memory.RasterizerMarkRegionCached(cache_begin << YUZU_PAGEBITS, cache_bytes, true); - cache_bytes = 0; + // Declare the overall interval we are going to operate on. + const VAddr start_address = addr; + const VAddr end_address = addr + size; + const IntervalType modification_range(start_address, end_address); + + // Find the boundaries of where to iterate. + const auto lower = map.lower_bound(modification_range); + const auto upper = map.upper_bound(modification_range); + + // Iterate over the contained intervals. + for (auto it = lower; it != upper; it++) { + // Intersect interval range with modification range. + const auto current_range = modification_range & it->first; + + // Calculate the address and size to operate over. + const auto current_addr = current_range.lower(); + const auto current_size = current_range.upper() - current_addr; + + // Get the current value of the range. + const auto value = it->second; + + if (cache && value == IdentityValue) { + // If we are going to cache, and the value is not yet referenced, then cache this range. + cpu_memory.RasterizerMarkRegionCached(current_addr, current_size, true); + } else if (!cache && value == IdentityValue + 1) { + // If we are going to uncache, and this is the last reference, then uncache this range. + cpu_memory.RasterizerMarkRegionCached(current_addr, current_size, false); } } - if (uncache_bytes > 0) { - cpu_memory.RasterizerMarkRegionCached(uncache_begin << YUZU_PAGEBITS, uncache_bytes, false); - } - if (cache_bytes > 0) { - cpu_memory.RasterizerMarkRegionCached(cache_begin << YUZU_PAGEBITS, cache_bytes, true); + + // Update the set. + const auto value = std::make_pair(modification_range, IdentityValue); + if (cache) { + map.add(value); + } else { + map.subtract(value); } } diff --git a/src/video_core/rasterizer_accelerated.h b/src/video_core/rasterizer_accelerated.h index e6c0ea87a..cd1c706de 100644 --- a/src/video_core/rasterizer_accelerated.h +++ b/src/video_core/rasterizer_accelerated.h @@ -3,8 +3,7 @@ #pragma once -#include -#include +#include #include "common/common_types.h" #include "video_core/rasterizer_interface.h" @@ -21,28 +20,16 @@ public: explicit RasterizerAccelerated(Core::Memory::Memory& cpu_memory_); ~RasterizerAccelerated() override; - void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) override; + void UpdatePagesCachedCount(VAddr addr, u64 size, bool cache) override; private: - class CacheEntry final { - public: - CacheEntry() = default; + using PageIndex = VAddr; + using PageReferenceCount = u16; - std::atomic_uint16_t& Count(std::size_t page) { - return values[page & 3]; - } + using IntervalMap = boost::icl::interval_map; + using IntervalType = IntervalMap::interval_type; - const std::atomic_uint16_t& Count(std::size_t page) const { - return values[page & 3]; - } - - private: - std::array values{}; - }; - static_assert(sizeof(CacheEntry) == 8, "CacheEntry should be 8 bytes!"); - - using CachedPages = std::array; - std::unique_ptr cached_pages; + IntervalMap map; Core::Memory::Memory& cpu_memory; }; diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index af1469147..fd42d26b5 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -162,7 +162,7 @@ public: } /// Increase/decrease the number of object in pages touching the specified region - virtual void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {} + virtual void UpdatePagesCachedCount(VAddr addr, u64 size, bool cache) {} /// Initialize disk cached resources for the game being emulated virtual void LoadDiskResources(u64 title_id, std::stop_token stop_loading, diff --git a/src/video_core/shader_cache.cpp b/src/video_core/shader_cache.cpp index e81cd031b..a109f9cbe 100644 --- a/src/video_core/shader_cache.cpp +++ b/src/video_core/shader_cache.cpp @@ -132,7 +132,7 @@ void ShaderCache::Register(std::unique_ptr data, VAddr addr, size_t storage.push_back(std::move(data)); - rasterizer.UpdatePagesCachedCount(addr, size, 1); + rasterizer.UpdatePagesCachedCount(addr, size, true); } void ShaderCache::InvalidatePagesInRegion(VAddr addr, size_t size) { @@ -209,7 +209,7 @@ void ShaderCache::UnmarkMemory(Entry* entry) { const VAddr addr = entry->addr_start; const size_t size = entry->addr_end - addr; - rasterizer.UpdatePagesCachedCount(addr, size, -1); + rasterizer.UpdatePagesCachedCount(addr, size, false); } void ShaderCache::RemoveShadersFromStorage(std::span removed_shaders) { diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 0d5a1709f..d7941f6a4 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -2080,7 +2080,7 @@ void TextureCache

::TrackImage(ImageBase& image, ImageId image_id) { ASSERT(False(image.flags & ImageFlagBits::Tracked)); image.flags |= ImageFlagBits::Tracked; if (False(image.flags & ImageFlagBits::Sparse)) { - rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1); + rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, true); return; } if (True(image.flags & ImageFlagBits::Registered)) { @@ -2091,13 +2091,13 @@ void TextureCache

::TrackImage(ImageBase& image, ImageId image_id) { const auto& map = slot_map_views[map_view_id]; const VAddr cpu_addr = map.cpu_addr; const std::size_t size = map.size; - rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1); + rasterizer.UpdatePagesCachedCount(cpu_addr, size, true); } return; } ForEachSparseSegment(image, [this]([[maybe_unused]] GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) { - rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1); + rasterizer.UpdatePagesCachedCount(cpu_addr, size, true); }); } @@ -2106,7 +2106,7 @@ void TextureCache

::UntrackImage(ImageBase& image, ImageId image_id) { ASSERT(True(image.flags & ImageFlagBits::Tracked)); image.flags &= ~ImageFlagBits::Tracked; if (False(image.flags & ImageFlagBits::Sparse)) { - rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, -1); + rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, false); return; } ASSERT(True(image.flags & ImageFlagBits::Registered)); @@ -2117,7 +2117,7 @@ void TextureCache

::UntrackImage(ImageBase& image, ImageId image_id) { const auto& map = slot_map_views[map_view_id]; const VAddr cpu_addr = map.cpu_addr; const std::size_t size = map.size; - rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1); + rasterizer.UpdatePagesCachedCount(cpu_addr, size, false); } } -- cgit v1.2.3 From 2a3f84aaf28cb526121e016a9e6699fce7223407 Mon Sep 17 00:00:00 2001 From: Liam Date: Thu, 14 Dec 2023 21:41:28 -0500 Subject: video_core: lock interval map update --- src/video_core/rasterizer_accelerated.cpp | 2 ++ src/video_core/rasterizer_accelerated.h | 2 ++ 2 files changed, 4 insertions(+) diff --git a/src/video_core/rasterizer_accelerated.cpp b/src/video_core/rasterizer_accelerated.cpp index 3abfd5ff3..3c9477f6e 100644 --- a/src/video_core/rasterizer_accelerated.cpp +++ b/src/video_core/rasterizer_accelerated.cpp @@ -29,6 +29,8 @@ RasterizerAccelerated::RasterizerAccelerated(Memory& cpu_memory_) : map{}, cpu_m RasterizerAccelerated::~RasterizerAccelerated() = default; void RasterizerAccelerated::UpdatePagesCachedCount(VAddr addr, u64 size, bool cache) { + std::scoped_lock lk{map_lock}; + // Align sizes. addr = Common::AlignDown(addr, YUZU_PAGESIZE); size = Common::AlignUp(size, YUZU_PAGESIZE); diff --git a/src/video_core/rasterizer_accelerated.h b/src/video_core/rasterizer_accelerated.h index cd1c706de..f1968f186 100644 --- a/src/video_core/rasterizer_accelerated.h +++ b/src/video_core/rasterizer_accelerated.h @@ -3,6 +3,7 @@ #pragma once +#include #include #include "common/common_types.h" @@ -30,6 +31,7 @@ private: using IntervalType = IntervalMap::interval_type; IntervalMap map; + std::mutex map_lock; Core::Memory::Memory& cpu_memory; }; -- cgit v1.2.3