diff options
author | Fernando Sahmkow <fsahmkow27@gmail.com> | 2019-07-19 16:50:40 +0200 |
---|---|---|
committer | FernandoS27 <fsahmkow27@gmail.com> | 2019-08-21 18:14:22 +0200 |
commit | 862bec001b7ada13ba0e97f95d6ad108ae8a8d0c (patch) | |
tree | d366f7768d0acd4cbe1514a57b9fddcfb4c79eaa /src/video_core/buffer_cache | |
parent | Merge pull request #2748 from FernandoS27/align-memory (diff) | |
download | yuzu-862bec001b7ada13ba0e97f95d6ad108ae8a8d0c.tar yuzu-862bec001b7ada13ba0e97f95d6ad108ae8a8d0c.tar.gz yuzu-862bec001b7ada13ba0e97f95d6ad108ae8a8d0c.tar.bz2 yuzu-862bec001b7ada13ba0e97f95d6ad108ae8a8d0c.tar.lz yuzu-862bec001b7ada13ba0e97f95d6ad108ae8a8d0c.tar.xz yuzu-862bec001b7ada13ba0e97f95d6ad108ae8a8d0c.tar.zst yuzu-862bec001b7ada13ba0e97f95d6ad108ae8a8d0c.zip |
Diffstat (limited to 'src/video_core/buffer_cache')
-rw-r--r-- | src/video_core/buffer_cache/buffer_block.h | 78 | ||||
-rw-r--r-- | src/video_core/buffer_cache/buffer_cache.h | 372 | ||||
-rw-r--r-- | src/video_core/buffer_cache/map_interval.h | 48 |
3 files changed, 498 insertions, 0 deletions
diff --git a/src/video_core/buffer_cache/buffer_block.h b/src/video_core/buffer_cache/buffer_block.h new file mode 100644 index 000000000..2c739a586 --- /dev/null +++ b/src/video_core/buffer_cache/buffer_block.h @@ -0,0 +1,78 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <unordered_set> +#include <utility> + +#include "common/alignment.h" +#include "common/common_types.h" +#include "video_core/gpu.h" + +namespace VideoCommon { + +class BufferBlock { +public: + bool Overlaps(const CacheAddr start, const CacheAddr end) const { + return (cache_addr < end) && (cache_addr_end > start); + } + + bool IsInside(const CacheAddr other_start, const CacheAddr other_end) { + return (cache_addr <= other_start && other_end <= cache_addr_end); + } + + u8* GetWritableHostPtr() const { + return FromCacheAddr(cache_addr); + } + + u8* GetWritableHostPtr(std::size_t offset) const { + return FromCacheAddr(cache_addr + offset); + } + + std::size_t GetOffset(const CacheAddr in_addr) { + return static_cast<std::size_t>(in_addr - cache_addr); + } + + CacheAddr GetCacheAddr() const { + return cache_addr; + } + + CacheAddr GetCacheAddrEnd() const { + return cache_addr_end; + } + + void SetCacheAddr(const CacheAddr new_addr) { + cache_addr = new_addr; + cache_addr_end = new_addr + size; + } + + std::size_t GetSize() const { + return size; + } + + void SetEpoch(u64 new_epoch) { + epoch = new_epoch; + } + + u64 GetEpoch() { + return epoch; + } + +protected: + explicit BufferBlock(CacheAddr cache_addr,const std::size_t size) + : size{size} { + SetCacheAddr(cache_addr); + } + ~BufferBlock() = default; + +private: + CacheAddr cache_addr{}; + CacheAddr cache_addr_end{}; + u64 pages{}; + std::size_t size{}; + u64 epoch{}; +}; + +} // namespace VideoCommon diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h new file mode 100644 index 000000000..6c467eb80 --- /dev/null +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -0,0 +1,372 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <array> +#include <memory> +#include <mutex> +#include <unordered_map> +#include <unordered_set> +#include <utility> +#include <vector> + +#include "common/alignment.h" +#include "common/common_types.h" +#include "core/core.h" +#include "video_core/buffer_cache/map_interval.h" +#include "video_core/buffer_cache/buffer_block.h" +#include "video_core/memory_manager.h" + +namespace VideoCore { +class RasterizerInterface; +} + +namespace VideoCommon { + +template <typename TBuffer, typename TBufferType, typename StreamBuffer> +class BufferCache { +public: + using BufferInfo = std::pair<const TBufferType*, u64>; + + BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4, + bool is_written = false) { + std::lock_guard lock{mutex}; + + auto& memory_manager = system.GPU().MemoryManager(); + const auto host_ptr = memory_manager.GetPointer(gpu_addr); + if (!host_ptr) { + return {GetEmptyBuffer(size), 0}; + } + const auto cache_addr = ToCacheAddr(host_ptr); + + auto block = GetBlock(cache_addr, size); + MapAddress(block, gpu_addr, cache_addr, size, is_written); + + const u64 offset = static_cast<u64>(block->GetOffset(cache_addr)); + + return {ToHandle(block), offset}; + } + + /// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset. + BufferInfo UploadHostMemory(const void* raw_pointer, std::size_t size, + std::size_t alignment = 4) { + std::lock_guard lock{mutex}; + return StreamBufferUpload(raw_pointer, size, alignment); + } + + void Map(std::size_t max_size) { + std::tie(buffer_ptr, buffer_offset_base, invalidated) = stream_buffer->Map(max_size, 4); + buffer_offset = buffer_offset_base; + } + + /// Finishes the upload stream, returns true on bindings invalidation. + bool Unmap() { + stream_buffer->Unmap(buffer_offset - buffer_offset_base); + return std::exchange(invalidated, false); + } + + void TickFrame() { + ++epoch; + while (!pending_destruction.empty()) { + if (pending_destruction.front()->GetEpoch() + 1 > epoch) { + break; + } + pending_destruction.pop_front(); + } + } + + /// Write any cached resources overlapping the specified region back to memory + void FlushRegion(CacheAddr addr, std::size_t size) { + std::lock_guard lock{mutex}; + + // TODO + } + + /// Mark the specified region as being invalidated + void InvalidateRegion(CacheAddr addr, u64 size) { + std::lock_guard lock{mutex}; + + std::vector<MapInterval> objects = GetMapsInRange(addr, size); + for (auto& object : objects) { + Unregister(object); + } + } + + virtual const TBufferType* GetEmptyBuffer(std::size_t size) = 0; + +protected: + explicit BufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, + std::unique_ptr<StreamBuffer> stream_buffer) + : rasterizer{rasterizer}, system{system}, stream_buffer{std::move(stream_buffer)}, + stream_buffer_handle{this->stream_buffer->GetHandle()} {} + + ~BufferCache() = default; + + virtual const TBufferType* ToHandle(const TBuffer& storage) = 0; + + virtual void WriteBarrier() = 0; + + virtual TBuffer CreateBlock(CacheAddr cache_addr, std::size_t size) = 0; + + virtual void UploadBlockData(const TBuffer& buffer, std::size_t offset, std::size_t size, + const u8* data) = 0; + + virtual void DownloadBlockData(const TBuffer& buffer, std::size_t offset, std::size_t size, + u8* data) = 0; + + virtual void CopyBlock(const TBuffer& src, const TBuffer& dst, std::size_t src_offset, + std::size_t dst_offset, std::size_t size) = 0; + + /// Register an object into the cache + void Register(const MapInterval& new_interval, const GPUVAddr gpu_addr) { + const CacheAddr cache_ptr = new_interval.start; + const std::size_t size = new_interval.end - new_interval.start; + const std::optional<VAddr> cpu_addr = + system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr); + if (!cache_ptr || !cpu_addr) { + LOG_CRITICAL(HW_GPU, "Failed to register buffer with unmapped gpu_address 0x{:016x}", + gpu_addr); + return; + } + const IntervalType interval{new_interval.start, new_interval.end}; + mapped_addresses.insert(interval); + map_storage[new_interval] = MapInfo{gpu_addr, *cpu_addr}; + + rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1); + } + + /// Unregisters an object from the cache + void Unregister(const MapInterval& interval) { + const MapInfo info = map_storage[interval]; + const std::size_t size = interval.end - interval.start; + rasterizer.UpdatePagesCachedCount(info.cpu_addr, size, -1); + const IntervalType delete_interval{interval.start, interval.end}; + mapped_addresses.erase(delete_interval); + map_storage.erase(interval); + } + +private: + void MapAddress(const TBuffer& block, const GPUVAddr gpu_addr, const CacheAddr cache_addr, + const std::size_t size, bool is_written) { + + std::vector<MapInterval> overlaps = GetMapsInRange(cache_addr, size); + if (overlaps.empty()) { + const CacheAddr cache_addr_end = cache_addr + size; + MapInterval new_interval{cache_addr, cache_addr_end}; + if (!is_written) { + u8* host_ptr = FromCacheAddr(cache_addr); + UploadBlockData(block, block->GetOffset(cache_addr), size, host_ptr); + } + Register(new_interval, gpu_addr); + return; + } + + if (overlaps.size() == 1) { + MapInterval current_map = overlaps[0]; + const CacheAddr cache_addr_end = cache_addr + size; + if (current_map.IsInside(cache_addr, cache_addr_end)) { + return; + } + const CacheAddr new_start = std::min(cache_addr, current_map.start); + const CacheAddr new_end = std::max(cache_addr_end, current_map.end); + const GPUVAddr new_gpu_addr = gpu_addr + new_start - cache_addr; + const std::size_t new_size = static_cast<std::size_t>(new_end - new_start); + MapInterval new_interval{new_start, new_end}; + const std::size_t offset = current_map.start - new_start; + const std::size_t size = current_map.end - current_map.start; + // Upload the remaining data + if (!is_written) { + u8* host_ptr = FromCacheAddr(new_start); + if (new_start == cache_addr && new_end == cache_addr_end) { + std::size_t first_size = current_map.start - new_start; + if (first_size > 0) { + UploadBlockData(block, block->GetOffset(new_start), first_size, host_ptr); + } + + std::size_t second_size = new_end - current_map.end; + if (second_size > 0) { + u8* host_ptr2 = FromCacheAddr(current_map.end); + UploadBlockData(block, block->GetOffset(current_map.end), second_size, + host_ptr2); + } + } else { + if (new_start == cache_addr) { + std::size_t second_size = new_end - current_map.end; + if (second_size > 0) { + u8* host_ptr2 = FromCacheAddr(current_map.end); + UploadBlockData(block, block->GetOffset(current_map.end), second_size, + host_ptr2); + } + } else { + std::size_t first_size = current_map.start - new_start; + if (first_size > 0) { + UploadBlockData(block, block->GetOffset(new_start), first_size, host_ptr); + } + } + } + } + Unregister(current_map); + Register(new_interval, new_gpu_addr); + } else { + // Calculate new buffer parameters + GPUVAddr new_gpu_addr = gpu_addr; + CacheAddr start = cache_addr; + CacheAddr end = cache_addr + size; + for (auto& overlap : overlaps) { + start = std::min(overlap.start, start); + end = std::max(overlap.end, end); + } + new_gpu_addr = gpu_addr + start - cache_addr; + MapInterval new_interval{start, end}; + for (auto& overlap : overlaps) { + Unregister(overlap); + } + std::size_t new_size = end - start; + if (!is_written) { + u8* host_ptr = FromCacheAddr(start); + UploadBlockData(block, block->GetOffset(start), new_size, host_ptr); + } + Register(new_interval, new_gpu_addr); + } + } + + std::vector<MapInterval> GetMapsInRange(CacheAddr addr, std::size_t size) { + if (size == 0) { + return {}; + } + + std::vector<MapInterval> objects{}; + const IntervalType interval{addr, addr + size}; + for (auto& pair : boost::make_iterator_range(mapped_addresses.equal_range(interval))) { + objects.emplace_back(pair.lower(), pair.upper()); + } + + return objects; + } + + /// Returns a ticks counter used for tracking when cached objects were last modified + u64 GetModifiedTicks() { + return ++modified_ticks; + } + + BufferInfo StreamBufferUpload(const void* raw_pointer, std::size_t size, + std::size_t alignment) { + AlignBuffer(alignment); + const std::size_t uploaded_offset = buffer_offset; + std::memcpy(buffer_ptr, raw_pointer, size); + + buffer_ptr += size; + buffer_offset += size; + return {&stream_buffer_handle, uploaded_offset}; + } + + void AlignBuffer(std::size_t alignment) { + // Align the offset, not the mapped pointer + const std::size_t offset_aligned = Common::AlignUp(buffer_offset, alignment); + buffer_ptr += offset_aligned - buffer_offset; + buffer_offset = offset_aligned; + } + + TBuffer EnlargeBlock(TBuffer buffer) { + const std::size_t old_size = buffer->GetSize(); + const std::size_t new_size = old_size + block_page_size; + const CacheAddr cache_addr = buffer->GetCacheAddr(); + TBuffer new_buffer = CreateBlock(cache_addr, new_size); + CopyBlock(buffer, new_buffer, 0, 0, old_size); + buffer->SetEpoch(epoch); + pending_destruction.push_back(buffer); + const CacheAddr cache_addr_end = cache_addr + new_size - 1; + u64 page_start = cache_addr >> block_page_bits; + const u64 page_end = cache_addr_end >> block_page_bits; + while (page_start <= page_end) { + blocks[page_start] = new_buffer; + ++page_start; + } + return new_buffer; + } + + TBuffer MergeBlocks(TBuffer first, TBuffer second) { + const std::size_t size_1 = first->GetSize(); + const std::size_t size_2 = second->GetSize(); + const CacheAddr first_addr = first->GetCacheAddr(); + const CacheAddr second_addr = second->GetCacheAddr(); + const CacheAddr new_addr = std::min(first_addr, second_addr); + const std::size_t new_size = size_1 + size_2; + TBuffer new_buffer = CreateBlock(new_addr, new_size); + CopyBlock(first, new_buffer, 0, new_buffer->GetOffset(first_addr), size_1); + CopyBlock(second, new_buffer, 0, new_buffer->GetOffset(second_addr), size_2); + first->SetEpoch(epoch); + second->SetEpoch(epoch); + pending_destruction.push_back(first); + pending_destruction.push_back(second); + const CacheAddr cache_addr_end = new_addr + new_size - 1; + u64 page_start = new_addr >> block_page_bits; + const u64 page_end = cache_addr_end >> block_page_bits; + while (page_start <= page_end) { + blocks[page_start] = new_buffer; + ++page_start; + } + return new_buffer; + } + + TBuffer GetBlock(const CacheAddr cache_addr, const std::size_t size) { + TBuffer found{}; + const CacheAddr cache_addr_end = cache_addr + size - 1; + u64 page_start = cache_addr >> block_page_bits; + const u64 page_end = cache_addr_end >> block_page_bits; + const u64 num_pages = page_end - page_start + 1; + while (page_start <= page_end) { + auto it = blocks.find(page_start); + if (it == blocks.end()) { + if (found) { + found = EnlargeBlock(found); + } else { + const CacheAddr start_addr = (page_start << block_page_bits); + found = CreateBlock(start_addr, block_page_size); + blocks[page_start] = found; + } + } else { + if (found) { + if (found == it->second) { + ++page_start; + continue; + } + found = MergeBlocks(found, it->second); + } else { + found = it->second; + } + } + ++page_start; + } + return found; + } + + std::unique_ptr<StreamBuffer> stream_buffer; + TBufferType stream_buffer_handle{}; + + bool invalidated = false; + + u8* buffer_ptr = nullptr; + u64 buffer_offset = 0; + u64 buffer_offset_base = 0; + + using IntervalCache = boost::icl::interval_set<CacheAddr>; + using IntervalType = typename IntervalCache::interval_type; + IntervalCache mapped_addresses{}; + std::unordered_map<MapInterval, MapInfo> map_storage; + + static constexpr u64 block_page_bits{24}; + static constexpr u64 block_page_size{1 << block_page_bits}; + std::unordered_map<u64, TBuffer> blocks; + + std::list<TBuffer> pending_destruction; + u64 epoch{}; + u64 modified_ticks{}; + VideoCore::RasterizerInterface& rasterizer; + Core::System& system; + std::recursive_mutex mutex; +}; + +} // namespace VideoCommon diff --git a/src/video_core/buffer_cache/map_interval.h b/src/video_core/buffer_cache/map_interval.h new file mode 100644 index 000000000..652a35dcd --- /dev/null +++ b/src/video_core/buffer_cache/map_interval.h @@ -0,0 +1,48 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <boost/functional/hash.hpp> +#include "common/common_types.h" +#include "video_core/gpu.h" + +namespace VideoCommon { + +struct MapInterval { + MapInterval(const CacheAddr start, const CacheAddr end) : start{start}, end{end} {} + CacheAddr start; + CacheAddr end; + bool IsInside(const CacheAddr other_start, const CacheAddr other_end) { + return (start <= other_start && other_end <= end); + } + + bool operator==(const MapInterval& rhs) const { + return std::tie(start, end) == std::tie(rhs.start, rhs.end); + } + + bool operator!=(const MapInterval& rhs) const { + return !operator==(rhs); + } +}; + +struct MapInfo { + GPUVAddr gpu_addr; + VAddr cpu_addr; +}; + +} // namespace VideoCommon + +namespace std { + +template <> +struct hash<VideoCommon::MapInterval> { + std::size_t operator()(const VideoCommon::MapInterval& k) const noexcept { + std::size_t a = std::hash<CacheAddr>()(k.start); + boost::hash_combine(a, std::hash<CacheAddr>()(k.end)); + return a; + } +}; + +} // namespace std |