author: Fernando Sahmkow <fsahmkow27@gmail.com> 2019-07-19 16:50:40 +0200
committer: FernandoS27 <fsahmkow27@gmail.com> 2019-08-21 18:14:22 +0200
commit: 862bec001b7ada13ba0e97f95d6ad108ae8a8d0c (patch)
tree: d366f7768d0acd4cbe1514a57b9fddcfb4c79eaa /src/video_core/buffer_cache
parent: Merge pull request #2748 from FernandoS27/align-memory (diff)
download: yuzu-862bec001b7ada13ba0e97f95d6ad108ae8a8d0c.tar
yuzu-862bec001b7ada13ba0e97f95d6ad108ae8a8d0c.tar.gz
yuzu-862bec001b7ada13ba0e97f95d6ad108ae8a8d0c.tar.bz2
yuzu-862bec001b7ada13ba0e97f95d6ad108ae8a8d0c.tar.lz
yuzu-862bec001b7ada13ba0e97f95d6ad108ae8a8d0c.tar.xz
yuzu-862bec001b7ada13ba0e97f95d6ad108ae8a8d0c.tar.zst
yuzu-862bec001b7ada13ba0e97f95d6ad108ae8a8d0c.zip
3 files changed, 498 insertions, 0 deletions
diff --git a/src/video_core/buffer_cache/buffer_block.h b/src/video_core/buffer_cache/buffer_block.h
new file mode 100644
index 000000000..2c739a586
--- /dev/null
+++ b/src/video_core/buffer_cache/buffer_block.h
@@ -0,0 +1,78 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <unordered_set>
+#include <utility>
+
+#include "common/alignment.h"
+#include "common/common_types.h"
+#include "video_core/gpu.h"
+
+namespace VideoCommon {
+
+class BufferBlock {
+public:
+    bool Overlaps(const CacheAddr start, const CacheAddr end) const {
+        return (cache_addr < end) && (cache_addr_end > start);
+    }
+
+    bool IsInside(const CacheAddr other_start, const CacheAddr other_end) {
+        return (cache_addr <= other_start && other_end <= cache_addr_end);
+    }
+
+    u8* GetWritableHostPtr() const {
+        return FromCacheAddr(cache_addr);
+    }
+
+    u8* GetWritableHostPtr(std::size_t offset) const {
+        return FromCacheAddr(cache_addr + offset);
+    }
+
+    std::size_t GetOffset(const CacheAddr in_addr) {
+        return static_cast<std::size_t>(in_addr - cache_addr);
+    }
+
+    CacheAddr GetCacheAddr() const {
+        return cache_addr;
+    }
+
+    CacheAddr GetCacheAddrEnd() const {
+        return cache_addr_end;
+    }
+
+    void SetCacheAddr(const CacheAddr new_addr) {
+        cache_addr = new_addr;
+        cache_addr_end = new_addr + size;
+    }
+
+    std::size_t GetSize() const {
+        return size;
+    }
+
+    void SetEpoch(u64 new_epoch) {
+        epoch = new_epoch;
+    }
+
+    u64 GetEpoch() {
+        return epoch;
+    }
+
+protected:
+    explicit BufferBlock(CacheAddr cache_addr,const std::size_t size)
+        : size{size} {
+        SetCacheAddr(cache_addr);
+    }
+    ~BufferBlock() = default;
+
+private:
+    CacheAddr cache_addr{};
+    CacheAddr cache_addr_end{};
+    u64 pages{};
+    std::size_t size{};
+    u64 epoch{};
+};
+
+} // namespace VideoCommon
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
new file mode 100644
index 000000000..6c467eb80
--- /dev/null
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -0,0 +1,372 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <memory>
+#include <mutex>
+#include <unordered_map>
+#include <unordered_set>
+#include <utility>
+#include <vector>
+
+#include "common/alignment.h"
+#include "common/common_types.h"
+#include "core/core.h"
+#include "video_core/buffer_cache/map_interval.h"
+#include "video_core/buffer_cache/buffer_block.h"
+#include "video_core/memory_manager.h"
+
+namespace VideoCore {
+class RasterizerInterface;
+}
+
+namespace VideoCommon {
+
+template <typename TBuffer, typename TBufferType, typename StreamBuffer>
+class BufferCache {
+public:
+    using BufferInfo = std::pair<const TBufferType*, u64>;
+
+    BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4,
+                            bool is_written = false) {
+        std::lock_guard lock{mutex};
+
+        auto& memory_manager = system.GPU().MemoryManager();
+        const auto host_ptr = memory_manager.GetPointer(gpu_addr);
+        if (!host_ptr) {
+            return {GetEmptyBuffer(size), 0};
+        }
+        const auto cache_addr = ToCacheAddr(host_ptr);
+
+        auto block = GetBlock(cache_addr, size);
+        MapAddress(block, gpu_addr, cache_addr, size, is_written);
+
+        const u64 offset = static_cast<u64>(block->GetOffset(cache_addr));
+
+        return {ToHandle(block), offset};
+    }
+
+    /// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset.
+    BufferInfo UploadHostMemory(const void* raw_pointer, std::size_t size,
+                                std::size_t alignment = 4) {
+        std::lock_guard lock{mutex};
+        return StreamBufferUpload(raw_pointer, size, alignment);
+    }
+
+    void Map(std::size_t max_size) {
+        std::tie(buffer_ptr, buffer_offset_base, invalidated) = stream_buffer->Map(max_size, 4);
+        buffer_offset = buffer_offset_base;
+    }
+
+    /// Finishes the upload stream, returns true on bindings invalidation.
+    bool Unmap() {
+        stream_buffer->Unmap(buffer_offset - buffer_offset_base);
+        return std::exchange(invalidated, false);
+    }
+
+    void TickFrame() {
+        ++epoch;
+        while (!pending_destruction.empty()) {
+            if (pending_destruction.front()->GetEpoch() + 1 > epoch) {
+                break;
+            }
+            pending_destruction.pop_front();
+        }
+    }
+
+    /// Write any cached resources overlapping the specified region back to memory
+    void FlushRegion(CacheAddr addr, std::size_t size) {
+        std::lock_guard lock{mutex};
+
+        // TODO
+    }
+
+    /// Mark the specified region as being invalidated
+    void InvalidateRegion(CacheAddr addr, u64 size) {
+        std::lock_guard lock{mutex};
+
+        std::vector<MapInterval> objects = GetMapsInRange(addr, size);
+        for (auto& object : objects) {
+            Unregister(object);
+        }
+    }
+
+    virtual const TBufferType* GetEmptyBuffer(std::size_t size) = 0;
+
+protected:
+    explicit BufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
+                         std::unique_ptr<StreamBuffer> stream_buffer)
+        : rasterizer{rasterizer}, system{system}, stream_buffer{std::move(stream_buffer)},
+          stream_buffer_handle{this->stream_buffer->GetHandle()} {}
+
+    ~BufferCache() = default;
+
+    virtual const TBufferType* ToHandle(const TBuffer& storage) = 0;
+
+    virtual void WriteBarrier() = 0;
+
+    virtual TBuffer CreateBlock(CacheAddr cache_addr, std::size_t size) = 0;
+
+    virtual void UploadBlockData(const TBuffer& buffer, std::size_t offset, std::size_t size,
+                                 const u8* data) = 0;
+
+    virtual void DownloadBlockData(const TBuffer& buffer, std::size_t offset, std::size_t size,
+                                   u8* data) = 0;
+
+    virtual void CopyBlock(const TBuffer& src, const TBuffer& dst, std::size_t src_offset,
+                           std::size_t dst_offset, std::size_t size) = 0;
+
+    /// Register an object into the cache
+    void Register(const MapInterval& new_interval, const GPUVAddr gpu_addr) {
+        const CacheAddr cache_ptr = new_interval.start;
+        const std::size_t size = new_interval.end - new_interval.start;
+        const std::optional<VAddr> cpu_addr =
+            system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
+        if (!cache_ptr || !cpu_addr) {
+            LOG_CRITICAL(HW_GPU, "Failed to register buffer with unmapped gpu_address 0x{:016x}",
+                         gpu_addr);
+            return;
+        }
+        const IntervalType interval{new_interval.start, new_interval.end};
+        mapped_addresses.insert(interval);
+        map_storage[new_interval] = MapInfo{gpu_addr, *cpu_addr};
+
+        rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1);
+    }
+
+    /// Unregisters an object from the cache
+    void Unregister(const MapInterval& interval) {
+        const MapInfo info = map_storage[interval];
+        const std::size_t size = interval.end - interval.start;
+        rasterizer.UpdatePagesCachedCount(info.cpu_addr, size, -1);
+        const IntervalType delete_interval{interval.start, interval.end};
+        mapped_addresses.erase(delete_interval);
+        map_storage.erase(interval);
+    }
+
+private:
+    void MapAddress(const TBuffer& block, const GPUVAddr gpu_addr, const CacheAddr cache_addr,
+                    const std::size_t size, bool is_written) {
+
+        std::vector<MapInterval> overlaps = GetMapsInRange(cache_addr, size);
+        if (overlaps.empty()) {
+            const CacheAddr cache_addr_end = cache_addr + size;
+            MapInterval new_interval{cache_addr, cache_addr_end};
+            if (!is_written) {
+                u8* host_ptr = FromCacheAddr(cache_addr);
+                UploadBlockData(block, block->GetOffset(cache_addr), size, host_ptr);
+            }
+            Register(new_interval, gpu_addr);
+            return;
+        }
+
+        if (overlaps.size() == 1) {
+            MapInterval current_map = overlaps[0];
+            const CacheAddr cache_addr_end = cache_addr + size;
+            if (current_map.IsInside(cache_addr, cache_addr_end)) {
+                return;
+            }
+            const CacheAddr new_start = std::min(cache_addr, current_map.start);
+            const CacheAddr new_end = std::max(cache_addr_end, current_map.end);
+            const GPUVAddr new_gpu_addr = gpu_addr + new_start - cache_addr;
+            const std::size_t new_size = static_cast<std::size_t>(new_end - new_start);
+            MapInterval new_interval{new_start, new_end};
+            const std::size_t offset = current_map.start - new_start;
+            const std::size_t size = current_map.end - current_map.start;
+            // Upload the remaining data
+            if (!is_written) {
+                u8* host_ptr = FromCacheAddr(new_start);
+                if (new_start == cache_addr && new_end == cache_addr_end) {
+                    std::size_t first_size = current_map.start - new_start;
+                    if (first_size > 0) {
+                        UploadBlockData(block, block->GetOffset(new_start), first_size, host_ptr);
+                    }
+
+                    std::size_t second_size = new_end - current_map.end;
+                    if (second_size > 0) {
+                        u8* host_ptr2 = FromCacheAddr(current_map.end);
+                        UploadBlockData(block, block->GetOffset(current_map.end), second_size,
+                                         host_ptr2);
+                    }
+                } else {
+                    if (new_start == cache_addr) {
+                        std::size_t second_size = new_end - current_map.end;
+                        if (second_size > 0) {
+                            u8* host_ptr2 = FromCacheAddr(current_map.end);
+                            UploadBlockData(block, block->GetOffset(current_map.end), second_size,
+                                             host_ptr2);
+                        }
+                    } else {
+                        std::size_t first_size = current_map.start - new_start;
+                        if (first_size > 0) {
+                            UploadBlockData(block, block->GetOffset(new_start), first_size, host_ptr);
+                        }
+                    }
+                }
+            }
+            Unregister(current_map);
+            Register(new_interval, new_gpu_addr);
+        } else {
+            // Calculate new buffer parameters
+            GPUVAddr new_gpu_addr = gpu_addr;
+            CacheAddr start = cache_addr;
+            CacheAddr end = cache_addr + size;
+            for (auto& overlap : overlaps) {
+                start = std::min(overlap.start, start);
+                end = std::max(overlap.end, end);
+            }
+            new_gpu_addr = gpu_addr + start - cache_addr;
+            MapInterval new_interval{start, end};
+            for (auto& overlap : overlaps) {
+                Unregister(overlap);
+            }
+            std::size_t new_size = end - start;
+            if (!is_written) {
+                u8* host_ptr = FromCacheAddr(start);
+                UploadBlockData(block, block->GetOffset(start), new_size, host_ptr);
+            }
+            Register(new_interval, new_gpu_addr);
+        }
+    }
+
+    std::vector<MapInterval> GetMapsInRange(CacheAddr addr, std::size_t size) {
+        if (size == 0) {
+            return {};
+        }
+
+        std::vector<MapInterval> objects{};
+        const IntervalType interval{addr, addr + size};
+        for (auto& pair : boost::make_iterator_range(mapped_addresses.equal_range(interval))) {
+            objects.emplace_back(pair.lower(), pair.upper());
+        }
+
+        return objects;
+    }
+
+    /// Returns a ticks counter used for tracking when cached objects were last modified
+    u64 GetModifiedTicks() {
+        return ++modified_ticks;
+    }
+
+    BufferInfo StreamBufferUpload(const void* raw_pointer, std::size_t size,
+                                  std::size_t alignment) {
+        AlignBuffer(alignment);
+        const std::size_t uploaded_offset = buffer_offset;
+        std::memcpy(buffer_ptr, raw_pointer, size);
+
+        buffer_ptr += size;
+        buffer_offset += size;
+        return {&stream_buffer_handle, uploaded_offset};
+    }
+
+    void AlignBuffer(std::size_t alignment) {
+        // Align the offset, not the mapped pointer
+        const std::size_t offset_aligned = Common::AlignUp(buffer_offset, alignment);
+        buffer_ptr += offset_aligned - buffer_offset;
+        buffer_offset = offset_aligned;
+    }
+
+    TBuffer EnlargeBlock(TBuffer buffer) {
+        const std::size_t old_size = buffer->GetSize();
+        const std::size_t new_size = old_size + block_page_size;
+        const CacheAddr cache_addr = buffer->GetCacheAddr();
+        TBuffer new_buffer = CreateBlock(cache_addr, new_size);
+        CopyBlock(buffer, new_buffer, 0, 0, old_size);
+        buffer->SetEpoch(epoch);
+        pending_destruction.push_back(buffer);
+        const CacheAddr cache_addr_end = cache_addr + new_size - 1;
+        u64 page_start = cache_addr >> block_page_bits;
+        const u64 page_end = cache_addr_end >> block_page_bits;
+        while (page_start <= page_end) {
+            blocks[page_start] = new_buffer;
+            ++page_start;
+        }
+        return new_buffer;
+    }
+
+    TBuffer MergeBlocks(TBuffer first, TBuffer second) {
+        const std::size_t size_1 = first->GetSize();
+        const std::size_t size_2 = second->GetSize();
+        const CacheAddr first_addr = first->GetCacheAddr();
+        const CacheAddr second_addr = second->GetCacheAddr();
+        const CacheAddr new_addr = std::min(first_addr, second_addr);
+        const std::size_t new_size = size_1 + size_2;
+        TBuffer new_buffer = CreateBlock(new_addr, new_size);
+        CopyBlock(first, new_buffer, 0, new_buffer->GetOffset(first_addr), size_1);
+        CopyBlock(second, new_buffer, 0, new_buffer->GetOffset(second_addr), size_2);
+        first->SetEpoch(epoch);
+        second->SetEpoch(epoch);
+        pending_destruction.push_back(first);
+        pending_destruction.push_back(second);
+        const CacheAddr cache_addr_end = new_addr + new_size - 1;
+        u64 page_start = new_addr >> block_page_bits;
+        const u64 page_end = cache_addr_end >> block_page_bits;
+        while (page_start <= page_end) {
+            blocks[page_start] = new_buffer;
+            ++page_start;
+        }
+        return new_buffer;
+    }
+
+    TBuffer GetBlock(const CacheAddr cache_addr, const std::size_t size) {
+        TBuffer found{};
+        const CacheAddr cache_addr_end = cache_addr + size - 1;
+        u64 page_start = cache_addr >> block_page_bits;
+        const u64 page_end = cache_addr_end >> block_page_bits;
+        const u64 num_pages = page_end - page_start + 1;
+        while (page_start <= page_end) {
+            auto it = blocks.find(page_start);
+            if (it == blocks.end()) {
+                if (found) {
+                    found = EnlargeBlock(found);
+                } else {
+                    const CacheAddr start_addr = (page_start << block_page_bits);
+                    found = CreateBlock(start_addr, block_page_size);
+                    blocks[page_start] = found;
+                }
+            } else {
+                if (found) {
+                    if (found == it->second) {
+                        ++page_start;
+                        continue;
+                    }
+                    found = MergeBlocks(found, it->second);
+                } else {
+                    found = it->second;
+                }
+            }
+            ++page_start;
+        }
+        return found;
+    }
+
+    std::unique_ptr<StreamBuffer> stream_buffer;
+    TBufferType stream_buffer_handle{};
+
+    bool invalidated = false;
+
+    u8* buffer_ptr = nullptr;
+    u64 buffer_offset = 0;
+    u64 buffer_offset_base = 0;
+
+    using IntervalCache = boost::icl::interval_set<CacheAddr>;
+    using IntervalType = typename IntervalCache::interval_type;
+    IntervalCache mapped_addresses{};
+    std::unordered_map<MapInterval, MapInfo> map_storage;
+
+    static constexpr u64 block_page_bits{24};
+    static constexpr u64 block_page_size{1 << block_page_bits};
+    std::unordered_map<u64, TBuffer> blocks;
+
+    std::list<TBuffer> pending_destruction;
+    u64 epoch{};
+    u64 modified_ticks{};
+    VideoCore::RasterizerInterface& rasterizer;
+    Core::System& system;
+    std::recursive_mutex mutex;
+};
+
+} // namespace VideoCommon
diff --git a/src/video_core/buffer_cache/map_interval.h b/src/video_core/buffer_cache/map_interval.h
new file mode 100644
index 000000000..652a35dcd
--- /dev/null
+++ b/src/video_core/buffer_cache/map_interval.h
@@ -0,0 +1,48 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <boost/functional/hash.hpp>
+#include "common/common_types.h"
+#include "video_core/gpu.h"
+
+namespace VideoCommon {
+
+struct MapInterval {
+    MapInterval(const CacheAddr start, const CacheAddr end) : start{start}, end{end} {}
+    CacheAddr start;
+    CacheAddr end;
+    bool IsInside(const CacheAddr other_start, const CacheAddr other_end) {
+        return (start <= other_start && other_end <= end);
+    }
+
+    bool operator==(const MapInterval& rhs) const {
+        return std::tie(start, end) == std::tie(rhs.start, rhs.end);
+    }
+
+    bool operator!=(const MapInterval& rhs) const {
+        return !operator==(rhs);
+    }
+};
+
+struct MapInfo {
+    GPUVAddr gpu_addr;
+    VAddr cpu_addr;
+};
+
+} // namespace VideoCommon
+
+namespace std {
+
+template <>
+struct hash<VideoCommon::MapInterval> {
+    std::size_t operator()(const VideoCommon::MapInterval& k) const noexcept {
+        std::size_t a = std::hash<CacheAddr>()(k.start);
+        boost::hash_combine(a, std::hash<CacheAddr>()(k.end));
+        return a;
+    }
+};
+
+} // namespace std
author	Fernando Sahmkow <fsahmkow27@gmail.com>	2019-07-19 16:50:40 +0200
committer	FernandoS27 <fsahmkow27@gmail.com>	2019-08-21 18:14:22 +0200
commit	862bec001b7ada13ba0e97f95d6ad108ae8a8d0c (patch)
tree	d366f7768d0acd4cbe1514a57b9fddcfb4c79eaa /src/video_core/buffer_cache
parent	Merge pull request #2748 from FernandoS27/align-memory (diff)
download	yuzu-862bec001b7ada13ba0e97f95d6ad108ae8a8d0c.tar yuzu-862bec001b7ada13ba0e97f95d6ad108ae8a8d0c.tar.gz yuzu-862bec001b7ada13ba0e97f95d6ad108ae8a8d0c.tar.bz2 yuzu-862bec001b7ada13ba0e97f95d6ad108ae8a8d0c.tar.lz yuzu-862bec001b7ada13ba0e97f95d6ad108ae8a8d0c.tar.xz yuzu-862bec001b7ada13ba0e97f95d6ad108ae8a8d0c.tar.zst yuzu-862bec001b7ada13ba0e97f95d6ad108ae8a8d0c.zip