summaryrefslogtreecommitdiffstats
path: root/src/video_core/buffer_cache
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/buffer_cache')
-rw-r--r--src/video_core/buffer_cache/buffer_block.h62
-rw-r--r--src/video_core/buffer_cache/buffer_cache.cpp13
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h1598
-rw-r--r--src/video_core/buffer_cache/map_interval.cpp33
-rw-r--r--src/video_core/buffer_cache/map_interval.h93
5 files changed, 1132 insertions, 667 deletions
diff --git a/src/video_core/buffer_cache/buffer_block.h b/src/video_core/buffer_cache/buffer_block.h
deleted file mode 100644
index e9306194a..000000000
--- a/src/video_core/buffer_cache/buffer_block.h
+++ /dev/null
@@ -1,62 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include "common/common_types.h"
-
-namespace VideoCommon {
-
-class BufferBlock {
-public:
- [[nodiscard]] bool Overlaps(VAddr start, VAddr end) const {
- return (cpu_addr < end) && (cpu_addr_end > start);
- }
-
- [[nodiscard]] bool IsInside(VAddr other_start, VAddr other_end) const {
- return cpu_addr <= other_start && other_end <= cpu_addr_end;
- }
-
- [[nodiscard]] std::size_t Offset(VAddr in_addr) const {
- return static_cast<std::size_t>(in_addr - cpu_addr);
- }
-
- [[nodiscard]] VAddr CpuAddr() const {
- return cpu_addr;
- }
-
- [[nodiscard]] VAddr CpuAddrEnd() const {
- return cpu_addr_end;
- }
-
- void SetCpuAddr(VAddr new_addr) {
- cpu_addr = new_addr;
- cpu_addr_end = new_addr + size;
- }
-
- [[nodiscard]] std::size_t Size() const {
- return size;
- }
-
- [[nodiscard]] u64 Epoch() const {
- return epoch;
- }
-
- void SetEpoch(u64 new_epoch) {
- epoch = new_epoch;
- }
-
-protected:
- explicit BufferBlock(VAddr cpu_addr_, std::size_t size_) : size{size_} {
- SetCpuAddr(cpu_addr_);
- }
-
-private:
- VAddr cpu_addr{};
- VAddr cpu_addr_end{};
- std::size_t size{};
- u64 epoch{};
-};
-
-} // namespace VideoCommon
diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp
new file mode 100644
index 000000000..ab32294c8
--- /dev/null
+++ b/src/video_core/buffer_cache/buffer_cache.cpp
@@ -0,0 +1,13 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/microprofile.h"
+
+namespace VideoCommon {
+
+MICROPROFILE_DEFINE(GPU_PrepareBuffers, "GPU", "Prepare buffers", MP_RGB(224, 128, 128));
+MICROPROFILE_DEFINE(GPU_BindUploadBuffers, "GPU", "Bind and upload buffers", MP_RGB(224, 128, 128));
+MICROPROFILE_DEFINE(GPU_DownloadMemory, "GPU", "Download buffers", MP_RGB(224, 128, 128));
+
+} // namespace VideoCommon
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 83b9ee871..e4f3c8e35 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -4,591 +4,1231 @@
#pragma once
-#include <list>
+#include <algorithm>
+#include <array>
+#include <deque>
#include <memory>
#include <mutex>
+#include <span>
#include <unordered_map>
-#include <unordered_set>
-#include <utility>
#include <vector>
#include <boost/container/small_vector.hpp>
-#include <boost/icl/interval_set.hpp>
-#include <boost/intrusive/set.hpp>
-#include "common/alignment.h"
-#include "common/assert.h"
#include "common/common_types.h"
-#include "common/logging/log.h"
-#include "core/core.h"
+#include "common/div_ceil.h"
+#include "common/microprofile.h"
+#include "common/scope_exit.h"
#include "core/memory.h"
#include "core/settings.h"
-#include "video_core/buffer_cache/buffer_block.h"
-#include "video_core/buffer_cache/map_interval.h"
+#include "video_core/buffer_cache/buffer_base.h"
+#include "video_core/delayed_destruction_ring.h"
+#include "video_core/dirty_flags.h"
+#include "video_core/engines/kepler_compute.h"
+#include "video_core/engines/maxwell_3d.h"
#include "video_core/memory_manager.h"
#include "video_core/rasterizer_interface.h"
+#include "video_core/texture_cache/slot_vector.h"
+#include "video_core/texture_cache/types.h"
namespace VideoCommon {
-template <typename Buffer, typename BufferType, typename StreamBuffer>
+MICROPROFILE_DECLARE(GPU_PrepareBuffers);
+MICROPROFILE_DECLARE(GPU_BindUploadBuffers);
+MICROPROFILE_DECLARE(GPU_DownloadMemory);
+
+using BufferId = SlotId;
+
+constexpr u32 NUM_VERTEX_BUFFERS = 32;
+constexpr u32 NUM_TRANSFORM_FEEDBACK_BUFFERS = 4;
+constexpr u32 NUM_GRAPHICS_UNIFORM_BUFFERS = 18;
+constexpr u32 NUM_COMPUTE_UNIFORM_BUFFERS = 8;
+constexpr u32 NUM_STORAGE_BUFFERS = 16;
+constexpr u32 NUM_STAGES = 5;
+
+template <typename P>
class BufferCache {
- using IntervalSet = boost::icl::interval_set<VAddr>;
- using IntervalType = typename IntervalSet::interval_type;
- using VectorMapInterval = boost::container::small_vector<MapInterval*, 1>;
+ // Page size for caching purposes.
+ // This is unrelated to the CPU page size and it can be changed as it seems optimal.
+ static constexpr u32 PAGE_BITS = 16;
+ static constexpr u64 PAGE_SIZE = u64{1} << PAGE_BITS;
- static constexpr u64 WRITE_PAGE_BIT = 11;
- static constexpr u64 BLOCK_PAGE_BITS = 21;
- static constexpr u64 BLOCK_PAGE_SIZE = 1ULL << BLOCK_PAGE_BITS;
+ static constexpr bool IS_OPENGL = P::IS_OPENGL;
+ static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS =
+ P::HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS;
+ static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT =
+ P::HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT;
+ static constexpr bool NEEDS_BIND_UNIFORM_INDEX = P::NEEDS_BIND_UNIFORM_INDEX;
+ static constexpr bool NEEDS_BIND_STORAGE_INDEX = P::NEEDS_BIND_STORAGE_INDEX;
+ static constexpr bool USE_MEMORY_MAPS = P::USE_MEMORY_MAPS;
-public:
- struct BufferInfo {
- BufferType handle;
- u64 offset;
- u64 address;
+ static constexpr BufferId NULL_BUFFER_ID{0};
+
+ using Maxwell = Tegra::Engines::Maxwell3D::Regs;
+
+ using Runtime = typename P::Runtime;
+ using Buffer = typename P::Buffer;
+
+ struct Empty {};
+
+ struct Binding {
+ VAddr cpu_addr{};
+ u32 size{};
+ BufferId buffer_id;
};
- BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4,
- bool is_written = false, bool use_fast_cbuf = false) {
- std::lock_guard lock{mutex};
+ static constexpr Binding NULL_BINDING{
+ .cpu_addr = 0,
+ .size = 0,
+ .buffer_id = NULL_BUFFER_ID,
+ };
- const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
- if (!cpu_addr) {
- return GetEmptyBuffer(size);
- }
+public:
+ static constexpr size_t SKIP_CACHE_SIZE = 4096;
- // Cache management is a big overhead, so only cache entries with a given size.
- // TODO: Figure out which size is the best for given games.
- constexpr std::size_t max_stream_size = 0x800;
- if (use_fast_cbuf || size < max_stream_size) {
- if (!is_written && !IsRegionWritten(*cpu_addr, *cpu_addr + size - 1)) {
- const bool is_granular = gpu_memory.IsGranularRange(gpu_addr, size);
- if (use_fast_cbuf) {
- u8* dest;
- if (is_granular) {
- dest = gpu_memory.GetPointer(gpu_addr);
- } else {
- staging_buffer.resize(size);
- dest = staging_buffer.data();
- gpu_memory.ReadBlockUnsafe(gpu_addr, dest, size);
- }
- return ConstBufferUpload(dest, size);
- }
- if (is_granular) {
- u8* const host_ptr = gpu_memory.GetPointer(gpu_addr);
- return StreamBufferUpload(size, alignment, [host_ptr, size](u8* dest) {
- std::memcpy(dest, host_ptr, size);
- });
- } else {
- return StreamBufferUpload(size, alignment, [this, gpu_addr, size](u8* dest) {
- gpu_memory.ReadBlockUnsafe(gpu_addr, dest, size);
- });
- }
- }
- }
+ explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_,
+ Tegra::Engines::Maxwell3D& maxwell3d_,
+ Tegra::Engines::KeplerCompute& kepler_compute_,
+ Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
+ Runtime& runtime_);
- Buffer* const block = GetBlock(*cpu_addr, size);
- MapInterval* const map = MapAddress(block, gpu_addr, *cpu_addr, size);
- if (!map) {
- return GetEmptyBuffer(size);
- }
- if (is_written) {
- map->MarkAsModified(true, GetModifiedTicks());
- if (Settings::IsGPULevelHigh() &&
- Settings::values.use_asynchronous_gpu_emulation.GetValue()) {
- MarkForAsyncFlush(map);
- }
- if (!map->is_written) {
- map->is_written = true;
- MarkRegionAsWritten(map->start, map->end - 1);
- }
- }
+ void TickFrame();
- return BufferInfo{block->Handle(), block->Offset(*cpu_addr), block->Address()};
- }
+ void WriteMemory(VAddr cpu_addr, u64 size);
- /// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset.
- BufferInfo UploadHostMemory(const void* raw_pointer, std::size_t size,
- std::size_t alignment = 4) {
- std::lock_guard lock{mutex};
- return StreamBufferUpload(size, alignment, [raw_pointer, size](u8* dest) {
- std::memcpy(dest, raw_pointer, size);
- });
- }
+ void CachedWriteMemory(VAddr cpu_addr, u64 size);
- /// Prepares the buffer cache for data uploading
- /// @param max_size Maximum number of bytes that will be uploaded
- /// @return True when a stream buffer invalidation was required, false otherwise
- void Map(std::size_t max_size) {
- std::lock_guard lock{mutex};
+ void DownloadMemory(VAddr cpu_addr, u64 size);
- std::tie(buffer_ptr, buffer_offset_base) = stream_buffer.Map(max_size, 4);
- buffer_offset = buffer_offset_base;
- }
+ void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size);
- /// Finishes the upload stream
- void Unmap() {
- std::lock_guard lock{mutex};
- stream_buffer.Unmap(buffer_offset - buffer_offset_base);
- }
+ void UpdateGraphicsBuffers(bool is_indexed);
- /// Function called at the end of each frame, inteded for deferred operations
- void TickFrame() {
- ++epoch;
+ void UpdateComputeBuffers();
- while (!pending_destruction.empty()) {
- // Delay at least 4 frames before destruction.
- // This is due to triple buffering happening on some drivers.
- static constexpr u64 epochs_to_destroy = 5;
- if (pending_destruction.front()->Epoch() + epochs_to_destroy > epoch) {
- break;
- }
- pending_destruction.pop();
- }
- }
+ void BindHostGeometryBuffers(bool is_indexed);
- /// Write any cached resources overlapping the specified region back to memory
- void FlushRegion(VAddr addr, std::size_t size) {
- std::lock_guard lock{mutex};
+ void BindHostStageBuffers(size_t stage);
- VectorMapInterval objects = GetMapsInRange(addr, size);
- std::sort(objects.begin(), objects.end(),
- [](MapInterval* lhs, MapInterval* rhs) { return lhs->ticks < rhs->ticks; });
- for (MapInterval* object : objects) {
- if (object->is_modified && object->is_registered) {
- mutex.unlock();
- FlushMap(object);
- mutex.lock();
- }
- }
- }
+ void BindHostComputeBuffers();
- bool MustFlushRegion(VAddr addr, std::size_t size) {
- std::lock_guard lock{mutex};
+ void SetEnabledUniformBuffers(size_t stage, u32 enabled);
- const VectorMapInterval objects = GetMapsInRange(addr, size);
- return std::any_of(objects.cbegin(), objects.cend(), [](const MapInterval* map) {
- return map->is_modified && map->is_registered;
- });
- }
+ void SetEnabledComputeUniformBuffers(u32 enabled);
- /// Mark the specified region as being invalidated
- void InvalidateRegion(VAddr addr, u64 size) {
- std::lock_guard lock{mutex};
+ void UnbindGraphicsStorageBuffers(size_t stage);
- for (auto& object : GetMapsInRange(addr, size)) {
- if (object->is_registered) {
- Unregister(object);
- }
- }
- }
+ void BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset,
+ bool is_written);
- void OnCPUWrite(VAddr addr, std::size_t size) {
- std::lock_guard lock{mutex};
+ void UnbindComputeStorageBuffers();
- for (MapInterval* object : GetMapsInRange(addr, size)) {
- if (object->is_memory_marked && object->is_registered) {
- UnmarkMemory(object);
- object->is_sync_pending = true;
- marked_for_unregister.emplace_back(object);
- }
- }
- }
+ void BindComputeStorageBuffer(size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset,
+ bool is_written);
- void SyncGuestHost() {
- std::lock_guard lock{mutex};
+ void FlushCachedWrites();
- for (auto& object : marked_for_unregister) {
- if (object->is_registered) {
- object->is_sync_pending = false;
- Unregister(object);
- }
+ /// Return true when there are uncommitted buffers to be downloaded
+ [[nodiscard]] bool HasUncommittedFlushes() const noexcept;
+
+ /// Return true when the caller should wait for async downloads
+ [[nodiscard]] bool ShouldWaitAsyncFlushes() const noexcept;
+
+ /// Commit asynchronous downloads
+ void CommitAsyncFlushes();
+
+ /// Pop asynchronous downloads
+ void PopAsyncFlushes();
+
+ /// Return true when a CPU region is modified from the GPU
+ [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size);
+
+ std::mutex mutex;
+
+private:
+ template <typename Func>
+ static void ForEachEnabledBit(u32 enabled_mask, Func&& func) {
+ for (u32 index = 0; enabled_mask != 0; ++index, enabled_mask >>= 1) {
+ const int disabled_bits = std::countr_zero(enabled_mask);
+ index += disabled_bits;
+ enabled_mask >>= disabled_bits;
+ func(index);
}
- marked_for_unregister.clear();
}
- void CommitAsyncFlushes() {
- if (uncommitted_flushes) {
- auto commit_list = std::make_shared<std::list<MapInterval*>>();
- for (MapInterval* map : *uncommitted_flushes) {
- if (map->is_registered && map->is_modified) {
- // TODO(Blinkhawk): Implement backend asynchronous flushing
- // AsyncFlushMap(map)
- commit_list->push_back(map);
- }
- }
- if (!commit_list->empty()) {
- committed_flushes.push_back(commit_list);
- } else {
- committed_flushes.emplace_back();
+ template <typename Func>
+ void ForEachBufferInRange(VAddr cpu_addr, u64 size, Func&& func) {
+ const u64 page_end = Common::DivCeil(cpu_addr + size, PAGE_SIZE);
+ for (u64 page = cpu_addr >> PAGE_BITS; page < page_end;) {
+ const BufferId buffer_id = page_table[page];
+ if (!buffer_id) {
+ ++page;
+ continue;
}
- } else {
- committed_flushes.emplace_back();
+ Buffer& buffer = slot_buffers[buffer_id];
+ func(buffer_id, buffer);
+
+ const VAddr end_addr = buffer.CpuAddr() + buffer.SizeBytes();
+ page = Common::DivCeil(end_addr, PAGE_SIZE);
}
- uncommitted_flushes.reset();
}
- bool ShouldWaitAsyncFlushes() const {
- return !committed_flushes.empty() && committed_flushes.front() != nullptr;
+ static bool IsRangeGranular(VAddr cpu_addr, size_t size) {
+ return (cpu_addr & ~Core::Memory::PAGE_MASK) ==
+ ((cpu_addr + size) & ~Core::Memory::PAGE_MASK);
}
- bool HasUncommittedFlushes() const {
- return uncommitted_flushes != nullptr;
- }
+ void BindHostIndexBuffer();
- void PopAsyncFlushes() {
- if (committed_flushes.empty()) {
- return;
- }
- auto& flush_list = committed_flushes.front();
- if (!flush_list) {
- committed_flushes.pop_front();
- return;
- }
- for (MapInterval* map : *flush_list) {
- if (map->is_registered) {
- // TODO(Blinkhawk): Replace this for reading the asynchronous flush
- FlushMap(map);
- }
- }
- committed_flushes.pop_front();
- }
+ void BindHostVertexBuffers();
- virtual BufferInfo GetEmptyBuffer(std::size_t size) = 0;
+ void BindHostGraphicsUniformBuffers(size_t stage);
-protected:
- explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_,
- Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
- StreamBuffer& stream_buffer_)
- : rasterizer{rasterizer_}, gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_},
- stream_buffer{stream_buffer_} {}
+ void BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 binding_index, bool needs_bind);
- ~BufferCache() = default;
+ void BindHostGraphicsStorageBuffers(size_t stage);
- virtual std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) = 0;
+ void BindHostTransformFeedbackBuffers();
- virtual BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) {
- return {};
- }
+ void BindHostComputeUniformBuffers();
- /// Register an object into the cache
- MapInterval* Register(MapInterval new_map, bool inherit_written = false) {
- const VAddr cpu_addr = new_map.start;
- if (!cpu_addr) {
- LOG_CRITICAL(HW_GPU, "Failed to register buffer with unmapped gpu_address 0x{:016x}",
- new_map.gpu_addr);
- return nullptr;
- }
- const std::size_t size = new_map.end - new_map.start;
- new_map.is_registered = true;
- rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1);
- new_map.is_memory_marked = true;
- if (inherit_written) {
- MarkRegionAsWritten(new_map.start, new_map.end - 1);
- new_map.is_written = true;
- }
- MapInterval* const storage = mapped_addresses_allocator.Allocate();
- *storage = new_map;
- mapped_addresses.insert(*storage);
- return storage;
- }
+ void BindHostComputeStorageBuffers();
- void UnmarkMemory(MapInterval* map) {
- if (!map->is_memory_marked) {
- return;
- }
- const std::size_t size = map->end - map->start;
- rasterizer.UpdatePagesCachedCount(map->start, size, -1);
- map->is_memory_marked = false;
- }
-
- /// Unregisters an object from the cache
- void Unregister(MapInterval* map) {
- UnmarkMemory(map);
- map->is_registered = false;
- if (map->is_sync_pending) {
- map->is_sync_pending = false;
- marked_for_unregister.remove(map);
+ void DoUpdateGraphicsBuffers(bool is_indexed);
+
+ void DoUpdateComputeBuffers();
+
+ void UpdateIndexBuffer();
+
+ void UpdateVertexBuffers();
+
+ void UpdateVertexBuffer(u32 index);
+
+ void UpdateUniformBuffers(size_t stage);
+
+ void UpdateStorageBuffers(size_t stage);
+
+ void UpdateTransformFeedbackBuffers();
+
+ void UpdateTransformFeedbackBuffer(u32 index);
+
+ void UpdateComputeUniformBuffers();
+
+ void UpdateComputeStorageBuffers();
+
+ void MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 size);
+
+ [[nodiscard]] BufferId FindBuffer(VAddr cpu_addr, u32 size);
+
+ [[nodiscard]] BufferId CreateBuffer(VAddr cpu_addr, u32 wanted_size);
+
+ void Register(BufferId buffer_id);
+
+ void Unregister(BufferId buffer_id);
+
+ template <bool insert>
+ void ChangeRegister(BufferId buffer_id);
+
+ void SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size);
+
+ void SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size);
+
+ void UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy,
+ std::span<BufferCopy> copies);
+
+ void ImmediateUploadMemory(Buffer& buffer, u64 largest_copy,
+ std::span<const BufferCopy> copies);
+
+ void MappedUploadMemory(Buffer& buffer, u64 total_size_bytes,
+ std::span<const BufferCopy> copies);
+
+ void DeleteBuffer(BufferId buffer_id);
+
+ void ReplaceBufferDownloads(BufferId old_buffer_id, BufferId new_buffer_id);
+
+ void NotifyBufferDeletion();
+
+ [[nodiscard]] Binding StorageBufferBinding(GPUVAddr ssbo_addr) const;
+
+ [[nodiscard]] std::span<const u8> ImmediateBufferWithData(VAddr cpu_addr, size_t size);
+
+ [[nodiscard]] std::span<u8> ImmediateBuffer(size_t wanted_capacity);
+
+ [[nodiscard]] bool HasFastUniformBufferBound(size_t stage, u32 binding_index) const noexcept;
+
+ VideoCore::RasterizerInterface& rasterizer;
+ Tegra::Engines::Maxwell3D& maxwell3d;
+ Tegra::Engines::KeplerCompute& kepler_compute;
+ Tegra::MemoryManager& gpu_memory;
+ Core::Memory::Memory& cpu_memory;
+ Runtime& runtime;
+
+ SlotVector<Buffer> slot_buffers;
+ DelayedDestructionRing<Buffer, 8> delayed_destruction_ring;
+
+ u32 last_index_count = 0;
+
+ Binding index_buffer;
+ std::array<Binding, NUM_VERTEX_BUFFERS> vertex_buffers;
+ std::array<std::array<Binding, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES> uniform_buffers;
+ std::array<std::array<Binding, NUM_STORAGE_BUFFERS>, NUM_STAGES> storage_buffers;
+ std::array<Binding, NUM_TRANSFORM_FEEDBACK_BUFFERS> transform_feedback_buffers;
+
+ std::array<Binding, NUM_COMPUTE_UNIFORM_BUFFERS> compute_uniform_buffers;
+ std::array<Binding, NUM_STORAGE_BUFFERS> compute_storage_buffers;
+
+ std::array<u32, NUM_STAGES> enabled_uniform_buffers{};
+ u32 enabled_compute_uniform_buffers = 0;
+
+ std::array<u32, NUM_STAGES> enabled_storage_buffers{};
+ std::array<u32, NUM_STAGES> written_storage_buffers{};
+ u32 enabled_compute_storage_buffers = 0;
+ u32 written_compute_storage_buffers = 0;
+
+ std::array<u32, NUM_STAGES> fast_bound_uniform_buffers{};
+
+ bool has_deleted_buffers = false;
+
+ std::conditional_t<HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS, std::array<u32, NUM_STAGES>, Empty>
+ dirty_uniform_buffers{};
+
+ std::vector<BufferId> cached_write_buffer_ids;
+
+ // TODO: This data structure is not optimal and it should be reworked
+ std::vector<BufferId> uncommitted_downloads;
+ std::deque<std::vector<BufferId>> committed_downloads;
+
+ size_t immediate_buffer_capacity = 0;
+ std::unique_ptr<u8[]> immediate_buffer_alloc;
+
+ std::array<BufferId, ((1ULL << 39) >> PAGE_BITS)> page_table;
+};
+
+template <class P>
+BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_,
+ Tegra::Engines::Maxwell3D& maxwell3d_,
+ Tegra::Engines::KeplerCompute& kepler_compute_,
+ Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
+ Runtime& runtime_)
+ : rasterizer{rasterizer_}, maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_},
+ gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_}, runtime{runtime_} {
+ // Ensure the first slot is used for the null buffer
+ void(slot_buffers.insert(runtime, NullBufferParams{}));
+}
+
+template <class P>
+void BufferCache<P>::TickFrame() {
+ delayed_destruction_ring.Tick();
+}
+
+template <class P>
+void BufferCache<P>::WriteMemory(VAddr cpu_addr, u64 size) {
+ ForEachBufferInRange(cpu_addr, size, [&](BufferId, Buffer& buffer) {
+ buffer.MarkRegionAsCpuModified(cpu_addr, size);
+ });
+}
+
+template <class P>
+void BufferCache<P>::CachedWriteMemory(VAddr cpu_addr, u64 size) {
+ ForEachBufferInRange(cpu_addr, size, [&](BufferId buffer_id, Buffer& buffer) {
+ if (!buffer.HasCachedWrites()) {
+ cached_write_buffer_ids.push_back(buffer_id);
}
- if (map->is_written) {
- UnmarkRegionAsWritten(map->start, map->end - 1);
+ buffer.CachedCpuWrite(cpu_addr, size);
+ });
+}
+
+template <class P>
+void BufferCache<P>::DownloadMemory(VAddr cpu_addr, u64 size) {
+ ForEachBufferInRange(cpu_addr, size, [&](BufferId, Buffer& buffer) {
+ boost::container::small_vector<BufferCopy, 1> copies;
+ u64 total_size_bytes = 0;
+ u64 largest_copy = 0;
+ buffer.ForEachDownloadRange(cpu_addr, size, [&](u64 range_offset, u64 range_size) {
+ copies.push_back(BufferCopy{
+ .src_offset = range_offset,
+ .dst_offset = total_size_bytes,
+ .size = range_size,
+ });
+ total_size_bytes += range_size;
+ largest_copy = std::max(largest_copy, range_size);
+ });
+ if (total_size_bytes == 0) {
+ return;
}
- const auto it = mapped_addresses.find(*map);
- ASSERT(it != mapped_addresses.end());
- mapped_addresses.erase(it);
- mapped_addresses_allocator.Release(map);
- }
-
-private:
- MapInterval* MapAddress(Buffer* block, GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size) {
- const VectorMapInterval overlaps = GetMapsInRange(cpu_addr, size);
- if (overlaps.empty()) {
- const VAddr cpu_addr_end = cpu_addr + size;
- if (gpu_memory.IsGranularRange(gpu_addr, size)) {
- u8* const host_ptr = gpu_memory.GetPointer(gpu_addr);
- block->Upload(block->Offset(cpu_addr), size, host_ptr);
- } else {
- staging_buffer.resize(size);
- gpu_memory.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size);
- block->Upload(block->Offset(cpu_addr), size, staging_buffer.data());
+ MICROPROFILE_SCOPE(GPU_DownloadMemory);
+
+ if constexpr (USE_MEMORY_MAPS) {
+ auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes);
+ const u8* const mapped_memory = download_staging.mapped_span.data();
+ const std::span<BufferCopy> copies_span(copies.data(), copies.data() + copies.size());
+ runtime.CopyBuffer(download_staging.buffer, buffer, copies_span);
+ runtime.Finish();
+ for (const BufferCopy& copy : copies) {
+ const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset;
+ const u8* copy_mapped_memory = mapped_memory + copy.dst_offset;
+ cpu_memory.WriteBlockUnsafe(copy_cpu_addr, copy_mapped_memory, copy.size);
}
- return Register(MapInterval(cpu_addr, cpu_addr_end, gpu_addr));
- }
-
- const VAddr cpu_addr_end = cpu_addr + size;
- if (overlaps.size() == 1) {
- MapInterval* const current_map = overlaps[0];
- if (current_map->IsInside(cpu_addr, cpu_addr_end)) {
- return current_map;
+ } else {
+ const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy);
+ for (const BufferCopy& copy : copies) {
+ buffer.ImmediateDownload(copy.src_offset, immediate_buffer.subspan(0, copy.size));
+ const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset;
+ cpu_memory.WriteBlockUnsafe(copy_cpu_addr, immediate_buffer.data(), copy.size);
}
}
- VAddr new_start = cpu_addr;
- VAddr new_end = cpu_addr_end;
- bool write_inheritance = false;
- bool modified_inheritance = false;
- // Calculate new buffer parameters
- for (MapInterval* overlap : overlaps) {
- new_start = std::min(overlap->start, new_start);
- new_end = std::max(overlap->end, new_end);
- write_inheritance |= overlap->is_written;
- modified_inheritance |= overlap->is_modified;
+ });
+}
+
+template <class P>
+void BufferCache<P>::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr,
+ u32 size) {
+ const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
+ if (!cpu_addr) {
+ uniform_buffers[stage][index] = NULL_BINDING;
+ return;
+ }
+ const Binding binding{
+ .cpu_addr = *cpu_addr,
+ .size = size,
+ .buffer_id = BufferId{},
+ };
+ uniform_buffers[stage][index] = binding;
+}
+
+template <class P>
+void BufferCache<P>::UpdateGraphicsBuffers(bool is_indexed) {
+ MICROPROFILE_SCOPE(GPU_PrepareBuffers);
+ do {
+ has_deleted_buffers = false;
+ DoUpdateGraphicsBuffers(is_indexed);
+ } while (has_deleted_buffers);
+}
+
+template <class P>
+void BufferCache<P>::UpdateComputeBuffers() {
+ MICROPROFILE_SCOPE(GPU_PrepareBuffers);
+ do {
+ has_deleted_buffers = false;
+ DoUpdateComputeBuffers();
+ } while (has_deleted_buffers);
+}
+
+template <class P>
+void BufferCache<P>::BindHostGeometryBuffers(bool is_indexed) {
+ MICROPROFILE_SCOPE(GPU_BindUploadBuffers);
+ if (is_indexed) {
+ BindHostIndexBuffer();
+ } else if constexpr (!HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) {
+ const auto& regs = maxwell3d.regs;
+ if (regs.draw.topology == Maxwell::PrimitiveTopology::Quads) {
+ runtime.BindQuadArrayIndexBuffer(regs.vertex_buffer.first, regs.vertex_buffer.count);
}
- GPUVAddr new_gpu_addr = gpu_addr + new_start - cpu_addr;
- for (auto& overlap : overlaps) {
- Unregister(overlap);
+ }
+ BindHostVertexBuffers();
+ BindHostTransformFeedbackBuffers();
+}
+
+template <class P>
+void BufferCache<P>::BindHostStageBuffers(size_t stage) {
+ MICROPROFILE_SCOPE(GPU_BindUploadBuffers);
+ BindHostGraphicsUniformBuffers(stage);
+ BindHostGraphicsStorageBuffers(stage);
+}
+
+template <class P>
+void BufferCache<P>::BindHostComputeBuffers() {
+ MICROPROFILE_SCOPE(GPU_BindUploadBuffers);
+ BindHostComputeUniformBuffers();
+ BindHostComputeStorageBuffers();
+}
+
+template <class P>
+void BufferCache<P>::SetEnabledUniformBuffers(size_t stage, u32 enabled) {
+ if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
+ if (enabled_uniform_buffers[stage] != enabled) {
+ dirty_uniform_buffers[stage] = ~u32{0};
}
- UpdateBlock(block, new_start, new_end, overlaps);
-
- const MapInterval new_map{new_start, new_end, new_gpu_addr};
- MapInterval* const map = Register(new_map, write_inheritance);
- if (!map) {
- return nullptr;
+ }
+ enabled_uniform_buffers[stage] = enabled;
+}
+
+template <class P>
+void BufferCache<P>::SetEnabledComputeUniformBuffers(u32 enabled) {
+ enabled_compute_uniform_buffers = enabled;
+}
+
+template <class P>
+void BufferCache<P>::UnbindGraphicsStorageBuffers(size_t stage) {
+ enabled_storage_buffers[stage] = 0;
+ written_storage_buffers[stage] = 0;
+}
+
+template <class P>
+void BufferCache<P>::BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index, u32 cbuf_index,
+ u32 cbuf_offset, bool is_written) {
+ enabled_storage_buffers[stage] |= 1U << ssbo_index;
+ written_storage_buffers[stage] |= (is_written ? 1U : 0U) << ssbo_index;
+
+ const auto& cbufs = maxwell3d.state.shader_stages[stage];
+ const GPUVAddr ssbo_addr = cbufs.const_buffers[cbuf_index].address + cbuf_offset;
+ storage_buffers[stage][ssbo_index] = StorageBufferBinding(ssbo_addr);
+}
+
+template <class P>
+void BufferCache<P>::UnbindComputeStorageBuffers() {
+ enabled_compute_storage_buffers = 0;
+ written_compute_storage_buffers = 0;
+}
+
+template <class P>
+void BufferCache<P>::BindComputeStorageBuffer(size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset,
+ bool is_written) {
+ enabled_compute_storage_buffers |= 1U << ssbo_index;
+ written_compute_storage_buffers |= (is_written ? 1U : 0U) << ssbo_index;
+
+ const auto& launch_desc = kepler_compute.launch_description;
+ ASSERT(((launch_desc.const_buffer_enable_mask >> cbuf_index) & 1) != 0);
+
+ const auto& cbufs = launch_desc.const_buffer_config;
+ const GPUVAddr ssbo_addr = cbufs[cbuf_index].Address() + cbuf_offset;
+ compute_storage_buffers[ssbo_index] = StorageBufferBinding(ssbo_addr);
+}
+
+template <class P>
+void BufferCache<P>::FlushCachedWrites() {
+ for (const BufferId buffer_id : cached_write_buffer_ids) {
+ slot_buffers[buffer_id].FlushCachedWrites();
+ }
+ cached_write_buffer_ids.clear();
+}
+
+template <class P>
+bool BufferCache<P>::HasUncommittedFlushes() const noexcept {
+ return !uncommitted_downloads.empty();
+}
+
+template <class P>
+bool BufferCache<P>::ShouldWaitAsyncFlushes() const noexcept {
+ return !committed_downloads.empty() && !committed_downloads.front().empty();
+}
+
+template <class P>
+void BufferCache<P>::CommitAsyncFlushes() {
+ // This is intentionally passing the value by copy
+ committed_downloads.push_front(uncommitted_downloads);
+ uncommitted_downloads.clear();
+}
+
+template <class P>
+void BufferCache<P>::PopAsyncFlushes() {
+ if (committed_downloads.empty()) {
+ return;
+ }
+ auto scope_exit_pop_download = detail::ScopeExit([this] { committed_downloads.pop_back(); });
+ const std::span<const BufferId> download_ids = committed_downloads.back();
+ if (download_ids.empty()) {
+ return;
+ }
+ MICROPROFILE_SCOPE(GPU_DownloadMemory);
+
+ boost::container::small_vector<std::pair<BufferCopy, BufferId>, 1> downloads;
+ u64 total_size_bytes = 0;
+ u64 largest_copy = 0;
+ for (const BufferId buffer_id : download_ids) {
+ slot_buffers[buffer_id].ForEachDownloadRange([&](u64 range_offset, u64 range_size) {
+ downloads.push_back({
+ BufferCopy{
+ .src_offset = range_offset,
+ .dst_offset = total_size_bytes,
+ .size = range_size,
+ },
+ buffer_id,
+ });
+ total_size_bytes += range_size;
+ largest_copy = std::max(largest_copy, range_size);
+ });
+ }
+ if (downloads.empty()) {
+ return;
+ }
+ if constexpr (USE_MEMORY_MAPS) {
+ auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes);
+ for (const auto [copy, buffer_id] : downloads) {
+ const std::array copies{copy};
+ runtime.CopyBuffer(download_staging.buffer, slot_buffers[buffer_id], copies);
}
- if (modified_inheritance) {
- map->MarkAsModified(true, GetModifiedTicks());
- if (Settings::IsGPULevelHigh() &&
- Settings::values.use_asynchronous_gpu_emulation.GetValue()) {
- MarkForAsyncFlush(map);
- }
+ runtime.Finish();
+ for (const auto [copy, buffer_id] : downloads) {
+ const Buffer& buffer = slot_buffers[buffer_id];
+ const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset;
+ const u8* read_mapped_memory = download_staging.mapped_span.data() + copy.dst_offset;
+ cpu_memory.WriteBlockUnsafe(cpu_addr, read_mapped_memory, copy.size);
+ }
+ } else {
+ const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy);
+ for (const auto [copy, buffer_id] : downloads) {
+ Buffer& buffer = slot_buffers[buffer_id];
+ buffer.ImmediateDownload(copy.src_offset, immediate_buffer.subspan(0, copy.size));
+ const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset;
+ cpu_memory.WriteBlockUnsafe(cpu_addr, immediate_buffer.data(), copy.size);
}
- return map;
}
-
- void UpdateBlock(Buffer* block, VAddr start, VAddr end, const VectorMapInterval& overlaps) {
- const IntervalType base_interval{start, end};
- IntervalSet interval_set{};
- interval_set.add(base_interval);
- for (auto& overlap : overlaps) {
- const IntervalType subtract{overlap->start, overlap->end};
- interval_set.subtract(subtract);
+}
+
+template <class P>
+bool BufferCache<P>::IsRegionGpuModified(VAddr addr, size_t size) {
+ const u64 page_end = Common::DivCeil(addr + size, PAGE_SIZE);
+ for (u64 page = addr >> PAGE_BITS; page < page_end;) {
+ const BufferId image_id = page_table[page];
+ if (!image_id) {
+ ++page;
+ continue;
}
- for (auto& interval : interval_set) {
- const std::size_t size = interval.upper() - interval.lower();
- if (size == 0) {
- continue;
- }
- staging_buffer.resize(size);
- cpu_memory.ReadBlockUnsafe(interval.lower(), staging_buffer.data(), size);
- block->Upload(block->Offset(interval.lower()), size, staging_buffer.data());
+ Buffer& buffer = slot_buffers[image_id];
+ if (buffer.IsRegionGpuModified(addr, size)) {
+ return true;
}
+ const VAddr end_addr = buffer.CpuAddr() + buffer.SizeBytes();
+ page = Common::DivCeil(end_addr, PAGE_SIZE);
}
-
- VectorMapInterval GetMapsInRange(VAddr addr, std::size_t size) {
- VectorMapInterval result;
- if (size == 0) {
- return result;
+ return false;
+}
+
+template <class P>
+void BufferCache<P>::BindHostIndexBuffer() {
+ Buffer& buffer = slot_buffers[index_buffer.buffer_id];
+ const u32 offset = buffer.Offset(index_buffer.cpu_addr);
+ const u32 size = index_buffer.size;
+ SynchronizeBuffer(buffer, index_buffer.cpu_addr, size);
+ if constexpr (HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) {
+ runtime.BindIndexBuffer(buffer, offset, size);
+ } else {
+ runtime.BindIndexBuffer(maxwell3d.regs.draw.topology, maxwell3d.regs.index_array.format,
+ maxwell3d.regs.index_array.first, maxwell3d.regs.index_array.count,
+ buffer, offset, size);
+ }
+}
+
+template <class P>
+void BufferCache<P>::BindHostVertexBuffers() {
+ auto& flags = maxwell3d.dirty.flags;
+ for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) {
+ const Binding& binding = vertex_buffers[index];
+ Buffer& buffer = slot_buffers[binding.buffer_id];
+ SynchronizeBuffer(buffer, binding.cpu_addr, binding.size);
+ if (!flags[Dirty::VertexBuffer0 + index]) {
+ continue;
}
+ flags[Dirty::VertexBuffer0 + index] = false;
+
+ const u32 stride = maxwell3d.regs.vertex_array[index].stride;
+ const u32 offset = buffer.Offset(binding.cpu_addr);
+ runtime.BindVertexBuffer(index, buffer, offset, binding.size, stride);
+ }
+}
- const VAddr addr_end = addr + size;
- auto it = mapped_addresses.lower_bound(addr);
- if (it != mapped_addresses.begin()) {
- --it;
+template <class P>
+void BufferCache<P>::BindHostGraphicsUniformBuffers(size_t stage) {
+ u32 dirty = ~0U;
+ if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
+ dirty = std::exchange(dirty_uniform_buffers[stage], 0);
+ }
+ u32 binding_index = 0;
+ ForEachEnabledBit(enabled_uniform_buffers[stage], [&](u32 index) {
+ const bool needs_bind = ((dirty >> index) & 1) != 0;
+ BindHostGraphicsUniformBuffer(stage, index, binding_index, needs_bind);
+ if constexpr (NEEDS_BIND_UNIFORM_INDEX) {
+ ++binding_index;
}
- while (it != mapped_addresses.end() && it->start < addr_end) {
- if (it->Overlaps(addr, addr_end)) {
- result.push_back(&*it);
+ });
+}
+
+template <class P>
+void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 binding_index,
+ bool needs_bind) {
+ const Binding& binding = uniform_buffers[stage][index];
+ const VAddr cpu_addr = binding.cpu_addr;
+ const u32 size = binding.size;
+ Buffer& buffer = slot_buffers[binding.buffer_id];
+ if constexpr (IS_OPENGL) {
+ if (size <= SKIP_CACHE_SIZE && !buffer.IsRegionGpuModified(cpu_addr, size)) {
+ if (runtime.HasFastBufferSubData()) {
+ // Fast path for Nvidia
+ if (!HasFastUniformBufferBound(stage, binding_index)) {
+ // We only have to bind when the currently bound buffer is not the fast version
+ fast_bound_uniform_buffers[stage] |= 1U << binding_index;
+ runtime.BindFastUniformBuffer(stage, binding_index, size);
+ }
+ const auto span = ImmediateBufferWithData(cpu_addr, size);
+ runtime.PushFastUniformBuffer(stage, binding_index, span);
+ } else {
+ // Stream buffer path to avoid stalling on non-Nvidia drivers
+ const auto span = runtime.BindMappedUniformBuffer(stage, binding_index, size);
+ cpu_memory.ReadBlockUnsafe(cpu_addr, span.data(), size);
}
- ++it;
+ return;
}
- return result;
}
-
- /// Returns a ticks counter used for tracking when cached objects were last modified
- u64 GetModifiedTicks() {
- return ++modified_ticks;
+ // Classic cached path
+ SynchronizeBuffer(buffer, cpu_addr, size);
+ if (!needs_bind && !HasFastUniformBufferBound(stage, binding_index)) {
+ // Skip binding if it's not needed and if the bound buffer is not the fast version
+ // This exists to avoid instances where the fast buffer is bound and a GPU write happens
+ return;
}
+ fast_bound_uniform_buffers[stage] &= ~(1U << binding_index);
- void FlushMap(MapInterval* map) {
- const auto it = blocks.find(map->start >> BLOCK_PAGE_BITS);
- ASSERT_OR_EXECUTE(it != blocks.end(), return;);
-
- std::shared_ptr<Buffer> block = it->second;
-
- const std::size_t size = map->end - map->start;
- staging_buffer.resize(size);
- block->Download(block->Offset(map->start), size, staging_buffer.data());
- cpu_memory.WriteBlockUnsafe(map->start, staging_buffer.data(), size);
- map->MarkAsModified(false, 0);
+ const u32 offset = buffer.Offset(cpu_addr);
+ if constexpr (NEEDS_BIND_UNIFORM_INDEX) {
+ runtime.BindUniformBuffer(stage, binding_index, buffer, offset, size);
+ } else {
+ runtime.BindUniformBuffer(buffer, offset, size);
}
+}
+
+template <class P>
+void BufferCache<P>::BindHostGraphicsStorageBuffers(size_t stage) {
+ u32 binding_index = 0;
+ ForEachEnabledBit(enabled_storage_buffers[stage], [&](u32 index) {
+ const Binding& binding = storage_buffers[stage][index];
+ Buffer& buffer = slot_buffers[binding.buffer_id];
+ const u32 size = binding.size;
+ SynchronizeBuffer(buffer, binding.cpu_addr, size);
+
+ const u32 offset = buffer.Offset(binding.cpu_addr);
+ const bool is_written = ((written_storage_buffers[stage] >> index) & 1) != 0;
+ if constexpr (NEEDS_BIND_STORAGE_INDEX) {
+ runtime.BindStorageBuffer(stage, binding_index, buffer, offset, size, is_written);
+ ++binding_index;
+ } else {
+ runtime.BindStorageBuffer(buffer, offset, size, is_written);
+ }
+ });
+}
- template <typename Callable>
- BufferInfo StreamBufferUpload(std::size_t size, std::size_t alignment, Callable&& callable) {
- AlignBuffer(alignment);
- const std::size_t uploaded_offset = buffer_offset;
- callable(buffer_ptr);
-
- buffer_ptr += size;
- buffer_offset += size;
- return BufferInfo{stream_buffer.Handle(), uploaded_offset, stream_buffer.Address()};
+template <class P>
+void BufferCache<P>::BindHostTransformFeedbackBuffers() {
+ if (maxwell3d.regs.tfb_enabled == 0) {
+ return;
}
-
- void AlignBuffer(std::size_t alignment) {
- // Align the offset, not the mapped pointer
- const std::size_t offset_aligned = Common::AlignUp(buffer_offset, alignment);
- buffer_ptr += offset_aligned - buffer_offset;
- buffer_offset = offset_aligned;
+ for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) {
+ const Binding& binding = transform_feedback_buffers[index];
+ Buffer& buffer = slot_buffers[binding.buffer_id];
+ const u32 size = binding.size;
+ SynchronizeBuffer(buffer, binding.cpu_addr, size);
+
+ const u32 offset = buffer.Offset(binding.cpu_addr);
+ runtime.BindTransformFeedbackBuffer(index, buffer, offset, size);
}
+}
- std::shared_ptr<Buffer> EnlargeBlock(std::shared_ptr<Buffer> buffer) {
- const std::size_t old_size = buffer->Size();
- const std::size_t new_size = old_size + BLOCK_PAGE_SIZE;
- const VAddr cpu_addr = buffer->CpuAddr();
- std::shared_ptr<Buffer> new_buffer = CreateBlock(cpu_addr, new_size);
- new_buffer->CopyFrom(*buffer, 0, 0, old_size);
- QueueDestruction(std::move(buffer));
-
- const VAddr cpu_addr_end = cpu_addr + new_size - 1;
- const u64 page_end = cpu_addr_end >> BLOCK_PAGE_BITS;
- for (u64 page_start = cpu_addr >> BLOCK_PAGE_BITS; page_start <= page_end; ++page_start) {
- blocks.insert_or_assign(page_start, new_buffer);
+template <class P>
+void BufferCache<P>::BindHostComputeUniformBuffers() {
+ if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
+ // Mark all uniform buffers as dirty
+ dirty_uniform_buffers.fill(~u32{0});
+ }
+ u32 binding_index = 0;
+ ForEachEnabledBit(enabled_compute_uniform_buffers, [&](u32 index) {
+ const Binding& binding = compute_uniform_buffers[index];
+ Buffer& buffer = slot_buffers[binding.buffer_id];
+ const u32 size = binding.size;
+ SynchronizeBuffer(buffer, binding.cpu_addr, size);
+
+ const u32 offset = buffer.Offset(binding.cpu_addr);
+ if constexpr (NEEDS_BIND_UNIFORM_INDEX) {
+ runtime.BindComputeUniformBuffer(binding_index, buffer, offset, size);
+ ++binding_index;
+ } else {
+ runtime.BindUniformBuffer(buffer, offset, size);
}
+ });
+}
+
+template <class P>
+void BufferCache<P>::BindHostComputeStorageBuffers() {
+ u32 binding_index = 0;
+ ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) {
+ const Binding& binding = compute_storage_buffers[index];
+ Buffer& buffer = slot_buffers[binding.buffer_id];
+ const u32 size = binding.size;
+ SynchronizeBuffer(buffer, binding.cpu_addr, size);
+
+ const u32 offset = buffer.Offset(binding.cpu_addr);
+ const bool is_written = ((written_compute_storage_buffers >> index) & 1) != 0;
+ if constexpr (NEEDS_BIND_STORAGE_INDEX) {
+ runtime.BindComputeStorageBuffer(binding_index, buffer, offset, size, is_written);
+ ++binding_index;
+ } else {
+ runtime.BindStorageBuffer(buffer, offset, size, is_written);
+ }
+ });
+}
- return new_buffer;
+template <class P>
+void BufferCache<P>::DoUpdateGraphicsBuffers(bool is_indexed) {
+ if (is_indexed) {
+ UpdateIndexBuffer();
}
+ UpdateVertexBuffers();
+ UpdateTransformFeedbackBuffers();
+ for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
+ UpdateUniformBuffers(stage);
+ UpdateStorageBuffers(stage);
+ }
+}
+
+template <class P>
+void BufferCache<P>::DoUpdateComputeBuffers() {
+ UpdateComputeUniformBuffers();
+ UpdateComputeStorageBuffers();
+}
+
+template <class P>
+void BufferCache<P>::UpdateIndexBuffer() {
+ // We have to check for the dirty flags and index count
+ // The index count is currently changed without updating the dirty flags
+ const auto& index_array = maxwell3d.regs.index_array;
+ auto& flags = maxwell3d.dirty.flags;
+ if (!flags[Dirty::IndexBuffer] && last_index_count == index_array.count) {
+ return;
+ }
+ flags[Dirty::IndexBuffer] = false;
+ last_index_count = index_array.count;
+
+ const GPUVAddr gpu_addr_begin = index_array.StartAddress();
+ const GPUVAddr gpu_addr_end = index_array.EndAddress();
+ const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr_begin);
+ const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin);
+ const u32 draw_size = index_array.count * index_array.FormatSizeInBytes();
+ const u32 size = std::min(address_size, draw_size);
+ if (size == 0 || !cpu_addr) {
+ index_buffer = NULL_BINDING;
+ return;
+ }
+ index_buffer = Binding{
+ .cpu_addr = *cpu_addr,
+ .size = size,
+ .buffer_id = FindBuffer(*cpu_addr, size),
+ };
+}
- std::shared_ptr<Buffer> MergeBlocks(std::shared_ptr<Buffer> first,
- std::shared_ptr<Buffer> second) {
- const std::size_t size_1 = first->Size();
- const std::size_t size_2 = second->Size();
- const VAddr first_addr = first->CpuAddr();
- const VAddr second_addr = second->CpuAddr();
- const VAddr new_addr = std::min(first_addr, second_addr);
- const std::size_t new_size = size_1 + size_2;
-
- std::shared_ptr<Buffer> new_buffer = CreateBlock(new_addr, new_size);
- new_buffer->CopyFrom(*first, 0, new_buffer->Offset(first_addr), size_1);
- new_buffer->CopyFrom(*second, 0, new_buffer->Offset(second_addr), size_2);
- QueueDestruction(std::move(first));
- QueueDestruction(std::move(second));
+template <class P>
+void BufferCache<P>::UpdateVertexBuffers() {
+ auto& flags = maxwell3d.dirty.flags;
+ if (!maxwell3d.dirty.flags[Dirty::VertexBuffers]) {
+ return;
+ }
+ flags[Dirty::VertexBuffers] = false;
- const VAddr cpu_addr_end = new_addr + new_size - 1;
- const u64 page_end = cpu_addr_end >> BLOCK_PAGE_BITS;
- for (u64 page_start = new_addr >> BLOCK_PAGE_BITS; page_start <= page_end; ++page_start) {
- blocks.insert_or_assign(page_start, new_buffer);
- }
- return new_buffer;
+ for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) {
+ UpdateVertexBuffer(index);
}
+}
- Buffer* GetBlock(VAddr cpu_addr, std::size_t size) {
- std::shared_ptr<Buffer> found;
+template <class P>
+void BufferCache<P>::UpdateVertexBuffer(u32 index) {
+ if (!maxwell3d.dirty.flags[Dirty::VertexBuffer0 + index]) {
+ return;
+ }
+ const auto& array = maxwell3d.regs.vertex_array[index];
+ const auto& limit = maxwell3d.regs.vertex_array_limit[index];
+ const GPUVAddr gpu_addr_begin = array.StartAddress();
+ const GPUVAddr gpu_addr_end = limit.LimitAddress() + 1;
+ const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr_begin);
+ const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin);
+ const u32 size = address_size; // TODO: Analyze stride and number of vertices
+ if (array.enable == 0 || size == 0 || !cpu_addr) {
+ vertex_buffers[index] = NULL_BINDING;
+ return;
+ }
+ vertex_buffers[index] = Binding{
+ .cpu_addr = *cpu_addr,
+ .size = size,
+ .buffer_id = FindBuffer(*cpu_addr, size),
+ };
+}
+
+template <class P>
+void BufferCache<P>::UpdateUniformBuffers(size_t stage) {
+ ForEachEnabledBit(enabled_uniform_buffers[stage], [&](u32 index) {
+ Binding& binding = uniform_buffers[stage][index];
+ if (binding.buffer_id) {
+ // Already updated
+ return;
+ }
+ // Mark as dirty
+ if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
+ dirty_uniform_buffers[stage] |= 1U << index;
+ }
+ // Resolve buffer
+ binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size);
+ });
+}
+
+template <class P>
+void BufferCache<P>::UpdateStorageBuffers(size_t stage) {
+ const u32 written_mask = written_storage_buffers[stage];
+ ForEachEnabledBit(enabled_storage_buffers[stage], [&](u32 index) {
+ // Resolve buffer
+ Binding& binding = storage_buffers[stage][index];
+ const BufferId buffer_id = FindBuffer(binding.cpu_addr, binding.size);
+ binding.buffer_id = buffer_id;
+ // Mark buffer as written if needed
+ if (((written_mask >> index) & 1) != 0) {
+ MarkWrittenBuffer(buffer_id, binding.cpu_addr, binding.size);
+ }
+ });
+}
- const VAddr cpu_addr_end = cpu_addr + size - 1;
- const u64 page_end = cpu_addr_end >> BLOCK_PAGE_BITS;
- for (u64 page_start = cpu_addr >> BLOCK_PAGE_BITS; page_start <= page_end; ++page_start) {
- auto it = blocks.find(page_start);
- if (it == blocks.end()) {
- if (found) {
- found = EnlargeBlock(found);
- continue;
- }
- const VAddr start_addr = page_start << BLOCK_PAGE_BITS;
- found = CreateBlock(start_addr, BLOCK_PAGE_SIZE);
- blocks.insert_or_assign(page_start, found);
- continue;
- }
- if (!found) {
- found = it->second;
- continue;
- }
- if (found != it->second) {
- found = MergeBlocks(std::move(found), it->second);
+template <class P>
+void BufferCache<P>::UpdateTransformFeedbackBuffers() {
+ if (maxwell3d.regs.tfb_enabled == 0) {
+ return;
+ }
+ for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) {
+ UpdateTransformFeedbackBuffer(index);
+ }
+}
+
+template <class P>
+void BufferCache<P>::UpdateTransformFeedbackBuffer(u32 index) {
+ const auto& binding = maxwell3d.regs.tfb_bindings[index];
+ const GPUVAddr gpu_addr = binding.Address() + binding.buffer_offset;
+ const u32 size = binding.buffer_size;
+ const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
+ if (binding.buffer_enable == 0 || size == 0 || !cpu_addr) {
+ transform_feedback_buffers[index] = NULL_BINDING;
+ return;
+ }
+ const BufferId buffer_id = FindBuffer(*cpu_addr, size);
+ transform_feedback_buffers[index] = Binding{
+ .cpu_addr = *cpu_addr,
+ .size = size,
+ .buffer_id = buffer_id,
+ };
+ MarkWrittenBuffer(buffer_id, *cpu_addr, size);
+}
+
+template <class P>
+void BufferCache<P>::UpdateComputeUniformBuffers() {
+ ForEachEnabledBit(enabled_compute_uniform_buffers, [&](u32 index) {
+ Binding& binding = compute_uniform_buffers[index];
+ binding = NULL_BINDING;
+ const auto& launch_desc = kepler_compute.launch_description;
+ if (((launch_desc.const_buffer_enable_mask >> index) & 1) != 0) {
+ const auto& cbuf = launch_desc.const_buffer_config[index];
+ const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(cbuf.Address());
+ if (cpu_addr) {
+ binding.cpu_addr = *cpu_addr;
+ binding.size = cbuf.size;
}
}
- return found.get();
+ binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size);
+ });
+}
+
+template <class P>
+void BufferCache<P>::UpdateComputeStorageBuffers() {
+ ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) {
+ // Resolve buffer
+ Binding& binding = compute_storage_buffers[index];
+ const BufferId buffer_id = FindBuffer(binding.cpu_addr, binding.size);
+ binding.buffer_id = buffer_id;
+ // Mark as written if needed
+ if (((written_compute_storage_buffers >> index) & 1) != 0) {
+ MarkWrittenBuffer(buffer_id, binding.cpu_addr, binding.size);
+ }
+ });
+}
+
+template <class P>
+void BufferCache<P>::MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 size) {
+ Buffer& buffer = slot_buffers[buffer_id];
+ buffer.MarkRegionAsGpuModified(cpu_addr, size);
+
+ const bool is_accuracy_high = Settings::IsGPULevelHigh();
+ const bool is_async = Settings::values.use_asynchronous_gpu_emulation.GetValue();
+ if (!is_accuracy_high || !is_async) {
+ return;
}
+ if (std::ranges::find(uncommitted_downloads, buffer_id) != uncommitted_downloads.end()) {
+ // Already inserted
+ return;
+ }
+ uncommitted_downloads.push_back(buffer_id);
+}
- void MarkRegionAsWritten(VAddr start, VAddr end) {
- const u64 page_end = end >> WRITE_PAGE_BIT;
- for (u64 page_start = start >> WRITE_PAGE_BIT; page_start <= page_end; ++page_start) {
- if (const auto [it, inserted] = written_pages.emplace(page_start, 1); !inserted) {
- ++it->second;
- }
+template <class P>
+BufferId BufferCache<P>::FindBuffer(VAddr cpu_addr, u32 size) {
+ if (cpu_addr == 0) {
+ return NULL_BUFFER_ID;
+ }
+ const u64 page = cpu_addr >> PAGE_BITS;
+ const BufferId buffer_id = page_table[page];
+ if (!buffer_id) {
+ return CreateBuffer(cpu_addr, size);
+ }
+ const Buffer& buffer = slot_buffers[buffer_id];
+ if (buffer.IsInBounds(cpu_addr, size)) {
+ return buffer_id;
+ }
+ return CreateBuffer(cpu_addr, size);
+}
+
+template <class P>
+BufferId BufferCache<P>::CreateBuffer(VAddr cpu_addr, u32 wanted_size) {
+ std::vector<BufferId> overlap_ids;
+ VAddr cpu_addr_begin = cpu_addr;
+ VAddr cpu_addr_end = cpu_addr + wanted_size;
+ for (; cpu_addr >> PAGE_BITS < Common::DivCeil(cpu_addr_end, PAGE_SIZE);
+ cpu_addr += PAGE_SIZE) {
+ const BufferId overlap_id = page_table[cpu_addr >> PAGE_BITS];
+ if (!overlap_id) {
+ continue;
+ }
+ Buffer& overlap = slot_buffers[overlap_id];
+ if (overlap.IsPicked()) {
+ continue;
+ }
+ overlap.Pick();
+ overlap_ids.push_back(overlap_id);
+ const VAddr overlap_cpu_addr = overlap.CpuAddr();
+ if (overlap_cpu_addr < cpu_addr_begin) {
+ cpu_addr = cpu_addr_begin = overlap_cpu_addr;
}
+ cpu_addr_end = std::max(cpu_addr_end, overlap_cpu_addr + overlap.SizeBytes());
}
-
- void UnmarkRegionAsWritten(VAddr start, VAddr end) {
- const u64 page_end = end >> WRITE_PAGE_BIT;
- for (u64 page_start = start >> WRITE_PAGE_BIT; page_start <= page_end; ++page_start) {
- auto it = written_pages.find(page_start);
- if (it != written_pages.end()) {
- if (it->second > 1) {
- --it->second;
- } else {
- written_pages.erase(it);
- }
- }
+ const u32 size = static_cast<u32>(cpu_addr_end - cpu_addr_begin);
+ const BufferId new_buffer_id = slot_buffers.insert(runtime, rasterizer, cpu_addr_begin, size);
+ Buffer& new_buffer = slot_buffers[new_buffer_id];
+
+ for (const BufferId overlap_id : overlap_ids) {
+ Buffer& overlap = slot_buffers[overlap_id];
+ overlap.Unpick();
+
+ std::vector<BufferCopy> copies;
+ const size_t dst_base_offset = overlap.CpuAddr() - new_buffer.CpuAddr();
+ overlap.ForEachDownloadRange([&](u64 begin, u64 range_size) {
+ copies.push_back(BufferCopy{
+ .src_offset = begin,
+ .dst_offset = dst_base_offset + begin,
+ .size = range_size,
+ });
+ new_buffer.UnmarkRegionAsCpuModified(begin, range_size);
+ new_buffer.MarkRegionAsGpuModified(begin, range_size);
+ });
+ if (!copies.empty()) {
+ runtime.CopyBuffer(slot_buffers[new_buffer_id], overlap, copies);
+ }
+ ReplaceBufferDownloads(overlap_id, new_buffer_id);
+ DeleteBuffer(overlap_id);
+ }
+ Register(new_buffer_id);
+ return new_buffer_id;
+}
+
+template <class P>
+void BufferCache<P>::Register(BufferId buffer_id) {
+ ChangeRegister<true>(buffer_id);
+}
+
+template <class P>
+void BufferCache<P>::Unregister(BufferId buffer_id) {
+ ChangeRegister<false>(buffer_id);
+}
+
+template <class P>
+template <bool insert>
+void BufferCache<P>::ChangeRegister(BufferId buffer_id) {
+ const Buffer& buffer = slot_buffers[buffer_id];
+ const VAddr cpu_addr_begin = buffer.CpuAddr();
+ const VAddr cpu_addr_end = cpu_addr_begin + buffer.SizeBytes();
+ const u64 page_begin = cpu_addr_begin / PAGE_SIZE;
+ const u64 page_end = Common::DivCeil(cpu_addr_end, PAGE_SIZE);
+ for (u64 page = page_begin; page != page_end; ++page) {
+ if constexpr (insert) {
+ page_table[page] = buffer_id;
+ } else {
+ page_table[page] = BufferId{};
}
}
+}
- bool IsRegionWritten(VAddr start, VAddr end) const {
- const u64 page_end = end >> WRITE_PAGE_BIT;
- for (u64 page_start = start >> WRITE_PAGE_BIT; page_start <= page_end; ++page_start) {
- if (written_pages.contains(page_start)) {
- return true;
+template <class P>
+void BufferCache<P>::SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size) {
+ if (buffer.CpuAddr() == 0) {
+ return;
+ }
+ SynchronizeBufferImpl(buffer, cpu_addr, size);
+}
+
+template <class P>
+void BufferCache<P>::SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size) {
+ boost::container::small_vector<BufferCopy, 4> copies;
+ u64 total_size_bytes = 0;
+ u64 largest_copy = 0;
+ buffer.ForEachUploadRange(cpu_addr, size, [&](u64 range_offset, u64 range_size) {
+ copies.push_back(BufferCopy{
+ .src_offset = total_size_bytes,
+ .dst_offset = range_offset,
+ .size = range_size,
+ });
+ total_size_bytes += range_size;
+ largest_copy = std::max(largest_copy, range_size);
+ });
+ if (total_size_bytes == 0) {
+ return;
+ }
+ const std::span<BufferCopy> copies_span(copies.data(), copies.size());
+ UploadMemory(buffer, total_size_bytes, largest_copy, copies_span);
+}
+
+template <class P>
+void BufferCache<P>::UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy,
+ std::span<BufferCopy> copies) {
+ if constexpr (USE_MEMORY_MAPS) {
+ MappedUploadMemory(buffer, total_size_bytes, copies);
+ } else {
+ ImmediateUploadMemory(buffer, largest_copy, copies);
+ }
+}
+
+template <class P>
+void BufferCache<P>::ImmediateUploadMemory(Buffer& buffer, u64 largest_copy,
+ std::span<const BufferCopy> copies) {
+ std::span<u8> immediate_buffer;
+ for (const BufferCopy& copy : copies) {
+ std::span<const u8> upload_span;
+ const VAddr cpu_addr = buffer.CpuAddr() + copy.dst_offset;
+ if (IsRangeGranular(cpu_addr, copy.size)) {
+ upload_span = std::span(cpu_memory.GetPointer(cpu_addr), copy.size);
+ } else {
+ if (immediate_buffer.empty()) {
+ immediate_buffer = ImmediateBuffer(largest_copy);
}
+ cpu_memory.ReadBlockUnsafe(cpu_addr, immediate_buffer.data(), copy.size);
+ upload_span = immediate_buffer.subspan(0, copy.size);
}
- return false;
+ buffer.ImmediateUpload(copy.dst_offset, upload_span);
}
-
- void QueueDestruction(std::shared_ptr<Buffer> buffer) {
- buffer->SetEpoch(epoch);
- pending_destruction.push(std::move(buffer));
+}
+
+template <class P>
+void BufferCache<P>::MappedUploadMemory(Buffer& buffer, u64 total_size_bytes,
+ std::span<const BufferCopy> copies) {
+ auto upload_staging = runtime.UploadStagingBuffer(total_size_bytes);
+ const std::span<u8> staging_pointer = upload_staging.mapped_span;
+ for (const BufferCopy& copy : copies) {
+ const VAddr cpu_addr = buffer.CpuAddr() + copy.dst_offset;
+ u8* const src_pointer = staging_pointer.data() + copy.src_offset;
+ cpu_memory.ReadBlockUnsafe(cpu_addr, src_pointer, copy.size);
}
-
- void MarkForAsyncFlush(MapInterval* map) {
- if (!uncommitted_flushes) {
- uncommitted_flushes = std::make_shared<std::unordered_set<MapInterval*>>();
+ runtime.CopyBuffer(buffer, upload_staging.buffer, copies);
+}
+
+template <class P>
+void BufferCache<P>::DeleteBuffer(BufferId buffer_id) {
+ const auto scalar_replace = [buffer_id](Binding& binding) {
+ if (binding.buffer_id == buffer_id) {
+ binding.buffer_id = BufferId{};
+ }
+ };
+ const auto replace = [scalar_replace](std::span<Binding> bindings) {
+ std::ranges::for_each(bindings, scalar_replace);
+ };
+ scalar_replace(index_buffer);
+ replace(vertex_buffers);
+ std::ranges::for_each(uniform_buffers, replace);
+ std::ranges::for_each(storage_buffers, replace);
+ replace(transform_feedback_buffers);
+ replace(compute_uniform_buffers);
+ replace(compute_storage_buffers);
+ std::erase(cached_write_buffer_ids, buffer_id);
+
+ // Mark the whole buffer as CPU written to stop tracking CPU writes
+ Buffer& buffer = slot_buffers[buffer_id];
+ buffer.MarkRegionAsCpuModified(buffer.CpuAddr(), buffer.SizeBytes());
+
+ Unregister(buffer_id);
+ delayed_destruction_ring.Push(std::move(slot_buffers[buffer_id]));
+
+ NotifyBufferDeletion();
+}
+
+template <class P>
+void BufferCache<P>::ReplaceBufferDownloads(BufferId old_buffer_id, BufferId new_buffer_id) {
+ const auto replace = [old_buffer_id, new_buffer_id](std::vector<BufferId>& buffers) {
+ std::ranges::replace(buffers, old_buffer_id, new_buffer_id);
+ if (auto it = std::ranges::find(buffers, new_buffer_id); it != buffers.end()) {
+ buffers.erase(std::remove(it + 1, buffers.end(), new_buffer_id), buffers.end());
}
- uncommitted_flushes->insert(map);
+ };
+ replace(uncommitted_downloads);
+ std::ranges::for_each(committed_downloads, replace);
+}
+
+template <class P>
+void BufferCache<P>::NotifyBufferDeletion() {
+ if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
+ dirty_uniform_buffers.fill(~u32{0});
}
+ auto& flags = maxwell3d.dirty.flags;
+ flags[Dirty::IndexBuffer] = true;
+ flags[Dirty::VertexBuffers] = true;
+ for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) {
+ flags[Dirty::VertexBuffer0 + index] = true;
+ }
+ has_deleted_buffers = true;
+}
+
+template <class P>
+typename BufferCache<P>::Binding BufferCache<P>::StorageBufferBinding(GPUVAddr ssbo_addr) const {
+ const GPUVAddr gpu_addr = gpu_memory.Read<u64>(ssbo_addr);
+ const u32 size = gpu_memory.Read<u32>(ssbo_addr + 8);
+ const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
+ if (!cpu_addr || size == 0) {
+ return NULL_BINDING;
+ }
+ const Binding binding{
+ .cpu_addr = *cpu_addr,
+ .size = size,
+ .buffer_id = BufferId{},
+ };
+ return binding;
+}
+
+template <class P>
+std::span<const u8> BufferCache<P>::ImmediateBufferWithData(VAddr cpu_addr, size_t size) {
+ u8* const base_pointer = cpu_memory.GetPointer(cpu_addr);
+ if (IsRangeGranular(cpu_addr, size) ||
+ base_pointer + size == cpu_memory.GetPointer(cpu_addr + size)) {
+ return std::span(base_pointer, size);
+ } else {
+ const std::span<u8> span = ImmediateBuffer(size);
+ cpu_memory.ReadBlockUnsafe(cpu_addr, span.data(), size);
+ return span;
+ }
+}
- VideoCore::RasterizerInterface& rasterizer;
- Tegra::MemoryManager& gpu_memory;
- Core::Memory::Memory& cpu_memory;
- StreamBuffer& stream_buffer;
-
- u8* buffer_ptr = nullptr;
- u64 buffer_offset = 0;
- u64 buffer_offset_base = 0;
-
- MapIntervalAllocator mapped_addresses_allocator;
- boost::intrusive::set<MapInterval, boost::intrusive::compare<MapIntervalCompare>>
- mapped_addresses;
-
- std::unordered_map<u64, u32> written_pages;
- std::unordered_map<u64, std::shared_ptr<Buffer>> blocks;
-
- std::queue<std::shared_ptr<Buffer>> pending_destruction;
- u64 epoch = 0;
- u64 modified_ticks = 0;
-
- std::vector<u8> staging_buffer;
-
- std::list<MapInterval*> marked_for_unregister;
-
- std::shared_ptr<std::unordered_set<MapInterval*>> uncommitted_flushes;
- std::list<std::shared_ptr<std::list<MapInterval*>>> committed_flushes;
-
- std::recursive_mutex mutex;
-};
+template <class P>
+std::span<u8> BufferCache<P>::ImmediateBuffer(size_t wanted_capacity) {
+ if (wanted_capacity > immediate_buffer_capacity) {
+ immediate_buffer_capacity = wanted_capacity;
+ immediate_buffer_alloc = std::make_unique<u8[]>(wanted_capacity);
+ }
+ return std::span<u8>(immediate_buffer_alloc.get(), wanted_capacity);
+}
+
+template <class P>
+bool BufferCache<P>::HasFastUniformBufferBound(size_t stage, u32 binding_index) const noexcept {
+ if constexpr (IS_OPENGL) {
+ return ((fast_bound_uniform_buffers[stage] >> binding_index) & 1) != 0;
+ } else {
+ // Only OpenGL has fast uniform buffers
+ return false;
+ }
+}
} // namespace VideoCommon
diff --git a/src/video_core/buffer_cache/map_interval.cpp b/src/video_core/buffer_cache/map_interval.cpp
deleted file mode 100644
index 62587e18a..000000000
--- a/src/video_core/buffer_cache/map_interval.cpp
+++ /dev/null
@@ -1,33 +0,0 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <algorithm>
-#include <array>
-#include <cstddef>
-#include <memory>
-
-#include "video_core/buffer_cache/map_interval.h"
-
-namespace VideoCommon {
-
-MapIntervalAllocator::MapIntervalAllocator() {
- FillFreeList(first_chunk);
-}
-
-MapIntervalAllocator::~MapIntervalAllocator() = default;
-
-void MapIntervalAllocator::AllocateNewChunk() {
- *new_chunk = std::make_unique<Chunk>();
- FillFreeList(**new_chunk);
- new_chunk = &(*new_chunk)->next;
-}
-
-void MapIntervalAllocator::FillFreeList(Chunk& chunk) {
- const std::size_t old_size = free_list.size();
- free_list.resize(old_size + chunk.data.size());
- std::transform(chunk.data.rbegin(), chunk.data.rend(), free_list.begin() + old_size,
- [](MapInterval& interval) { return &interval; });
-}
-
-} // namespace VideoCommon
diff --git a/src/video_core/buffer_cache/map_interval.h b/src/video_core/buffer_cache/map_interval.h
deleted file mode 100644
index ef974b08a..000000000
--- a/src/video_core/buffer_cache/map_interval.h
+++ /dev/null
@@ -1,93 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <array>
-#include <cstddef>
-#include <memory>
-#include <vector>
-
-#include <boost/intrusive/set_hook.hpp>
-
-#include "common/common_types.h"
-#include "video_core/gpu.h"
-
-namespace VideoCommon {
-
-struct MapInterval : public boost::intrusive::set_base_hook<boost::intrusive::optimize_size<true>> {
- MapInterval() = default;
-
- /*implicit*/ MapInterval(VAddr start_) noexcept : start{start_} {}
-
- explicit MapInterval(VAddr start_, VAddr end_, GPUVAddr gpu_addr_) noexcept
- : start{start_}, end{end_}, gpu_addr{gpu_addr_} {}
-
- bool IsInside(VAddr other_start, VAddr other_end) const noexcept {
- return start <= other_start && other_end <= end;
- }
-
- bool Overlaps(VAddr other_start, VAddr other_end) const noexcept {
- return start < other_end && other_start < end;
- }
-
- void MarkAsModified(bool is_modified_, u64 ticks_) noexcept {
- is_modified = is_modified_;
- ticks = ticks_;
- }
-
- boost::intrusive::set_member_hook<> member_hook_;
- VAddr start = 0;
- VAddr end = 0;
- GPUVAddr gpu_addr = 0;
- u64 ticks = 0;
- bool is_written = false;
- bool is_modified = false;
- bool is_registered = false;
- bool is_memory_marked = false;
- bool is_sync_pending = false;
-};
-
-struct MapIntervalCompare {
- constexpr bool operator()(const MapInterval& lhs, const MapInterval& rhs) const noexcept {
- return lhs.start < rhs.start;
- }
-};
-
-class MapIntervalAllocator {
-public:
- MapIntervalAllocator();
- ~MapIntervalAllocator();
-
- MapInterval* Allocate() {
- if (free_list.empty()) {
- AllocateNewChunk();
- }
- MapInterval* const interval = free_list.back();
- free_list.pop_back();
- return interval;
- }
-
- void Release(MapInterval* interval) {
- free_list.push_back(interval);
- }
-
-private:
- struct Chunk {
- std::unique_ptr<Chunk> next;
- std::array<MapInterval, 0x8000> data;
- };
-
- void AllocateNewChunk();
-
- void FillFreeList(Chunk& chunk);
-
- std::vector<MapInterval*> free_list;
-
- Chunk first_chunk;
-
- std::unique_ptr<Chunk>* new_chunk = &first_chunk.next;
-};
-
-} // namespace VideoCommon