diff options
author | Fernando Sahmkow <fsahmkow27@gmail.com> | 2021-07-13 16:16:14 +0200 |
---|---|---|
committer | Fernando Sahmkow <fsahmkow27@gmail.com> | 2021-07-14 16:44:15 +0200 |
commit | 8039be8b195e26b4b4dd4dc6b58e65dfe8eda464 (patch) | |
tree | cf6ef36b74f1ad5a6a4fe71f00895f6b618c4959 | |
parent | DMAEngine: Accelerate BufferClear (diff) | |
download | yuzu-8039be8b195e26b4b4dd4dc6b58e65dfe8eda464.tar yuzu-8039be8b195e26b4b4dd4dc6b58e65dfe8eda464.tar.gz yuzu-8039be8b195e26b4b4dd4dc6b58e65dfe8eda464.tar.bz2 yuzu-8039be8b195e26b4b4dd4dc6b58e65dfe8eda464.tar.lz yuzu-8039be8b195e26b4b4dd4dc6b58e65dfe8eda464.tar.xz yuzu-8039be8b195e26b4b4dd4dc6b58e65dfe8eda464.tar.zst yuzu-8039be8b195e26b4b4dd4dc6b58e65dfe8eda464.zip |
Diffstat (limited to '')
-rw-r--r-- | src/video_core/buffer_cache/buffer_cache.h | 30 |
1 files changed, 20 insertions, 10 deletions
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 5f5a59bba..4def8f076 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -329,6 +329,8 @@ private: [[nodiscard]] bool HasFastUniformBufferBound(size_t stage, u32 binding_index) const noexcept; + void ClearDownload(IntervalType subtract_interval); + VideoCore::RasterizerInterface& rasterizer; Tegra::Engines::Maxwell3D& maxwell3d; Tegra::Engines::KeplerCompute& kepler_compute; @@ -468,6 +470,14 @@ void BufferCache<P>::DownloadMemory(VAddr cpu_addr, u64 size) { } template <class P> +void BufferCache<P>::ClearDownload(IntervalType subtract_interval) { + uncommitted_ranges.subtract(subtract_interval); + for (auto& interval_set : committed_ranges) { + interval_set.subtract(subtract_interval); + } +} + +template <class P> bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount) { const std::optional<VAddr> cpu_src_address = gpu_memory.GpuToCpuAddress(src_address); const std::optional<VAddr> cpu_dest_address = gpu_memory.GpuToCpuAddress(dest_address); @@ -481,10 +491,7 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am } const IntervalType subtract_interval{*cpu_dest_address, *cpu_dest_address + amount}; - uncommitted_ranges.subtract(subtract_interval); - for (auto& interval_set : committed_ranges) { - interval_set.subtract(subtract_interval); - } + ClearDownload(subtract_interval); BufferId buffer_a; BufferId buffer_b; @@ -496,7 +503,6 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am auto& src_buffer = slot_buffers[buffer_a]; auto& dest_buffer = slot_buffers[buffer_b]; SynchronizeBuffer(src_buffer, *cpu_src_address, static_cast<u32>(amount)); - SynchronizeBuffer(dest_buffer, *cpu_dest_address, static_cast<u32>(amount)); std::array copies{BufferCopy{ .src_offset = src_buffer.Offset(*cpu_src_address), .dst_offset = dest_buffer.Offset(*cpu_dest_address), @@ -515,12 +521,17 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am ForEachWrittenRange(*cpu_src_address, amount, mirror); // This subtraction in this order is important for overlapping copies. common_ranges.subtract(subtract_interval); + bool atleast_1_download = tmp_intervals.size() != 0; for (const IntervalType add_interval : tmp_intervals) { common_ranges.add(add_interval); } + if (dest_buffer.HasCachedWrites()) { + dest_buffer.FlushCachedWrites(); + } runtime.CopyBuffer(dest_buffer, src_buffer, copies); - if (IsRegionGpuModified(*cpu_src_address, amount)) { + dest_buffer.UnmarkRegionAsCpuModified(*cpu_dest_address, amount); + if (atleast_1_download) { dest_buffer.MarkRegionAsGpuModified(*cpu_dest_address, amount); } std::vector<u8> tmp_buffer(amount); @@ -541,10 +552,7 @@ bool BufferCache<P>::DMAClear(GPUVAddr dst_address, u64 amount, u32 value) { } const IntervalType subtract_interval{*cpu_dst_address, *cpu_dst_address + amount * sizeof(u32)}; - uncommitted_ranges.subtract(subtract_interval); - for (auto& interval_set : committed_ranges) { - interval_set.subtract(subtract_interval); - } + ClearDownload(subtract_interval); common_ranges.subtract(subtract_interval); const size_t size = amount * sizeof(u32); @@ -557,6 +565,7 @@ bool BufferCache<P>::DMAClear(GPUVAddr dst_address, u64 amount, u32 value) { auto& dest_buffer = slot_buffers[buffer]; const u32 offset = static_cast<u32>(*cpu_dst_address - dest_buffer.CpuAddr()); runtime.ClearBuffer(dest_buffer, offset, size, value); + dest_buffer.UnmarkRegionAsCpuModified(*cpu_dst_address, size); return true; } @@ -1482,6 +1491,7 @@ void BufferCache<P>::DownloadBufferMemory(Buffer& buffer, VAddr cpu_addr, u64 si const VAddr end_address = start_address + range_size; ForEachWrittenRange(start_address, range_size, add_download); const IntervalType subtract_interval{start_address, end_address}; + ClearDownload(subtract_interval); common_ranges.subtract(subtract_interval); }); if (total_size_bytes == 0) { |