summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.cpp143
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.h73
-rw-r--r--src/video_core/renderer_vulkan/vk_stream_buffer.cpp142
-rw-r--r--src/video_core/renderer_vulkan/vk_stream_buffer.h44
4 files changed, 340 insertions, 62 deletions
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index c36ede898..1ba544943 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -1,3 +1,146 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <cstring>
+#include <memory>
+#include <optional>
+#include <tuple>
+
+#include "common/assert.h"
+#include "common/bit_util.h"
+#include "core/core.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/vk_buffer_cache.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/vk_scheduler.h"
+#include "video_core/renderer_vulkan/vk_stream_buffer.h"
+
+namespace Vulkan {
+
+namespace {
+
+const auto BufferUsage =
+ vk::BufferUsageFlagBits::eVertexBuffer | vk::BufferUsageFlagBits::eIndexBuffer |
+ vk::BufferUsageFlagBits::eUniformBuffer | vk::BufferUsageFlagBits::eStorageBuffer;
+
+const auto UploadPipelineStage =
+ vk::PipelineStageFlagBits::eTransfer | vk::PipelineStageFlagBits::eVertexInput |
+ vk::PipelineStageFlagBits::eVertexShader | vk::PipelineStageFlagBits::eFragmentShader |
+ vk::PipelineStageFlagBits::eComputeShader;
+
+const auto UploadAccessBarriers =
+ vk::AccessFlagBits::eTransferRead | vk::AccessFlagBits::eShaderRead |
+ vk::AccessFlagBits::eUniformRead | vk::AccessFlagBits::eVertexAttributeRead |
+ vk::AccessFlagBits::eIndexRead;
+
+auto CreateStreamBuffer(const VKDevice& device, VKScheduler& scheduler) {
+ return std::make_unique<VKStreamBuffer>(device, scheduler, BufferUsage);
+}
+
+} // Anonymous namespace
+
+CachedBufferBlock::CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager,
+ CacheAddr cache_addr, std::size_t size)
+ : VideoCommon::BufferBlock{cache_addr, size} {
+ const vk::BufferCreateInfo buffer_ci({}, static_cast<vk::DeviceSize>(size),
+ BufferUsage | vk::BufferUsageFlagBits::eTransferSrc |
+ vk::BufferUsageFlagBits::eTransferDst,
+ vk::SharingMode::eExclusive, 0, nullptr);
+
+ const auto& dld{device.GetDispatchLoader()};
+ const auto dev{device.GetLogical()};
+ buffer.handle = dev.createBufferUnique(buffer_ci, nullptr, dld);
+ buffer.commit = memory_manager.Commit(*buffer.handle, false);
+}
+
+CachedBufferBlock::~CachedBufferBlock() = default;
+
+VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
+ const VKDevice& device, VKMemoryManager& memory_manager,
+ VKScheduler& scheduler, VKStagingBufferPool& staging_pool)
+ : VideoCommon::BufferCache<Buffer, vk::Buffer, VKStreamBuffer>{rasterizer, system,
+ CreateStreamBuffer(device,
+ scheduler)},
+ device{device}, memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{
+ staging_pool} {}
+
+VKBufferCache::~VKBufferCache() = default;
+
+Buffer VKBufferCache::CreateBlock(CacheAddr cache_addr, std::size_t size) {
+ return std::make_shared<CachedBufferBlock>(device, memory_manager, cache_addr, size);
+}
+
+const vk::Buffer* VKBufferCache::ToHandle(const Buffer& buffer) {
+ return buffer->GetHandle();
+}
+
+const vk::Buffer* VKBufferCache::GetEmptyBuffer(std::size_t size) {
+ size = std::max(size, std::size_t(4));
+ const auto& empty = staging_pool.GetUnusedBuffer(size, false);
+ scheduler.RequestOutsideRenderPassOperationContext();
+ scheduler.Record([size, buffer = *empty.handle](vk::CommandBuffer cmdbuf, auto& dld) {
+ cmdbuf.fillBuffer(buffer, 0, size, 0, dld);
+ });
+ return &*empty.handle;
+}
+
+void VKBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
+ const u8* data) {
+ const auto& staging = staging_pool.GetUnusedBuffer(size, true);
+ std::memcpy(staging.commit->Map(size), data, size);
+
+ scheduler.RequestOutsideRenderPassOperationContext();
+ scheduler.Record([staging = *staging.handle, buffer = *buffer->GetHandle(), offset,
+ size](auto cmdbuf, auto& dld) {
+ cmdbuf.copyBuffer(staging, buffer, {{0, offset, size}}, dld);
+ cmdbuf.pipelineBarrier(
+ vk::PipelineStageFlagBits::eTransfer, UploadPipelineStage, {}, {},
+ {vk::BufferMemoryBarrier(vk::AccessFlagBits::eTransferWrite, UploadAccessBarriers,
+ VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, buffer,
+ offset, size)},
+ {}, dld);
+ });
+}
+
+void VKBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
+ u8* data) {
+ const auto& staging = staging_pool.GetUnusedBuffer(size, true);
+ scheduler.RequestOutsideRenderPassOperationContext();
+ scheduler.Record([staging = *staging.handle, buffer = *buffer->GetHandle(), offset,
+ size](auto cmdbuf, auto& dld) {
+ cmdbuf.pipelineBarrier(
+ vk::PipelineStageFlagBits::eVertexShader | vk::PipelineStageFlagBits::eFragmentShader |
+ vk::PipelineStageFlagBits::eComputeShader,
+ vk::PipelineStageFlagBits::eTransfer, {}, {},
+ {vk::BufferMemoryBarrier(vk::AccessFlagBits::eShaderWrite,
+ vk::AccessFlagBits::eTransferRead, VK_QUEUE_FAMILY_IGNORED,
+ VK_QUEUE_FAMILY_IGNORED, buffer, offset, size)},
+ {}, dld);
+ cmdbuf.copyBuffer(buffer, staging, {{offset, 0, size}}, dld);
+ });
+ scheduler.Finish();
+
+ std::memcpy(data, staging.commit->Map(size), size);
+}
+
+void VKBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset,
+ std::size_t dst_offset, std::size_t size) {
+ scheduler.RequestOutsideRenderPassOperationContext();
+ scheduler.Record([src_buffer = *src->GetHandle(), dst_buffer = *dst->GetHandle(), src_offset,
+ dst_offset, size](auto cmdbuf, auto& dld) {
+ cmdbuf.copyBuffer(src_buffer, dst_buffer, {{src_offset, dst_offset, size}}, dld);
+ cmdbuf.pipelineBarrier(
+ vk::PipelineStageFlagBits::eTransfer, UploadPipelineStage, {}, {},
+ {vk::BufferMemoryBarrier(vk::AccessFlagBits::eTransferRead,
+ vk::AccessFlagBits::eShaderWrite, VK_QUEUE_FAMILY_IGNORED,
+ VK_QUEUE_FAMILY_IGNORED, src_buffer, src_offset, size),
+ vk::BufferMemoryBarrier(vk::AccessFlagBits::eTransferWrite, UploadAccessBarriers,
+ VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, dst_buffer,
+ dst_offset, size)},
+ {}, dld);
+ });
+}
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
index bc6e584cf..3f38eed0c 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -3,3 +3,76 @@
// Refer to the license.txt file included.
#pragma once
+
+#include <memory>
+#include <unordered_map>
+#include <vector>
+
+#include "common/common_types.h"
+#include "video_core/buffer_cache/buffer_cache.h"
+#include "video_core/rasterizer_cache.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/vk_memory_manager.h"
+#include "video_core/renderer_vulkan/vk_resource_manager.h"
+#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
+#include "video_core/renderer_vulkan/vk_stream_buffer.h"
+
+namespace Core {
+class System;
+}
+
+namespace Vulkan {
+
+class VKDevice;
+class VKMemoryManager;
+class VKScheduler;
+
+class CachedBufferBlock final : public VideoCommon::BufferBlock {
+public:
+ explicit CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager,
+ CacheAddr cache_addr, std::size_t size);
+ ~CachedBufferBlock();
+
+ const vk::Buffer* GetHandle() const {
+ return &*buffer.handle;
+ }
+
+private:
+ VKBuffer buffer;
+};
+
+using Buffer = std::shared_ptr<CachedBufferBlock>;
+
+class VKBufferCache final : public VideoCommon::BufferCache<Buffer, vk::Buffer, VKStreamBuffer> {
+public:
+ explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
+ const VKDevice& device, VKMemoryManager& memory_manager,
+ VKScheduler& scheduler, VKStagingBufferPool& staging_pool);
+ ~VKBufferCache();
+
+ const vk::Buffer* GetEmptyBuffer(std::size_t size) override;
+
+protected:
+ void WriteBarrier() override {}
+
+ Buffer CreateBlock(CacheAddr cache_addr, std::size_t size) override;
+
+ const vk::Buffer* ToHandle(const Buffer& buffer) override;
+
+ void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
+ const u8* data) override;
+
+ void DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
+ u8* data) override;
+
+ void CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset,
+ std::size_t dst_offset, std::size_t size) override;
+
+private:
+ const VKDevice& device;
+ VKMemoryManager& memory_manager;
+ VKScheduler& scheduler;
+ VKStagingBufferPool& staging_pool;
+};
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
index 62f1427f5..d48d3b44c 100644
--- a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
+++ b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
@@ -3,86 +3,144 @@
// Refer to the license.txt file included.
#include <algorithm>
-#include <memory>
#include <optional>
+#include <tuple>
#include <vector>
+#include "common/alignment.h"
#include "common/assert.h"
#include "video_core/renderer_vulkan/declarations.h"
#include "video_core/renderer_vulkan/vk_device.h"
-#include "video_core/renderer_vulkan/vk_memory_manager.h"
#include "video_core/renderer_vulkan/vk_resource_manager.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
namespace Vulkan {
+namespace {
+
constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000;
constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000;
-VKStreamBuffer::VKStreamBuffer(const VKDevice& device, VKMemoryManager& memory_manager,
- VKScheduler& scheduler, u64 size, vk::BufferUsageFlags usage,
- vk::AccessFlags access, vk::PipelineStageFlags pipeline_stage)
- : device{device}, scheduler{scheduler}, buffer_size{size}, access{access}, pipeline_stage{
- pipeline_stage} {
- CreateBuffers(memory_manager, usage);
- ReserveWatches(WATCHES_INITIAL_RESERVE);
+constexpr u64 STREAM_BUFFER_SIZE = 256 * 1024 * 1024;
+
+std::optional<u32> FindMemoryType(const VKDevice& device, u32 filter,
+ vk::MemoryPropertyFlags wanted) {
+ const auto properties = device.GetPhysical().getMemoryProperties(device.GetDispatchLoader());
+ for (u32 i = 0; i < properties.memoryTypeCount; i++) {
+ if (!(filter & (1 << i))) {
+ continue;
+ }
+ if ((properties.memoryTypes[i].propertyFlags & wanted) == wanted) {
+ return i;
+ }
+ }
+ return {};
+}
+
+} // Anonymous namespace
+
+VKStreamBuffer::VKStreamBuffer(const VKDevice& device, VKScheduler& scheduler,
+ vk::BufferUsageFlags usage)
+ : device{device}, scheduler{scheduler} {
+ CreateBuffers(usage);
+ ReserveWatches(current_watches, WATCHES_INITIAL_RESERVE);
+ ReserveWatches(previous_watches, WATCHES_INITIAL_RESERVE);
}
VKStreamBuffer::~VKStreamBuffer() = default;
-std::tuple<u8*, u64, bool> VKStreamBuffer::Reserve(u64 size) {
- ASSERT(size <= buffer_size);
+std::tuple<u8*, u64, bool> VKStreamBuffer::Map(u64 size, u64 alignment) {
+ ASSERT(size <= STREAM_BUFFER_SIZE);
mapped_size = size;
- if (offset + size > buffer_size) {
- // The buffer would overflow, save the amount of used buffers, signal an invalidation and
- // reset the state.
- invalidation_mark = used_watches;
- used_watches = 0;
+ if (alignment > 0) {
+ offset = Common::AlignUp(offset, alignment);
+ }
+
+ WaitPendingOperations(offset);
+
+ bool invalidated = false;
+ if (offset + size > STREAM_BUFFER_SIZE) {
+ // The buffer would overflow, save the amount of used watches and reset the state.
+ invalidation_mark = current_watch_cursor;
+ current_watch_cursor = 0;
offset = 0;
+
+ // Swap watches and reset waiting cursors.
+ std::swap(previous_watches, current_watches);
+ wait_cursor = 0;
+ wait_bound = 0;
+
+ // Ensure that we don't wait for uncommitted fences.
+ scheduler.Flush();
+
+ invalidated = true;
}
- return {mapped_pointer + offset, offset, invalidation_mark.has_value()};
+ const auto dev = device.GetLogical();
+ const auto& dld = device.GetDispatchLoader();
+ const auto pointer = reinterpret_cast<u8*>(dev.mapMemory(*memory, offset, size, {}, dld));
+ return {pointer, offset, invalidated};
}
-void VKStreamBuffer::Send(u64 size) {
+void VKStreamBuffer::Unmap(u64 size) {
ASSERT_MSG(size <= mapped_size, "Reserved size is too small");
- if (invalidation_mark) {
- // TODO(Rodrigo): Find a better way to invalidate than waiting for all watches to finish.
- scheduler.Flush();
- std::for_each(watches.begin(), watches.begin() + *invalidation_mark,
- [&](auto& resource) { resource->Wait(); });
- invalidation_mark = std::nullopt;
- }
+ const auto dev = device.GetLogical();
+ dev.unmapMemory(*memory, device.GetDispatchLoader());
+
+ offset += size;
- if (used_watches + 1 >= watches.size()) {
+ if (current_watch_cursor + 1 >= current_watches.size()) {
// Ensure that there are enough watches.
- ReserveWatches(WATCHES_RESERVE_CHUNK);
+ ReserveWatches(current_watches, WATCHES_RESERVE_CHUNK);
}
- // Add a watch for this allocation.
- watches[used_watches++]->Watch(scheduler.GetFence());
-
- offset += size;
+ auto& watch = current_watches[current_watch_cursor++];
+ watch.upper_bound = offset;
+ watch.fence.Watch(scheduler.GetFence());
}
-void VKStreamBuffer::CreateBuffers(VKMemoryManager& memory_manager, vk::BufferUsageFlags usage) {
- const vk::BufferCreateInfo buffer_ci({}, buffer_size, usage, vk::SharingMode::eExclusive, 0,
- nullptr);
-
+void VKStreamBuffer::CreateBuffers(vk::BufferUsageFlags usage) {
+ const vk::BufferCreateInfo buffer_ci({}, STREAM_BUFFER_SIZE, usage, vk::SharingMode::eExclusive,
+ 0, nullptr);
const auto dev = device.GetLogical();
const auto& dld = device.GetDispatchLoader();
buffer = dev.createBufferUnique(buffer_ci, nullptr, dld);
- commit = memory_manager.Commit(*buffer, true);
- mapped_pointer = commit->GetData();
+
+ const auto requirements = dev.getBufferMemoryRequirements(*buffer, dld);
+ // Prefer device local host visible allocations (this should hit AMD's pinned memory).
+ auto type = FindMemoryType(device, requirements.memoryTypeBits,
+ vk::MemoryPropertyFlagBits::eHostVisible |
+ vk::MemoryPropertyFlagBits::eHostCoherent |
+ vk::MemoryPropertyFlagBits::eDeviceLocal);
+ if (!type) {
+ // Otherwise search for a host visible allocation.
+ type = FindMemoryType(device, requirements.memoryTypeBits,
+ vk::MemoryPropertyFlagBits::eHostVisible |
+ vk::MemoryPropertyFlagBits::eHostCoherent);
+ ASSERT_MSG(type, "No host visible and coherent memory type found");
+ }
+ const vk::MemoryAllocateInfo alloc_ci(requirements.size, *type);
+ memory = dev.allocateMemoryUnique(alloc_ci, nullptr, dld);
+
+ dev.bindBufferMemory(*buffer, *memory, 0, dld);
}
-void VKStreamBuffer::ReserveWatches(std::size_t grow_size) {
- const std::size_t previous_size = watches.size();
- watches.resize(previous_size + grow_size);
- std::generate(watches.begin() + previous_size, watches.end(),
- []() { return std::make_unique<VKFenceWatch>(); });
+void VKStreamBuffer::ReserveWatches(std::vector<Watch>& watches, std::size_t grow_size) {
+ watches.resize(watches.size() + grow_size);
+}
+
+void VKStreamBuffer::WaitPendingOperations(u64 requested_upper_bound) {
+ if (!invalidation_mark) {
+ return;
+ }
+ while (requested_upper_bound < wait_bound && wait_cursor < *invalidation_mark) {
+ auto& watch = previous_watches[wait_cursor];
+ wait_bound = watch.upper_bound;
+ watch.fence.Wait();
+ ++wait_cursor;
+ }
}
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.h b/src/video_core/renderer_vulkan/vk_stream_buffer.h
index 842e54162..187c0c612 100644
--- a/src/video_core/renderer_vulkan/vk_stream_buffer.h
+++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h
@@ -4,28 +4,24 @@
#pragma once
-#include <memory>
#include <optional>
#include <tuple>
#include <vector>
#include "common/common_types.h"
#include "video_core/renderer_vulkan/declarations.h"
-#include "video_core/renderer_vulkan/vk_memory_manager.h"
namespace Vulkan {
class VKDevice;
class VKFence;
class VKFenceWatch;
-class VKResourceManager;
class VKScheduler;
-class VKStreamBuffer {
+class VKStreamBuffer final {
public:
- explicit VKStreamBuffer(const VKDevice& device, VKMemoryManager& memory_manager,
- VKScheduler& scheduler, u64 size, vk::BufferUsageFlags usage,
- vk::AccessFlags access, vk::PipelineStageFlags pipeline_stage);
+ explicit VKStreamBuffer(const VKDevice& device, VKScheduler& scheduler,
+ vk::BufferUsageFlags usage);
~VKStreamBuffer();
/**
@@ -34,39 +30,47 @@ public:
* @returns A tuple in the following order: Raw memory pointer (with offset added), buffer
* offset and a boolean that's true when buffer has been invalidated.
*/
- std::tuple<u8*, u64, bool> Reserve(u64 size);
+ std::tuple<u8*, u64, bool> Map(u64 size, u64 alignment);
/// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy.
- void Send(u64 size);
+ void Unmap(u64 size);
- vk::Buffer GetBuffer() const {
+ vk::Buffer GetHandle() const {
return *buffer;
}
private:
+ struct Watch final {
+ VKFenceWatch fence;
+ u64 upper_bound{};
+ };
+
/// Creates Vulkan buffer handles committing the required the required memory.
- void CreateBuffers(VKMemoryManager& memory_manager, vk::BufferUsageFlags usage);
+ void CreateBuffers(vk::BufferUsageFlags usage);
/// Increases the amount of watches available.
- void ReserveWatches(std::size_t grow_size);
+ void ReserveWatches(std::vector<Watch>& watches, std::size_t grow_size);
+
+ void WaitPendingOperations(u64 requested_upper_bound);
const VKDevice& device; ///< Vulkan device manager.
VKScheduler& scheduler; ///< Command scheduler.
- const u64 buffer_size; ///< Total size of the stream buffer.
const vk::AccessFlags access; ///< Access usage of this stream buffer.
const vk::PipelineStageFlags pipeline_stage; ///< Pipeline usage of this stream buffer.
- UniqueBuffer buffer; ///< Mapped buffer.
- VKMemoryCommit commit; ///< Memory commit.
- u8* mapped_pointer{}; ///< Pointer to the host visible commit
+ UniqueBuffer buffer; ///< Mapped buffer.
+ UniqueDeviceMemory memory; ///< Memory allocation.
u64 offset{}; ///< Buffer iterator.
u64 mapped_size{}; ///< Size reserved for the current copy.
- std::vector<std::unique_ptr<VKFenceWatch>> watches; ///< Total watches
- std::size_t used_watches{}; ///< Count of watches, reset on invalidation.
- std::optional<std::size_t>
- invalidation_mark{}; ///< Number of watches used in the current invalidation.
+ std::vector<Watch> current_watches; ///< Watches recorded in the current iteration.
+ std::size_t current_watch_cursor{}; ///< Count of watches, reset on invalidation.
+ std::optional<std::size_t> invalidation_mark; ///< Number of watches used in the previous cycle.
+
+ std::vector<Watch> previous_watches; ///< Watches used in the previous iteration.
+ std::size_t wait_cursor{}; ///< Last watch being waited for completion.
+ u64 wait_bound{}; ///< Highest offset being watched for completion.
};
} // namespace Vulkan