summaryrefslogtreecommitdiffstats
path: root/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.cpp165
1 files changed, 115 insertions, 50 deletions
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index 1cfb4c2ff..660f7c9ff 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -7,7 +7,6 @@
#include <span>
#include <vector>
-#include "video_core/buffer_cache/buffer_cache.h"
#include "video_core/renderer_vulkan/maxwell_to_vk.h"
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
@@ -51,7 +50,7 @@ size_t BytesPerIndex(VkIndexType index_type) {
}
}
-vk::Buffer CreateBuffer(const Device& device, u64 size) {
+vk::Buffer CreateBuffer(const Device& device, const MemoryAllocator& memory_allocator, u64 size) {
VkBufferUsageFlags flags =
VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT |
@@ -61,7 +60,7 @@ vk::Buffer CreateBuffer(const Device& device, u64 size) {
if (device.IsExtTransformFeedbackSupported()) {
flags |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT;
}
- return device.GetLogical().CreateBuffer({
+ const VkBufferCreateInfo buffer_ci = {
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
@@ -70,7 +69,8 @@ vk::Buffer CreateBuffer(const Device& device, u64 size) {
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
- });
+ };
+ return memory_allocator.CreateBuffer(buffer_ci, MemoryUsage::DeviceLocal);
}
} // Anonymous namespace
@@ -80,8 +80,8 @@ Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params)
Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_,
VAddr cpu_addr_, u64 size_bytes_)
: VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_),
- device{&runtime.device}, buffer{CreateBuffer(*device, SizeBytes())},
- commit{runtime.memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal)} {
+ device{&runtime.device}, buffer{
+ CreateBuffer(*device, runtime.memory_allocator, SizeBytes())} {
if (runtime.device.HasDebuggingToolAttached()) {
buffer.SetObjectNameEXT(fmt::format("Buffer 0x{:x}", CpuAddr()).c_str());
}
@@ -139,7 +139,7 @@ public:
const u32 num_first_offset_copies = 4;
const size_t bytes_per_index = BytesPerIndex(index_type);
const size_t size_bytes = num_triangle_indices * bytes_per_index * num_first_offset_copies;
- buffer = device.GetLogical().CreateBuffer(VkBufferCreateInfo{
+ const VkBufferCreateInfo buffer_ci = {
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
@@ -148,14 +148,21 @@ public:
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
- });
+ };
+ buffer = memory_allocator.CreateBuffer(buffer_ci, MemoryUsage::DeviceLocal);
if (device.HasDebuggingToolAttached()) {
buffer.SetObjectNameEXT("Quad LUT");
}
- memory_commit = memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal);
- const StagingBufferRef staging = staging_pool.Request(size_bytes, MemoryUsage::Upload);
- u8* staging_data = staging.mapped_span.data();
+ const bool host_visible = buffer.IsHostVisible();
+ const StagingBufferRef staging = [&] {
+ if (host_visible) {
+ return StagingBufferRef{};
+ }
+ return staging_pool.Request(size_bytes, MemoryUsage::Upload);
+ }();
+
+ u8* staging_data = host_visible ? buffer.Mapped().data() : staging.mapped_span.data();
const size_t quad_size = bytes_per_index * 6;
for (u32 first = 0; first < num_first_offset_copies; ++first) {
@@ -165,29 +172,33 @@ public:
}
}
- scheduler.RequestOutsideRenderPassOperationContext();
- scheduler.Record([src_buffer = staging.buffer, src_offset = staging.offset,
- dst_buffer = *buffer, size_bytes](vk::CommandBuffer cmdbuf) {
- const VkBufferCopy copy{
- .srcOffset = src_offset,
- .dstOffset = 0,
- .size = size_bytes,
- };
- const VkBufferMemoryBarrier write_barrier{
- .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
- .pNext = nullptr,
- .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
- .dstAccessMask = VK_ACCESS_INDEX_READ_BIT,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .buffer = dst_buffer,
- .offset = 0,
- .size = size_bytes,
- };
- cmdbuf.CopyBuffer(src_buffer, dst_buffer, copy);
- cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT,
- VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, write_barrier);
- });
+ if (!host_visible) {
+ scheduler.RequestOutsideRenderPassOperationContext();
+ scheduler.Record([src_buffer = staging.buffer, src_offset = staging.offset,
+ dst_buffer = *buffer, size_bytes](vk::CommandBuffer cmdbuf) {
+ const VkBufferCopy copy{
+ .srcOffset = src_offset,
+ .dstOffset = 0,
+ .size = size_bytes,
+ };
+ const VkBufferMemoryBarrier write_barrier{
+ .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
+ .pNext = nullptr,
+ .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
+ .dstAccessMask = VK_ACCESS_INDEX_READ_BIT,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .buffer = dst_buffer,
+ .offset = 0,
+ .size = size_bytes,
+ };
+ cmdbuf.CopyBuffer(src_buffer, dst_buffer, copy);
+ cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT,
+ VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, write_barrier);
+ });
+ } else {
+ buffer.Flush();
+ }
}
void BindBuffer(u32 first) {
@@ -238,7 +249,7 @@ private:
return indices;
}
- void MakeAndUpdateIndices(u8* staging_data, size_t quad_size, u32 quad, u32 first) {
+ void MakeAndUpdateIndices(u8* staging_data, size_t quad_size, u32 quad, u32 first) override {
switch (index_type) {
case VK_INDEX_TYPE_UINT8_EXT:
std::memcpy(staging_data, MakeIndices<u8>(quad, first).data(), quad_size);
@@ -278,7 +289,7 @@ private:
return indices;
}
- void MakeAndUpdateIndices(u8* staging_data, size_t quad_size, u32 quad, u32 first) {
+ void MakeAndUpdateIndices(u8* staging_data, size_t quad_size, u32 quad, u32 first) override {
switch (index_type) {
case VK_INDEX_TYPE_UINT8_EXT:
std::memcpy(staging_data, MakeIndices<u8>(quad, first).data(), quad_size);
@@ -298,12 +309,18 @@ private:
BufferCacheRuntime::BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_allocator_,
Scheduler& scheduler_, StagingBufferPool& staging_pool_,
- UpdateDescriptorQueue& update_descriptor_queue_,
+ GuestDescriptorQueue& guest_descriptor_queue_,
+ ComputePassDescriptorQueue& compute_pass_descriptor_queue,
DescriptorPool& descriptor_pool)
: device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_},
- staging_pool{staging_pool_}, update_descriptor_queue{update_descriptor_queue_},
- uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
- quad_index_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue) {
+ staging_pool{staging_pool_}, guest_descriptor_queue{guest_descriptor_queue_},
+ quad_index_pass(device, scheduler, descriptor_pool, staging_pool,
+ compute_pass_descriptor_queue) {
+ if (device.GetDriverID() != VK_DRIVER_ID_QUALCOMM_PROPRIETARY) {
+ // TODO: FixMe: Uint8Pass compute shader does not build on some Qualcomm drivers.
+ uint8_pass = std::make_unique<Uint8Pass>(device, scheduler, descriptor_pool, staging_pool,
+ compute_pass_descriptor_queue);
+ }
quad_array_index_buffer = std::make_shared<QuadArrayIndexBuffer>(device_, memory_allocator_,
scheduler_, staging_pool_);
quad_strip_index_buffer = std::make_shared<QuadStripIndexBuffer>(device_, memory_allocator_,
@@ -314,8 +331,12 @@ StagingBufferRef BufferCacheRuntime::UploadStagingBuffer(size_t size) {
return staging_pool.Request(size, MemoryUsage::Upload);
}
-StagingBufferRef BufferCacheRuntime::DownloadStagingBuffer(size_t size) {
- return staging_pool.Request(size, MemoryUsage::Download);
+StagingBufferRef BufferCacheRuntime::DownloadStagingBuffer(size_t size, bool deferred) {
+ return staging_pool.Request(size, MemoryUsage::Download, deferred);
+}
+
+void BufferCacheRuntime::FreeDeferredStagingBuffer(StagingBufferRef& ref) {
+ staging_pool.FreeDeferred(ref);
}
u64 BufferCacheRuntime::GetDeviceLocalMemory() const {
@@ -330,10 +351,6 @@ bool BufferCacheRuntime::CanReportMemoryUsage() const {
return device.CanReportMemoryUsage();
}
-u32 BufferCacheRuntime::GetStorageBufferAlignment() const {
- return static_cast<u32>(device.GetStorageBufferAlignment());
-}
-
void BufferCacheRuntime::Finish() {
scheduler.Finish();
}
@@ -356,7 +373,7 @@ void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer,
.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
};
// Measuring a popular game, this number never exceeds the specified size once data is warmed up
- boost::container::small_vector<VkBufferCopy, 3> vk_copies(copies.size());
+ boost::container::small_vector<VkBufferCopy, 8> vk_copies(copies.size());
std::ranges::transform(copies, vk_copies.begin(), MakeBufferCopy);
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([src_buffer, dst_buffer, vk_copies, barrier](vk::CommandBuffer cmdbuf) {
@@ -440,7 +457,9 @@ void BufferCacheRuntime::BindIndexBuffer(PrimitiveTopology topology, IndexFormat
topology == PrimitiveTopology::QuadStrip);
} else if (vk_index_type == VK_INDEX_TYPE_UINT8_EXT && !device.IsExtIndexTypeUint8Supported()) {
vk_index_type = VK_INDEX_TYPE_UINT16;
- std::tie(vk_buffer, vk_offset) = uint8_pass.Assemble(num_indices, buffer, offset);
+ if (uint8_pass) {
+ std::tie(vk_buffer, vk_offset) = uint8_pass->Assemble(num_indices, buffer, offset);
+ }
}
if (vk_buffer == VK_NULL_HANDLE) {
// Vulkan doesn't support null index buffers. Replace it with our own null buffer.
@@ -494,6 +513,36 @@ void BufferCacheRuntime::BindVertexBuffer(u32 index, VkBuffer buffer, u32 offset
}
}
+void BufferCacheRuntime::BindVertexBuffers(VideoCommon::HostBindings<Buffer>& bindings) {
+ boost::container::small_vector<VkBuffer, 32> buffer_handles;
+ for (u32 index = 0; index < bindings.buffers.size(); ++index) {
+ auto handle = bindings.buffers[index]->Handle();
+ if (handle == VK_NULL_HANDLE) {
+ bindings.offsets[index] = 0;
+ bindings.sizes[index] = VK_WHOLE_SIZE;
+ if (!device.HasNullDescriptor()) {
+ ReserveNullBuffer();
+ handle = *null_buffer;
+ }
+ }
+ buffer_handles.push_back(handle);
+ }
+ if (device.IsExtExtendedDynamicStateSupported()) {
+ scheduler.Record([bindings = std::move(bindings),
+ buffer_handles = std::move(buffer_handles)](vk::CommandBuffer cmdbuf) {
+ cmdbuf.BindVertexBuffers2EXT(
+ bindings.min_index, bindings.max_index - bindings.min_index, buffer_handles.data(),
+ bindings.offsets.data(), bindings.sizes.data(), bindings.strides.data());
+ });
+ } else {
+ scheduler.Record([bindings = std::move(bindings),
+ buffer_handles = std::move(buffer_handles)](vk::CommandBuffer cmdbuf) {
+ cmdbuf.BindVertexBuffers(bindings.min_index, bindings.max_index - bindings.min_index,
+ buffer_handles.data(), bindings.offsets.data());
+ });
+ }
+}
+
void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, VkBuffer buffer, u32 offset,
u32 size) {
if (!device.IsExtTransformFeedbackSupported()) {
@@ -515,6 +564,23 @@ void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, VkBuffer buffer,
});
}
+void BufferCacheRuntime::BindTransformFeedbackBuffers(VideoCommon::HostBindings<Buffer>& bindings) {
+ if (!device.IsExtTransformFeedbackSupported()) {
+ // Already logged in the rasterizer
+ return;
+ }
+ boost::container::small_vector<VkBuffer, 4> buffer_handles;
+ for (u32 index = 0; index < bindings.buffers.size(); ++index) {
+ buffer_handles.push_back(bindings.buffers[index]->Handle());
+ }
+ scheduler.Record([bindings = std::move(bindings),
+ buffer_handles = std::move(buffer_handles)](vk::CommandBuffer cmdbuf) {
+ cmdbuf.BindTransformFeedbackBuffersEXT(0, static_cast<u32>(buffer_handles.size()),
+ buffer_handles.data(), bindings.offsets.data(),
+ bindings.sizes.data());
+ });
+}
+
void BufferCacheRuntime::ReserveNullBuffer() {
if (null_buffer) {
return;
@@ -533,11 +599,10 @@ void BufferCacheRuntime::ReserveNullBuffer() {
create_info.usage |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT;
}
create_info.usage |= VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT;
- null_buffer = device.GetLogical().CreateBuffer(create_info);
+ null_buffer = memory_allocator.CreateBuffer(create_info, MemoryUsage::DeviceLocal);
if (device.HasDebuggingToolAttached()) {
null_buffer.SetObjectNameEXT("Null buffer");
}
- null_buffer_commit = memory_allocator.Commit(null_buffer, MemoryUsage::DeviceLocal);
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([buffer = *null_buffer](vk::CommandBuffer cmdbuf) {