diff options
Diffstat (limited to '')
23 files changed, 398 insertions, 366 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index bf6439530..e9e6f278d 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -291,7 +291,7 @@ target_link_options(video_core PRIVATE ${FFmpeg_LDFLAGS}) add_dependencies(video_core host_shaders) target_include_directories(video_core PRIVATE ${HOST_SHADERS_INCLUDE}) -target_link_libraries(video_core PRIVATE sirit Vulkan::Headers) +target_link_libraries(video_core PRIVATE sirit Vulkan::Headers vma) if (ENABLE_NSIGHT_AFTERMATH) if (NOT DEFINED ENV{NSIGHT_AFTERMATH_SDK}) diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index 77128c6e2..ddf28ca28 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -89,8 +89,8 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_, Settings::values.renderer_debug.GetValue())), debug_callback(Settings::values.renderer_debug ? CreateDebugCallback(instance) : nullptr), surface(CreateSurface(instance, render_window.GetWindowInfo())), - device(CreateDevice(instance, dld, *surface)), memory_allocator(device, false), - state_tracker(), scheduler(device, state_tracker), + device(CreateDevice(instance, dld, *surface)), memory_allocator(device), state_tracker(), + scheduler(device, state_tracker), swapchain(*surface, device, scheduler, render_window.GetFramebufferLayout().width, render_window.GetFramebufferLayout().height, false), present_manager(instance, render_window, device, memory_allocator, scheduler, swapchain, @@ -173,7 +173,7 @@ void Vulkan::RendererVulkan::RenderScreenshot(const Tegra::FramebufferConfig& fr return; } const Layout::FramebufferLayout layout{renderer_settings.screenshot_framebuffer_layout}; - vk::Image staging_image = device.GetLogical().CreateImage(VkImageCreateInfo{ + vk::Image staging_image = memory_allocator.CreateImage(VkImageCreateInfo{ .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, .pNext = nullptr, .flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT, @@ -196,7 +196,6 @@ void Vulkan::RendererVulkan::RenderScreenshot(const Tegra::FramebufferConfig& fr .pQueueFamilyIndices = nullptr, .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, }); - const auto image_commit = memory_allocator.Commit(staging_image, MemoryUsage::DeviceLocal); const vk::ImageView dst_view = device.GetLogical().CreateImageView(VkImageViewCreateInfo{ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, @@ -234,8 +233,8 @@ void Vulkan::RendererVulkan::RenderScreenshot(const Tegra::FramebufferConfig& fr .queueFamilyIndexCount = 0, .pQueueFamilyIndices = nullptr, }; - const vk::Buffer dst_buffer = device.GetLogical().CreateBuffer(dst_buffer_info); - MemoryCommit dst_buffer_memory = memory_allocator.Commit(dst_buffer, MemoryUsage::Download); + const vk::Buffer dst_buffer = + memory_allocator.CreateBuffer(dst_buffer_info, MemoryUsage::Download); scheduler.RequestOutsideRenderPassOperationContext(); scheduler.Record([&](vk::CommandBuffer cmdbuf) { @@ -309,8 +308,9 @@ void Vulkan::RendererVulkan::RenderScreenshot(const Tegra::FramebufferConfig& fr scheduler.Finish(); // Copy backing image data to the QImage screenshot buffer - const auto dst_memory_map = dst_buffer_memory.Map(); - std::memcpy(renderer_settings.screenshot_bits, dst_memory_map.data(), dst_memory_map.size()); + dst_buffer.Invalidate(); + std::memcpy(renderer_settings.screenshot_bits, dst_buffer.Mapped().data(), + dst_buffer.Mapped().size()); renderer_settings.screenshot_complete_callback(false); renderer_settings.screenshot_requested = false; } diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp index acb143fc7..ad3b29f0e 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp +++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp @@ -162,7 +162,7 @@ void BlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, SetUniformData(data, layout); SetVertexData(data, framebuffer, layout); - const std::span<u8> mapped_span = buffer_commit.Map(); + const std::span<u8> mapped_span = buffer.Mapped(); std::memcpy(mapped_span.data(), &data, sizeof(data)); if (!use_accelerated) { @@ -1071,14 +1071,9 @@ void BlitScreen::ReleaseRawImages() { scheduler.Wait(tick); } raw_images.clear(); - raw_buffer_commits.clear(); - aa_image_view.reset(); aa_image.reset(); - aa_commit = MemoryCommit{}; - buffer.reset(); - buffer_commit = MemoryCommit{}; } void BlitScreen::CreateStagingBuffer(const Tegra::FramebufferConfig& framebuffer) { @@ -1094,20 +1089,18 @@ void BlitScreen::CreateStagingBuffer(const Tegra::FramebufferConfig& framebuffer .pQueueFamilyIndices = nullptr, }; - buffer = device.GetLogical().CreateBuffer(ci); - buffer_commit = memory_allocator.Commit(buffer, MemoryUsage::Upload); + buffer = memory_allocator.CreateBuffer(ci, MemoryUsage::Upload); } void BlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) { raw_images.resize(image_count); raw_image_views.resize(image_count); - raw_buffer_commits.resize(image_count); const auto create_image = [&](bool used_on_framebuffer = false, u32 up_scale = 1, u32 down_shift = 0) { u32 extra_usages = used_on_framebuffer ? VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT : VK_IMAGE_USAGE_TRANSFER_DST_BIT; - return device.GetLogical().CreateImage(VkImageCreateInfo{ + return memory_allocator.CreateImage(VkImageCreateInfo{ .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, .pNext = nullptr, .flags = 0, @@ -1130,9 +1123,6 @@ void BlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) { .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, }); }; - const auto create_commit = [&](vk::Image& image) { - return memory_allocator.Commit(image, MemoryUsage::DeviceLocal); - }; const auto create_image_view = [&](vk::Image& image, bool used_on_framebuffer = false) { return device.GetLogical().CreateImageView(VkImageViewCreateInfo{ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, @@ -1161,7 +1151,6 @@ void BlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) { for (size_t i = 0; i < image_count; ++i) { raw_images[i] = create_image(); - raw_buffer_commits[i] = create_commit(raw_images[i]); raw_image_views[i] = create_image_view(raw_images[i]); } @@ -1169,7 +1158,6 @@ void BlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) { const u32 up_scale = Settings::values.resolution_info.up_scale; const u32 down_shift = Settings::values.resolution_info.down_shift; aa_image = create_image(true, up_scale, down_shift); - aa_commit = create_commit(aa_image); aa_image_view = create_image_view(aa_image, true); VkExtent2D size{ .width = (up_scale * framebuffer.width) >> down_shift, diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.h b/src/video_core/renderer_vulkan/vk_blit_screen.h index 68ec20253..8365b5668 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.h +++ b/src/video_core/renderer_vulkan/vk_blit_screen.h @@ -142,13 +142,11 @@ private: vk::Sampler sampler; vk::Buffer buffer; - MemoryCommit buffer_commit; std::vector<u64> resource_ticks; std::vector<vk::Image> raw_images; std::vector<vk::ImageView> raw_image_views; - std::vector<MemoryCommit> raw_buffer_commits; vk::DescriptorPool aa_descriptor_pool; vk::DescriptorSetLayout aa_descriptor_set_layout; @@ -159,7 +157,6 @@ private: vk::DescriptorSets aa_descriptor_sets; vk::Image aa_image; vk::ImageView aa_image_view; - MemoryCommit aa_commit; u32 raw_width = 0; u32 raw_height = 0; diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index f47301ad5..660f7c9ff 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -50,7 +50,7 @@ size_t BytesPerIndex(VkIndexType index_type) { } } -vk::Buffer CreateBuffer(const Device& device, u64 size) { +vk::Buffer CreateBuffer(const Device& device, const MemoryAllocator& memory_allocator, u64 size) { VkBufferUsageFlags flags = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | @@ -60,7 +60,7 @@ vk::Buffer CreateBuffer(const Device& device, u64 size) { if (device.IsExtTransformFeedbackSupported()) { flags |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT; } - return device.GetLogical().CreateBuffer({ + const VkBufferCreateInfo buffer_ci = { .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, .pNext = nullptr, .flags = 0, @@ -69,7 +69,8 @@ vk::Buffer CreateBuffer(const Device& device, u64 size) { .sharingMode = VK_SHARING_MODE_EXCLUSIVE, .queueFamilyIndexCount = 0, .pQueueFamilyIndices = nullptr, - }); + }; + return memory_allocator.CreateBuffer(buffer_ci, MemoryUsage::DeviceLocal); } } // Anonymous namespace @@ -79,8 +80,8 @@ Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params) Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_, VAddr cpu_addr_, u64 size_bytes_) : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_), - device{&runtime.device}, buffer{CreateBuffer(*device, SizeBytes())}, - commit{runtime.memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal)} { + device{&runtime.device}, buffer{ + CreateBuffer(*device, runtime.memory_allocator, SizeBytes())} { if (runtime.device.HasDebuggingToolAttached()) { buffer.SetObjectNameEXT(fmt::format("Buffer 0x{:x}", CpuAddr()).c_str()); } @@ -138,7 +139,7 @@ public: const u32 num_first_offset_copies = 4; const size_t bytes_per_index = BytesPerIndex(index_type); const size_t size_bytes = num_triangle_indices * bytes_per_index * num_first_offset_copies; - buffer = device.GetLogical().CreateBuffer(VkBufferCreateInfo{ + const VkBufferCreateInfo buffer_ci = { .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, .pNext = nullptr, .flags = 0, @@ -147,14 +148,21 @@ public: .sharingMode = VK_SHARING_MODE_EXCLUSIVE, .queueFamilyIndexCount = 0, .pQueueFamilyIndices = nullptr, - }); + }; + buffer = memory_allocator.CreateBuffer(buffer_ci, MemoryUsage::DeviceLocal); if (device.HasDebuggingToolAttached()) { buffer.SetObjectNameEXT("Quad LUT"); } - memory_commit = memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal); - const StagingBufferRef staging = staging_pool.Request(size_bytes, MemoryUsage::Upload); - u8* staging_data = staging.mapped_span.data(); + const bool host_visible = buffer.IsHostVisible(); + const StagingBufferRef staging = [&] { + if (host_visible) { + return StagingBufferRef{}; + } + return staging_pool.Request(size_bytes, MemoryUsage::Upload); + }(); + + u8* staging_data = host_visible ? buffer.Mapped().data() : staging.mapped_span.data(); const size_t quad_size = bytes_per_index * 6; for (u32 first = 0; first < num_first_offset_copies; ++first) { @@ -164,29 +172,33 @@ public: } } - scheduler.RequestOutsideRenderPassOperationContext(); - scheduler.Record([src_buffer = staging.buffer, src_offset = staging.offset, - dst_buffer = *buffer, size_bytes](vk::CommandBuffer cmdbuf) { - const VkBufferCopy copy{ - .srcOffset = src_offset, - .dstOffset = 0, - .size = size_bytes, - }; - const VkBufferMemoryBarrier write_barrier{ - .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, - .pNext = nullptr, - .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, - .dstAccessMask = VK_ACCESS_INDEX_READ_BIT, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .buffer = dst_buffer, - .offset = 0, - .size = size_bytes, - }; - cmdbuf.CopyBuffer(src_buffer, dst_buffer, copy); - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, - VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, write_barrier); - }); + if (!host_visible) { + scheduler.RequestOutsideRenderPassOperationContext(); + scheduler.Record([src_buffer = staging.buffer, src_offset = staging.offset, + dst_buffer = *buffer, size_bytes](vk::CommandBuffer cmdbuf) { + const VkBufferCopy copy{ + .srcOffset = src_offset, + .dstOffset = 0, + .size = size_bytes, + }; + const VkBufferMemoryBarrier write_barrier{ + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_INDEX_READ_BIT, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = dst_buffer, + .offset = 0, + .size = size_bytes, + }; + cmdbuf.CopyBuffer(src_buffer, dst_buffer, copy); + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, write_barrier); + }); + } else { + buffer.Flush(); + } } void BindBuffer(u32 first) { @@ -587,11 +599,10 @@ void BufferCacheRuntime::ReserveNullBuffer() { create_info.usage |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT; } create_info.usage |= VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT; - null_buffer = device.GetLogical().CreateBuffer(create_info); + null_buffer = memory_allocator.CreateBuffer(create_info, MemoryUsage::DeviceLocal); if (device.HasDebuggingToolAttached()) { null_buffer.SetObjectNameEXT("Null buffer"); } - null_buffer_commit = memory_allocator.Commit(null_buffer, MemoryUsage::DeviceLocal); scheduler.RequestOutsideRenderPassOperationContext(); scheduler.Record([buffer = *null_buffer](vk::CommandBuffer cmdbuf) { diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index cdeef8846..95446c732 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h @@ -48,7 +48,6 @@ private: const Device* device{}; vk::Buffer buffer; - MemoryCommit commit; std::vector<BufferView> views; }; @@ -142,7 +141,6 @@ private: std::shared_ptr<QuadStripIndexBuffer> quad_strip_index_buffer; vk::Buffer null_buffer; - MemoryCommit null_buffer_commit; std::unique_ptr<Uint8Pass> uint8_pass; QuadIndexedPass quad_index_pass; diff --git a/src/video_core/renderer_vulkan/vk_fsr.cpp b/src/video_core/renderer_vulkan/vk_fsr.cpp index df972cd54..9bcdca2fb 100644 --- a/src/video_core/renderer_vulkan/vk_fsr.cpp +++ b/src/video_core/renderer_vulkan/vk_fsr.cpp @@ -205,10 +205,9 @@ void FSR::CreateDescriptorSets() { void FSR::CreateImages() { images.resize(image_count * 2); image_views.resize(image_count * 2); - buffer_commits.resize(image_count * 2); for (size_t i = 0; i < image_count * 2; ++i) { - images[i] = device.GetLogical().CreateImage(VkImageCreateInfo{ + images[i] = memory_allocator.CreateImage(VkImageCreateInfo{ .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, .pNext = nullptr, .flags = 0, @@ -231,7 +230,6 @@ void FSR::CreateImages() { .pQueueFamilyIndices = nullptr, .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, }); - buffer_commits[i] = memory_allocator.Commit(images[i], MemoryUsage::DeviceLocal); image_views[i] = device.GetLogical().CreateImageView(VkImageViewCreateInfo{ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, .pNext = nullptr, diff --git a/src/video_core/renderer_vulkan/vk_fsr.h b/src/video_core/renderer_vulkan/vk_fsr.h index 5d872861f..8bb9fc23a 100644 --- a/src/video_core/renderer_vulkan/vk_fsr.h +++ b/src/video_core/renderer_vulkan/vk_fsr.h @@ -47,7 +47,6 @@ private: vk::Sampler sampler; std::vector<vk::Image> images; std::vector<vk::ImageView> image_views; - std::vector<MemoryCommit> buffer_commits; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_present_manager.cpp b/src/video_core/renderer_vulkan/vk_present_manager.cpp index 10ace0420..d681bd22a 100644 --- a/src/video_core/renderer_vulkan/vk_present_manager.cpp +++ b/src/video_core/renderer_vulkan/vk_present_manager.cpp @@ -181,7 +181,7 @@ void PresentManager::RecreateFrame(Frame* frame, u32 width, u32 height, bool is_ frame->height = height; frame->is_srgb = is_srgb; - frame->image = dld.CreateImage({ + frame->image = memory_allocator.CreateImage({ .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, .pNext = nullptr, .flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT, @@ -204,8 +204,6 @@ void PresentManager::RecreateFrame(Frame* frame, u32 width, u32 height, bool is_ .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, }); - frame->image_commit = memory_allocator.Commit(frame->image, MemoryUsage::DeviceLocal); - frame->image_view = dld.CreateImageView({ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, .pNext = nullptr, diff --git a/src/video_core/renderer_vulkan/vk_present_manager.h b/src/video_core/renderer_vulkan/vk_present_manager.h index 4ac2e2395..83e859416 100644 --- a/src/video_core/renderer_vulkan/vk_present_manager.h +++ b/src/video_core/renderer_vulkan/vk_present_manager.h @@ -29,7 +29,6 @@ struct Frame { vk::Image image; vk::ImageView image_view; vk::Framebuffer framebuffer; - MemoryCommit image_commit; vk::CommandBuffer cmdbuf; vk::Semaphore render_ready; vk::Fence present_done; diff --git a/src/video_core/renderer_vulkan/vk_smaa.cpp b/src/video_core/renderer_vulkan/vk_smaa.cpp index f8735189d..5efd7d66e 100644 --- a/src/video_core/renderer_vulkan/vk_smaa.cpp +++ b/src/video_core/renderer_vulkan/vk_smaa.cpp @@ -25,9 +25,7 @@ namespace { #define ARRAY_TO_SPAN(a) std::span(a, (sizeof(a) / sizeof(a[0]))) -std::pair<vk::Image, MemoryCommit> CreateWrappedImage(const Device& device, - MemoryAllocator& allocator, - VkExtent2D dimensions, VkFormat format) { +vk::Image CreateWrappedImage(MemoryAllocator& allocator, VkExtent2D dimensions, VkFormat format) { const VkImageCreateInfo image_ci{ .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, .pNext = nullptr, @@ -46,11 +44,7 @@ std::pair<vk::Image, MemoryCommit> CreateWrappedImage(const Device& device, .pQueueFamilyIndices = nullptr, .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, }; - - auto image = device.GetLogical().CreateImage(image_ci); - auto commit = allocator.Commit(image, Vulkan::MemoryUsage::DeviceLocal); - - return std::make_pair(std::move(image), std::move(commit)); + return allocator.CreateImage(image_ci); } void TransitionImageLayout(vk::CommandBuffer& cmdbuf, VkImage image, VkImageLayout target_layout, @@ -82,7 +76,7 @@ void TransitionImageLayout(vk::CommandBuffer& cmdbuf, VkImage image, VkImageLayo void UploadImage(const Device& device, MemoryAllocator& allocator, Scheduler& scheduler, vk::Image& image, VkExtent2D dimensions, VkFormat format, std::span<const u8> initial_contents = {}) { - auto upload_buffer = device.GetLogical().CreateBuffer(VkBufferCreateInfo{ + const VkBufferCreateInfo upload_ci = { .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, .pNext = nullptr, .flags = 0, @@ -91,9 +85,10 @@ void UploadImage(const Device& device, MemoryAllocator& allocator, Scheduler& sc .sharingMode = VK_SHARING_MODE_EXCLUSIVE, .queueFamilyIndexCount = 0, .pQueueFamilyIndices = nullptr, - }); - auto upload_commit = allocator.Commit(upload_buffer, MemoryUsage::Upload); - std::ranges::copy(initial_contents, upload_commit.Map().begin()); + }; + auto upload_buffer = allocator.CreateBuffer(upload_ci, MemoryUsage::Upload); + std::ranges::copy(initial_contents, upload_buffer.Mapped().begin()); + upload_buffer.Flush(); const std::array<VkBufferImageCopy, 1> regions{{{ .bufferOffset = 0, @@ -117,9 +112,6 @@ void UploadImage(const Device& device, MemoryAllocator& allocator, Scheduler& sc VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); }); scheduler.Finish(); - - // This should go out of scope before the commit - auto upload_buffer2 = std::move(upload_buffer); } vk::ImageView CreateWrappedImageView(const Device& device, vk::Image& image, VkFormat format) { @@ -531,10 +523,8 @@ void SMAA::CreateImages() { static constexpr VkExtent2D area_extent{AREATEX_WIDTH, AREATEX_HEIGHT}; static constexpr VkExtent2D search_extent{SEARCHTEX_WIDTH, SEARCHTEX_HEIGHT}; - std::tie(m_static_images[Area], m_static_buffer_commits[Area]) = - CreateWrappedImage(m_device, m_allocator, area_extent, VK_FORMAT_R8G8_UNORM); - std::tie(m_static_images[Search], m_static_buffer_commits[Search]) = - CreateWrappedImage(m_device, m_allocator, search_extent, VK_FORMAT_R8_UNORM); + m_static_images[Area] = CreateWrappedImage(m_allocator, area_extent, VK_FORMAT_R8G8_UNORM); + m_static_images[Search] = CreateWrappedImage(m_allocator, search_extent, VK_FORMAT_R8_UNORM); m_static_image_views[Area] = CreateWrappedImageView(m_device, m_static_images[Area], VK_FORMAT_R8G8_UNORM); @@ -544,12 +534,11 @@ void SMAA::CreateImages() { for (u32 i = 0; i < m_image_count; i++) { Images& images = m_dynamic_images.emplace_back(); - std::tie(images.images[Blend], images.buffer_commits[Blend]) = - CreateWrappedImage(m_device, m_allocator, m_extent, VK_FORMAT_R16G16B16A16_SFLOAT); - std::tie(images.images[Edges], images.buffer_commits[Edges]) = - CreateWrappedImage(m_device, m_allocator, m_extent, VK_FORMAT_R16G16_SFLOAT); - std::tie(images.images[Output], images.buffer_commits[Output]) = - CreateWrappedImage(m_device, m_allocator, m_extent, VK_FORMAT_R16G16B16A16_SFLOAT); + images.images[Blend] = + CreateWrappedImage(m_allocator, m_extent, VK_FORMAT_R16G16B16A16_SFLOAT); + images.images[Edges] = CreateWrappedImage(m_allocator, m_extent, VK_FORMAT_R16G16_SFLOAT); + images.images[Output] = + CreateWrappedImage(m_allocator, m_extent, VK_FORMAT_R16G16B16A16_SFLOAT); images.image_views[Blend] = CreateWrappedImageView(m_device, images.images[Blend], VK_FORMAT_R16G16B16A16_SFLOAT); diff --git a/src/video_core/renderer_vulkan/vk_smaa.h b/src/video_core/renderer_vulkan/vk_smaa.h index 99a369148..0e214258a 100644 --- a/src/video_core/renderer_vulkan/vk_smaa.h +++ b/src/video_core/renderer_vulkan/vk_smaa.h @@ -66,13 +66,11 @@ private: std::array<vk::Pipeline, MaxSMAAStage> m_pipelines{}; std::array<vk::RenderPass, MaxSMAAStage> m_renderpasses{}; - std::array<MemoryCommit, MaxStaticImage> m_static_buffer_commits; std::array<vk::Image, MaxStaticImage> m_static_images{}; std::array<vk::ImageView, MaxStaticImage> m_static_image_views{}; struct Images { vk::DescriptorSets descriptor_sets{}; - std::array<MemoryCommit, MaxDynamicImage> buffer_commits; std::array<vk::Image, MaxDynamicImage> images{}; std::array<vk::ImageView, MaxDynamicImage> image_views{}; std::array<vk::Framebuffer, MaxSMAAStage> framebuffers{}; diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp index 74ca77216..62b251a9b 100644 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp @@ -30,55 +30,6 @@ constexpr VkDeviceSize MAX_STREAM_BUFFER_REQUEST_SIZE = 8_MiB; constexpr VkDeviceSize STREAM_BUFFER_SIZE = 128_MiB; constexpr VkDeviceSize REGION_SIZE = STREAM_BUFFER_SIZE / StagingBufferPool::NUM_SYNCS; -constexpr VkMemoryPropertyFlags HOST_FLAGS = - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; -constexpr VkMemoryPropertyFlags STREAM_FLAGS = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | HOST_FLAGS; - -bool IsStreamHeap(VkMemoryHeap heap) noexcept { - return STREAM_BUFFER_SIZE < (heap.size * 2) / 3; -} - -std::optional<u32> FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& props, u32 type_mask, - VkMemoryPropertyFlags flags) noexcept { - for (u32 type_index = 0; type_index < props.memoryTypeCount; ++type_index) { - if (((type_mask >> type_index) & 1) == 0) { - // Memory type is incompatible - continue; - } - const VkMemoryType& memory_type = props.memoryTypes[type_index]; - if ((memory_type.propertyFlags & flags) != flags) { - // Memory type doesn't have the flags we want - continue; - } - if (!IsStreamHeap(props.memoryHeaps[memory_type.heapIndex])) { - // Memory heap is not suitable for streaming - continue; - } - // Success! - return type_index; - } - return std::nullopt; -} - -u32 FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& props, u32 type_mask, - bool try_device_local) { - std::optional<u32> type; - if (try_device_local) { - // Try to find a DEVICE_LOCAL_BIT type, Nvidia and AMD have a dedicated heap for this - type = FindMemoryTypeIndex(props, type_mask, STREAM_FLAGS); - if (type) { - return *type; - } - } - // Otherwise try without the DEVICE_LOCAL_BIT - type = FindMemoryTypeIndex(props, type_mask, HOST_FLAGS); - if (type) { - return *type; - } - // This should never happen, and in case it does, signal it as an out of memory situation - throw vk::Exception(VK_ERROR_OUT_OF_DEVICE_MEMORY); -} - size_t Region(size_t iterator) noexcept { return iterator / REGION_SIZE; } @@ -87,8 +38,7 @@ size_t Region(size_t iterator) noexcept { StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& memory_allocator_, Scheduler& scheduler_) : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_} { - const vk::Device& dev = device.GetLogical(); - stream_buffer = dev.CreateBuffer(VkBufferCreateInfo{ + const VkBufferCreateInfo stream_ci = { .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, .pNext = nullptr, .flags = 0, @@ -99,46 +49,13 @@ StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& mem .sharingMode = VK_SHARING_MODE_EXCLUSIVE, .queueFamilyIndexCount = 0, .pQueueFamilyIndices = nullptr, - }); - if (device.HasDebuggingToolAttached()) { - stream_buffer.SetObjectNameEXT("Stream Buffer"); - } - VkMemoryDedicatedRequirements dedicated_reqs{ - .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS, - .pNext = nullptr, - .prefersDedicatedAllocation = VK_FALSE, - .requiresDedicatedAllocation = VK_FALSE, - }; - const auto requirements = dev.GetBufferMemoryRequirements(*stream_buffer, &dedicated_reqs); - const bool make_dedicated = dedicated_reqs.prefersDedicatedAllocation == VK_TRUE || - dedicated_reqs.requiresDedicatedAllocation == VK_TRUE; - const VkMemoryDedicatedAllocateInfo dedicated_info{ - .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO, - .pNext = nullptr, - .image = nullptr, - .buffer = *stream_buffer, }; - const auto memory_properties = device.GetPhysical().GetMemoryProperties().memoryProperties; - VkMemoryAllocateInfo stream_memory_info{ - .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, - .pNext = make_dedicated ? &dedicated_info : nullptr, - .allocationSize = requirements.size, - .memoryTypeIndex = - FindMemoryTypeIndex(memory_properties, requirements.memoryTypeBits, true), - }; - stream_memory = dev.TryAllocateMemory(stream_memory_info); - if (!stream_memory) { - LOG_INFO(Render_Vulkan, "Dynamic memory allocation failed, trying with system memory"); - stream_memory_info.memoryTypeIndex = - FindMemoryTypeIndex(memory_properties, requirements.memoryTypeBits, false); - stream_memory = dev.AllocateMemory(stream_memory_info); - } - + stream_buffer = memory_allocator.CreateBuffer(stream_ci, MemoryUsage::Stream); if (device.HasDebuggingToolAttached()) { - stream_memory.SetObjectNameEXT("Stream Buffer Memory"); + stream_buffer.SetObjectNameEXT("Stream Buffer"); } - stream_buffer.BindMemory(*stream_memory, 0); - stream_pointer = stream_memory.Map(0, STREAM_BUFFER_SIZE); + stream_pointer = stream_buffer.Mapped(); + ASSERT_MSG(!stream_pointer.empty(), "Stream buffer must be host visible!"); } StagingBufferPool::~StagingBufferPool() = default; @@ -199,7 +116,7 @@ StagingBufferRef StagingBufferPool::GetStreamBuffer(size_t size) { return StagingBufferRef{ .buffer = *stream_buffer, .offset = static_cast<VkDeviceSize>(offset), - .mapped_span = std::span<u8>(stream_pointer + offset, size), + .mapped_span = stream_pointer.subspan(offset, size), .usage{}, .log2_level{}, .index{}, @@ -247,7 +164,7 @@ std::optional<StagingBufferRef> StagingBufferPool::TryGetReservedBuffer(size_t s StagingBufferRef StagingBufferPool::CreateStagingBuffer(size_t size, MemoryUsage usage, bool deferred) { const u32 log2 = Common::Log2Ceil64(size); - vk::Buffer buffer = device.GetLogical().CreateBuffer({ + const VkBufferCreateInfo buffer_ci = { .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, .pNext = nullptr, .flags = 0, @@ -259,17 +176,15 @@ StagingBufferRef StagingBufferPool::CreateStagingBuffer(size_t size, MemoryUsage .sharingMode = VK_SHARING_MODE_EXCLUSIVE, .queueFamilyIndexCount = 0, .pQueueFamilyIndices = nullptr, - }); + }; + vk::Buffer buffer = memory_allocator.CreateBuffer(buffer_ci, usage); if (device.HasDebuggingToolAttached()) { ++buffer_index; buffer.SetObjectNameEXT(fmt::format("Staging Buffer {}", buffer_index).c_str()); } - MemoryCommit commit = memory_allocator.Commit(buffer, usage); - const std::span<u8> mapped_span = IsHostVisible(usage) ? commit.Map() : std::span<u8>{}; - + const std::span<u8> mapped_span = buffer.Mapped(); StagingBuffer& entry = GetCache(usage)[log2].entries.emplace_back(StagingBuffer{ .buffer = std::move(buffer), - .commit = std::move(commit), .mapped_span = mapped_span, .usage = usage, .log2_level = log2, diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h index 4fd15f11a..5f69f08b1 100644 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h @@ -46,7 +46,6 @@ private: struct StagingBuffer { vk::Buffer buffer; - MemoryCommit commit; std::span<u8> mapped_span; MemoryUsage usage; u32 log2_level; @@ -97,8 +96,7 @@ private: Scheduler& scheduler; vk::Buffer stream_buffer; - vk::DeviceMemory stream_memory; - u8* stream_pointer = nullptr; + std::span<u8> stream_pointer; size_t iterator = 0; size_t used_iterator = 0; diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index f3cef09dd..ce6acc30c 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -15,7 +15,6 @@ #include "video_core/renderer_vulkan/blit_image.h" #include "video_core/renderer_vulkan/maxwell_to_vk.h" #include "video_core/renderer_vulkan/vk_compute_pass.h" -#include "video_core/renderer_vulkan/vk_rasterizer.h" #include "video_core/renderer_vulkan/vk_render_pass_cache.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" @@ -163,11 +162,12 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) { }; } -[[nodiscard]] vk::Image MakeImage(const Device& device, const ImageInfo& info) { +[[nodiscard]] vk::Image MakeImage(const Device& device, const MemoryAllocator& allocator, + const ImageInfo& info) { if (info.type == ImageType::Buffer) { return vk::Image{}; } - return device.GetLogical().CreateImage(MakeImageCreateInfo(device, info)); + return allocator.CreateImage(MakeImageCreateInfo(device, info)); } [[nodiscard]] VkImageAspectFlags ImageAspectMask(PixelFormat format) { @@ -839,14 +839,14 @@ bool TextureCacheRuntime::ShouldReinterpret(Image& dst, Image& src) { VkBuffer TextureCacheRuntime::GetTemporaryBuffer(size_t needed_size) { const auto level = (8 * sizeof(size_t)) - std::countl_zero(needed_size - 1ULL); - if (buffer_commits[level]) { + if (buffers[level]) { return *buffers[level]; } const auto new_size = Common::NextPow2(needed_size); static constexpr VkBufferUsageFlags flags = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT; - buffers[level] = device.GetLogical().CreateBuffer({ + const VkBufferCreateInfo temp_ci = { .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, .pNext = nullptr, .flags = 0, @@ -855,9 +855,8 @@ VkBuffer TextureCacheRuntime::GetTemporaryBuffer(size_t needed_size) { .sharingMode = VK_SHARING_MODE_EXCLUSIVE, .queueFamilyIndexCount = 0, .pQueueFamilyIndices = nullptr, - }); - buffer_commits[level] = std::make_unique<MemoryCommit>( - memory_allocator.Commit(buffers[level], MemoryUsage::DeviceLocal)); + }; + buffers[level] = memory_allocator.CreateBuffer(temp_ci, MemoryUsage::DeviceLocal); return *buffers[level]; } @@ -1266,8 +1265,8 @@ void TextureCacheRuntime::TickFrame() {} Image::Image(TextureCacheRuntime& runtime_, const ImageInfo& info_, GPUVAddr gpu_addr_, VAddr cpu_addr_) : VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), scheduler{&runtime_.scheduler}, - runtime{&runtime_}, original_image(MakeImage(runtime_.device, info)), - commit(runtime_.memory_allocator.Commit(original_image, MemoryUsage::DeviceLocal)), + runtime{&runtime_}, + original_image(MakeImage(runtime_.device, runtime_.memory_allocator, info)), aspect_mask(ImageAspectMask(info.format)) { if (IsPixelFormatASTC(info.format) && !runtime->device.IsOptimalAstcSupported()) { if (Settings::values.async_astc.GetValue()) { @@ -1468,9 +1467,7 @@ bool Image::ScaleUp(bool ignore) { auto scaled_info = info; scaled_info.size.width = scaled_width; scaled_info.size.height = scaled_height; - scaled_image = MakeImage(runtime->device, scaled_info); - auto& allocator = runtime->memory_allocator; - scaled_commit = MemoryCommit(allocator.Commit(scaled_image, MemoryUsage::DeviceLocal)); + scaled_image = MakeImage(runtime->device, runtime->memory_allocator, scaled_info); ignore = false; } current_image = *scaled_image; diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index f14525dcb..220943116 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -116,7 +116,6 @@ public: static constexpr size_t indexing_slots = 8 * sizeof(size_t); std::array<vk::Buffer, indexing_slots> buffers{}; - std::array<std::unique_ptr<MemoryCommit>, indexing_slots> buffer_commits{}; }; class Image : public VideoCommon::ImageBase { @@ -180,12 +179,10 @@ private: TextureCacheRuntime* runtime{}; vk::Image original_image; - MemoryCommit commit; std::vector<vk::ImageView> storage_image_views; VkImageAspectFlags aspect_mask = 0; bool initialized = false; vk::Image scaled_image{}; - MemoryCommit scaled_commit{}; VkImage current_image{}; std::unique_ptr<Framebuffer> scale_framebuffer; diff --git a/src/video_core/renderer_vulkan/vk_turbo_mode.cpp b/src/video_core/renderer_vulkan/vk_turbo_mode.cpp index a802d3c49..460d8d59d 100644 --- a/src/video_core/renderer_vulkan/vk_turbo_mode.cpp +++ b/src/video_core/renderer_vulkan/vk_turbo_mode.cpp @@ -18,7 +18,7 @@ using namespace Common::Literals; TurboMode::TurboMode(const vk::Instance& instance, const vk::InstanceDispatch& dld) #ifndef ANDROID - : m_device{CreateDevice(instance, dld, VK_NULL_HANDLE)}, m_allocator{m_device, false} + : m_device{CreateDevice(instance, dld, VK_NULL_HANDLE)}, m_allocator{m_device} #endif { { @@ -41,7 +41,7 @@ void TurboMode::Run(std::stop_token stop_token) { auto& dld = m_device.GetLogical(); // Allocate buffer. 2MiB should be sufficient. - auto buffer = dld.CreateBuffer(VkBufferCreateInfo{ + const VkBufferCreateInfo buffer_ci = { .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, .pNext = nullptr, .flags = 0, @@ -50,10 +50,8 @@ void TurboMode::Run(std::stop_token stop_token) { .sharingMode = VK_SHARING_MODE_EXCLUSIVE, .queueFamilyIndexCount = 0, .pQueueFamilyIndices = nullptr, - }); - - // Commit some device local memory for the buffer. - auto commit = m_allocator.Commit(buffer, MemoryUsage::DeviceLocal); + }; + vk::Buffer buffer = m_allocator.CreateBuffer(buffer_ci, MemoryUsage::DeviceLocal); // Create the descriptor pool to contain our descriptor. static constexpr VkDescriptorPoolSize pool_size{ diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index b11abe311..e4ca65b58 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -22,6 +22,8 @@ #include <adrenotools/bcenabler.h> #endif +#include <vk_mem_alloc.h> + namespace Vulkan { using namespace Common::Literals; namespace { @@ -596,9 +598,31 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR graphics_queue = logical.GetQueue(graphics_family); present_queue = logical.GetQueue(present_family); + + VmaVulkanFunctions functions{}; + functions.vkGetInstanceProcAddr = dld.vkGetInstanceProcAddr; + functions.vkGetDeviceProcAddr = dld.vkGetDeviceProcAddr; + + const VmaAllocatorCreateInfo allocator_info = { + .flags = VMA_ALLOCATOR_CREATE_EXTERNALLY_SYNCHRONIZED_BIT, + .physicalDevice = physical, + .device = *logical, + .preferredLargeHeapBlockSize = 0, + .pAllocationCallbacks = nullptr, + .pDeviceMemoryCallbacks = nullptr, + .pHeapSizeLimit = nullptr, + .pVulkanFunctions = &functions, + .instance = instance, + .vulkanApiVersion = VK_API_VERSION_1_1, + .pTypeExternalMemoryHandleTypes = nullptr, + }; + + vk::Check(vmaCreateAllocator(&allocator_info, &allocator)); } -Device::~Device() = default; +Device::~Device() { + vmaDestroyAllocator(allocator); +} VkFormat Device::GetSupportedFormat(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage, FormatType format_type) const { diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 0b634a876..b84af3dfb 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -14,6 +14,8 @@ #include "common/settings.h" #include "video_core/vulkan_common/vulkan_wrapper.h" +VK_DEFINE_HANDLE(VmaAllocator) + // Define all features which may be used by the implementation here. // Vulkan version in the macro describes the minimum version required for feature availability. // If the Vulkan version is lower than the required version, the named extension is required. @@ -199,6 +201,11 @@ public: return dld; } + /// Returns the VMA allocator. + VmaAllocator GetAllocator() const { + return allocator; + } + /// Returns the logical device. const vk::Device& GetLogical() const { return logical; @@ -630,6 +637,7 @@ private: private: VkInstance instance; ///< Vulkan instance. + VmaAllocator allocator; ///< VMA allocator. vk::DeviceDispatch dld; ///< Device function pointers. vk::PhysicalDevice physical; ///< Physical device. vk::Device logical; ///< Logical device. diff --git a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp index e28a556f8..a2ef0efa4 100644 --- a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp +++ b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp @@ -6,8 +6,6 @@ #include <optional> #include <vector> -#include <glad/glad.h> - #include "common/alignment.h" #include "common/assert.h" #include "common/common_types.h" @@ -17,6 +15,8 @@ #include "video_core/vulkan_common/vulkan_memory_allocator.h" #include "video_core/vulkan_common/vulkan_wrapper.h" +#include <vk_mem_alloc.h> + namespace Vulkan { namespace { struct Range { @@ -49,22 +49,45 @@ struct Range { case MemoryUsage::Download: return VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT; + case MemoryUsage::Stream: + return VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; } ASSERT_MSG(false, "Invalid memory usage={}", usage); return VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; } -constexpr VkExportMemoryAllocateInfo EXPORT_ALLOCATE_INFO{ - .sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO, - .pNext = nullptr, -#ifdef _WIN32 - .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT, -#elif __unix__ - .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT, -#else - .handleTypes = 0, -#endif -}; +[[nodiscard]] VkMemoryPropertyFlags MemoryUsagePreferedVmaFlags(MemoryUsage usage) { + return usage != MemoryUsage::DeviceLocal ? VK_MEMORY_PROPERTY_HOST_COHERENT_BIT + : VkMemoryPropertyFlagBits{}; +} + +[[nodiscard]] VmaAllocationCreateFlags MemoryUsageVmaFlags(MemoryUsage usage) { + switch (usage) { + case MemoryUsage::Upload: + case MemoryUsage::Stream: + return VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT; + case MemoryUsage::Download: + return VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT; + case MemoryUsage::DeviceLocal: + return VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | + VMA_ALLOCATION_CREATE_HOST_ACCESS_ALLOW_TRANSFER_INSTEAD_BIT; + } + return {}; +} + +[[nodiscard]] VmaMemoryUsage MemoryUsageVma(MemoryUsage usage) { + switch (usage) { + case MemoryUsage::DeviceLocal: + case MemoryUsage::Stream: + return VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE; + case MemoryUsage::Upload: + case MemoryUsage::Download: + return VMA_MEMORY_USAGE_AUTO_PREFER_HOST; + } + return VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE; +} + } // Anonymous namespace class MemoryAllocation { @@ -74,14 +97,6 @@ public: : allocator{allocator_}, memory{std::move(memory_)}, allocation_size{allocation_size_}, property_flags{properties}, shifted_memory_type{1U << type} {} -#if defined(_WIN32) || defined(__unix__) - ~MemoryAllocation() { - if (owning_opengl_handle != 0) { - glDeleteMemoryObjectsEXT(1, &owning_opengl_handle); - } - } -#endif - MemoryAllocation& operator=(const MemoryAllocation&) = delete; MemoryAllocation(const MemoryAllocation&) = delete; @@ -120,31 +135,6 @@ public: return memory_mapped_span; } -#ifdef _WIN32 - [[nodiscard]] u32 ExportOpenGLHandle() { - if (!owning_opengl_handle) { - glCreateMemoryObjectsEXT(1, &owning_opengl_handle); - glImportMemoryWin32HandleEXT(owning_opengl_handle, allocation_size, - GL_HANDLE_TYPE_OPAQUE_WIN32_EXT, - memory.GetMemoryWin32HandleKHR()); - } - return owning_opengl_handle; - } -#elif __unix__ - [[nodiscard]] u32 ExportOpenGLHandle() { - if (!owning_opengl_handle) { - glCreateMemoryObjectsEXT(1, &owning_opengl_handle); - glImportMemoryFdEXT(owning_opengl_handle, allocation_size, GL_HANDLE_TYPE_OPAQUE_FD_EXT, - memory.GetMemoryFdKHR()); - } - return owning_opengl_handle; - } -#else - [[nodiscard]] u32 ExportOpenGLHandle() { - return 0; - } -#endif - /// Returns whether this allocation is compatible with the arguments. [[nodiscard]] bool IsCompatible(VkMemoryPropertyFlags flags, u32 type_mask) const { return (flags & property_flags) == flags && (type_mask & shifted_memory_type) != 0; @@ -182,9 +172,6 @@ private: const u32 shifted_memory_type; ///< Shifted Vulkan memory type. std::vector<Range> commits; ///< All commit ranges done from this allocation. std::span<u8> memory_mapped_span; ///< Memory mapped span. Empty if not queried before. -#if defined(_WIN32) || defined(__unix__) - u32 owning_opengl_handle{}; ///< Owning OpenGL memory object handle. -#endif }; MemoryCommit::MemoryCommit(MemoryAllocation* allocation_, VkDeviceMemory memory_, u64 begin_, @@ -216,24 +203,70 @@ std::span<u8> MemoryCommit::Map() { return span; } -u32 MemoryCommit::ExportOpenGLHandle() const { - return allocation->ExportOpenGLHandle(); -} - void MemoryCommit::Release() { if (allocation) { allocation->Free(begin); } } -MemoryAllocator::MemoryAllocator(const Device& device_, bool export_allocations_) - : device{device_}, properties{device_.GetPhysical().GetMemoryProperties().memoryProperties}, - export_allocations{export_allocations_}, +MemoryAllocator::MemoryAllocator(const Device& device_) + : device{device_}, allocator{device.GetAllocator()}, + properties{device_.GetPhysical().GetMemoryProperties().memoryProperties}, buffer_image_granularity{ device_.GetPhysical().GetProperties().limits.bufferImageGranularity} {} MemoryAllocator::~MemoryAllocator() = default; +vk::Image MemoryAllocator::CreateImage(const VkImageCreateInfo& ci) const { + const VmaAllocationCreateInfo alloc_ci = { + .flags = VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT, + .usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE, + .requiredFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, + .preferredFlags = 0, + .memoryTypeBits = 0, + .pool = VK_NULL_HANDLE, + .pUserData = nullptr, + .priority = 0.f, + }; + + VkImage handle{}; + VmaAllocation allocation{}; + + vk::Check(vmaCreateImage(allocator, &ci, &alloc_ci, &handle, &allocation, nullptr)); + + return vk::Image(handle, *device.GetLogical(), allocator, allocation, + device.GetDispatchLoader()); +} + +vk::Buffer MemoryAllocator::CreateBuffer(const VkBufferCreateInfo& ci, MemoryUsage usage) const { + const VmaAllocationCreateInfo alloc_ci = { + .flags = VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT | VMA_ALLOCATION_CREATE_MAPPED_BIT | + MemoryUsageVmaFlags(usage), + .usage = MemoryUsageVma(usage), + .requiredFlags = 0, + .preferredFlags = MemoryUsagePreferedVmaFlags(usage), + .memoryTypeBits = 0, + .pool = VK_NULL_HANDLE, + .pUserData = nullptr, + .priority = 0.f, + }; + + VkBuffer handle{}; + VmaAllocationInfo alloc_info{}; + VmaAllocation allocation{}; + VkMemoryPropertyFlags property_flags{}; + + vk::Check(vmaCreateBuffer(allocator, &ci, &alloc_ci, &handle, &allocation, &alloc_info)); + vmaGetAllocationMemoryProperties(allocator, allocation, &property_flags); + + u8* data = reinterpret_cast<u8*>(alloc_info.pMappedData); + const std::span<u8> mapped_data = data ? std::span<u8>{data, ci.size} : std::span<u8>{}; + const bool is_coherent = property_flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; + + return vk::Buffer(handle, *device.GetLogical(), allocator, allocation, mapped_data, is_coherent, + device.GetDispatchLoader()); +} + MemoryCommit MemoryAllocator::Commit(const VkMemoryRequirements& requirements, MemoryUsage usage) { // Find the fastest memory flags we can afford with the current requirements const u32 type_mask = requirements.memoryTypeBits; @@ -253,25 +286,11 @@ MemoryCommit MemoryAllocator::Commit(const VkMemoryRequirements& requirements, M return TryCommit(requirements, flags).value(); } -MemoryCommit MemoryAllocator::Commit(const vk::Buffer& buffer, MemoryUsage usage) { - auto commit = Commit(device.GetLogical().GetBufferMemoryRequirements(*buffer), usage); - buffer.BindMemory(commit.Memory(), commit.Offset()); - return commit; -} - -MemoryCommit MemoryAllocator::Commit(const vk::Image& image, MemoryUsage usage) { - VkMemoryRequirements requirements = device.GetLogical().GetImageMemoryRequirements(*image); - requirements.size = Common::AlignUp(requirements.size, buffer_image_granularity); - auto commit = Commit(requirements, usage); - image.BindMemory(commit.Memory(), commit.Offset()); - return commit; -} - bool MemoryAllocator::TryAllocMemory(VkMemoryPropertyFlags flags, u32 type_mask, u64 size) { const u32 type = FindType(flags, type_mask).value(); vk::DeviceMemory memory = device.GetLogical().TryAllocateMemory({ .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, - .pNext = export_allocations ? &EXPORT_ALLOCATE_INFO : nullptr, + .pNext = nullptr, .allocationSize = size, .memoryTypeIndex = type, }); @@ -342,16 +361,4 @@ std::optional<u32> MemoryAllocator::FindType(VkMemoryPropertyFlags flags, u32 ty return std::nullopt; } -bool IsHostVisible(MemoryUsage usage) noexcept { - switch (usage) { - case MemoryUsage::DeviceLocal: - return false; - case MemoryUsage::Upload: - case MemoryUsage::Download: - return true; - } - ASSERT_MSG(false, "Invalid memory usage={}", usage); - return false; -} - } // namespace Vulkan diff --git a/src/video_core/vulkan_common/vulkan_memory_allocator.h b/src/video_core/vulkan_common/vulkan_memory_allocator.h index a5bff03fe..f449bc8d0 100644 --- a/src/video_core/vulkan_common/vulkan_memory_allocator.h +++ b/src/video_core/vulkan_common/vulkan_memory_allocator.h @@ -9,6 +9,8 @@ #include "common/common_types.h" #include "video_core/vulkan_common/vulkan_wrapper.h" +VK_DEFINE_HANDLE(VmaAllocator) + namespace Vulkan { class Device; @@ -17,9 +19,11 @@ class MemoryAllocation; /// Hints and requirements for the backing memory type of a commit enum class MemoryUsage { - DeviceLocal, ///< Hints device local usages, fastest memory type to read and write from the GPU + DeviceLocal, ///< Requests device local host visible buffer, falling back to device local + ///< memory. Upload, ///< Requires a host visible memory type optimized for CPU to GPU uploads Download, ///< Requires a host visible memory type optimized for GPU to CPU readbacks + Stream, ///< Requests device local host visible buffer, falling back host memory. }; /// Ownership handle of a memory commitment. @@ -41,9 +45,6 @@ public: /// It will map the backing allocation if it hasn't been mapped before. std::span<u8> Map(); - /// Returns an non-owning OpenGL handle, creating one if it doesn't exist. - u32 ExportOpenGLHandle() const; - /// Returns the Vulkan memory handler. VkDeviceMemory Memory() const { return memory; @@ -74,16 +75,19 @@ public: * Construct memory allocator * * @param device_ Device to allocate from - * @param export_allocations_ True when allocations have to be exported * * @throw vk::Exception on failure */ - explicit MemoryAllocator(const Device& device_, bool export_allocations_); + explicit MemoryAllocator(const Device& device_); ~MemoryAllocator(); MemoryAllocator& operator=(const MemoryAllocator&) = delete; MemoryAllocator(const MemoryAllocator&) = delete; + vk::Image CreateImage(const VkImageCreateInfo& ci) const; + + vk::Buffer CreateBuffer(const VkBufferCreateInfo& ci, MemoryUsage usage) const; + /** * Commits a memory with the specified requirements. * @@ -97,9 +101,6 @@ public: /// Commits memory required by the buffer and binds it. MemoryCommit Commit(const vk::Buffer& buffer, MemoryUsage usage); - /// Commits memory required by the image and binds it. - MemoryCommit Commit(const vk::Image& image, MemoryUsage usage); - private: /// Tries to allocate a chunk of memory. bool TryAllocMemory(VkMemoryPropertyFlags flags, u32 type_mask, u64 size); @@ -117,15 +118,12 @@ private: /// Returns index to the fastest memory type compatible with the passed requirements. std::optional<u32> FindType(VkMemoryPropertyFlags flags, u32 type_mask) const; - const Device& device; ///< Device handle. - const VkPhysicalDeviceMemoryProperties properties; ///< Physical device properties. - const bool export_allocations; ///< True when memory allocations have to be exported. + const Device& device; ///< Device handle. + VmaAllocator allocator; ///< Vma allocator. + const VkPhysicalDeviceMemoryProperties properties; ///< Physical device properties. std::vector<std::unique_ptr<MemoryAllocation>> allocations; ///< Current allocations. VkDeviceSize buffer_image_granularity; // The granularity for adjacent offsets between buffers // and optimal images }; -/// Returns true when a memory usage is guaranteed to be host visible. -bool IsHostVisible(MemoryUsage usage) noexcept; - } // namespace Vulkan diff --git a/src/video_core/vulkan_common/vulkan_wrapper.cpp b/src/video_core/vulkan_common/vulkan_wrapper.cpp index 336f53700..28fcb21a0 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.cpp +++ b/src/video_core/vulkan_common/vulkan_wrapper.cpp @@ -12,6 +12,8 @@ #include "video_core/vulkan_common/vulkan_wrapper.h" +#include <vk_mem_alloc.h> + namespace Vulkan::vk { namespace { @@ -547,24 +549,40 @@ DebugUtilsMessenger Instance::CreateDebugUtilsMessenger( return DebugUtilsMessenger(object, handle, *dld); } -void Buffer::BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const { - Check(dld->vkBindBufferMemory(owner, handle, memory, offset)); +void Image::SetObjectNameEXT(const char* name) const { + SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_IMAGE, name); } -void Buffer::SetObjectNameEXT(const char* name) const { - SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_BUFFER, name); +void Image::Release() const noexcept { + if (handle) { + vmaDestroyImage(allocator, handle, allocation); + } } -void BufferView::SetObjectNameEXT(const char* name) const { - SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_BUFFER_VIEW, name); +void Buffer::Flush() const { + if (!is_coherent) { + vmaFlushAllocation(allocator, allocation, 0, VK_WHOLE_SIZE); + } } -void Image::BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const { - Check(dld->vkBindImageMemory(owner, handle, memory, offset)); +void Buffer::Invalidate() const { + if (!is_coherent) { + vmaInvalidateAllocation(allocator, allocation, 0, VK_WHOLE_SIZE); + } } -void Image::SetObjectNameEXT(const char* name) const { - SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_IMAGE, name); +void Buffer::SetObjectNameEXT(const char* name) const { + SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_BUFFER, name); +} + +void Buffer::Release() const noexcept { + if (handle) { + vmaDestroyBuffer(allocator, handle, allocation); + } +} + +void BufferView::SetObjectNameEXT(const char* name) const { + SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_BUFFER_VIEW, name); } void ImageView::SetObjectNameEXT(const char* name) const { @@ -701,24 +719,12 @@ Queue Device::GetQueue(u32 family_index) const noexcept { return Queue(queue, *dld); } -Buffer Device::CreateBuffer(const VkBufferCreateInfo& ci) const { - VkBuffer object; - Check(dld->vkCreateBuffer(handle, &ci, nullptr, &object)); - return Buffer(object, handle, *dld); -} - BufferView Device::CreateBufferView(const VkBufferViewCreateInfo& ci) const { VkBufferView object; Check(dld->vkCreateBufferView(handle, &ci, nullptr, &object)); return BufferView(object, handle, *dld); } -Image Device::CreateImage(const VkImageCreateInfo& ci) const { - VkImage object; - Check(dld->vkCreateImage(handle, &ci, nullptr, &object)); - return Image(object, handle, *dld); -} - ImageView Device::CreateImageView(const VkImageViewCreateInfo& ci) const { VkImageView object; Check(dld->vkCreateImageView(handle, &ci, nullptr, &object)); diff --git a/src/video_core/vulkan_common/vulkan_wrapper.h b/src/video_core/vulkan_common/vulkan_wrapper.h index 4ff328a21..44fce47a5 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.h +++ b/src/video_core/vulkan_common/vulkan_wrapper.h @@ -32,6 +32,9 @@ #pragma warning(disable : 26812) // Disable prefer enum class over enum #endif +VK_DEFINE_HANDLE(VmaAllocator) +VK_DEFINE_HANDLE(VmaAllocation) + namespace Vulkan::vk { /** @@ -616,6 +619,138 @@ public: } }; +class Image { +public: + explicit Image(VkImage handle_, VkDevice owner_, VmaAllocator allocator_, + VmaAllocation allocation_, const DeviceDispatch& dld_) noexcept + : handle{handle_}, owner{owner_}, allocator{allocator_}, + allocation{allocation_}, dld{&dld_} {} + Image() = default; + + Image(const Image&) = delete; + Image& operator=(const Image&) = delete; + + Image(Image&& rhs) noexcept + : handle{std::exchange(rhs.handle, nullptr)}, owner{rhs.owner}, allocator{rhs.allocator}, + allocation{rhs.allocation}, dld{rhs.dld} {} + + Image& operator=(Image&& rhs) noexcept { + Release(); + handle = std::exchange(rhs.handle, nullptr); + owner = rhs.owner; + allocator = rhs.allocator; + allocation = rhs.allocation; + dld = rhs.dld; + return *this; + } + + ~Image() noexcept { + Release(); + } + + VkImage operator*() const noexcept { + return handle; + } + + void reset() noexcept { + Release(); + handle = nullptr; + } + + explicit operator bool() const noexcept { + return handle != nullptr; + } + + void SetObjectNameEXT(const char* name) const; + +private: + void Release() const noexcept; + + VkImage handle = nullptr; + VkDevice owner = nullptr; + VmaAllocator allocator = nullptr; + VmaAllocation allocation = nullptr; + const DeviceDispatch* dld = nullptr; +}; + +class Buffer { +public: + explicit Buffer(VkBuffer handle_, VkDevice owner_, VmaAllocator allocator_, + VmaAllocation allocation_, std::span<u8> mapped_, bool is_coherent_, + const DeviceDispatch& dld_) noexcept + : handle{handle_}, owner{owner_}, allocator{allocator_}, + allocation{allocation_}, mapped{mapped_}, is_coherent{is_coherent_}, dld{&dld_} {} + Buffer() = default; + + Buffer(const Buffer&) = delete; + Buffer& operator=(const Buffer&) = delete; + + Buffer(Buffer&& rhs) noexcept + : handle{std::exchange(rhs.handle, nullptr)}, owner{rhs.owner}, allocator{rhs.allocator}, + allocation{rhs.allocation}, mapped{rhs.mapped}, + is_coherent{rhs.is_coherent}, dld{rhs.dld} {} + + Buffer& operator=(Buffer&& rhs) noexcept { + Release(); + handle = std::exchange(rhs.handle, nullptr); + owner = rhs.owner; + allocator = rhs.allocator; + allocation = rhs.allocation; + mapped = rhs.mapped; + is_coherent = rhs.is_coherent; + dld = rhs.dld; + return *this; + } + + ~Buffer() noexcept { + Release(); + } + + VkBuffer operator*() const noexcept { + return handle; + } + + void reset() noexcept { + Release(); + handle = nullptr; + } + + explicit operator bool() const noexcept { + return handle != nullptr; + } + + /// Returns the host mapped memory, an empty span otherwise. + std::span<u8> Mapped() noexcept { + return mapped; + } + + std::span<const u8> Mapped() const noexcept { + return mapped; + } + + /// Returns true if the buffer is mapped to the host. + bool IsHostVisible() const noexcept { + return !mapped.empty(); + } + + void Flush() const; + + void Invalidate() const; + + void SetObjectNameEXT(const char* name) const; + +private: + void Release() const noexcept; + + VkBuffer handle = nullptr; + VkDevice owner = nullptr; + VmaAllocator allocator = nullptr; + VmaAllocation allocation = nullptr; + std::span<u8> mapped = {}; + bool is_coherent = false; + const DeviceDispatch* dld = nullptr; +}; + class Queue { public: /// Construct an empty queue handle. @@ -639,17 +774,6 @@ private: const DeviceDispatch* dld = nullptr; }; -class Buffer : public Handle<VkBuffer, VkDevice, DeviceDispatch> { - using Handle<VkBuffer, VkDevice, DeviceDispatch>::Handle; - -public: - /// Attaches a memory allocation. - void BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const; - - /// Set object name. - void SetObjectNameEXT(const char* name) const; -}; - class BufferView : public Handle<VkBufferView, VkDevice, DeviceDispatch> { using Handle<VkBufferView, VkDevice, DeviceDispatch>::Handle; @@ -658,17 +782,6 @@ public: void SetObjectNameEXT(const char* name) const; }; -class Image : public Handle<VkImage, VkDevice, DeviceDispatch> { - using Handle<VkImage, VkDevice, DeviceDispatch>::Handle; - -public: - /// Attaches a memory allocation. - void BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const; - - /// Set object name. - void SetObjectNameEXT(const char* name) const; -}; - class ImageView : public Handle<VkImageView, VkDevice, DeviceDispatch> { using Handle<VkImageView, VkDevice, DeviceDispatch>::Handle; @@ -840,12 +953,8 @@ public: Queue GetQueue(u32 family_index) const noexcept; - Buffer CreateBuffer(const VkBufferCreateInfo& ci) const; - BufferView CreateBufferView(const VkBufferViewCreateInfo& ci) const; - Image CreateImage(const VkImageCreateInfo& ci) const; - ImageView CreateImageView(const VkImageViewCreateInfo& ci) const; Semaphore CreateSemaphore() const; |