diff options
author | ReinUsesLisp <reinuseslisp@airmail.cc> | 2021-01-16 20:20:18 +0100 |
---|---|---|
committer | ReinUsesLisp <reinuseslisp@airmail.cc> | 2021-02-13 06:17:24 +0100 |
commit | 35df1d1864ba721ea7b1cebf9a106dd771cde4f5 (patch) | |
tree | 034a8281294246e2a8eea92d1937607ad00ed428 | |
parent | vulkan_device: Enable robustBufferAccess (diff) | |
download | yuzu-35df1d1864ba721ea7b1cebf9a106dd771cde4f5.tar yuzu-35df1d1864ba721ea7b1cebf9a106dd771cde4f5.tar.gz yuzu-35df1d1864ba721ea7b1cebf9a106dd771cde4f5.tar.bz2 yuzu-35df1d1864ba721ea7b1cebf9a106dd771cde4f5.tar.lz yuzu-35df1d1864ba721ea7b1cebf9a106dd771cde4f5.tar.xz yuzu-35df1d1864ba721ea7b1cebf9a106dd771cde4f5.tar.zst yuzu-35df1d1864ba721ea7b1cebf9a106dd771cde4f5.zip |
-rw-r--r-- | src/video_core/buffer_cache/buffer_cache.h | 28 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_texture_cache.cpp | 21 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_texture_cache.h | 11 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/util_shaders.cpp | 18 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/util_shaders.h | 6 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_buffer_cache.cpp | 23 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_compute_pass.cpp | 61 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_compute_pass.h | 9 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp | 142 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_staging_buffer_pool.h | 20 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_texture_cache.cpp | 14 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_texture_cache.h | 9 | ||||
-rw-r--r-- | src/video_core/texture_cache/texture_cache.h | 38 | ||||
-rw-r--r-- | src/video_core/vulkan_common/vulkan_wrapper.cpp | 20 | ||||
-rw-r--r-- | src/video_core/vulkan_common/vulkan_wrapper.h | 5 |
15 files changed, 298 insertions, 127 deletions
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index e4f3c8e35..d6399bf24 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -239,8 +239,7 @@ private: void ImmediateUploadMemory(Buffer& buffer, u64 largest_copy, std::span<const BufferCopy> copies); - void MappedUploadMemory(Buffer& buffer, u64 total_size_bytes, - std::span<const BufferCopy> copies); + void MappedUploadMemory(Buffer& buffer, u64 total_size_bytes, std::span<BufferCopy> copies); void DeleteBuffer(BufferId buffer_id); @@ -362,11 +361,17 @@ void BufferCache<P>::DownloadMemory(VAddr cpu_addr, u64 size) { auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes); const u8* const mapped_memory = download_staging.mapped_span.data(); const std::span<BufferCopy> copies_span(copies.data(), copies.data() + copies.size()); + for (BufferCopy& copy : copies) { + // Modify copies to have the staging offset in mind + copy.dst_offset += download_staging.offset; + } runtime.CopyBuffer(download_staging.buffer, buffer, copies_span); runtime.Finish(); for (const BufferCopy& copy : copies) { const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset; - const u8* copy_mapped_memory = mapped_memory + copy.dst_offset; + // Undo the modified offset + const u64 dst_offset = copy.dst_offset - download_staging.offset; + const u8* copy_mapped_memory = mapped_memory + dst_offset; cpu_memory.WriteBlockUnsafe(copy_cpu_addr, copy_mapped_memory, copy.size); } } else { @@ -554,7 +559,9 @@ void BufferCache<P>::PopAsyncFlushes() { } if constexpr (USE_MEMORY_MAPS) { auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes); - for (const auto [copy, buffer_id] : downloads) { + for (auto& [copy, buffer_id] : downloads) { + // Have in mind the staging buffer offset for the copy + copy.dst_offset += download_staging.offset; const std::array copies{copy}; runtime.CopyBuffer(download_staging.buffer, slot_buffers[buffer_id], copies); } @@ -562,7 +569,9 @@ void BufferCache<P>::PopAsyncFlushes() { for (const auto [copy, buffer_id] : downloads) { const Buffer& buffer = slot_buffers[buffer_id]; const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset; - const u8* read_mapped_memory = download_staging.mapped_span.data() + copy.dst_offset; + // Undo the modified offset + const u64 dst_offset = copy.dst_offset - download_staging.offset; + const u8* read_mapped_memory = download_staging.mapped_span.data() + dst_offset; cpu_memory.WriteBlockUnsafe(cpu_addr, read_mapped_memory, copy.size); } } else { @@ -1117,13 +1126,16 @@ void BufferCache<P>::ImmediateUploadMemory(Buffer& buffer, u64 largest_copy, template <class P> void BufferCache<P>::MappedUploadMemory(Buffer& buffer, u64 total_size_bytes, - std::span<const BufferCopy> copies) { + std::span<BufferCopy> copies) { auto upload_staging = runtime.UploadStagingBuffer(total_size_bytes); const std::span<u8> staging_pointer = upload_staging.mapped_span; - for (const BufferCopy& copy : copies) { - const VAddr cpu_addr = buffer.CpuAddr() + copy.dst_offset; + for (BufferCopy& copy : copies) { u8* const src_pointer = staging_pointer.data() + copy.src_offset; + const VAddr cpu_addr = buffer.CpuAddr() + copy.dst_offset; cpu_memory.ReadBlockUnsafe(cpu_addr, src_pointer, copy.size); + + // Apply the staging offset + copy.src_offset += upload_staging.offset; } runtime.CopyBuffer(buffer, upload_staging.buffer, copies); } diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 37572ab28..31eb54123 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -550,15 +550,14 @@ void TextureCacheRuntime::BlitFramebuffer(Framebuffer* dst, Framebuffer* src, } void TextureCacheRuntime::AccelerateImageUpload(Image& image, const ImageBufferMap& map, - size_t buffer_offset, std::span<const SwizzleParameters> swizzles) { switch (image.info.type) { case ImageType::e2D: - return util_shaders.BlockLinearUpload2D(image, map, buffer_offset, swizzles); + return util_shaders.BlockLinearUpload2D(image, map, swizzles); case ImageType::e3D: - return util_shaders.BlockLinearUpload3D(image, map, buffer_offset, swizzles); + return util_shaders.BlockLinearUpload3D(image, map, swizzles); case ImageType::Linear: - return util_shaders.PitchUpload(image, map, buffer_offset, swizzles); + return util_shaders.PitchUpload(image, map, swizzles); default: UNREACHABLE(); break; @@ -710,10 +709,10 @@ Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_, } } -void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset, +void Image::UploadMemory(const ImageBufferMap& map, std::span<const VideoCommon::BufferImageCopy> copies) { glBindBuffer(GL_PIXEL_UNPACK_BUFFER, map.buffer); - glFlushMappedBufferRange(GL_PIXEL_UNPACK_BUFFER, buffer_offset, unswizzled_size_bytes); + glFlushMappedBufferRange(GL_PIXEL_UNPACK_BUFFER, map.offset, unswizzled_size_bytes); glPixelStorei(GL_UNPACK_ALIGNMENT, 1); @@ -729,19 +728,19 @@ void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset, current_image_height = copy.buffer_image_height; glPixelStorei(GL_UNPACK_IMAGE_HEIGHT, current_image_height); } - CopyBufferToImage(copy, buffer_offset); + CopyBufferToImage(copy, map.offset); } } -void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset, +void Image::UploadMemory(const ImageBufferMap& map, std::span<const VideoCommon::BufferCopy> copies) { for (const VideoCommon::BufferCopy& copy : copies) { - glCopyNamedBufferSubData(map.buffer, buffer.handle, copy.src_offset + buffer_offset, + glCopyNamedBufferSubData(map.buffer, buffer.handle, copy.src_offset + map.offset, copy.dst_offset, copy.size); } } -void Image::DownloadMemory(ImageBufferMap& map, size_t buffer_offset, +void Image::DownloadMemory(ImageBufferMap& map, std::span<const VideoCommon::BufferImageCopy> copies) { glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT); // TODO: Move this to its own API @@ -760,7 +759,7 @@ void Image::DownloadMemory(ImageBufferMap& map, size_t buffer_offset, current_image_height = copy.buffer_image_height; glPixelStorei(GL_PACK_IMAGE_HEIGHT, current_image_height); } - CopyImageToBuffer(copy, buffer_offset); + CopyImageToBuffer(copy, map.offset); } } diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 60d08d6d6..874cf54f4 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -35,6 +35,7 @@ struct ImageBufferMap { ~ImageBufferMap(); std::span<u8> mapped_span; + size_t offset = 0; OGLSync* sync; GLuint buffer; }; @@ -78,7 +79,7 @@ public: Tegra::Engines::Fermi2D::Filter filter, Tegra::Engines::Fermi2D::Operation operation); - void AccelerateImageUpload(Image& image, const ImageBufferMap& map, size_t buffer_offset, + void AccelerateImageUpload(Image& image, const ImageBufferMap& map, std::span<const VideoCommon::SwizzleParameters> swizzles); void InsertUploadMemoryBarrier(); @@ -137,14 +138,12 @@ public: explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr); - void UploadMemory(const ImageBufferMap& map, size_t buffer_offset, + void UploadMemory(const ImageBufferMap& map, std::span<const VideoCommon::BufferImageCopy> copies); - void UploadMemory(const ImageBufferMap& map, size_t buffer_offset, - std::span<const VideoCommon::BufferCopy> copies); + void UploadMemory(const ImageBufferMap& map, std::span<const VideoCommon::BufferCopy> copies); - void DownloadMemory(ImageBufferMap& map, size_t buffer_offset, - std::span<const VideoCommon::BufferImageCopy> copies); + void DownloadMemory(ImageBufferMap& map, std::span<const VideoCommon::BufferImageCopy> copies); GLuint Handle() const noexcept { return texture.handle; diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp index aeb36551c..1b58e8617 100644 --- a/src/video_core/renderer_opengl/util_shaders.cpp +++ b/src/video_core/renderer_opengl/util_shaders.cpp @@ -63,7 +63,7 @@ UtilShaders::UtilShaders(ProgramManager& program_manager_) UtilShaders::~UtilShaders() = default; -void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, size_t buffer_offset, +void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, std::span<const SwizzleParameters> swizzles) { static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1}; static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0; @@ -71,13 +71,13 @@ void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, s static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; program_manager.BindHostCompute(block_linear_unswizzle_2d_program.handle); - glFlushMappedNamedBufferRange(map.buffer, buffer_offset, image.guest_size_bytes); + glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format)); for (const SwizzleParameters& swizzle : swizzles) { const Extent3D num_tiles = swizzle.num_tiles; - const size_t input_offset = swizzle.buffer_offset + buffer_offset; + const size_t input_offset = swizzle.buffer_offset + map.offset; const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width); const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height); @@ -100,7 +100,7 @@ void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, s program_manager.RestoreGuestCompute(); } -void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, size_t buffer_offset, +void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, std::span<const SwizzleParameters> swizzles) { static constexpr Extent3D WORKGROUP_SIZE{16, 8, 8}; @@ -108,14 +108,14 @@ void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, s static constexpr GLuint BINDING_INPUT_BUFFER = 1; static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; - glFlushMappedNamedBufferRange(map.buffer, buffer_offset, image.guest_size_bytes); + glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); program_manager.BindHostCompute(block_linear_unswizzle_3d_program.handle); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format)); for (const SwizzleParameters& swizzle : swizzles) { const Extent3D num_tiles = swizzle.num_tiles; - const size_t input_offset = swizzle.buffer_offset + buffer_offset; + const size_t input_offset = swizzle.buffer_offset + map.offset; const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width); const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height); @@ -141,7 +141,7 @@ void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, s program_manager.RestoreGuestCompute(); } -void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map, size_t buffer_offset, +void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map, std::span<const SwizzleParameters> swizzles) { static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1}; static constexpr GLuint BINDING_INPUT_BUFFER = 0; @@ -159,7 +159,7 @@ void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map, size_t bu "Non-power of two images are not implemented"); program_manager.BindHostCompute(pitch_unswizzle_program.handle); - glFlushMappedNamedBufferRange(map.buffer, buffer_offset, image.guest_size_bytes); + glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); glUniform2ui(LOC_ORIGIN, 0, 0); glUniform2i(LOC_DESTINATION, 0, 0); glUniform1ui(LOC_BYTES_PER_BLOCK, bytes_per_block); @@ -167,7 +167,7 @@ void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map, size_t bu glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), 0, GL_FALSE, 0, GL_WRITE_ONLY, format); for (const SwizzleParameters& swizzle : swizzles) { const Extent3D num_tiles = swizzle.num_tiles; - const size_t input_offset = swizzle.buffer_offset + buffer_offset; + const size_t input_offset = swizzle.buffer_offset + map.offset; const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width); const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height); diff --git a/src/video_core/renderer_opengl/util_shaders.h b/src/video_core/renderer_opengl/util_shaders.h index bec026bc3..7b1d16b09 100644 --- a/src/video_core/renderer_opengl/util_shaders.h +++ b/src/video_core/renderer_opengl/util_shaders.h @@ -24,13 +24,13 @@ public: explicit UtilShaders(ProgramManager& program_manager); ~UtilShaders(); - void BlockLinearUpload2D(Image& image, const ImageBufferMap& map, size_t buffer_offset, + void BlockLinearUpload2D(Image& image, const ImageBufferMap& map, std::span<const VideoCommon::SwizzleParameters> swizzles); - void BlockLinearUpload3D(Image& image, const ImageBufferMap& map, size_t buffer_offset, + void BlockLinearUpload3D(Image& image, const ImageBufferMap& map, std::span<const VideoCommon::SwizzleParameters> swizzles); - void PitchUpload(Image& image, const ImageBufferMap& map, size_t buffer_offset, + void PitchUpload(Image& image, const ImageBufferMap& map, std::span<const VideoCommon::SwizzleParameters> swizzles); void CopyBC4(Image& dst_image, Image& src_image, diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 48fc5d966..4f1e4ec28 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -138,17 +138,18 @@ void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer, void BufferCacheRuntime::BindIndexBuffer(PrimitiveTopology topology, IndexFormat index_format, u32 base_vertex, u32 num_indices, VkBuffer buffer, u32 offset, [[maybe_unused]] u32 size) { - VkIndexType index_type = MaxwellToVK::IndexFormat(index_format); + VkIndexType vk_index_type = MaxwellToVK::IndexFormat(index_format); + VkDeviceSize vk_offset = offset; if (topology == PrimitiveTopology::Quads) { - index_type = VK_INDEX_TYPE_UINT32; - std::tie(buffer, offset) = + vk_index_type = VK_INDEX_TYPE_UINT32; + std::tie(buffer, vk_offset) = quad_index_pass.Assemble(index_format, num_indices, base_vertex, buffer, offset); - } else if (index_type == VK_INDEX_TYPE_UINT8_EXT && !device.IsExtIndexTypeUint8Supported()) { - index_type = VK_INDEX_TYPE_UINT16; - std::tie(buffer, offset) = uint8_pass.Assemble(num_indices, buffer, offset); + } else if (vk_index_type == VK_INDEX_TYPE_UINT8_EXT && !device.IsExtIndexTypeUint8Supported()) { + vk_index_type = VK_INDEX_TYPE_UINT16; + std::tie(buffer, vk_offset) = uint8_pass.Assemble(num_indices, buffer, offset); } - scheduler.Record([buffer, offset, index_type](vk::CommandBuffer cmdbuf) { - cmdbuf.BindIndexBuffer(buffer, offset, index_type); + scheduler.Record([buffer, vk_offset, vk_index_type](vk::CommandBuffer cmdbuf) { + cmdbuf.BindIndexBuffer(buffer, vk_offset, vk_index_type); }); } @@ -251,10 +252,10 @@ void BufferCacheRuntime::ReserveQuadArrayLUT(u32 num_indices, bool wait_for_idle } } scheduler.RequestOutsideRenderPassOperationContext(); - scheduler.Record([src_buffer = staging.buffer, dst_buffer = *quad_array_lut, - size_bytes](vk::CommandBuffer cmdbuf) { + scheduler.Record([src_buffer = staging.buffer, src_offset = staging.offset, + dst_buffer = *quad_array_lut, size_bytes](vk::CommandBuffer cmdbuf) { const VkBufferCopy copy{ - .srcOffset = 0, + .srcOffset = src_offset, .dstOffset = 0, .size = size_bytes, }; diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index a4fdcdf81..2f9a7b028 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp @@ -10,6 +10,7 @@ #include "common/alignment.h" #include "common/assert.h" #include "common/common_types.h" +#include "common/div_ceil.h" #include "video_core/host_shaders/vulkan_quad_indexed_comp_spv.h" #include "video_core/host_shaders/vulkan_uint8_comp_spv.h" #include "video_core/renderer_vulkan/vk_compute_pass.h" @@ -148,38 +149,33 @@ Uint8Pass::Uint8Pass(const Device& device, VKScheduler& scheduler_, Uint8Pass::~Uint8Pass() = default; -std::pair<VkBuffer, u32> Uint8Pass::Assemble(u32 num_vertices, VkBuffer src_buffer, - u32 src_offset) { +std::pair<VkBuffer, VkDeviceSize> Uint8Pass::Assemble(u32 num_vertices, VkBuffer src_buffer, + u32 src_offset) { const u32 staging_size = static_cast<u32>(num_vertices * sizeof(u16)); const auto staging = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal); update_descriptor_queue.Acquire(); update_descriptor_queue.AddBuffer(src_buffer, src_offset, num_vertices); - update_descriptor_queue.AddBuffer(staging.buffer, 0, staging_size); + update_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size); const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); scheduler.RequestOutsideRenderPassOperationContext(); scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging.buffer, set, num_vertices](vk::CommandBuffer cmdbuf) { - constexpr u32 dispatch_size = 1024; + static constexpr u32 DISPATCH_SIZE = 1024; + static constexpr VkMemoryBarrier WRITE_BARRIER{ + .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT, + }; cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {}); - cmdbuf.Dispatch(Common::AlignUp(num_vertices, dispatch_size) / dispatch_size, 1, 1); - - VkBufferMemoryBarrier barrier; - barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; - barrier.pNext = nullptr; - barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; - barrier.dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT; - barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barrier.buffer = buffer; - barrier.offset = 0; - barrier.size = static_cast<VkDeviceSize>(num_vertices * sizeof(u16)); + cmdbuf.Dispatch(Common::DivCeil(num_vertices, DISPATCH_SIZE), 1, 1); cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, - VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, barrier, {}); + VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, WRITE_BARRIER); }); - return {staging.buffer, 0}; + return {staging.buffer, staging.offset}; } QuadIndexedPass::QuadIndexedPass(const Device& device_, VKScheduler& scheduler_, @@ -194,7 +190,7 @@ QuadIndexedPass::QuadIndexedPass(const Device& device_, VKScheduler& scheduler_, QuadIndexedPass::~QuadIndexedPass() = default; -std::pair<VkBuffer, u32> QuadIndexedPass::Assemble( +std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble( Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, u32 num_vertices, u32 base_vertex, VkBuffer src_buffer, u32 src_offset) { const u32 index_shift = [index_format] { @@ -217,34 +213,29 @@ std::pair<VkBuffer, u32> QuadIndexedPass::Assemble( update_descriptor_queue.Acquire(); update_descriptor_queue.AddBuffer(src_buffer, src_offset, input_size); - update_descriptor_queue.AddBuffer(staging.buffer, 0, staging_size); + update_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size); const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); scheduler.RequestOutsideRenderPassOperationContext(); scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging.buffer, set, num_tri_vertices, base_vertex, index_shift](vk::CommandBuffer cmdbuf) { - static constexpr u32 dispatch_size = 1024; + static constexpr u32 DISPATCH_SIZE = 1024; + static constexpr VkMemoryBarrier WRITE_BARRIER{ + .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT, + }; const std::array push_constants = {base_vertex, index_shift}; cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {}); cmdbuf.PushConstants(layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants), &push_constants); - cmdbuf.Dispatch(Common::AlignUp(num_tri_vertices, dispatch_size) / dispatch_size, 1, 1); - - VkBufferMemoryBarrier barrier; - barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; - barrier.pNext = nullptr; - barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; - barrier.dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT; - barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barrier.buffer = buffer; - barrier.offset = 0; - barrier.size = static_cast<VkDeviceSize>(num_tri_vertices * sizeof(u32)); + cmdbuf.Dispatch(Common::DivCeil(num_tri_vertices, DISPATCH_SIZE), 1, 1); cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, - VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, barrier, {}); + VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, WRITE_BARRIER); }); - return {staging.buffer, 0}; + return {staging.buffer, staging.offset}; } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h index 4904019f5..17d781d99 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.h +++ b/src/video_core/renderer_vulkan/vk_compute_pass.h @@ -50,7 +50,8 @@ public: /// Assemble uint8 indices into an uint16 index buffer /// Returns a pair with the staging buffer, and the offset where the assembled data is - std::pair<VkBuffer, u32> Assemble(u32 num_vertices, VkBuffer src_buffer, u32 src_offset); + std::pair<VkBuffer, VkDeviceSize> Assemble(u32 num_vertices, VkBuffer src_buffer, + u32 src_offset); private: VKScheduler& scheduler; @@ -66,9 +67,9 @@ public: VKUpdateDescriptorQueue& update_descriptor_queue_); ~QuadIndexedPass(); - std::pair<VkBuffer, u32> Assemble(Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, - u32 num_vertices, u32 base_vertex, VkBuffer src_buffer, - u32 src_offset); + std::pair<VkBuffer, VkDeviceSize> Assemble( + Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, u32 num_vertices, + u32 base_vertex, VkBuffer src_buffer, u32 src_offset); private: VKScheduler& scheduler; diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp index 97fd41cc1..275d740b8 100644 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp @@ -8,6 +8,7 @@ #include <fmt/format.h> +#include "common/alignment.h" #include "common/assert.h" #include "common/bit_util.h" #include "common/common_types.h" @@ -17,14 +18,117 @@ #include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { +namespace { +// Maximum potential alignment of a Vulkan buffer +constexpr VkDeviceSize MAX_ALIGNMENT = 256; +// Maximum size to put elements in the stream buffer +constexpr VkDeviceSize MAX_STREAM_BUFFER_REQUEST_SIZE = 8 * 1024 * 1024; +// Stream buffer size in bytes +constexpr VkDeviceSize STREAM_BUFFER_SIZE = 128 * 1024 * 1024; +constexpr VkDeviceSize REGION_SIZE = STREAM_BUFFER_SIZE / StagingBufferPool::NUM_SYNCS; + +constexpr VkMemoryPropertyFlags HOST_FLAGS = + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; +constexpr VkMemoryPropertyFlags STREAM_FLAGS = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | HOST_FLAGS; + +bool IsStreamHeap(VkMemoryHeap heap) noexcept { + return STREAM_BUFFER_SIZE < (heap.size * 2) / 3; +} + +std::optional<u32> FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& props, u32 type_mask, + VkMemoryPropertyFlags flags) noexcept { + for (u32 type_index = 0; type_index < props.memoryTypeCount; ++type_index) { + if (((type_mask >> type_index) & 1) == 0) { + // Memory type is incompatible + continue; + } + const VkMemoryType& memory_type = props.memoryTypes[type_index]; + if ((memory_type.propertyFlags & flags) != flags) { + // Memory type doesn't have the flags we want + continue; + } + if (!IsStreamHeap(props.memoryHeaps[memory_type.heapIndex])) { + // Memory heap is not suitable for streaming + continue; + } + // Success! + return type_index; + } + return std::nullopt; +} + +u32 FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& props, u32 type_mask) { + // Try to find a DEVICE_LOCAL_BIT type, Nvidia and AMD have a dedicated heap for this + std::optional<u32> type = FindMemoryTypeIndex(props, type_mask, STREAM_FLAGS); + if (type) { + return *type; + } + // Otherwise try without the DEVICE_LOCAL_BIT + type = FindMemoryTypeIndex(props, type_mask, HOST_FLAGS); + if (type) { + return *type; + } + // This should never happen, and in case it does, signal it as an out of memory situation + throw vk::Exception(VK_ERROR_OUT_OF_DEVICE_MEMORY); +} + +size_t Region(size_t iterator) noexcept { + return iterator / REGION_SIZE; +} +} // Anonymous namespace StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& memory_allocator_, VKScheduler& scheduler_) - : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_} {} + : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_} { + const vk::Device& dev = device.GetLogical(); + stream_buffer = dev.CreateBuffer(VkBufferCreateInfo{ + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .size = STREAM_BUFFER_SIZE, + .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .queueFamilyIndexCount = 0, + .pQueueFamilyIndices = nullptr, + }); + if (device.HasDebuggingToolAttached()) { + stream_buffer.SetObjectNameEXT("Stream Buffer"); + } + VkMemoryDedicatedRequirements dedicated_reqs{ + .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS, + .pNext = nullptr, + .prefersDedicatedAllocation = VK_FALSE, + .requiresDedicatedAllocation = VK_FALSE, + }; + const auto requirements = dev.GetBufferMemoryRequirements(*stream_buffer, &dedicated_reqs); + const bool make_dedicated = dedicated_reqs.prefersDedicatedAllocation == VK_TRUE || + dedicated_reqs.requiresDedicatedAllocation == VK_TRUE; + const VkMemoryDedicatedAllocateInfo dedicated_info{ + .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO, + .pNext = nullptr, + .image = nullptr, + .buffer = *stream_buffer, + }; + const auto memory_properties = device.GetPhysical().GetMemoryProperties(); + stream_memory = dev.AllocateMemory(VkMemoryAllocateInfo{ + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, + .pNext = make_dedicated ? &dedicated_info : nullptr, + .allocationSize = requirements.size, + .memoryTypeIndex = FindMemoryTypeIndex(memory_properties, requirements.memoryTypeBits), + }); + if (device.HasDebuggingToolAttached()) { + stream_memory.SetObjectNameEXT("Stream Buffer Memory"); + } + stream_buffer.BindMemory(*stream_memory, 0); + stream_pointer = stream_memory.Map(0, STREAM_BUFFER_SIZE); +} StagingBufferPool::~StagingBufferPool() = default; StagingBufferRef StagingBufferPool::Request(size_t size, MemoryUsage usage) { + if (usage == MemoryUsage::Upload && size <= MAX_STREAM_BUFFER_REQUEST_SIZE) { + return GetStreamBuffer(size); + } if (const std::optional<StagingBufferRef> ref = TryGetReservedBuffer(size, usage)) { return *ref; } @@ -39,6 +143,42 @@ void StagingBufferPool::TickFrame() { ReleaseCache(MemoryUsage::Download); } +StagingBufferRef StagingBufferPool::GetStreamBuffer(size_t size) { + for (size_t region = Region(used_iterator), region_end = Region(iterator); region < region_end; + ++region) { + sync_ticks[region] = scheduler.CurrentTick(); + } + used_iterator = iterator; + + for (size_t region = Region(free_iterator) + 1, + region_end = std::min(Region(iterator + size) + 1, NUM_SYNCS); + region < region_end; ++region) { + scheduler.Wait(sync_ticks[region]); + } + if (iterator + size > free_iterator) { + free_iterator = iterator + size; + } + if (iterator + size > STREAM_BUFFER_SIZE) { + for (size_t region = Region(used_iterator); region < NUM_SYNCS; ++region) { + sync_ticks[region] = scheduler.CurrentTick(); + } + used_iterator = 0; + iterator = 0; + free_iterator = size; + + for (size_t region = 0, region_end = Region(size); region <= region_end; ++region) { + scheduler.Wait(sync_ticks[region]); + } + } + const size_t offset = iterator; + iterator = Common::AlignUp(iterator + size, MAX_ALIGNMENT); + return StagingBufferRef{ + .buffer = *stream_buffer, + .offset = static_cast<VkDeviceSize>(offset), + .mapped_span = std::span<u8>(stream_pointer + offset, size), + }; +} + std::optional<StagingBufferRef> StagingBufferPool::TryGetReservedBuffer(size_t size, MemoryUsage usage) { StagingBuffers& cache_level = GetCache(usage)[Common::Log2Ceil64(size)]; diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h index d42918a47..4ed99c0df 100644 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h @@ -19,11 +19,14 @@ class VKScheduler; struct StagingBufferRef { VkBuffer buffer; + VkDeviceSize offset; std::span<u8> mapped_span; }; class StagingBufferPool { public: + static constexpr size_t NUM_SYNCS = 16; + explicit StagingBufferPool(const Device& device, MemoryAllocator& memory_allocator, VKScheduler& scheduler); ~StagingBufferPool(); @@ -33,6 +36,11 @@ public: void TickFrame(); private: + struct StreamBufferCommit { + size_t upper_bound; + u64 tick; + }; + struct StagingBuffer { vk::Buffer buffer; MemoryCommit commit; @@ -42,6 +50,7 @@ private: StagingBufferRef Ref() const noexcept { return { .buffer = *buffer, + .offset = 0, .mapped_span = mapped_span, }; } @@ -56,6 +65,8 @@ private: static constexpr size_t NUM_LEVELS = sizeof(size_t) * CHAR_BIT; using StagingBuffersCache = std::array<StagingBuffers, NUM_LEVELS>; + StagingBufferRef GetStreamBuffer(size_t size); + std::optional<StagingBufferRef> TryGetReservedBuffer(size_t size, MemoryUsage usage); StagingBufferRef CreateStagingBuffer(size_t size, MemoryUsage usage); @@ -70,6 +81,15 @@ private: MemoryAllocator& memory_allocator; VKScheduler& scheduler; + vk::Buffer stream_buffer; + vk::DeviceMemory stream_memory; + u8* stream_pointer = nullptr; + + size_t iterator = 0; + size_t used_iterator = 0; + size_t free_iterator = 0; + std::array<u64, NUM_SYNCS> sync_ticks{}; + StagingBuffersCache device_local_cache; StagingBuffersCache upload_cache; StagingBuffersCache download_cache; diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 1eeb45ca9..22a1014a9 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -818,11 +818,10 @@ Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_ } } -void Image::UploadMemory(const StagingBufferRef& map, size_t buffer_offset, - std::span<const BufferImageCopy> copies) { +void Image::UploadMemory(const StagingBufferRef& map, std::span<const BufferImageCopy> copies) { // TODO: Move this to another API scheduler->RequestOutsideRenderPassOperationContext(); - std::vector vk_copies = TransformBufferImageCopies(copies, buffer_offset, aspect_mask); + std::vector vk_copies = TransformBufferImageCopies(copies, map.offset, aspect_mask); const VkBuffer src_buffer = map.buffer; const VkImage vk_image = *image; const VkImageAspectFlags vk_aspect_mask = aspect_mask; @@ -833,11 +832,11 @@ void Image::UploadMemory(const StagingBufferRef& map, size_t buffer_offset, }); } -void Image::UploadMemory(const StagingBufferRef& map, size_t buffer_offset, +void Image::UploadMemory(const StagingBufferRef& map, std::span<const VideoCommon::BufferCopy> copies) { // TODO: Move this to another API scheduler->RequestOutsideRenderPassOperationContext(); - std::vector vk_copies = TransformBufferCopies(copies, buffer_offset); + std::vector vk_copies = TransformBufferCopies(copies, map.offset); const VkBuffer src_buffer = map.buffer; const VkBuffer dst_buffer = *buffer; scheduler->Record([src_buffer, dst_buffer, vk_copies](vk::CommandBuffer cmdbuf) { @@ -846,9 +845,8 @@ void Image::UploadMemory(const StagingBufferRef& map, size_t buffer_offset, }); } -void Image::DownloadMemory(const StagingBufferRef& map, size_t buffer_offset, - std::span<const BufferImageCopy> copies) { - std::vector vk_copies = TransformBufferImageCopies(copies, buffer_offset, aspect_mask); +void Image::DownloadMemory(const StagingBufferRef& map, std::span<const BufferImageCopy> copies) { + std::vector vk_copies = TransformBufferImageCopies(copies, map.offset, aspect_mask); scheduler->Record([buffer = map.buffer, image = *image, aspect_mask = aspect_mask, vk_copies](vk::CommandBuffer cmdbuf) { const VkImageMemoryBarrier read_barrier{ diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index 4558c3297..b08c23459 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -82,7 +82,7 @@ struct TextureCacheRuntime { return false; } - void AccelerateImageUpload(Image&, const StagingBufferRef&, size_t, + void AccelerateImageUpload(Image&, const StagingBufferRef&, std::span<const VideoCommon::SwizzleParameters>) { UNREACHABLE(); } @@ -100,13 +100,12 @@ public: explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr); - void UploadMemory(const StagingBufferRef& map, size_t buffer_offset, + void UploadMemory(const StagingBufferRef& map, std::span<const VideoCommon::BufferImageCopy> copies); - void UploadMemory(const StagingBufferRef& map, size_t buffer_offset, - std::span<const VideoCommon::BufferCopy> copies); + void UploadMemory(const StagingBufferRef& map, std::span<const VideoCommon::BufferCopy> copies); - void DownloadMemory(const StagingBufferRef& map, size_t buffer_offset, + void DownloadMemory(const StagingBufferRef& map, std::span<const VideoCommon::BufferImageCopy> copies); [[nodiscard]] VkImage Handle() const noexcept { diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index f336b705f..b1da69971 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -212,7 +212,7 @@ private: /// Upload data from guest to an image template <typename StagingBuffer> - void UploadImageContents(Image& image, StagingBuffer& staging_buffer, size_t buffer_offset); + void UploadImageContents(Image& image, StagingBuffer& staging_buffer); /// Find or create an image view from a guest descriptor [[nodiscard]] ImageViewId FindImageView(const TICEntry& config); @@ -592,7 +592,7 @@ void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) { Image& image = slot_images[image_id]; auto map = runtime.DownloadStagingBuffer(image.unswizzled_size_bytes); const auto copies = FullDownloadCopies(image.info); - image.DownloadMemory(map, 0, copies); + image.DownloadMemory(map, copies); runtime.Finish(); SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span); } @@ -750,24 +750,24 @@ void TextureCache<P>::PopAsyncFlushes() { total_size_bytes += slot_images[image_id].unswizzled_size_bytes; } auto download_map = runtime.DownloadStagingBuffer(total_size_bytes); - size_t buffer_offset = 0; + const size_t original_offset = download_map.offset; for (const ImageId image_id : download_ids) { Image& image = slot_images[image_id]; const auto copies = FullDownloadCopies(image.info); - image.DownloadMemory(download_map, buffer_offset, copies); - buffer_offset += image.unswizzled_size_bytes; + image.DownloadMemory(download_map, copies); + download_map.offset += image.unswizzled_size_bytes; } // Wait for downloads to finish runtime.Finish(); - buffer_offset = 0; - const std::span<u8> download_span = download_map.mapped_span; + download_map.offset = original_offset; + std::span<u8> download_span = download_map.mapped_span; for (const ImageId image_id : download_ids) { const ImageBase& image = slot_images[image_id]; const auto copies = FullDownloadCopies(image.info); - const std::span<u8> image_download_span = download_span.subspan(buffer_offset); - SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, image_download_span); - buffer_offset += image.unswizzled_size_bytes; + SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, download_span); + download_map.offset += image.unswizzled_size_bytes; + download_span = download_span.subspan(image.unswizzled_size_bytes); } committed_downloads.pop(); } @@ -798,32 +798,32 @@ void TextureCache<P>::RefreshContents(Image& image) { LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented"); return; } - auto map = runtime.UploadStagingBuffer(MapSizeBytes(image)); - UploadImageContents(image, map, 0); + auto staging = runtime.UploadStagingBuffer(MapSizeBytes(image)); + UploadImageContents(image, staging); runtime.InsertUploadMemoryBarrier(); } template <class P> -template <typename MapBuffer> -void TextureCache<P>::UploadImageContents(Image& image, MapBuffer& map, size_t buffer_offset) { - const std::span<u8> mapped_span = map.mapped_span.subspan(buffer_offset); +template <typename StagingBuffer> +void TextureCache<P>::UploadImageContents(Image& image, StagingBuffer& staging) { + const std::span<u8> mapped_span = staging.mapped_span; const GPUVAddr gpu_addr = image.gpu_addr; if (True(image.flags & ImageFlagBits::AcceleratedUpload)) { gpu_memory.ReadBlockUnsafe(gpu_addr, mapped_span.data(), mapped_span.size_bytes()); const auto uploads = FullUploadSwizzles(image.info); - runtime.AccelerateImageUpload(image, map, buffer_offset, uploads); + runtime.AccelerateImageUpload(image, staging, uploads); } else if (True(image.flags & ImageFlagBits::Converted)) { std::vector<u8> unswizzled_data(image.unswizzled_size_bytes); auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, unswizzled_data); ConvertImage(unswizzled_data, image.info, mapped_span, copies); - image.UploadMemory(map, buffer_offset, copies); + image.UploadMemory(staging, copies); } else if (image.info.type == ImageType::Buffer) { const std::array copies{UploadBufferCopy(gpu_memory, gpu_addr, image, mapped_span)}; - image.UploadMemory(map, buffer_offset, copies); + image.UploadMemory(staging, copies); } else { const auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, mapped_span); - image.UploadMemory(map, buffer_offset, copies); + image.UploadMemory(staging, copies); } } diff --git a/src/video_core/vulkan_common/vulkan_wrapper.cpp b/src/video_core/vulkan_common/vulkan_wrapper.cpp index d39bbdc70..2aa0ffbe6 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.cpp +++ b/src/video_core/vulkan_common/vulkan_wrapper.cpp @@ -168,7 +168,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { X(vkFreeCommandBuffers); X(vkFreeDescriptorSets); X(vkFreeMemory); - X(vkGetBufferMemoryRequirements); + X(vkGetBufferMemoryRequirements2); X(vkGetDeviceQueue); X(vkGetEventStatus); X(vkGetFenceStatus); @@ -786,10 +786,20 @@ DeviceMemory Device::AllocateMemory(const VkMemoryAllocateInfo& ai) const { return DeviceMemory(memory, handle, *dld); } -VkMemoryRequirements Device::GetBufferMemoryRequirements(VkBuffer buffer) const noexcept { - VkMemoryRequirements requirements; - dld->vkGetBufferMemoryRequirements(handle, buffer, &requirements); - return requirements; +VkMemoryRequirements Device::GetBufferMemoryRequirements(VkBuffer buffer, + void* pnext) const noexcept { + const VkBufferMemoryRequirementsInfo2 info{ + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2, + .pNext = nullptr, + .buffer = buffer, + }; + VkMemoryRequirements2 requirements{ + .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2, + .pNext = pnext, + .memoryRequirements{}, + }; + dld->vkGetBufferMemoryRequirements2(handle, &info, &requirements); + return requirements.memoryRequirements; } VkMemoryRequirements Device::GetImageMemoryRequirements(VkImage image) const noexcept { diff --git a/src/video_core/vulkan_common/vulkan_wrapper.h b/src/video_core/vulkan_common/vulkan_wrapper.h index 7f781b081..3e36d356a 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.h +++ b/src/video_core/vulkan_common/vulkan_wrapper.h @@ -283,7 +283,7 @@ struct DeviceDispatch : InstanceDispatch { PFN_vkFreeCommandBuffers vkFreeCommandBuffers{}; PFN_vkFreeDescriptorSets vkFreeDescriptorSets{}; PFN_vkFreeMemory vkFreeMemory{}; - PFN_vkGetBufferMemoryRequirements vkGetBufferMemoryRequirements{}; + PFN_vkGetBufferMemoryRequirements2 vkGetBufferMemoryRequirements2{}; PFN_vkGetDeviceQueue vkGetDeviceQueue{}; PFN_vkGetEventStatus vkGetEventStatus{}; PFN_vkGetFenceStatus vkGetFenceStatus{}; @@ -871,7 +871,8 @@ public: DeviceMemory AllocateMemory(const VkMemoryAllocateInfo& ai) const; - VkMemoryRequirements GetBufferMemoryRequirements(VkBuffer buffer) const noexcept; + VkMemoryRequirements GetBufferMemoryRequirements(VkBuffer buffer, + void* pnext = nullptr) const noexcept; VkMemoryRequirements GetImageMemoryRequirements(VkImage image) const noexcept; |