diff options
Diffstat (limited to 'src/video_core')
31 files changed, 1181 insertions, 610 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 25a4b1c5b..bb1f8491f 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -1,6 +1,7 @@ add_subdirectory(host_shaders) add_library(video_core STATIC + buffer_cache/buffer_base.h buffer_cache/buffer_block.h buffer_cache/buffer_cache.h buffer_cache/map_interval.cpp @@ -135,8 +136,6 @@ add_library(video_core STATIC renderer_vulkan/vk_graphics_pipeline.h renderer_vulkan/vk_master_semaphore.cpp renderer_vulkan/vk_master_semaphore.h - renderer_vulkan/vk_memory_manager.cpp - renderer_vulkan/vk_memory_manager.h renderer_vulkan/vk_pipeline_cache.cpp renderer_vulkan/vk_pipeline_cache.h renderer_vulkan/vk_query_cache.cpp @@ -259,6 +258,8 @@ add_library(video_core STATIC vulkan_common/vulkan_instance.h vulkan_common/vulkan_library.cpp vulkan_common/vulkan_library.h + vulkan_common/vulkan_memory_allocator.cpp + vulkan_common/vulkan_memory_allocator.h vulkan_common/vulkan_surface.cpp vulkan_common/vulkan_surface.h vulkan_common/vulkan_wrapper.cpp @@ -287,10 +288,10 @@ target_link_libraries(video_core PRIVATE sirit) if (ENABLE_NSIGHT_AFTERMATH) if (NOT DEFINED ENV{NSIGHT_AFTERMATH_SDK}) - message(ERROR "Environment variable NSIGHT_AFTERMATH_SDK has to be provided") + message(FATAL_ERROR "Environment variable NSIGHT_AFTERMATH_SDK has to be provided") endif() if (NOT WIN32) - message(ERROR "Nsight Aftermath doesn't support non-Windows platforms") + message(FATAL_ERROR "Nsight Aftermath doesn't support non-Windows platforms") endif() target_compile_definitions(video_core PRIVATE HAS_NSIGHT_AFTERMATH) target_include_directories(video_core PRIVATE "$ENV{NSIGHT_AFTERMATH_SDK}/include") diff --git a/src/video_core/buffer_cache/buffer_base.h b/src/video_core/buffer_cache/buffer_base.h new file mode 100644 index 000000000..ee8602ce9 --- /dev/null +++ b/src/video_core/buffer_cache/buffer_base.h @@ -0,0 +1,495 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <algorithm> +#include <bit> +#include <limits> +#include <utility> + +#include "common/alignment.h" +#include "common/common_funcs.h" +#include "common/common_types.h" +#include "common/div_ceil.h" +#include "core/memory.h" + +namespace VideoCommon { + +enum class BufferFlagBits { + Picked = 1 << 0, +}; +DECLARE_ENUM_FLAG_OPERATORS(BufferFlagBits) + +/// Tag for creating null buffers with no storage or size +struct NullBufferParams {}; + +/** + * Range tracking buffer container. + * + * It keeps track of the modified CPU and GPU ranges on a CPU page granularity, notifying the given + * rasterizer about state changes in the tracking behavior of the buffer. + * + * The buffer size and address is forcefully aligned to CPU page boundaries. + */ +template <class RasterizerInterface> +class BufferBase { + static constexpr u64 PAGES_PER_WORD = 64; + static constexpr u64 BYTES_PER_PAGE = Core::Memory::PAGE_SIZE; + static constexpr u64 BYTES_PER_WORD = PAGES_PER_WORD * BYTES_PER_PAGE; + + /// Vector tracking modified pages tightly packed with small vector optimization + union WrittenWords { + /// Returns the pointer to the words state + [[nodiscard]] const u64* Pointer(bool is_short) const noexcept { + return is_short ? &stack : heap; + } + + /// Returns the pointer to the words state + [[nodiscard]] u64* Pointer(bool is_short) noexcept { + return is_short ? &stack : heap; + } + + u64 stack = 0; ///< Small buffers storage + u64* heap; ///< Not-small buffers pointer to the storage + }; + + struct GpuCpuWords { + explicit GpuCpuWords() = default; + explicit GpuCpuWords(u64 size_bytes_) : size_bytes{size_bytes_} { + if (IsShort()) { + cpu.stack = ~u64{0}; + gpu.stack = 0; + } else { + // Share allocation between CPU and GPU pages and set their default values + const size_t num_words = NumWords(); + u64* const alloc = new u64[num_words * 2]; + cpu.heap = alloc; + gpu.heap = alloc + num_words; + std::fill_n(cpu.heap, num_words, ~u64{0}); + std::fill_n(gpu.heap, num_words, 0); + } + // Clean up tailing bits + const u64 last_local_page = + Common::DivCeil(size_bytes % BYTES_PER_WORD, BYTES_PER_PAGE); + const u64 shift = (PAGES_PER_WORD - last_local_page) % PAGES_PER_WORD; + u64& last_word = cpu.Pointer(IsShort())[NumWords() - 1]; + last_word = (last_word << shift) >> shift; + } + + ~GpuCpuWords() { + Release(); + } + + GpuCpuWords& operator=(GpuCpuWords&& rhs) noexcept { + Release(); + size_bytes = rhs.size_bytes; + cpu = rhs.cpu; + gpu = rhs.gpu; + rhs.cpu.heap = nullptr; + return *this; + } + + GpuCpuWords(GpuCpuWords&& rhs) noexcept + : size_bytes{rhs.size_bytes}, cpu{rhs.cpu}, gpu{rhs.gpu} { + rhs.cpu.heap = nullptr; + } + + GpuCpuWords& operator=(const GpuCpuWords&) = delete; + GpuCpuWords(const GpuCpuWords&) = delete; + + /// Returns true when the buffer fits in the small vector optimization + [[nodiscard]] bool IsShort() const noexcept { + return size_bytes <= BYTES_PER_WORD; + } + + /// Returns the number of words of the buffer + [[nodiscard]] size_t NumWords() const noexcept { + return Common::DivCeil(size_bytes, BYTES_PER_WORD); + } + + /// Release buffer resources + void Release() { + if (!IsShort()) { + // CPU written words is the base for the heap allocation + delete[] cpu.heap; + } + } + + u64 size_bytes = 0; + WrittenWords cpu; + WrittenWords gpu; + }; + +public: + explicit BufferBase(RasterizerInterface& rasterizer_, VAddr cpu_addr_, u64 size_bytes) + : rasterizer{&rasterizer_}, cpu_addr{Common::AlignDown(cpu_addr_, BYTES_PER_PAGE)}, + words(Common::AlignUp(size_bytes + (cpu_addr_ - cpu_addr), BYTES_PER_PAGE)) {} + + explicit BufferBase(NullBufferParams) {} + + BufferBase& operator=(const BufferBase&) = delete; + BufferBase(const BufferBase&) = delete; + + /// Returns the inclusive CPU modified range in a begin end pair + [[nodiscard]] std::pair<u64, u64> ModifiedCpuRegion(VAddr query_cpu_addr, + u64 query_size) const noexcept { + const u64 offset = query_cpu_addr - cpu_addr; + return ModifiedRegion<false>(offset, query_size); + } + + /// Returns the inclusive GPU modified range in a begin end pair + [[nodiscard]] std::pair<u64, u64> ModifiedGpuRegion(VAddr query_cpu_addr, + u64 query_size) const noexcept { + const u64 offset = query_cpu_addr - cpu_addr; + return ModifiedRegion<true>(offset, query_size); + } + + /// Returns true if a region has been modified from the CPU + [[nodiscard]] bool IsRegionCpuModified(VAddr query_cpu_addr, u64 query_size) const noexcept { + const u64 offset = query_cpu_addr - cpu_addr; + return IsRegionModified<false>(offset, query_size); + } + + /// Returns true if a region has been modified from the GPU + [[nodiscard]] bool IsRegionGpuModified(VAddr query_cpu_addr, u64 query_size) const noexcept { + const u64 offset = query_cpu_addr - cpu_addr; + return IsRegionModified<true>(offset, query_size); + } + + /// Mark region as CPU modified, notifying the rasterizer about this change + void MarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 size) { + ChangeRegionState<true, true>(words.cpu, dirty_cpu_addr, size); + } + + /// Unmark region as CPU modified, notifying the rasterizer about this change + void UnmarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 size) { + ChangeRegionState<false, true>(words.cpu, dirty_cpu_addr, size); + } + + /// Mark region as modified from the host GPU + void MarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 size) noexcept { + ChangeRegionState<true, false>(words.gpu, dirty_cpu_addr, size); + } + + /// Unmark region as modified from the host GPU + void UnmarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 size) noexcept { + ChangeRegionState<false, false>(words.gpu, dirty_cpu_addr, size); + } + + /// Call 'func' for each CPU modified range and unmark those pages as CPU modified + template <typename Func> + void ForEachUploadRange(VAddr query_cpu_range, u64 size, Func&& func) { + ForEachModifiedRange<false, true>(query_cpu_range, size, func); + } + + /// Call 'func' for each GPU modified range and unmark those pages as GPU modified + template <typename Func> + void ForEachDownloadRange(VAddr query_cpu_range, u64 size, Func&& func) { + ForEachModifiedRange<true, false>(query_cpu_range, size, func); + } + + /// Call 'func' for each GPU modified range and unmark those pages as GPU modified + template <typename Func> + void ForEachDownloadRange(Func&& func) { + ForEachModifiedRange<true, false>(cpu_addr, SizeBytes(), func); + } + + /// Mark buffer as picked + void Pick() noexcept { + flags |= BufferFlagBits::Picked; + } + + /// Unmark buffer as picked + void Unpick() noexcept { + flags &= ~BufferFlagBits::Picked; + } + + /// Returns true when vaddr -> vaddr+size is fully contained in the buffer + [[nodiscard]] bool IsInBounds(VAddr addr, u64 size) const noexcept { + return addr >= cpu_addr && addr + size <= cpu_addr + SizeBytes(); + } + + /// Returns true if the buffer has been marked as picked + [[nodiscard]] bool IsPicked() const noexcept { + return True(flags & BufferFlagBits::Picked); + } + + /// Returns the base CPU address of the buffer + [[nodiscard]] VAddr CpuAddr() const noexcept { + return cpu_addr; + } + + /// Returns the offset relative to the given CPU address + /// @pre IsInBounds returns true + [[nodiscard]] u32 Offset(VAddr other_cpu_addr) const noexcept { + return static_cast<u32>(other_cpu_addr - cpu_addr); + } + + /// Returns the size in bytes of the buffer + [[nodiscard]] u64 SizeBytes() const noexcept { + return words.size_bytes; + } + +private: + /** + * Change the state of a range of pages + * + * @param written_words Pages to be marked or unmarked as modified + * @param dirty_addr Base address to mark or unmark as modified + * @param size Size in bytes to mark or unmark as modified + * + * @tparam enable True when the bits will be set to one, false for zero + * @tparam notify_rasterizer True when the rasterizer has to be notified about the changes + */ + template <bool enable, bool notify_rasterizer> + void ChangeRegionState(WrittenWords& written_words, u64 dirty_addr, + s64 size) noexcept(!notify_rasterizer) { + const s64 difference = dirty_addr - cpu_addr; + const u64 offset = std::max<s64>(difference, 0); + size += std::min<s64>(difference, 0); + if (offset >= SizeBytes() || size < 0) { + return; + } + u64* const state_words = written_words.Pointer(IsShort()); + const u64 offset_end = std::min(offset + size, SizeBytes()); + const u64 begin_page_index = offset / BYTES_PER_PAGE; + const u64 begin_word_index = begin_page_index / PAGES_PER_WORD; + const u64 end_page_index = Common::DivCeil(offset_end, BYTES_PER_PAGE); + const u64 end_word_index = Common::DivCeil(end_page_index, PAGES_PER_WORD); + u64 page_index = begin_page_index % PAGES_PER_WORD; + u64 word_index = begin_word_index; + while (word_index < end_word_index) { + const u64 next_word_first_page = (word_index + 1) * PAGES_PER_WORD; + const u64 left_offset = + std::min(next_word_first_page - end_page_index, PAGES_PER_WORD) % PAGES_PER_WORD; + const u64 right_offset = page_index; + u64 bits = ~u64{0}; + bits = (bits >> right_offset) << right_offset; + bits = (bits << left_offset) >> left_offset; + if constexpr (notify_rasterizer) { + NotifyRasterizer<!enable>(word_index, state_words[word_index], bits); + } + if constexpr (enable) { + state_words[word_index] |= bits; + } else { + state_words[word_index] &= ~bits; + } + page_index = 0; + ++word_index; + } + } + + /** + * Notify rasterizer about changes in the CPU tracking state of a word in the buffer + * + * @param word_index Index to the word to notify to the rasterizer + * @param current_bits Current state of the word + * @param new_bits New state of the word + * + * @tparam add_to_rasterizer True when the rasterizer should start tracking the new pages + */ + template <bool add_to_rasterizer> + void NotifyRasterizer(u64 word_index, u64 current_bits, u64 new_bits) { + u64 changed_bits = (add_to_rasterizer ? current_bits : ~current_bits) & new_bits; + VAddr addr = cpu_addr + word_index * BYTES_PER_WORD; + while (changed_bits != 0) { + const int empty_bits = std::countr_zero(changed_bits); + addr += empty_bits * BYTES_PER_PAGE; + changed_bits >>= empty_bits; + + const u32 continuous_bits = std::countr_one(changed_bits); + const u64 size = continuous_bits * BYTES_PER_PAGE; + const VAddr begin_addr = addr; + addr += size; + changed_bits = continuous_bits < PAGES_PER_WORD ? (changed_bits >> continuous_bits) : 0; + rasterizer->UpdatePagesCachedCount(begin_addr, size, add_to_rasterizer ? 1 : -1); + } + } + + /** + * Loop over each page in the given range, turn off those bits and notify the rasterizer if + * needed. Call the given function on each turned off range. + * + * @param query_cpu_range Base CPU address to loop over + * @param size Size in bytes of the CPU range to loop over + * @param func Function to call for each turned off region + * + * @tparam gpu True for host GPU pages, false for CPU pages + * @tparam notify_rasterizer True when the rasterizer should be notified about state changes + */ + template <bool gpu, bool notify_rasterizer, typename Func> + void ForEachModifiedRange(VAddr query_cpu_range, s64 size, Func&& func) { + const s64 difference = query_cpu_range - cpu_addr; + const u64 query_begin = std::max<s64>(difference, 0); + size += std::min<s64>(difference, 0); + if (query_begin >= SizeBytes() || size < 0) { + return; + } + const u64* const cpu_words = words.cpu.Pointer(IsShort()); + const u64 query_end = query_begin + std::min(static_cast<u64>(size), SizeBytes()); + u64* const state_words = (gpu ? words.gpu : words.cpu).Pointer(IsShort()); + u64* const words_begin = state_words + query_begin / BYTES_PER_WORD; + u64* const words_end = state_words + Common::DivCeil(query_end, BYTES_PER_WORD); + + const auto modified = [](u64 word) { return word != 0; }; + const auto first_modified_word = std::find_if(words_begin, words_end, modified); + if (first_modified_word == words_end) { + // Exit early when the buffer is not modified + return; + } + const auto last_modified_word = std::find_if_not(first_modified_word, words_end, modified); + + const u64 word_index_begin = std::distance(state_words, first_modified_word); + const u64 word_index_end = std::distance(state_words, last_modified_word); + + const unsigned local_page_begin = std::countr_zero(*first_modified_word); + const unsigned local_page_end = PAGES_PER_WORD - std::countl_zero(last_modified_word[-1]); + const u64 word_page_begin = word_index_begin * PAGES_PER_WORD; + const u64 word_page_end = (word_index_end - 1) * PAGES_PER_WORD; + const u64 query_page_begin = query_begin / BYTES_PER_PAGE; + const u64 query_page_end = Common::DivCeil(query_end, BYTES_PER_PAGE); + const u64 page_index_begin = std::max(word_page_begin + local_page_begin, query_page_begin); + const u64 page_index_end = std::min(word_page_end + local_page_end, query_page_end); + const u64 first_word_page_begin = page_index_begin % PAGES_PER_WORD; + const u64 last_word_page_end = (page_index_end - 1) % PAGES_PER_WORD + 1; + + u64 page_begin = first_word_page_begin; + u64 current_base = 0; + u64 current_size = 0; + bool on_going = false; + for (u64 word_index = word_index_begin; word_index < word_index_end; ++word_index) { + const bool is_last_word = word_index + 1 == word_index_end; + const u64 page_end = is_last_word ? last_word_page_end : PAGES_PER_WORD; + const u64 right_offset = page_begin; + const u64 left_offset = PAGES_PER_WORD - page_end; + u64 bits = ~u64{0}; + bits = (bits >> right_offset) << right_offset; + bits = (bits << left_offset) >> left_offset; + + const u64 current_word = state_words[word_index] & bits; + state_words[word_index] &= ~bits; + + // Exclude CPU modified pages when visiting GPU pages + const u64 word = current_word & ~(gpu ? cpu_words[word_index] : 0); + if constexpr (notify_rasterizer) { + NotifyRasterizer<true>(word_index, word, ~u64{0}); + } + u64 page = page_begin; + page_begin = 0; + + while (page < page_end) { + const int empty_bits = std::countr_zero(word >> page); + if (on_going && empty_bits != 0) { + InvokeModifiedRange(func, current_size, current_base); + current_size = 0; + on_going = false; + } + page += empty_bits; + + const int continuous_bits = std::countr_one(word >> page); + if (!on_going && continuous_bits != 0) { + current_base = word_index * PAGES_PER_WORD + page; + on_going = true; + } + current_size += continuous_bits; + page += continuous_bits; + } + } + if (on_going && current_size > 0) { + InvokeModifiedRange(func, current_size, current_base); + } + } + + template <typename Func> + void InvokeModifiedRange(Func&& func, u64 current_size, u64 current_base) { + const u64 current_size_bytes = current_size * BYTES_PER_PAGE; + const u64 offset_begin = current_base * BYTES_PER_PAGE; + const u64 offset_end = std::min(offset_begin + current_size_bytes, SizeBytes()); + func(offset_begin, offset_end - offset_begin); + } + + /** + * Returns true when a region has been modified + * + * @param offset Offset in bytes from the start of the buffer + * @param size Size in bytes of the region to query for modifications + */ + template <bool gpu> + [[nodiscard]] bool IsRegionModified(u64 offset, u64 size) const noexcept { + const u64* const cpu_words = words.cpu.Pointer(IsShort()); + const u64* const state_words = (gpu ? words.gpu : words.cpu).Pointer(IsShort()); + const u64 num_query_words = size / BYTES_PER_WORD + 1; + const u64 word_begin = offset / BYTES_PER_WORD; + const u64 word_end = std::min(word_begin + num_query_words, NumWords()); + const u64 page_limit = Common::DivCeil(offset + size, BYTES_PER_PAGE); + u64 page_index = (offset / BYTES_PER_PAGE) % PAGES_PER_WORD; + for (u64 word_index = word_begin; word_index < word_end; ++word_index, page_index = 0) { + const u64 word = state_words[word_index] & ~(gpu ? cpu_words[word_index] : 0); + if (word == 0) { + continue; + } + const u64 page_end = std::min((word_index + 1) * PAGES_PER_WORD, page_limit); + const u64 local_page_end = page_end % PAGES_PER_WORD; + const u64 page_end_shift = (PAGES_PER_WORD - local_page_end) % PAGES_PER_WORD; + if (((word >> page_index) << page_index) << page_end_shift != 0) { + return true; + } + } + return false; + } + + /** + * Returns a begin end pair with the inclusive modified region + * + * @param offset Offset in bytes from the start of the buffer + * @param size Size in bytes of the region to query for modifications + * + * @tparam gpu True to query GPU modified pages, false for CPU pages + */ + template <bool gpu> + [[nodiscard]] std::pair<u64, u64> ModifiedRegion(u64 offset, u64 size) const noexcept { + const u64* const cpu_words = words.cpu.Pointer(IsShort()); + const u64* const state_words = (gpu ? words.gpu : words.cpu).Pointer(IsShort()); + const u64 num_query_words = size / BYTES_PER_WORD + 1; + const u64 word_begin = offset / BYTES_PER_WORD; + const u64 word_end = std::min(word_begin + num_query_words, NumWords()); + const u64 page_base = offset / BYTES_PER_PAGE; + const u64 page_limit = Common::DivCeil(offset + size, BYTES_PER_PAGE); + u64 begin = std::numeric_limits<u64>::max(); + u64 end = 0; + for (u64 word_index = word_begin; word_index < word_end; ++word_index) { + const u64 word = state_words[word_index] & ~(gpu ? cpu_words[word_index] : 0); + if (word == 0) { + continue; + } + const u64 local_page_begin = std::countr_zero(word); + const u64 local_page_end = PAGES_PER_WORD - std::countl_zero(word); + const u64 page_index = word_index * PAGES_PER_WORD; + const u64 page_begin = std::max(page_index + local_page_begin, page_base); + const u64 page_end = std::min(page_index + local_page_end, page_limit); + begin = std::min(begin, page_begin); + end = std::max(end, page_end); + } + static constexpr std::pair<u64, u64> EMPTY{0, 0}; + return begin < end ? std::make_pair(begin * BYTES_PER_PAGE, end * BYTES_PER_PAGE) : EMPTY; + } + + /// Returns the number of words of the buffer + [[nodiscard]] size_t NumWords() const noexcept { + return words.NumWords(); + } + + /// Returns true when the buffer fits in the small vector optimization + [[nodiscard]] bool IsShort() const noexcept { + return words.IsShort(); + } + + RasterizerInterface* rasterizer = nullptr; + VAddr cpu_addr = 0; + GpuCpuWords words; + BufferFlagBits flags{}; +}; + +} // namespace VideoCommon diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index 4c7399d5a..73f331d4c 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -55,7 +55,7 @@ foreach(FILENAME IN ITEMS ${SHADER_FILES}) OUTPUT ${SPIRV_HEADER_FILE} COMMAND - ${GLSLANGVALIDATOR} -V ${GLSL_FLAGS} --variable-name ${SPIRV_VARIABLE_NAME} -o ${SPIRV_HEADER_FILE} ${SOURCE_FILE} + ${GLSLANGVALIDATOR} -V --quiet ${GLSL_FLAGS} --variable-name ${SPIRV_VARIABLE_NAME} -o ${SPIRV_HEADER_FILE} ${SOURCE_FILE} MAIN_DEPENDENCY ${SOURCE_FILE} ) diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index d7437e185..61796e33a 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -23,7 +23,6 @@ #include "video_core/renderer_vulkan/renderer_vulkan.h" #include "video_core/renderer_vulkan/vk_blit_screen.h" #include "video_core/renderer_vulkan/vk_master_semaphore.h" -#include "video_core/renderer_vulkan/vk_memory_manager.h" #include "video_core/renderer_vulkan/vk_rasterizer.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_state_tracker.h" @@ -32,6 +31,7 @@ #include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_instance.h" #include "video_core/vulkan_common/vulkan_library.h" +#include "video_core/vulkan_common/vulkan_memory_allocator.h" #include "video_core/vulkan_common/vulkan_surface.h" #include "video_core/vulkan_common/vulkan_wrapper.h" @@ -137,7 +137,7 @@ bool RendererVulkan::Init() try { InitializeDevice(); Report(); - memory_manager = std::make_unique<VKMemoryManager>(*device); + memory_allocator = std::make_unique<MemoryAllocator>(*device); state_tracker = std::make_unique<StateTracker>(gpu); @@ -149,11 +149,11 @@ bool RendererVulkan::Init() try { rasterizer = std::make_unique<RasterizerVulkan>(render_window, gpu, gpu.MemoryManager(), cpu_memory, screen_info, *device, - *memory_manager, *state_tracker, *scheduler); + *memory_allocator, *state_tracker, *scheduler); blit_screen = std::make_unique<VKBlitScreen>(cpu_memory, render_window, *rasterizer, *device, - *memory_manager, *swapchain, *scheduler, screen_info); + *memory_allocator, *swapchain, *scheduler, screen_info); return true; } catch (const vk::Exception& exception) { @@ -172,7 +172,7 @@ void RendererVulkan::ShutDown() { blit_screen.reset(); scheduler.reset(); swapchain.reset(); - memory_manager.reset(); + memory_allocator.reset(); device.reset(); } diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h index 5575ffc54..daf55b9b4 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.h +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h @@ -29,8 +29,8 @@ namespace Vulkan { class Device; class StateTracker; +class MemoryAllocator; class VKBlitScreen; -class VKMemoryManager; class VKSwapchain; class VKScheduler; @@ -75,7 +75,7 @@ private: vk::DebugUtilsMessenger debug_callback; std::unique_ptr<Device> device; - std::unique_ptr<VKMemoryManager> memory_manager; + std::unique_ptr<MemoryAllocator> memory_allocator; std::unique_ptr<StateTracker> state_tracker; std::unique_ptr<VKScheduler> scheduler; std::unique_ptr<VKSwapchain> swapchain; diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp index 5e184eb42..3e3b895e0 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp +++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp @@ -22,13 +22,13 @@ #include "video_core/renderer_vulkan/renderer_vulkan.h" #include "video_core/renderer_vulkan/vk_blit_screen.h" #include "video_core/renderer_vulkan/vk_master_semaphore.h" -#include "video_core/renderer_vulkan/vk_memory_manager.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_shader_util.h" #include "video_core/renderer_vulkan/vk_swapchain.h" #include "video_core/surface.h" #include "video_core/textures/decoders.h" #include "video_core/vulkan_common/vulkan_device.h" +#include "video_core/vulkan_common/vulkan_memory_allocator.h" #include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { @@ -115,10 +115,10 @@ struct VKBlitScreen::BufferData { VKBlitScreen::VKBlitScreen(Core::Memory::Memory& cpu_memory_, Core::Frontend::EmuWindow& render_window_, VideoCore::RasterizerInterface& rasterizer_, const Device& device_, - VKMemoryManager& memory_manager_, VKSwapchain& swapchain_, + MemoryAllocator& memory_allocator_, VKSwapchain& swapchain_, VKScheduler& scheduler_, const VKScreenInfo& screen_info_) : cpu_memory{cpu_memory_}, render_window{render_window_}, rasterizer{rasterizer_}, - device{device_}, memory_manager{memory_manager_}, swapchain{swapchain_}, + device{device_}, memory_allocator{memory_allocator_}, swapchain{swapchain_}, scheduler{scheduler_}, image_count{swapchain.GetImageCount()}, screen_info{screen_info_} { resource_ticks.resize(image_count); @@ -150,8 +150,8 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool SetUniformData(data, framebuffer); SetVertexData(data, framebuffer); - auto map = buffer_commit->Map(); - std::memcpy(map.Address(), &data, sizeof(data)); + const std::span<u8> map = buffer_commit.Map(); + std::memcpy(map.data(), &data, sizeof(data)); if (!use_accelerated) { const u64 image_offset = GetRawImageOffset(framebuffer, image_index); @@ -165,8 +165,8 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool constexpr u32 block_height_log2 = 4; const u32 bytes_per_pixel = GetBytesPerPixel(framebuffer); Tegra::Texture::UnswizzleTexture( - std::span(map.Address() + image_offset, size_bytes), std::span(host_ptr, size_bytes), - bytes_per_pixel, framebuffer.width, framebuffer.height, 1, block_height_log2, 0); + map.subspan(image_offset, size_bytes), std::span(host_ptr, size_bytes), bytes_per_pixel, + framebuffer.width, framebuffer.height, 1, block_height_log2, 0); const VkBufferImageCopy copy{ .bufferOffset = image_offset, @@ -224,8 +224,6 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, write_barrier); }); } - map.Release(); - scheduler.Record([renderpass = *renderpass, framebuffer = *framebuffers[image_index], descriptor_set = descriptor_sets[image_index], buffer = *buffer, size = swapchain.GetSize(), pipeline = *pipeline, @@ -642,7 +640,7 @@ void VKBlitScreen::ReleaseRawImages() { raw_images.clear(); raw_buffer_commits.clear(); buffer.reset(); - buffer_commit.reset(); + buffer_commit = MemoryCommit{}; } void VKBlitScreen::CreateStagingBuffer(const Tegra::FramebufferConfig& framebuffer) { @@ -659,7 +657,7 @@ void VKBlitScreen::CreateStagingBuffer(const Tegra::FramebufferConfig& framebuff }; buffer = device.GetLogical().CreateBuffer(ci); - buffer_commit = memory_manager.Commit(buffer, true); + buffer_commit = memory_allocator.Commit(buffer, MemoryUsage::Upload); } void VKBlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) { @@ -690,7 +688,7 @@ void VKBlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) .pQueueFamilyIndices = nullptr, .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, }); - raw_buffer_commits[i] = memory_manager.Commit(raw_images[i], false); + raw_buffer_commits[i] = memory_allocator.Commit(raw_images[i], MemoryUsage::DeviceLocal); raw_image_views[i] = device.GetLogical().CreateImageView(VkImageViewCreateInfo{ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, .pNext = nullptr, diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.h b/src/video_core/renderer_vulkan/vk_blit_screen.h index 69ed61770..b52576957 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.h +++ b/src/video_core/renderer_vulkan/vk_blit_screen.h @@ -6,7 +6,7 @@ #include <memory> -#include "video_core/renderer_vulkan/vk_memory_manager.h" +#include "video_core/vulkan_common/vulkan_memory_allocator.h" #include "video_core/vulkan_common/vulkan_wrapper.h" namespace Core { @@ -43,7 +43,7 @@ public: explicit VKBlitScreen(Core::Memory::Memory& cpu_memory, Core::Frontend::EmuWindow& render_window, VideoCore::RasterizerInterface& rasterizer, const Device& device, - VKMemoryManager& memory_manager, VKSwapchain& swapchain, + MemoryAllocator& memory_allocator, VKSwapchain& swapchain, VKScheduler& scheduler, const VKScreenInfo& screen_info); ~VKBlitScreen(); @@ -86,7 +86,7 @@ private: Core::Frontend::EmuWindow& render_window; VideoCore::RasterizerInterface& rasterizer; const Device& device; - VKMemoryManager& memory_manager; + MemoryAllocator& memory_allocator; VKSwapchain& swapchain; VKScheduler& scheduler; const std::size_t image_count; @@ -104,14 +104,14 @@ private: vk::Sampler sampler; vk::Buffer buffer; - VKMemoryCommit buffer_commit; + MemoryCommit buffer_commit; std::vector<u64> resource_ticks; std::vector<vk::Semaphore> semaphores; std::vector<vk::Image> raw_images; std::vector<vk::ImageView> raw_image_views; - std::vector<VKMemoryCommit> raw_buffer_commits; + std::vector<MemoryCommit> raw_buffer_commits; u32 raw_width = 0; u32 raw_height = 0; }; diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 58c710344..d8ad40a0f 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -36,11 +36,11 @@ constexpr VkAccessFlags TRANSFORM_FEEDBACK_WRITE_ACCESS = } // Anonymous namespace -Buffer::Buffer(const Device& device_, VKMemoryManager& memory_manager, VKScheduler& scheduler_, - VKStagingBufferPool& staging_pool_, VAddr cpu_addr_, std::size_t size_) +Buffer::Buffer(const Device& device_, MemoryAllocator& memory_allocator, VKScheduler& scheduler_, + StagingBufferPool& staging_pool_, VAddr cpu_addr_, std::size_t size_) : BufferBlock{cpu_addr_, size_}, device{device_}, scheduler{scheduler_}, staging_pool{ staging_pool_} { - const VkBufferCreateInfo ci{ + buffer = device.GetLogical().CreateBuffer(VkBufferCreateInfo{ .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, .pNext = nullptr, .flags = 0, @@ -49,22 +49,20 @@ Buffer::Buffer(const Device& device_, VKMemoryManager& memory_manager, VKSchedul .sharingMode = VK_SHARING_MODE_EXCLUSIVE, .queueFamilyIndexCount = 0, .pQueueFamilyIndices = nullptr, - }; - - buffer.handle = device.GetLogical().CreateBuffer(ci); - buffer.commit = memory_manager.Commit(buffer.handle, false); + }); + commit = memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal); } Buffer::~Buffer() = default; void Buffer::Upload(std::size_t offset, std::size_t data_size, const u8* data) { - const auto& staging = staging_pool.GetUnusedBuffer(data_size, true); - std::memcpy(staging.commit->Map(data_size), data, data_size); + const auto& staging = staging_pool.Request(data_size, MemoryUsage::Upload); + std::memcpy(staging.mapped_span.data(), data, data_size); scheduler.RequestOutsideRenderPassOperationContext(); const VkBuffer handle = Handle(); - scheduler.Record([staging = *staging.handle, handle, offset, data_size, + scheduler.Record([staging = staging.buffer, handle, offset, data_size, &device = device](vk::CommandBuffer cmdbuf) { const VkBufferMemoryBarrier read_barrier{ .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, @@ -100,12 +98,12 @@ void Buffer::Upload(std::size_t offset, std::size_t data_size, const u8* data) { } void Buffer::Download(std::size_t offset, std::size_t data_size, u8* data) { - const auto& staging = staging_pool.GetUnusedBuffer(data_size, true); + auto staging = staging_pool.Request(data_size, MemoryUsage::Download); scheduler.RequestOutsideRenderPassOperationContext(); const VkBuffer handle = Handle(); scheduler.Record( - [staging = *staging.handle, handle, offset, data_size](vk::CommandBuffer cmdbuf) { + [staging = staging.buffer, handle, offset, data_size](vk::CommandBuffer cmdbuf) { const VkBufferMemoryBarrier barrier{ .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, .pNext = nullptr, @@ -126,7 +124,7 @@ void Buffer::Download(std::size_t offset, std::size_t data_size, u8* data) { }); scheduler.Finish(); - std::memcpy(data, staging.commit->Map(data_size), data_size); + std::memcpy(data, staging.mapped_span.data(), data_size); } void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset, @@ -164,29 +162,29 @@ void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer_, Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, - const Device& device_, VKMemoryManager& memory_manager_, + const Device& device_, MemoryAllocator& memory_allocator_, VKScheduler& scheduler_, VKStreamBuffer& stream_buffer_, - VKStagingBufferPool& staging_pool_) + StagingBufferPool& staging_pool_) : VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer>{rasterizer_, gpu_memory_, cpu_memory_, stream_buffer_}, - device{device_}, memory_manager{memory_manager_}, scheduler{scheduler_}, staging_pool{ - staging_pool_} {} + device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_}, + staging_pool{staging_pool_} {} VKBufferCache::~VKBufferCache() = default; std::shared_ptr<Buffer> VKBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) { - return std::make_shared<Buffer>(device, memory_manager, scheduler, staging_pool, cpu_addr, + return std::make_shared<Buffer>(device, memory_allocator, scheduler, staging_pool, cpu_addr, size); } VKBufferCache::BufferInfo VKBufferCache::GetEmptyBuffer(std::size_t size) { size = std::max(size, std::size_t(4)); - const auto& empty = staging_pool.GetUnusedBuffer(size, false); + const auto& empty = staging_pool.Request(size, MemoryUsage::DeviceLocal); scheduler.RequestOutsideRenderPassOperationContext(); - scheduler.Record([size, buffer = *empty.handle](vk::CommandBuffer cmdbuf) { + scheduler.Record([size, buffer = empty.buffer](vk::CommandBuffer cmdbuf) { cmdbuf.FillBuffer(buffer, 0, size, 0); }); - return {*empty.handle, 0, 0}; + return {empty.buffer, 0, 0}; } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index 1c39aed34..41d577510 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h @@ -8,21 +8,20 @@ #include "common/common_types.h" #include "video_core/buffer_cache/buffer_cache.h" -#include "video_core/renderer_vulkan/vk_memory_manager.h" #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" #include "video_core/renderer_vulkan/vk_stream_buffer.h" +#include "video_core/vulkan_common/vulkan_memory_allocator.h" #include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { class Device; -class VKMemoryManager; class VKScheduler; class Buffer final : public VideoCommon::BufferBlock { public: - explicit Buffer(const Device& device, VKMemoryManager& memory_manager, VKScheduler& scheduler, - VKStagingBufferPool& staging_pool, VAddr cpu_addr_, std::size_t size_); + explicit Buffer(const Device& device, MemoryAllocator& memory_allocator, VKScheduler& scheduler, + StagingBufferPool& staging_pool, VAddr cpu_addr_, std::size_t size_); ~Buffer(); void Upload(std::size_t offset, std::size_t data_size, const u8* data); @@ -33,7 +32,7 @@ public: std::size_t copy_size); VkBuffer Handle() const { - return *buffer.handle; + return *buffer; } u64 Address() const { @@ -43,18 +42,19 @@ public: private: const Device& device; VKScheduler& scheduler; - VKStagingBufferPool& staging_pool; + StagingBufferPool& staging_pool; - VKBuffer buffer; + vk::Buffer buffer; + MemoryCommit commit; }; class VKBufferCache final : public VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer> { public: explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory, - const Device& device, VKMemoryManager& memory_manager, + const Device& device, MemoryAllocator& memory_allocator, VKScheduler& scheduler, VKStreamBuffer& stream_buffer, - VKStagingBufferPool& staging_pool); + StagingBufferPool& staging_pool); ~VKBufferCache(); BufferInfo GetEmptyBuffer(std::size_t size) override; @@ -64,9 +64,9 @@ protected: private: const Device& device; - VKMemoryManager& memory_manager; + MemoryAllocator& memory_allocator; VKScheduler& scheduler; - VKStagingBufferPool& staging_pool; + StagingBufferPool& staging_pool; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index 02a6d54b7..5eb6a54be 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp @@ -164,7 +164,7 @@ VkDescriptorSet VKComputePass::CommitDescriptorSet( QuadArrayPass::QuadArrayPass(const Device& device_, VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_, - VKStagingBufferPool& staging_buffer_pool_, + StagingBufferPool& staging_buffer_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_) : VKComputePass(device_, descriptor_pool_, BuildQuadArrayPassDescriptorSetLayoutBinding(), BuildQuadArrayPassDescriptorUpdateTemplateEntry(), @@ -177,18 +177,18 @@ QuadArrayPass::~QuadArrayPass() = default; std::pair<VkBuffer, VkDeviceSize> QuadArrayPass::Assemble(u32 num_vertices, u32 first) { const u32 num_triangle_vertices = (num_vertices / 4) * 6; const std::size_t staging_size = num_triangle_vertices * sizeof(u32); - auto& buffer = staging_buffer_pool.GetUnusedBuffer(staging_size, false); + const auto staging_ref = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal); update_descriptor_queue.Acquire(); - update_descriptor_queue.AddBuffer(*buffer.handle, 0, staging_size); + update_descriptor_queue.AddBuffer(staging_ref.buffer, 0, staging_size); const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); scheduler.RequestOutsideRenderPassOperationContext(); ASSERT(num_vertices % 4 == 0); const u32 num_quads = num_vertices / 4; - scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = *buffer.handle, num_quads, - first, set](vk::CommandBuffer cmdbuf) { + scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging_ref.buffer, + num_quads, first, set](vk::CommandBuffer cmdbuf) { constexpr u32 dispatch_size = 1024; cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {}); @@ -208,11 +208,11 @@ std::pair<VkBuffer, VkDeviceSize> QuadArrayPass::Assemble(u32 num_vertices, u32 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, {barrier}, {}); }); - return {*buffer.handle, 0}; + return {staging_ref.buffer, 0}; } Uint8Pass::Uint8Pass(const Device& device, VKScheduler& scheduler_, - VKDescriptorPool& descriptor_pool, VKStagingBufferPool& staging_buffer_pool_, + VKDescriptorPool& descriptor_pool, StagingBufferPool& staging_buffer_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_) : VKComputePass(device, descriptor_pool, BuildInputOutputDescriptorSetBindings(), BuildInputOutputDescriptorUpdateTemplate(), {}, VULKAN_UINT8_COMP_SPV), @@ -224,15 +224,15 @@ Uint8Pass::~Uint8Pass() = default; std::pair<VkBuffer, u64> Uint8Pass::Assemble(u32 num_vertices, VkBuffer src_buffer, u64 src_offset) { const u32 staging_size = static_cast<u32>(num_vertices * sizeof(u16)); - auto& buffer = staging_buffer_pool.GetUnusedBuffer(staging_size, false); + const auto staging_ref = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal); update_descriptor_queue.Acquire(); update_descriptor_queue.AddBuffer(src_buffer, src_offset, num_vertices); - update_descriptor_queue.AddBuffer(*buffer.handle, 0, staging_size); + update_descriptor_queue.AddBuffer(staging_ref.buffer, 0, staging_size); const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); scheduler.RequestOutsideRenderPassOperationContext(); - scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = *buffer.handle, set, + scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging_ref.buffer, set, num_vertices](vk::CommandBuffer cmdbuf) { constexpr u32 dispatch_size = 1024; cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); @@ -252,12 +252,12 @@ std::pair<VkBuffer, u64> Uint8Pass::Assemble(u32 num_vertices, VkBuffer src_buff cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, barrier, {}); }); - return {*buffer.handle, 0}; + return {staging_ref.buffer, 0}; } QuadIndexedPass::QuadIndexedPass(const Device& device_, VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_, - VKStagingBufferPool& staging_buffer_pool_, + StagingBufferPool& staging_buffer_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_) : VKComputePass(device_, descriptor_pool_, BuildInputOutputDescriptorSetBindings(), BuildInputOutputDescriptorUpdateTemplate(), @@ -286,15 +286,15 @@ std::pair<VkBuffer, u64> QuadIndexedPass::Assemble( const u32 num_tri_vertices = (num_vertices / 4) * 6; const std::size_t staging_size = num_tri_vertices * sizeof(u32); - auto& buffer = staging_buffer_pool.GetUnusedBuffer(staging_size, false); + const auto staging_ref = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal); update_descriptor_queue.Acquire(); update_descriptor_queue.AddBuffer(src_buffer, src_offset, input_size); - update_descriptor_queue.AddBuffer(*buffer.handle, 0, staging_size); + update_descriptor_queue.AddBuffer(staging_ref.buffer, 0, staging_size); const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); scheduler.RequestOutsideRenderPassOperationContext(); - scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = *buffer.handle, set, + scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging_ref.buffer, set, num_tri_vertices, base_vertex, index_shift](vk::CommandBuffer cmdbuf) { static constexpr u32 dispatch_size = 1024; const std::array push_constants = {base_vertex, index_shift}; @@ -317,7 +317,7 @@ std::pair<VkBuffer, u64> QuadIndexedPass::Assemble( cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, barrier, {}); }); - return {*buffer.handle, 0}; + return {staging_ref.buffer, 0}; } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h index 7ddb09afb..f5c6f5f17 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.h +++ b/src/video_core/renderer_vulkan/vk_compute_pass.h @@ -16,8 +16,8 @@ namespace Vulkan { class Device; +class StagingBufferPool; class VKScheduler; -class VKStagingBufferPool; class VKUpdateDescriptorQueue; class VKComputePass { @@ -45,7 +45,7 @@ class QuadArrayPass final : public VKComputePass { public: explicit QuadArrayPass(const Device& device_, VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_, - VKStagingBufferPool& staging_buffer_pool_, + StagingBufferPool& staging_buffer_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_); ~QuadArrayPass(); @@ -53,15 +53,14 @@ public: private: VKScheduler& scheduler; - VKStagingBufferPool& staging_buffer_pool; + StagingBufferPool& staging_buffer_pool; VKUpdateDescriptorQueue& update_descriptor_queue; }; class Uint8Pass final : public VKComputePass { public: explicit Uint8Pass(const Device& device_, VKScheduler& scheduler_, - VKDescriptorPool& descriptor_pool_, - VKStagingBufferPool& staging_buffer_pool_, + VKDescriptorPool& descriptor_pool_, StagingBufferPool& staging_buffer_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_); ~Uint8Pass(); @@ -69,7 +68,7 @@ public: private: VKScheduler& scheduler; - VKStagingBufferPool& staging_buffer_pool; + StagingBufferPool& staging_buffer_pool; VKUpdateDescriptorQueue& update_descriptor_queue; }; @@ -77,7 +76,7 @@ class QuadIndexedPass final : public VKComputePass { public: explicit QuadIndexedPass(const Device& device_, VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_, - VKStagingBufferPool& staging_buffer_pool_, + StagingBufferPool& staging_buffer_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_); ~QuadIndexedPass(); @@ -87,7 +86,7 @@ public: private: VKScheduler& scheduler; - VKStagingBufferPool& staging_buffer_pool; + StagingBufferPool& staging_buffer_pool; VKUpdateDescriptorQueue& update_descriptor_queue; }; diff --git a/src/video_core/renderer_vulkan/vk_memory_manager.cpp b/src/video_core/renderer_vulkan/vk_memory_manager.cpp deleted file mode 100644 index a6abd0eee..000000000 --- a/src/video_core/renderer_vulkan/vk_memory_manager.cpp +++ /dev/null @@ -1,230 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <algorithm> -#include <optional> -#include <tuple> -#include <vector> - -#include "common/alignment.h" -#include "common/assert.h" -#include "common/common_types.h" -#include "common/logging/log.h" -#include "video_core/renderer_vulkan/vk_memory_manager.h" -#include "video_core/vulkan_common/vulkan_device.h" -#include "video_core/vulkan_common/vulkan_wrapper.h" - -namespace Vulkan { - -namespace { - -u64 GetAllocationChunkSize(u64 required_size) { - static constexpr u64 sizes[] = {16ULL << 20, 32ULL << 20, 64ULL << 20, 128ULL << 20}; - auto it = std::lower_bound(std::begin(sizes), std::end(sizes), required_size); - return it != std::end(sizes) ? *it : Common::AlignUp(required_size, 256ULL << 20); -} - -} // Anonymous namespace - -class VKMemoryAllocation final { -public: - explicit VKMemoryAllocation(const Device& device_, vk::DeviceMemory memory_, - VkMemoryPropertyFlags properties_, u64 allocation_size_, u32 type_) - : device{device_}, memory{std::move(memory_)}, properties{properties_}, - allocation_size{allocation_size_}, shifted_type{ShiftType(type_)} {} - - VKMemoryCommit Commit(VkDeviceSize commit_size, VkDeviceSize alignment) { - auto found = TryFindFreeSection(free_iterator, allocation_size, - static_cast<u64>(commit_size), static_cast<u64>(alignment)); - if (!found) { - found = TryFindFreeSection(0, free_iterator, static_cast<u64>(commit_size), - static_cast<u64>(alignment)); - if (!found) { - // Signal out of memory, it'll try to do more allocations. - return nullptr; - } - } - auto commit = std::make_unique<VKMemoryCommitImpl>(device, this, memory, *found, - *found + commit_size); - commits.push_back(commit.get()); - - // Last commit's address is highly probable to be free. - free_iterator = *found + commit_size; - - return commit; - } - - void Free(const VKMemoryCommitImpl* commit) { - ASSERT(commit); - - const auto it = std::find(std::begin(commits), std::end(commits), commit); - if (it == commits.end()) { - UNREACHABLE_MSG("Freeing unallocated commit!"); - return; - } - commits.erase(it); - } - - /// Returns whether this allocation is compatible with the arguments. - bool IsCompatible(VkMemoryPropertyFlags wanted_properties, u32 type_mask) const { - return (wanted_properties & properties) && (type_mask & shifted_type) != 0; - } - -private: - static constexpr u32 ShiftType(u32 type) { - return 1U << type; - } - - /// A memory allocator, it may return a free region between "start" and "end" with the solicited - /// requirements. - std::optional<u64> TryFindFreeSection(u64 start, u64 end, u64 size, u64 alignment) const { - u64 iterator = Common::AlignUp(start, alignment); - while (iterator + size <= end) { - const u64 try_left = iterator; - const u64 try_right = try_left + size; - - bool overlap = false; - for (const auto& commit : commits) { - const auto [commit_left, commit_right] = commit->interval; - if (try_left < commit_right && commit_left < try_right) { - // There's an overlap, continue the search where the overlapping commit ends. - iterator = Common::AlignUp(commit_right, alignment); - overlap = true; - break; - } - } - if (!overlap) { - // A free address has been found. - return try_left; - } - } - - // No free regions where found, return an empty optional. - return std::nullopt; - } - - const Device& device; ///< Vulkan device. - const vk::DeviceMemory memory; ///< Vulkan memory allocation handler. - const VkMemoryPropertyFlags properties; ///< Vulkan properties. - const u64 allocation_size; ///< Size of this allocation. - const u32 shifted_type; ///< Stored Vulkan type of this allocation, shifted. - - /// Hints where the next free region is likely going to be. - u64 free_iterator{}; - - /// Stores all commits done from this allocation. - std::vector<const VKMemoryCommitImpl*> commits; -}; - -VKMemoryManager::VKMemoryManager(const Device& device_) - : device{device_}, properties{device_.GetPhysical().GetMemoryProperties()} {} - -VKMemoryManager::~VKMemoryManager() = default; - -VKMemoryCommit VKMemoryManager::Commit(const VkMemoryRequirements& requirements, - bool host_visible) { - const u64 chunk_size = GetAllocationChunkSize(requirements.size); - - // When a host visible commit is asked, search for host visible and coherent, otherwise search - // for a fast device local type. - const VkMemoryPropertyFlags wanted_properties = - host_visible ? VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT - : VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; - - if (auto commit = TryAllocCommit(requirements, wanted_properties)) { - return commit; - } - - // Commit has failed, allocate more memory. - if (!AllocMemory(wanted_properties, requirements.memoryTypeBits, chunk_size)) { - // TODO(Rodrigo): Handle these situations in some way like flushing to guest memory. - // Allocation has failed, panic. - UNREACHABLE_MSG("Ran out of VRAM!"); - return {}; - } - - // Commit again, this time it won't fail since there's a fresh allocation above. If it does, - // there's a bug. - auto commit = TryAllocCommit(requirements, wanted_properties); - ASSERT(commit); - return commit; -} - -VKMemoryCommit VKMemoryManager::Commit(const vk::Buffer& buffer, bool host_visible) { - auto commit = Commit(device.GetLogical().GetBufferMemoryRequirements(*buffer), host_visible); - buffer.BindMemory(commit->GetMemory(), commit->GetOffset()); - return commit; -} - -VKMemoryCommit VKMemoryManager::Commit(const vk::Image& image, bool host_visible) { - auto commit = Commit(device.GetLogical().GetImageMemoryRequirements(*image), host_visible); - image.BindMemory(commit->GetMemory(), commit->GetOffset()); - return commit; -} - -bool VKMemoryManager::AllocMemory(VkMemoryPropertyFlags wanted_properties, u32 type_mask, - u64 size) { - const u32 type = [&] { - for (u32 type_index = 0; type_index < properties.memoryTypeCount; ++type_index) { - const auto flags = properties.memoryTypes[type_index].propertyFlags; - if ((type_mask & (1U << type_index)) && (flags & wanted_properties)) { - // The type matches in type and in the wanted properties. - return type_index; - } - } - UNREACHABLE_MSG("Couldn't find a compatible memory type!"); - return 0U; - }(); - - // Try to allocate found type. - vk::DeviceMemory memory = device.GetLogical().TryAllocateMemory({ - .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, - .pNext = nullptr, - .allocationSize = size, - .memoryTypeIndex = type, - }); - if (!memory) { - LOG_CRITICAL(Render_Vulkan, "Device allocation failed!"); - return false; - } - - allocations.push_back(std::make_unique<VKMemoryAllocation>(device, std::move(memory), - wanted_properties, size, type)); - return true; -} - -VKMemoryCommit VKMemoryManager::TryAllocCommit(const VkMemoryRequirements& requirements, - VkMemoryPropertyFlags wanted_properties) { - for (auto& allocation : allocations) { - if (!allocation->IsCompatible(wanted_properties, requirements.memoryTypeBits)) { - continue; - } - if (auto commit = allocation->Commit(requirements.size, requirements.alignment)) { - return commit; - } - } - return {}; -} - -VKMemoryCommitImpl::VKMemoryCommitImpl(const Device& device_, VKMemoryAllocation* allocation_, - const vk::DeviceMemory& memory_, u64 begin_, u64 end_) - : device{device_}, memory{memory_}, interval{begin_, end_}, allocation{allocation_} {} - -VKMemoryCommitImpl::~VKMemoryCommitImpl() { - allocation->Free(this); -} - -MemoryMap VKMemoryCommitImpl::Map(u64 size, u64 offset_) const { - return MemoryMap(this, std::span<u8>(memory.Map(interval.first + offset_, size), size)); -} - -void VKMemoryCommitImpl::Unmap() const { - memory.Unmap(); -} - -MemoryMap VKMemoryCommitImpl::Map() const { - return Map(interval.second - interval.first); -} - -} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_memory_manager.h b/src/video_core/renderer_vulkan/vk_memory_manager.h deleted file mode 100644 index 2452bca4e..000000000 --- a/src/video_core/renderer_vulkan/vk_memory_manager.h +++ /dev/null @@ -1,132 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include <memory> -#include <span> -#include <utility> -#include <vector> -#include "common/common_types.h" -#include "video_core/vulkan_common/vulkan_wrapper.h" - -namespace Vulkan { - -class Device; -class MemoryMap; -class VKMemoryAllocation; -class VKMemoryCommitImpl; - -using VKMemoryCommit = std::unique_ptr<VKMemoryCommitImpl>; - -class VKMemoryManager final { -public: - explicit VKMemoryManager(const Device& device_); - VKMemoryManager(const VKMemoryManager&) = delete; - ~VKMemoryManager(); - - /** - * Commits a memory with the specified requeriments. - * @param requirements Requirements returned from a Vulkan call. - * @param host_visible Signals the allocator that it *must* use host visible and coherent - * memory. When passing false, it will try to allocate device local memory. - * @returns A memory commit. - */ - VKMemoryCommit Commit(const VkMemoryRequirements& requirements, bool host_visible); - - /// Commits memory required by the buffer and binds it. - VKMemoryCommit Commit(const vk::Buffer& buffer, bool host_visible); - - /// Commits memory required by the image and binds it. - VKMemoryCommit Commit(const vk::Image& image, bool host_visible); - -private: - /// Allocates a chunk of memory. - bool AllocMemory(VkMemoryPropertyFlags wanted_properties, u32 type_mask, u64 size); - - /// Tries to allocate a memory commit. - VKMemoryCommit TryAllocCommit(const VkMemoryRequirements& requirements, - VkMemoryPropertyFlags wanted_properties); - - const Device& device; ///< Device handler. - const VkPhysicalDeviceMemoryProperties properties; ///< Physical device properties. - std::vector<std::unique_ptr<VKMemoryAllocation>> allocations; ///< Current allocations. -}; - -class VKMemoryCommitImpl final { - friend VKMemoryAllocation; - friend MemoryMap; - -public: - explicit VKMemoryCommitImpl(const Device& device_, VKMemoryAllocation* allocation_, - const vk::DeviceMemory& memory_, u64 begin_, u64 end_); - ~VKMemoryCommitImpl(); - - /// Maps a memory region and returns a pointer to it. - /// It's illegal to have more than one memory map at the same time. - MemoryMap Map(u64 size, u64 offset = 0) const; - - /// Maps the whole commit and returns a pointer to it. - /// It's illegal to have more than one memory map at the same time. - MemoryMap Map() const; - - /// Returns the Vulkan memory handler. - VkDeviceMemory GetMemory() const { - return *memory; - } - - /// Returns the start position of the commit relative to the allocation. - VkDeviceSize GetOffset() const { - return static_cast<VkDeviceSize>(interval.first); - } - -private: - /// Unmaps memory. - void Unmap() const; - - const Device& device; ///< Vulkan device. - const vk::DeviceMemory& memory; ///< Vulkan device memory handler. - std::pair<u64, u64> interval{}; ///< Interval where the commit exists. - VKMemoryAllocation* allocation{}; ///< Pointer to the large memory allocation. -}; - -/// Holds ownership of a memory map. -class MemoryMap final { -public: - explicit MemoryMap(const VKMemoryCommitImpl* commit_, std::span<u8> span_) - : commit{commit_}, span{span_} {} - - ~MemoryMap() { - if (commit) { - commit->Unmap(); - } - } - - /// Prematurely releases the memory map. - void Release() { - commit->Unmap(); - commit = nullptr; - } - - /// Returns a span to the memory map. - [[nodiscard]] std::span<u8> Span() const noexcept { - return span; - } - - /// Returns the address of the memory map. - [[nodiscard]] u8* Address() const noexcept { - return span.data(); - } - - /// Returns the address of the memory map; - [[nodiscard]] operator u8*() const noexcept { - return span.data(); - } - -private: - const VKMemoryCommitImpl* commit{}; ///< Mapped memory commit. - std::span<u8> span; ///< Address to the mapped memory. -}; - -} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 02282e36f..8991505ca 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -355,14 +355,12 @@ VKPipelineCache::DecompileShaders(const FixedPipelineState& fixed_state) { SPIRVProgram program; std::vector<VkDescriptorSetLayoutBinding> bindings; - for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { + for (std::size_t index = 1; index < Maxwell::MaxShaderProgram; ++index) { const auto program_enum = static_cast<Maxwell::ShaderProgram>(index); - // Skip stages that are not enabled if (!maxwell3d.regs.IsShaderConfigEnabled(index)) { continue; } - const GPUVAddr gpu_addr = GetShaderAddress(maxwell3d, program_enum); const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); Shader* const shader = cpu_addr ? TryGet(*cpu_addr) : null_shader.get(); @@ -372,12 +370,8 @@ VKPipelineCache::DecompileShaders(const FixedPipelineState& fixed_state) { const auto& entries = shader->GetEntries(); program[stage] = { Decompile(device, shader->GetIR(), program_type, shader->GetRegistry(), specialization), - entries}; - - if (program_enum == Maxwell::ShaderProgram::VertexA) { - // VertexB was combined with VertexA, so we skip the VertexB iteration - ++index; - } + entries, + }; const u32 old_binding = specialization.base_binding; specialization.base_binding = diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index ce3db49bd..f0a111829 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -409,24 +409,24 @@ void RasterizerVulkan::DrawParameters::Draw(vk::CommandBuffer cmdbuf) const { RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, VKScreenInfo& screen_info_, - const Device& device_, VKMemoryManager& memory_manager_, + const Device& device_, MemoryAllocator& memory_allocator_, StateTracker& state_tracker_, VKScheduler& scheduler_) : RasterizerAccelerated{cpu_memory_}, gpu{gpu_}, gpu_memory{gpu_memory_}, maxwell3d{gpu.Maxwell3D()}, kepler_compute{gpu.KeplerCompute()}, - screen_info{screen_info_}, device{device_}, memory_manager{memory_manager_}, + screen_info{screen_info_}, device{device_}, memory_allocator{memory_allocator_}, state_tracker{state_tracker_}, scheduler{scheduler_}, stream_buffer(device, scheduler), - staging_pool(device, memory_manager, scheduler), descriptor_pool(device, scheduler), + staging_pool(device, memory_allocator, scheduler), descriptor_pool(device, scheduler), update_descriptor_queue(device, scheduler), blit_image(device, scheduler, state_tracker, descriptor_pool), quad_array_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), quad_indexed_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), - texture_cache_runtime{device, scheduler, memory_manager, staging_pool, blit_image}, + texture_cache_runtime{device, scheduler, memory_allocator, staging_pool, blit_image}, texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory), pipeline_cache(*this, gpu, maxwell3d, kepler_compute, gpu_memory, device, scheduler, descriptor_pool, update_descriptor_queue), - buffer_cache(*this, gpu_memory, cpu_memory_, device, memory_manager, scheduler, stream_buffer, - staging_pool), + buffer_cache(*this, gpu_memory, cpu_memory_, device, memory_allocator, scheduler, + stream_buffer, staging_pool), query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, fence_manager(*this, gpu, gpu_memory, texture_cache, buffer_cache, query_cache, scheduler), wfi_event(device.GetLogical().CreateEvent()), async_shaders(emu_window_) { @@ -1445,7 +1445,7 @@ VkBuffer RasterizerVulkan::DefaultBuffer() { .queueFamilyIndexCount = 0, .pQueueFamilyIndices = nullptr, }); - default_buffer_commit = memory_manager.Commit(default_buffer, false); + default_buffer_commit = memory_allocator.Commit(default_buffer, MemoryUsage::DeviceLocal); scheduler.RequestOutsideRenderPassOperationContext(); scheduler.Record([buffer = *default_buffer](vk::CommandBuffer cmdbuf) { diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 4695718e9..8e261b9bd 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -21,7 +21,6 @@ #include "video_core/renderer_vulkan/vk_compute_pass.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" #include "video_core/renderer_vulkan/vk_fence_manager.h" -#include "video_core/renderer_vulkan/vk_memory_manager.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_query_cache.h" #include "video_core/renderer_vulkan/vk_scheduler.h" @@ -30,6 +29,7 @@ #include "video_core/renderer_vulkan/vk_texture_cache.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" #include "video_core/shader/async_shaders.h" +#include "video_core/vulkan_common/vulkan_memory_allocator.h" #include "video_core/vulkan_common/vulkan_wrapper.h" namespace Core { @@ -56,7 +56,7 @@ public: explicit RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, VKScreenInfo& screen_info_, const Device& device_, - VKMemoryManager& memory_manager_, StateTracker& state_tracker_, + MemoryAllocator& memory_allocator_, StateTracker& state_tracker_, VKScheduler& scheduler_); ~RasterizerVulkan() override; @@ -213,12 +213,12 @@ private: VKScreenInfo& screen_info; const Device& device; - VKMemoryManager& memory_manager; + MemoryAllocator& memory_allocator; StateTracker& state_tracker; VKScheduler& scheduler; VKStreamBuffer stream_buffer; - VKStagingBufferPool staging_pool; + StagingBufferPool staging_pool; VKDescriptorPool descriptor_pool; VKUpdateDescriptorQueue update_descriptor_queue; BlitImageHelper blit_image; @@ -234,7 +234,7 @@ private: VKFenceManager fence_manager; vk::Buffer default_buffer; - VKMemoryCommit default_buffer_commit; + MemoryCommit default_buffer_commit; vk::Event wfi_event; VideoCommon::Shader::AsyncShaders async_shaders; diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index 89cbe01ad..61d52b961 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -1334,7 +1334,10 @@ private: } if (const auto comment = std::get_if<CommentNode>(&*node)) { - Name(OpUndef(t_void), comment->GetText()); + if (device.HasDebuggingToolAttached()) { + // We should insert comments with OpString instead of using named variables + Name(OpUndef(t_int), comment->GetText()); + } return {}; } diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp index 1e0b8b922..97fd41cc1 100644 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp @@ -3,10 +3,12 @@ // Refer to the license.txt file included. #include <algorithm> -#include <unordered_map> #include <utility> #include <vector> +#include <fmt/format.h> + +#include "common/assert.h" #include "common/bit_util.h" #include "common/common_types.h" #include "video_core/renderer_vulkan/vk_scheduler.h" @@ -16,45 +18,51 @@ namespace Vulkan { -VKStagingBufferPool::StagingBuffer::StagingBuffer(std::unique_ptr<VKBuffer> buffer_) - : buffer{std::move(buffer_)} {} - -VKStagingBufferPool::VKStagingBufferPool(const Device& device_, VKMemoryManager& memory_manager_, - VKScheduler& scheduler_) - : device{device_}, memory_manager{memory_manager_}, scheduler{scheduler_} {} +StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& memory_allocator_, + VKScheduler& scheduler_) + : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_} {} -VKStagingBufferPool::~VKStagingBufferPool() = default; +StagingBufferPool::~StagingBufferPool() = default; -VKBuffer& VKStagingBufferPool::GetUnusedBuffer(std::size_t size, bool host_visible) { - if (const auto buffer = TryGetReservedBuffer(size, host_visible)) { - return *buffer; +StagingBufferRef StagingBufferPool::Request(size_t size, MemoryUsage usage) { + if (const std::optional<StagingBufferRef> ref = TryGetReservedBuffer(size, usage)) { + return *ref; } - return CreateStagingBuffer(size, host_visible); + return CreateStagingBuffer(size, usage); } -void VKStagingBufferPool::TickFrame() { - current_delete_level = (current_delete_level + 1) % NumLevels; +void StagingBufferPool::TickFrame() { + current_delete_level = (current_delete_level + 1) % NUM_LEVELS; - ReleaseCache(true); - ReleaseCache(false); + ReleaseCache(MemoryUsage::DeviceLocal); + ReleaseCache(MemoryUsage::Upload); + ReleaseCache(MemoryUsage::Download); } -VKBuffer* VKStagingBufferPool::TryGetReservedBuffer(std::size_t size, bool host_visible) { - for (StagingBuffer& entry : GetCache(host_visible)[Common::Log2Ceil64(size)].entries) { - if (!scheduler.IsFree(entry.tick)) { - continue; +std::optional<StagingBufferRef> StagingBufferPool::TryGetReservedBuffer(size_t size, + MemoryUsage usage) { + StagingBuffers& cache_level = GetCache(usage)[Common::Log2Ceil64(size)]; + + const auto is_free = [this](const StagingBuffer& entry) { + return scheduler.IsFree(entry.tick); + }; + auto& entries = cache_level.entries; + const auto hint_it = entries.begin() + cache_level.iterate_index; + auto it = std::find_if(entries.begin() + cache_level.iterate_index, entries.end(), is_free); + if (it == entries.end()) { + it = std::find_if(entries.begin(), hint_it, is_free); + if (it == hint_it) { + return std::nullopt; } - entry.tick = scheduler.CurrentTick(); - return &*entry.buffer; } - return nullptr; + cache_level.iterate_index = std::distance(entries.begin(), it) + 1; + it->tick = scheduler.CurrentTick(); + return it->Ref(); } -VKBuffer& VKStagingBufferPool::CreateStagingBuffer(std::size_t size, bool host_visible) { +StagingBufferRef StagingBufferPool::CreateStagingBuffer(size_t size, MemoryUsage usage) { const u32 log2 = Common::Log2Ceil64(size); - - auto buffer = std::make_unique<VKBuffer>(); - buffer->handle = device.GetLogical().CreateBuffer({ + vk::Buffer buffer = device.GetLogical().CreateBuffer({ .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, .pNext = nullptr, .flags = 0, @@ -66,49 +74,63 @@ VKBuffer& VKStagingBufferPool::CreateStagingBuffer(std::size_t size, bool host_v .queueFamilyIndexCount = 0, .pQueueFamilyIndices = nullptr, }); - buffer->commit = memory_manager.Commit(buffer->handle, host_visible); - - std::vector<StagingBuffer>& entries = GetCache(host_visible)[log2].entries; - StagingBuffer& entry = entries.emplace_back(std::move(buffer)); - entry.tick = scheduler.CurrentTick(); - return *entry.buffer; -} - -VKStagingBufferPool::StagingBuffersCache& VKStagingBufferPool::GetCache(bool host_visible) { - return host_visible ? host_staging_buffers : device_staging_buffers; + if (device.HasDebuggingToolAttached()) { + ++buffer_index; + buffer.SetObjectNameEXT(fmt::format("Staging Buffer {}", buffer_index).c_str()); + } + MemoryCommit commit = memory_allocator.Commit(buffer, usage); + const std::span<u8> mapped_span = IsHostVisible(usage) ? commit.Map() : std::span<u8>{}; + + StagingBuffer& entry = GetCache(usage)[log2].entries.emplace_back(StagingBuffer{ + .buffer = std::move(buffer), + .commit = std::move(commit), + .mapped_span = mapped_span, + .tick = scheduler.CurrentTick(), + }); + return entry.Ref(); } -void VKStagingBufferPool::ReleaseCache(bool host_visible) { - auto& cache = GetCache(host_visible); - const u64 size = ReleaseLevel(cache, current_delete_level); - if (size == 0) { - return; +StagingBufferPool::StagingBuffersCache& StagingBufferPool::GetCache(MemoryUsage usage) { + switch (usage) { + case MemoryUsage::DeviceLocal: + return device_local_cache; + case MemoryUsage::Upload: + return upload_cache; + case MemoryUsage::Download: + return download_cache; + default: + UNREACHABLE_MSG("Invalid memory usage={}", usage); + return upload_cache; } } -u64 VKStagingBufferPool::ReleaseLevel(StagingBuffersCache& cache, std::size_t log2) { - static constexpr std::size_t deletions_per_tick = 16; +void StagingBufferPool::ReleaseCache(MemoryUsage usage) { + ReleaseLevel(GetCache(usage), current_delete_level); +} +void StagingBufferPool::ReleaseLevel(StagingBuffersCache& cache, size_t log2) { + constexpr size_t deletions_per_tick = 16; auto& staging = cache[log2]; auto& entries = staging.entries; - const std::size_t old_size = entries.size(); + const size_t old_size = entries.size(); const auto is_deleteable = [this](const StagingBuffer& entry) { return scheduler.IsFree(entry.tick); }; - const std::size_t begin_offset = staging.delete_index; - const std::size_t end_offset = std::min(begin_offset + deletions_per_tick, old_size); - const auto begin = std::begin(entries) + begin_offset; - const auto end = std::begin(entries) + end_offset; + const size_t begin_offset = staging.delete_index; + const size_t end_offset = std::min(begin_offset + deletions_per_tick, old_size); + const auto begin = entries.begin() + begin_offset; + const auto end = entries.begin() + end_offset; entries.erase(std::remove_if(begin, end, is_deleteable), end); - const std::size_t new_size = entries.size(); + const size_t new_size = entries.size(); staging.delete_index += deletions_per_tick; if (staging.delete_index >= new_size) { staging.delete_index = 0; } - - return (1ULL << log2) * (old_size - new_size); + if (staging.iterate_index > new_size) { + staging.iterate_index = 0; + } } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h index 90dadcbbe..d42918a47 100644 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h @@ -9,7 +9,7 @@ #include "common/common_types.h" -#include "video_core/renderer_vulkan/vk_memory_manager.h" +#include "video_core/vulkan_common/vulkan_memory_allocator.h" #include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { @@ -17,55 +17,65 @@ namespace Vulkan { class Device; class VKScheduler; -struct VKBuffer final { - vk::Buffer handle; - VKMemoryCommit commit; +struct StagingBufferRef { + VkBuffer buffer; + std::span<u8> mapped_span; }; -class VKStagingBufferPool final { +class StagingBufferPool { public: - explicit VKStagingBufferPool(const Device& device, VKMemoryManager& memory_manager, - VKScheduler& scheduler); - ~VKStagingBufferPool(); + explicit StagingBufferPool(const Device& device, MemoryAllocator& memory_allocator, + VKScheduler& scheduler); + ~StagingBufferPool(); - VKBuffer& GetUnusedBuffer(std::size_t size, bool host_visible); + StagingBufferRef Request(size_t size, MemoryUsage usage); void TickFrame(); private: - struct StagingBuffer final { - explicit StagingBuffer(std::unique_ptr<VKBuffer> buffer); - - std::unique_ptr<VKBuffer> buffer; + struct StagingBuffer { + vk::Buffer buffer; + MemoryCommit commit; + std::span<u8> mapped_span; u64 tick = 0; + + StagingBufferRef Ref() const noexcept { + return { + .buffer = *buffer, + .mapped_span = mapped_span, + }; + } }; - struct StagingBuffers final { + struct StagingBuffers { std::vector<StagingBuffer> entries; - std::size_t delete_index = 0; + size_t delete_index = 0; + size_t iterate_index = 0; }; - static constexpr std::size_t NumLevels = sizeof(std::size_t) * CHAR_BIT; - using StagingBuffersCache = std::array<StagingBuffers, NumLevels>; + static constexpr size_t NUM_LEVELS = sizeof(size_t) * CHAR_BIT; + using StagingBuffersCache = std::array<StagingBuffers, NUM_LEVELS>; - VKBuffer* TryGetReservedBuffer(std::size_t size, bool host_visible); + std::optional<StagingBufferRef> TryGetReservedBuffer(size_t size, MemoryUsage usage); - VKBuffer& CreateStagingBuffer(std::size_t size, bool host_visible); + StagingBufferRef CreateStagingBuffer(size_t size, MemoryUsage usage); - StagingBuffersCache& GetCache(bool host_visible); + StagingBuffersCache& GetCache(MemoryUsage usage); - void ReleaseCache(bool host_visible); + void ReleaseCache(MemoryUsage usage); - u64 ReleaseLevel(StagingBuffersCache& cache, std::size_t log2); + void ReleaseLevel(StagingBuffersCache& cache, size_t log2); const Device& device; - VKMemoryManager& memory_manager; + MemoryAllocator& memory_allocator; VKScheduler& scheduler; - StagingBuffersCache host_staging_buffers; - StagingBuffersCache device_staging_buffers; + StagingBuffersCache device_local_cache; + StagingBuffersCache upload_cache; + StagingBuffersCache download_cache; - std::size_t current_delete_level = 0; + size_t current_delete_level = 0; + u64 buffer_index = 0; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 0f9e93e48..aa7c5d7c6 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -10,12 +10,12 @@ #include "video_core/engines/fermi_2d.h" #include "video_core/renderer_vulkan/blit_image.h" #include "video_core/renderer_vulkan/maxwell_to_vk.h" -#include "video_core/renderer_vulkan/vk_memory_manager.h" #include "video_core/renderer_vulkan/vk_rasterizer.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" #include "video_core/renderer_vulkan/vk_texture_cache.h" #include "video_core/vulkan_common/vulkan_device.h" +#include "video_core/vulkan_common/vulkan_memory_allocator.h" #include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { @@ -570,10 +570,18 @@ void TextureCacheRuntime::Finish() { } ImageBufferMap TextureCacheRuntime::MapUploadBuffer(size_t size) { - const auto& buffer = staging_buffer_pool.GetUnusedBuffer(size, true); - return ImageBufferMap{ - .handle = *buffer.handle, - .map = buffer.commit->Map(size), + const auto staging_ref = staging_buffer_pool.Request(size, MemoryUsage::Upload); + return { + .handle = staging_ref.buffer, + .span = staging_ref.mapped_span, + }; +} + +ImageBufferMap TextureCacheRuntime::MapDownloadBuffer(size_t size) { + const auto staging_ref = staging_buffer_pool.Request(size, MemoryUsage::Download); + return { + .handle = staging_ref.buffer, + .span = staging_ref.mapped_span, }; } @@ -804,9 +812,9 @@ Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_ image(MakeImage(runtime.device, info)), buffer(MakeBuffer(runtime.device, info)), aspect_mask(ImageAspectMask(info.format)) { if (image) { - commit = runtime.memory_manager.Commit(image, false); + commit = runtime.memory_allocator.Commit(image, MemoryUsage::DeviceLocal); } else { - commit = runtime.memory_manager.Commit(buffer, false); + commit = runtime.memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal); } if (IsPixelFormatASTC(info.format) && !runtime.device.IsOptimalAstcSupported()) { flags |= VideoCommon::ImageFlagBits::Converted; diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index 92a7aad8b..a55d405d1 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -7,8 +7,8 @@ #include <compare> #include <span> -#include "video_core/renderer_vulkan/vk_memory_manager.h" #include "video_core/texture_cache/texture_cache.h" +#include "video_core/vulkan_common/vulkan_memory_allocator.h" #include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { @@ -19,14 +19,13 @@ using VideoCommon::Offset2D; using VideoCommon::RenderTargets; using VideoCore::Surface::PixelFormat; -class VKScheduler; -class VKStagingBufferPool; - class BlitImageHelper; class Device; class Image; class ImageView; class Framebuffer; +class StagingBufferPool; +class VKScheduler; struct RenderPassKey { constexpr auto operator<=>(const RenderPassKey&) const noexcept = default; @@ -60,18 +59,18 @@ struct ImageBufferMap { } [[nodiscard]] std::span<u8> Span() const noexcept { - return map.Span(); + return span; } VkBuffer handle; - MemoryMap map; + std::span<u8> span; }; struct TextureCacheRuntime { const Device& device; VKScheduler& scheduler; - VKMemoryManager& memory_manager; - VKStagingBufferPool& staging_buffer_pool; + MemoryAllocator& memory_allocator; + StagingBufferPool& staging_buffer_pool; BlitImageHelper& blit_image_helper; std::unordered_map<RenderPassKey, vk::RenderPass> renderpass_cache; @@ -79,10 +78,7 @@ struct TextureCacheRuntime { [[nodiscard]] ImageBufferMap MapUploadBuffer(size_t size); - [[nodiscard]] ImageBufferMap MapDownloadBuffer(size_t size) { - // TODO: Have a special function for this - return MapUploadBuffer(size); - } + [[nodiscard]] ImageBufferMap MapDownloadBuffer(size_t size); void BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src, const std::array<Offset2D, 2>& dst_region, @@ -141,7 +137,7 @@ private: VKScheduler* scheduler; vk::Image image; vk::Buffer buffer; - VKMemoryCommit commit; + MemoryCommit commit; VkImageAspectFlags aspect_mask = 0; bool initialized = false; }; diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index b54d33763..c9840b75e 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h @@ -465,6 +465,14 @@ public: return operands.size(); } + NodeBlock& GetOperands() { + return operands; + } + + const NodeBlock& GetOperands() const { + return operands; + } + [[nodiscard]] const Node& operator[](std::size_t operand_index) const { return operands.at(operand_index); } diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp index a4987ffc6..625a5eb46 100644 --- a/src/video_core/shader/shader_ir.cpp +++ b/src/video_core/shader/shader_ir.cpp @@ -388,9 +388,54 @@ void ShaderIR::SetInternalFlagsFromInteger(NodeBlock& bb, Node value, bool sets_ if (!sets_cc) { return; } - Node zerop = Operation(OperationCode::LogicalIEqual, std::move(value), Immediate(0)); - SetInternalFlag(bb, InternalFlag::Zero, std::move(zerop)); - LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete"); + switch (value->index()) { + case 0: // Operation Node + SearchOperands(bb, value); + break; + case 2: // General Purpose Node + if (const auto* gpr = std::get_if<GprNode>(value.get())) { + LOG_DEBUG(HW_GPU, "GprNode: index={}", gpr->GetIndex()); + Node zerop = Operation(OperationCode::LogicalIEqual, std::move(value), + Immediate(gpr->GetIndex())); + SetInternalFlag(bb, InternalFlag::Zero, std::move(zerop)); + } + break; + + default: + Node zerop = Operation(OperationCode::LogicalIEqual, std::move(value), Immediate(0)); + SetInternalFlag(bb, InternalFlag::Zero, std::move(zerop)); + LOG_WARNING(HW_GPU, "Node Type: {}", value->index()); + break; + } +} + +void ShaderIR::SearchOperands(NodeBlock& nb, Node var) { + const auto* op = std::get_if<OperationNode>(var.get()); + if (op == nullptr) { + return; + } + + if (op->GetOperandsCount() == 0) { + return; + } + + for (auto& operand : op->GetOperands()) { + switch (operand->index()) { + case 0: // Operation Node + return SearchOperands(nb, operand); + case 2: // General Purpose Node + if (const auto* gpr = std::get_if<GprNode>(operand.get())) { + LOG_DEBUG(HW_GPU, "Child GprNode: index={}", gpr->GetIndex()); + Node zerop = Operation(OperationCode::LogicalIEqual, std::move(operand), + Immediate(gpr->GetIndex())); + SetInternalFlag(nb, InternalFlag::Zero, std::move(zerop)); + } + break; + default: + LOG_WARNING(HW_GPU, "Child Node Type: {}", operand->index()); + break; + } + } } Node ShaderIR::BitfieldExtract(Node value, u32 offset, u32 bits) { diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 0c6ab0f07..0afa39531 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -346,6 +346,9 @@ private: /// Access a bindless image sampler. ImageEntry& GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type); + /// Recursive Iteration over the OperationNode operands, searching for GprNodes. + void SearchOperands(NodeBlock& nb, Node var); + /// Extracts a sequence of bits from a node Node BitfieldExtract(Node value, u32 offset, u32 bits); diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp index ce8fcfe0a..b23424523 100644 --- a/src/video_core/texture_cache/util.cpp +++ b/src/video_core/texture_cache/util.cpp @@ -679,7 +679,7 @@ u32 CalculateLayerSize(const ImageInfo& info) noexcept { } std::array<u32, MAX_MIP_LEVELS> CalculateMipLevelOffsets(const ImageInfo& info) noexcept { - ASSERT(info.resources.levels <= MAX_MIP_LEVELS); + ASSERT(info.resources.levels <= static_cast<s32>(MAX_MIP_LEVELS)); const LevelInfo level_info = MakeLevelInfo(info); std::array<u32, MAX_MIP_LEVELS> offsets{}; u32 offset = 0; diff --git a/src/video_core/vulkan_common/nsight_aftermath_tracker.cpp b/src/video_core/vulkan_common/nsight_aftermath_tracker.cpp index 8d10ac29e..7a9d00d4f 100644 --- a/src/video_core/vulkan_common/nsight_aftermath_tracker.cpp +++ b/src/video_core/vulkan_common/nsight_aftermath_tracker.cpp @@ -12,21 +12,12 @@ #include <fmt/format.h> -#define VK_NO_PROTOTYPES -#include <vulkan/vulkan.h> - -#include <GFSDK_Aftermath.h> -#include <GFSDK_Aftermath_Defines.h> -#include <GFSDK_Aftermath_GpuCrashDump.h> -#include <GFSDK_Aftermath_GpuCrashDumpDecoding.h> - #include "common/common_paths.h" #include "common/common_types.h" #include "common/file_util.h" #include "common/logging/log.h" #include "common/scope_exit.h" - -#include "video_core/renderer_vulkan/nsight_aftermath_tracker.h" +#include "video_core/vulkan_common/nsight_aftermath_tracker.h" namespace Vulkan { @@ -53,7 +44,7 @@ NsightAftermathTracker::NsightAftermathTracker() { !dl.GetSymbol("GFSDK_Aftermath_GpuCrashDump_GetJSON", &GFSDK_Aftermath_GpuCrashDump_GetJSON)) { LOG_ERROR(Render_Vulkan, "Failed to load Nsight Aftermath function pointers"); - return false; + return; } dump_dir = Common::FS::GetUserPath(Common::FS::UserPath::LogDir) + "gpucrash"; diff --git a/src/video_core/vulkan_common/nsight_aftermath_tracker.h b/src/video_core/vulkan_common/nsight_aftermath_tracker.h index cee3847fb..1ce8d4e8e 100644 --- a/src/video_core/vulkan_common/nsight_aftermath_tracker.h +++ b/src/video_core/vulkan_common/nsight_aftermath_tracker.h @@ -8,8 +8,9 @@ #include <string> #include <vector> -#define VK_NO_PROTOTYPES -#include <vulkan/vulkan.h> +#include "common/common_types.h" +#include "common/dynamic_library.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" #ifdef HAS_NSIGHT_AFTERMATH #include <GFSDK_Aftermath_Defines.h> @@ -17,9 +18,6 @@ #include <GFSDK_Aftermath_GpuCrashDumpDecoding.h> #endif -#include "common/common_types.h" -#include "common/dynamic_library.h" - namespace Vulkan { class NsightAftermathTracker { diff --git a/src/video_core/vulkan_common/vulkan_debug_callback.h b/src/video_core/vulkan_common/vulkan_debug_callback.h index 2efcd244c..b0519f132 100644 --- a/src/video_core/vulkan_common/vulkan_debug_callback.h +++ b/src/video_core/vulkan_common/vulkan_debug_callback.h @@ -2,6 +2,8 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#pragma once + #include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 37d7b45a3..5b4209c72 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -84,21 +84,6 @@ VkFormatFeatureFlags GetFormatFeatures(VkFormatProperties properties, FormatType } } -[[nodiscard]] bool IsRDNA(std::string_view device_name, VkDriverIdKHR driver_id) { - static constexpr std::array RDNA_DEVICES{ - "5700", - "5600", - "5500", - "5300", - }; - if (driver_id != VK_DRIVER_ID_AMD_PROPRIETARY_KHR) { - return false; - } - return std::any_of(RDNA_DEVICES.begin(), RDNA_DEVICES.end(), [device_name](const char* name) { - return device_name.find(name) != std::string_view::npos; - }); -} - std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(vk::PhysicalDevice physical) { static constexpr std::array formats{ VK_FORMAT_A8B8G8R8_UNORM_PACK32, @@ -436,14 +421,6 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR "Blacklisting RADV for VK_EXT_extended_dynamic state, likely due to a bug in yuzu"); ext_extended_dynamic_state = false; } - if (ext_extended_dynamic_state && IsRDNA(properties.deviceName, driver_id)) { - // AMD's proprietary driver supports VK_EXT_extended_dynamic_state but on RDNA devices it - // seems to cause stability issues - LOG_WARNING( - Render_Vulkan, - "Blacklisting AMD proprietary on RDNA devices from VK_EXT_extended_dynamic_state"); - ext_extended_dynamic_state = false; - } graphics_queue = logical.GetQueue(graphics_family); present_queue = logical.GetQueue(present_family); diff --git a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp new file mode 100644 index 000000000..d6eb3af31 --- /dev/null +++ b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp @@ -0,0 +1,268 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <algorithm> +#include <bit> +#include <optional> +#include <vector> + +#include "common/alignment.h" +#include "common/assert.h" +#include "common/common_types.h" +#include "common/logging/log.h" +#include "video_core/vulkan_common/vulkan_device.h" +#include "video_core/vulkan_common/vulkan_memory_allocator.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" + +namespace Vulkan { +namespace { +struct Range { + u64 begin; + u64 end; + + [[nodiscard]] bool Contains(u64 iterator, u64 size) const noexcept { + return iterator < end && begin < iterator + size; + } +}; + +[[nodiscard]] u64 AllocationChunkSize(u64 required_size) { + static constexpr std::array sizes{ + 0x1000ULL << 10, 0x1400ULL << 10, 0x1800ULL << 10, 0x1c00ULL << 10, 0x2000ULL << 10, + 0x3200ULL << 10, 0x4000ULL << 10, 0x6000ULL << 10, 0x8000ULL << 10, 0xA000ULL << 10, + 0x10000ULL << 10, 0x18000ULL << 10, 0x20000ULL << 10, + }; + static_assert(std::is_sorted(sizes.begin(), sizes.end())); + + const auto it = std::ranges::lower_bound(sizes, required_size); + return it != sizes.end() ? *it : Common::AlignUp(required_size, 4ULL << 20); +} + +[[nodiscard]] VkMemoryPropertyFlags MemoryUsagePropertyFlags(MemoryUsage usage) { + switch (usage) { + case MemoryUsage::DeviceLocal: + return VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + case MemoryUsage::Upload: + return VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; + case MemoryUsage::Download: + return VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | + VK_MEMORY_PROPERTY_HOST_CACHED_BIT; + } + UNREACHABLE_MSG("Invalid memory usage={}", usage); + return VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; +} +} // Anonymous namespace + +class MemoryAllocation { +public: + explicit MemoryAllocation(const Device& device_, vk::DeviceMemory memory_, + VkMemoryPropertyFlags properties, u64 allocation_size_, u32 type) + : device{device_}, memory{std::move(memory_)}, allocation_size{allocation_size_}, + property_flags{properties}, shifted_memory_type{1U << type} {} + + [[nodiscard]] std::optional<MemoryCommit> Commit(VkDeviceSize size, VkDeviceSize alignment) { + const std::optional<u64> alloc = FindFreeRegion(size, alignment); + if (!alloc) { + // Signal out of memory, it'll try to do more allocations. + return std::nullopt; + } + const Range range{ + .begin = *alloc, + .end = *alloc + size, + }; + commits.insert(std::ranges::upper_bound(commits, *alloc, {}, &Range::begin), range); + return std::make_optional<MemoryCommit>(this, *memory, *alloc, *alloc + size); + } + + void Free(u64 begin) { + const auto it = std::ranges::find(commits, begin, &Range::begin); + ASSERT_MSG(it != commits.end(), "Invalid commit"); + commits.erase(it); + } + + [[nodiscard]] std::span<u8> Map() { + if (memory_mapped_span.empty()) { + u8* const raw_pointer = memory.Map(0, allocation_size); + memory_mapped_span = std::span<u8>(raw_pointer, allocation_size); + } + return memory_mapped_span; + } + + /// Returns whether this allocation is compatible with the arguments. + [[nodiscard]] bool IsCompatible(VkMemoryPropertyFlags flags, u32 type_mask) const { + return (flags & property_flags) && (type_mask & shifted_memory_type) != 0; + } + +private: + [[nodiscard]] static constexpr u32 ShiftType(u32 type) { + return 1U << type; + } + + [[nodiscard]] std::optional<u64> FindFreeRegion(u64 size, u64 alignment) noexcept { + ASSERT(std::has_single_bit(alignment)); + const u64 alignment_log2 = std::countr_zero(alignment); + std::optional<u64> candidate; + u64 iterator = 0; + auto commit = commits.begin(); + while (iterator + size <= allocation_size) { + candidate = candidate.value_or(iterator); + if (commit == commits.end()) { + break; + } + if (commit->Contains(*candidate, size)) { + candidate = std::nullopt; + } + iterator = Common::AlignUpLog2(commit->end, alignment_log2); + ++commit; + } + return candidate; + } + + const Device& device; ///< Vulkan device. + const vk::DeviceMemory memory; ///< Vulkan memory allocation handler. + const u64 allocation_size; ///< Size of this allocation. + const VkMemoryPropertyFlags property_flags; ///< Vulkan memory property flags. + const u32 shifted_memory_type; ///< Shifted Vulkan memory type. + std::vector<Range> commits; ///< All commit ranges done from this allocation. + std::span<u8> memory_mapped_span; ///< Memory mapped span. Empty if not queried before. +}; + +MemoryCommit::MemoryCommit(MemoryAllocation* allocation_, VkDeviceMemory memory_, u64 begin_, + u64 end_) noexcept + : allocation{allocation_}, memory{memory_}, begin{begin_}, end{end_} {} + +MemoryCommit::~MemoryCommit() { + Release(); +} + +MemoryCommit& MemoryCommit::operator=(MemoryCommit&& rhs) noexcept { + Release(); + allocation = std::exchange(rhs.allocation, nullptr); + memory = rhs.memory; + begin = rhs.begin; + end = rhs.end; + span = std::exchange(rhs.span, std::span<u8>{}); + return *this; +} + +MemoryCommit::MemoryCommit(MemoryCommit&& rhs) noexcept + : allocation{std::exchange(rhs.allocation, nullptr)}, memory{rhs.memory}, begin{rhs.begin}, + end{rhs.end}, span{std::exchange(rhs.span, std::span<u8>{})} {} + +std::span<u8> MemoryCommit::Map() { + if (span.empty()) { + span = allocation->Map().subspan(begin, end - begin); + } + return span; +} + +void MemoryCommit::Release() { + if (allocation) { + allocation->Free(begin); + } +} + +MemoryAllocator::MemoryAllocator(const Device& device_) + : device{device_}, properties{device_.GetPhysical().GetMemoryProperties()} {} + +MemoryAllocator::~MemoryAllocator() = default; + +MemoryCommit MemoryAllocator::Commit(const VkMemoryRequirements& requirements, MemoryUsage usage) { + // Find the fastest memory flags we can afford with the current requirements + const VkMemoryPropertyFlags flags = MemoryPropertyFlags(requirements.memoryTypeBits, usage); + if (std::optional<MemoryCommit> commit = TryCommit(requirements, flags)) { + return std::move(*commit); + } + // Commit has failed, allocate more memory. + // TODO(Rodrigo): Handle out of memory situations in some way like flushing to guest memory. + AllocMemory(flags, requirements.memoryTypeBits, AllocationChunkSize(requirements.size)); + + // Commit again, this time it won't fail since there's a fresh allocation above. + // If it does, there's a bug. + return TryCommit(requirements, flags).value(); +} + +MemoryCommit MemoryAllocator::Commit(const vk::Buffer& buffer, MemoryUsage usage) { + auto commit = Commit(device.GetLogical().GetBufferMemoryRequirements(*buffer), usage); + buffer.BindMemory(commit.Memory(), commit.Offset()); + return commit; +} + +MemoryCommit MemoryAllocator::Commit(const vk::Image& image, MemoryUsage usage) { + auto commit = Commit(device.GetLogical().GetImageMemoryRequirements(*image), usage); + image.BindMemory(commit.Memory(), commit.Offset()); + return commit; +} + +void MemoryAllocator::AllocMemory(VkMemoryPropertyFlags flags, u32 type_mask, u64 size) { + const u32 type = FindType(flags, type_mask).value(); + vk::DeviceMemory memory = device.GetLogical().AllocateMemory({ + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, + .pNext = nullptr, + .allocationSize = size, + .memoryTypeIndex = type, + }); + allocations.push_back( + std::make_unique<MemoryAllocation>(device, std::move(memory), flags, size, type)); +} + +std::optional<MemoryCommit> MemoryAllocator::TryCommit(const VkMemoryRequirements& requirements, + VkMemoryPropertyFlags flags) { + for (auto& allocation : allocations) { + if (!allocation->IsCompatible(flags, requirements.memoryTypeBits)) { + continue; + } + if (auto commit = allocation->Commit(requirements.size, requirements.alignment)) { + return commit; + } + } + return std::nullopt; +} + +VkMemoryPropertyFlags MemoryAllocator::MemoryPropertyFlags(u32 type_mask, MemoryUsage usage) const { + return MemoryPropertyFlags(type_mask, MemoryUsagePropertyFlags(usage)); +} + +VkMemoryPropertyFlags MemoryAllocator::MemoryPropertyFlags(u32 type_mask, + VkMemoryPropertyFlags flags) const { + if (FindType(flags, type_mask)) { + // Found a memory type with those requirements + return flags; + } + if (flags & VK_MEMORY_PROPERTY_HOST_CACHED_BIT) { + // Remove host cached bit in case it's not supported + return MemoryPropertyFlags(type_mask, flags & ~VK_MEMORY_PROPERTY_HOST_CACHED_BIT); + } + if (flags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) { + // Remove device local, if it's not supported by the requested resource + return MemoryPropertyFlags(type_mask, flags & ~VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); + } + UNREACHABLE_MSG("No compatible memory types found"); + return 0; +} + +std::optional<u32> MemoryAllocator::FindType(VkMemoryPropertyFlags flags, u32 type_mask) const { + for (u32 type_index = 0; type_index < properties.memoryTypeCount; ++type_index) { + const VkMemoryPropertyFlags type_flags = properties.memoryTypes[type_index].propertyFlags; + if ((type_mask & (1U << type_index)) && (type_flags & flags)) { + // The type matches in type and in the wanted properties. + return type_index; + } + } + // Failed to find index + return std::nullopt; +} + +bool IsHostVisible(MemoryUsage usage) noexcept { + switch (usage) { + case MemoryUsage::DeviceLocal: + return false; + case MemoryUsage::Upload: + case MemoryUsage::Download: + return true; + } + UNREACHABLE_MSG("Invalid memory usage={}", usage); + return false; +} + +} // namespace Vulkan diff --git a/src/video_core/vulkan_common/vulkan_memory_allocator.h b/src/video_core/vulkan_common/vulkan_memory_allocator.h new file mode 100644 index 000000000..9e6cfabf9 --- /dev/null +++ b/src/video_core/vulkan_common/vulkan_memory_allocator.h @@ -0,0 +1,117 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <memory> +#include <span> +#include <utility> +#include <vector> +#include "common/common_types.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" + +namespace Vulkan { + +class Device; +class MemoryMap; +class MemoryAllocation; + +/// Hints and requirements for the backing memory type of a commit +enum class MemoryUsage { + DeviceLocal, ///< Hints device local usages, fastest memory type to read and write from the GPU + Upload, ///< Requires a host visible memory type optimized for CPU to GPU uploads + Download, ///< Requires a host visible memory type optimized for GPU to CPU readbacks +}; + +/// Ownership handle of a memory commitment. +/// Points to a subregion of a memory allocation. +class MemoryCommit { +public: + explicit MemoryCommit() noexcept = default; + explicit MemoryCommit(MemoryAllocation* allocation_, VkDeviceMemory memory_, u64 begin_, + u64 end_) noexcept; + ~MemoryCommit(); + + MemoryCommit& operator=(MemoryCommit&&) noexcept; + MemoryCommit(MemoryCommit&&) noexcept; + + MemoryCommit& operator=(const MemoryCommit&) = delete; + MemoryCommit(const MemoryCommit&) = delete; + + /// Returns a host visible memory map. + /// It will map the backing allocation if it hasn't been mapped before. + std::span<u8> Map(); + + /// Returns the Vulkan memory handler. + VkDeviceMemory Memory() const { + return memory; + } + + /// Returns the start position of the commit relative to the allocation. + VkDeviceSize Offset() const { + return static_cast<VkDeviceSize>(begin); + } + +private: + void Release(); + + MemoryAllocation* allocation{}; ///< Pointer to the large memory allocation. + VkDeviceMemory memory{}; ///< Vulkan device memory handler. + u64 begin{}; ///< Beginning offset in bytes to where the commit exists. + u64 end{}; ///< Offset in bytes where the commit ends. + std::span<u8> span; ///< Host visible memory span. Empty if not queried before. +}; + +/// Memory allocator container. +/// Allocates and releases memory allocations on demand. +class MemoryAllocator { +public: + explicit MemoryAllocator(const Device& device_); + ~MemoryAllocator(); + + MemoryAllocator& operator=(const MemoryAllocator&) = delete; + MemoryAllocator(const MemoryAllocator&) = delete; + + /** + * Commits a memory with the specified requirements. + * + * @param requirements Requirements returned from a Vulkan call. + * @param usage Indicates how the memory will be used. + * + * @returns A memory commit. + */ + MemoryCommit Commit(const VkMemoryRequirements& requirements, MemoryUsage usage); + + /// Commits memory required by the buffer and binds it. + MemoryCommit Commit(const vk::Buffer& buffer, MemoryUsage usage); + + /// Commits memory required by the image and binds it. + MemoryCommit Commit(const vk::Image& image, MemoryUsage usage); + +private: + /// Allocates a chunk of memory. + void AllocMemory(VkMemoryPropertyFlags flags, u32 type_mask, u64 size); + + /// Tries to allocate a memory commit. + std::optional<MemoryCommit> TryCommit(const VkMemoryRequirements& requirements, + VkMemoryPropertyFlags flags); + + /// Returns the fastest compatible memory property flags from a wanted usage. + VkMemoryPropertyFlags MemoryPropertyFlags(u32 type_mask, MemoryUsage usage) const; + + /// Returns the fastest compatible memory property flags from the wanted flags. + VkMemoryPropertyFlags MemoryPropertyFlags(u32 type_mask, VkMemoryPropertyFlags flags) const; + + /// Returns index to the fastest memory type compatible with the passed requirements. + std::optional<u32> FindType(VkMemoryPropertyFlags flags, u32 type_mask) const; + + const Device& device; ///< Device handle. + const VkPhysicalDeviceMemoryProperties properties; ///< Physical device properties. + std::vector<std::unique_ptr<MemoryAllocation>> allocations; ///< Current allocations. +}; + +/// Returns true when a memory usage is guaranteed to be host visible. +bool IsHostVisible(MemoryUsage usage) noexcept; + +} // namespace Vulkan |