diff options
Diffstat (limited to 'src/video_core')
-rw-r--r-- | src/video_core/buffer_cache/buffer_cache.h | 4 | ||||
-rw-r--r-- | src/video_core/gpu.cpp | 12 | ||||
-rw-r--r-- | src/video_core/gpu.h | 2 | ||||
-rw-r--r-- | src/video_core/rasterizer_accelerated.cpp | 6 | ||||
-rw-r--r-- | src/video_core/rasterizer_accelerated.h | 12 | ||||
-rw-r--r-- | src/video_core/vulkan_common/vulkan_memory_allocator.cpp | 67 | ||||
-rw-r--r-- | src/video_core/vulkan_common/vulkan_memory_allocator.h | 7 |
7 files changed, 67 insertions, 43 deletions
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index de971041f..9e6b87960 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -596,7 +596,7 @@ void BufferCache<P>::PopAsyncFlushes() { runtime.CopyBuffer(download_staging.buffer, slot_buffers[buffer_id], copies); } runtime.Finish(); - for (const auto [copy, buffer_id] : downloads) { + for (const auto& [copy, buffer_id] : downloads) { const Buffer& buffer = slot_buffers[buffer_id]; const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset; // Undo the modified offset @@ -606,7 +606,7 @@ void BufferCache<P>::PopAsyncFlushes() { } } else { const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy); - for (const auto [copy, buffer_id] : downloads) { + for (const auto& [copy, buffer_id] : downloads) { Buffer& buffer = slot_buffers[buffer_id]; buffer.ImmediateDownload(copy.src_offset, immediate_buffer.subspan(0, copy.size)); const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset; diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 37f7b24e1..35cc561be 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -104,7 +104,13 @@ void GPU::WaitFence(u32 syncpoint_id, u32 value) { } MICROPROFILE_SCOPE(GPU_wait); std::unique_lock lock{sync_mutex}; - sync_cv.wait(lock, [=, this] { return syncpoints.at(syncpoint_id).load() >= value; }); + sync_cv.wait(lock, [=, this] { + if (shutting_down.load(std::memory_order_relaxed)) { + // We're shutting down, ensure no threads continue to wait for the next syncpoint + return true; + } + return syncpoints.at(syncpoint_id).load() >= value; + }); } void GPU::IncrementSyncPoint(const u32 syncpoint_id) { @@ -523,6 +529,10 @@ void GPU::TriggerCpuInterrupt(const u32 syncpoint_id, const u32 value) const { } void GPU::ShutDown() { + // Signal that threads should no longer block on syncpoint fences + shutting_down.store(true, std::memory_order_relaxed); + sync_cv.notify_all(); + gpu_thread.ShutDown(); } diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 29a867863..a8e98e51b 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -389,6 +389,8 @@ private: std::unique_ptr<Engines::KeplerMemory> kepler_memory; /// Shader build notifier std::unique_ptr<VideoCore::ShaderNotify> shader_notify; + /// When true, we are about to shut down emulation session, so terminate outstanding tasks + std::atomic_bool shutting_down{}; std::array<std::atomic<u32>, Service::Nvidia::MaxSyncPoints> syncpoints{}; diff --git a/src/video_core/rasterizer_accelerated.cpp b/src/video_core/rasterizer_accelerated.cpp index 62d84c0f8..6decd2546 100644 --- a/src/video_core/rasterizer_accelerated.cpp +++ b/src/video_core/rasterizer_accelerated.cpp @@ -18,10 +18,10 @@ RasterizerAccelerated::~RasterizerAccelerated() = default; void RasterizerAccelerated::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) { const auto page_end = Common::DivCeil(addr + size, Core::Memory::PAGE_SIZE); for (auto page = addr >> Core::Memory::PAGE_BITS; page != page_end; ++page) { - auto& count = cached_pages.at(page >> 3).Count(page); + auto& count = cached_pages.at(page >> 2).Count(page); if (delta > 0) { - ASSERT_MSG(count < UINT8_MAX, "Count may overflow!"); + ASSERT_MSG(count < UINT16_MAX, "Count may overflow!"); } else if (delta < 0) { ASSERT_MSG(count > 0, "Count may underflow!"); } else { @@ -29,7 +29,7 @@ void RasterizerAccelerated::UpdatePagesCachedCount(VAddr addr, u64 size, int del } // Adds or subtracts 1, as count is a unsigned 8-bit value - count += static_cast<u8>(delta); + count += static_cast<u16>(delta); // Assume delta is either -1 or 1 if (count == 0) { diff --git a/src/video_core/rasterizer_accelerated.h b/src/video_core/rasterizer_accelerated.h index 9227a4adc..ea879bfdd 100644 --- a/src/video_core/rasterizer_accelerated.h +++ b/src/video_core/rasterizer_accelerated.h @@ -29,20 +29,20 @@ private: public: CacheEntry() = default; - std::atomic_uint8_t& Count(std::size_t page) { - return values[page & 7]; + std::atomic_uint16_t& Count(std::size_t page) { + return values[page & 3]; } - const std::atomic_uint8_t& Count(std::size_t page) const { - return values[page & 7]; + const std::atomic_uint16_t& Count(std::size_t page) const { + return values[page & 3]; } private: - std::array<std::atomic_uint8_t, 8> values{}; + std::array<std::atomic_uint16_t, 4> values{}; }; static_assert(sizeof(CacheEntry) == 8, "CacheEntry should be 8 bytes!"); - std::array<CacheEntry, 0x800000> cached_pages; + std::array<CacheEntry, 0x1000000> cached_pages; Core::Memory::Memory& cpu_memory; }; diff --git a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp index fa37aa79a..5edd06ebc 100644 --- a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp +++ b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp @@ -53,6 +53,18 @@ struct Range { UNREACHABLE_MSG("Invalid memory usage={}", usage); return VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; } + +constexpr VkExportMemoryAllocateInfo EXPORT_ALLOCATE_INFO{ + .sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO, + .pNext = nullptr, +#ifdef _WIN32 + .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT, +#elif __unix__ + .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT, +#else + .handleTypes = 0, +#endif +}; } // Anonymous namespace class MemoryAllocation { @@ -131,7 +143,7 @@ public: /// Returns whether this allocation is compatible with the arguments. [[nodiscard]] bool IsCompatible(VkMemoryPropertyFlags flags, u32 type_mask) const { - return (flags & property_flags) && (type_mask & shifted_memory_type) != 0; + return (flags & property_flags) == property_flags && (type_mask & shifted_memory_type) != 0; } private: @@ -217,14 +229,18 @@ MemoryAllocator::~MemoryAllocator() = default; MemoryCommit MemoryAllocator::Commit(const VkMemoryRequirements& requirements, MemoryUsage usage) { // Find the fastest memory flags we can afford with the current requirements - const VkMemoryPropertyFlags flags = MemoryPropertyFlags(requirements.memoryTypeBits, usage); + const u32 type_mask = requirements.memoryTypeBits; + const VkMemoryPropertyFlags usage_flags = MemoryUsagePropertyFlags(usage); + const VkMemoryPropertyFlags flags = MemoryPropertyFlags(type_mask, usage_flags); if (std::optional<MemoryCommit> commit = TryCommit(requirements, flags)) { return std::move(*commit); } // Commit has failed, allocate more memory. - // TODO(Rodrigo): Handle out of memory situations in some way like flushing to guest memory. - AllocMemory(flags, requirements.memoryTypeBits, AllocationChunkSize(requirements.size)); - + const u64 chunk_size = AllocationChunkSize(requirements.size); + if (!TryAllocMemory(flags, type_mask, chunk_size)) { + // TODO(Rodrigo): Handle out of memory situations in some way like flushing to guest memory. + throw vk::Exception(VK_ERROR_OUT_OF_DEVICE_MEMORY); + } // Commit again, this time it won't fail since there's a fresh allocation above. // If it does, there's a bug. return TryCommit(requirements, flags).value(); @@ -242,26 +258,25 @@ MemoryCommit MemoryAllocator::Commit(const vk::Image& image, MemoryUsage usage) return commit; } -void MemoryAllocator::AllocMemory(VkMemoryPropertyFlags flags, u32 type_mask, u64 size) { +bool MemoryAllocator::TryAllocMemory(VkMemoryPropertyFlags flags, u32 type_mask, u64 size) { const u32 type = FindType(flags, type_mask).value(); - const VkExportMemoryAllocateInfo export_allocate_info{ - .sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO, - .pNext = nullptr, -#ifdef _WIN32 - .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT, -#elif __unix__ - .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT, -#else - .handleTypes = 0, -#endif - }; - vk::DeviceMemory memory = device.GetLogical().AllocateMemory({ + vk::DeviceMemory memory = device.GetLogical().TryAllocateMemory({ .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, - .pNext = export_allocations ? &export_allocate_info : nullptr, + .pNext = export_allocations ? &EXPORT_ALLOCATE_INFO : nullptr, .allocationSize = size, .memoryTypeIndex = type, }); + if (!memory) { + if ((flags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) != 0) { + // Try to allocate non device local memory + return TryAllocMemory(flags & ~VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, type_mask, size); + } else { + // RIP + return false; + } + } allocations.push_back(std::make_unique<MemoryAllocation>(std::move(memory), flags, size, type)); + return true; } std::optional<MemoryCommit> MemoryAllocator::TryCommit(const VkMemoryRequirements& requirements, @@ -274,24 +289,24 @@ std::optional<MemoryCommit> MemoryAllocator::TryCommit(const VkMemoryRequirement return commit; } } + if ((flags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) != 0) { + // Look for non device local commits on failure + return TryCommit(requirements, flags & ~VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); + } return std::nullopt; } -VkMemoryPropertyFlags MemoryAllocator::MemoryPropertyFlags(u32 type_mask, MemoryUsage usage) const { - return MemoryPropertyFlags(type_mask, MemoryUsagePropertyFlags(usage)); -} - VkMemoryPropertyFlags MemoryAllocator::MemoryPropertyFlags(u32 type_mask, VkMemoryPropertyFlags flags) const { if (FindType(flags, type_mask)) { // Found a memory type with those requirements return flags; } - if (flags & VK_MEMORY_PROPERTY_HOST_CACHED_BIT) { + if ((flags & VK_MEMORY_PROPERTY_HOST_CACHED_BIT) != 0) { // Remove host cached bit in case it's not supported return MemoryPropertyFlags(type_mask, flags & ~VK_MEMORY_PROPERTY_HOST_CACHED_BIT); } - if (flags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) { + if ((flags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) != 0) { // Remove device local, if it's not supported by the requested resource return MemoryPropertyFlags(type_mask, flags & ~VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); } @@ -302,7 +317,7 @@ VkMemoryPropertyFlags MemoryAllocator::MemoryPropertyFlags(u32 type_mask, std::optional<u32> MemoryAllocator::FindType(VkMemoryPropertyFlags flags, u32 type_mask) const { for (u32 type_index = 0; type_index < properties.memoryTypeCount; ++type_index) { const VkMemoryPropertyFlags type_flags = properties.memoryTypes[type_index].propertyFlags; - if ((type_mask & (1U << type_index)) && (type_flags & flags)) { + if ((type_mask & (1U << type_index)) != 0 && (type_flags & flags) == flags) { // The type matches in type and in the wanted properties. return type_index; } diff --git a/src/video_core/vulkan_common/vulkan_memory_allocator.h b/src/video_core/vulkan_common/vulkan_memory_allocator.h index d1ce29450..db12d02f4 100644 --- a/src/video_core/vulkan_common/vulkan_memory_allocator.h +++ b/src/video_core/vulkan_common/vulkan_memory_allocator.h @@ -101,16 +101,13 @@ public: MemoryCommit Commit(const vk::Image& image, MemoryUsage usage); private: - /// Allocates a chunk of memory. - void AllocMemory(VkMemoryPropertyFlags flags, u32 type_mask, u64 size); + /// Tries to allocate a chunk of memory. + bool TryAllocMemory(VkMemoryPropertyFlags flags, u32 type_mask, u64 size); /// Tries to allocate a memory commit. std::optional<MemoryCommit> TryCommit(const VkMemoryRequirements& requirements, VkMemoryPropertyFlags flags); - /// Returns the fastest compatible memory property flags from a wanted usage. - VkMemoryPropertyFlags MemoryPropertyFlags(u32 type_mask, MemoryUsage usage) const; - /// Returns the fastest compatible memory property flags from the wanted flags. VkMemoryPropertyFlags MemoryPropertyFlags(u32 type_mask, VkMemoryPropertyFlags flags) const; |