summaryrefslogtreecommitdiffstats
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h4
-rw-r--r--src/video_core/gpu.cpp12
-rw-r--r--src/video_core/gpu.h2
-rw-r--r--src/video_core/rasterizer_accelerated.cpp6
-rw-r--r--src/video_core/rasterizer_accelerated.h12
-rw-r--r--src/video_core/vulkan_common/vulkan_memory_allocator.cpp67
-rw-r--r--src/video_core/vulkan_common/vulkan_memory_allocator.h7
7 files changed, 67 insertions, 43 deletions
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index de971041f..9e6b87960 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -596,7 +596,7 @@ void BufferCache<P>::PopAsyncFlushes() {
runtime.CopyBuffer(download_staging.buffer, slot_buffers[buffer_id], copies);
}
runtime.Finish();
- for (const auto [copy, buffer_id] : downloads) {
+ for (const auto& [copy, buffer_id] : downloads) {
const Buffer& buffer = slot_buffers[buffer_id];
const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset;
// Undo the modified offset
@@ -606,7 +606,7 @@ void BufferCache<P>::PopAsyncFlushes() {
}
} else {
const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy);
- for (const auto [copy, buffer_id] : downloads) {
+ for (const auto& [copy, buffer_id] : downloads) {
Buffer& buffer = slot_buffers[buffer_id];
buffer.ImmediateDownload(copy.src_offset, immediate_buffer.subspan(0, copy.size));
const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset;
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 37f7b24e1..35cc561be 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -104,7 +104,13 @@ void GPU::WaitFence(u32 syncpoint_id, u32 value) {
}
MICROPROFILE_SCOPE(GPU_wait);
std::unique_lock lock{sync_mutex};
- sync_cv.wait(lock, [=, this] { return syncpoints.at(syncpoint_id).load() >= value; });
+ sync_cv.wait(lock, [=, this] {
+ if (shutting_down.load(std::memory_order_relaxed)) {
+ // We're shutting down, ensure no threads continue to wait for the next syncpoint
+ return true;
+ }
+ return syncpoints.at(syncpoint_id).load() >= value;
+ });
}
void GPU::IncrementSyncPoint(const u32 syncpoint_id) {
@@ -523,6 +529,10 @@ void GPU::TriggerCpuInterrupt(const u32 syncpoint_id, const u32 value) const {
}
void GPU::ShutDown() {
+ // Signal that threads should no longer block on syncpoint fences
+ shutting_down.store(true, std::memory_order_relaxed);
+ sync_cv.notify_all();
+
gpu_thread.ShutDown();
}
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index 29a867863..a8e98e51b 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -389,6 +389,8 @@ private:
std::unique_ptr<Engines::KeplerMemory> kepler_memory;
/// Shader build notifier
std::unique_ptr<VideoCore::ShaderNotify> shader_notify;
+ /// When true, we are about to shut down emulation session, so terminate outstanding tasks
+ std::atomic_bool shutting_down{};
std::array<std::atomic<u32>, Service::Nvidia::MaxSyncPoints> syncpoints{};
diff --git a/src/video_core/rasterizer_accelerated.cpp b/src/video_core/rasterizer_accelerated.cpp
index 62d84c0f8..6decd2546 100644
--- a/src/video_core/rasterizer_accelerated.cpp
+++ b/src/video_core/rasterizer_accelerated.cpp
@@ -18,10 +18,10 @@ RasterizerAccelerated::~RasterizerAccelerated() = default;
void RasterizerAccelerated::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {
const auto page_end = Common::DivCeil(addr + size, Core::Memory::PAGE_SIZE);
for (auto page = addr >> Core::Memory::PAGE_BITS; page != page_end; ++page) {
- auto& count = cached_pages.at(page >> 3).Count(page);
+ auto& count = cached_pages.at(page >> 2).Count(page);
if (delta > 0) {
- ASSERT_MSG(count < UINT8_MAX, "Count may overflow!");
+ ASSERT_MSG(count < UINT16_MAX, "Count may overflow!");
} else if (delta < 0) {
ASSERT_MSG(count > 0, "Count may underflow!");
} else {
@@ -29,7 +29,7 @@ void RasterizerAccelerated::UpdatePagesCachedCount(VAddr addr, u64 size, int del
}
// Adds or subtracts 1, as count is a unsigned 8-bit value
- count += static_cast<u8>(delta);
+ count += static_cast<u16>(delta);
// Assume delta is either -1 or 1
if (count == 0) {
diff --git a/src/video_core/rasterizer_accelerated.h b/src/video_core/rasterizer_accelerated.h
index 9227a4adc..ea879bfdd 100644
--- a/src/video_core/rasterizer_accelerated.h
+++ b/src/video_core/rasterizer_accelerated.h
@@ -29,20 +29,20 @@ private:
public:
CacheEntry() = default;
- std::atomic_uint8_t& Count(std::size_t page) {
- return values[page & 7];
+ std::atomic_uint16_t& Count(std::size_t page) {
+ return values[page & 3];
}
- const std::atomic_uint8_t& Count(std::size_t page) const {
- return values[page & 7];
+ const std::atomic_uint16_t& Count(std::size_t page) const {
+ return values[page & 3];
}
private:
- std::array<std::atomic_uint8_t, 8> values{};
+ std::array<std::atomic_uint16_t, 4> values{};
};
static_assert(sizeof(CacheEntry) == 8, "CacheEntry should be 8 bytes!");
- std::array<CacheEntry, 0x800000> cached_pages;
+ std::array<CacheEntry, 0x1000000> cached_pages;
Core::Memory::Memory& cpu_memory;
};
diff --git a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp
index fa37aa79a..5edd06ebc 100644
--- a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp
+++ b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp
@@ -53,6 +53,18 @@ struct Range {
UNREACHABLE_MSG("Invalid memory usage={}", usage);
return VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
}
+
+constexpr VkExportMemoryAllocateInfo EXPORT_ALLOCATE_INFO{
+ .sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO,
+ .pNext = nullptr,
+#ifdef _WIN32
+ .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT,
+#elif __unix__
+ .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT,
+#else
+ .handleTypes = 0,
+#endif
+};
} // Anonymous namespace
class MemoryAllocation {
@@ -131,7 +143,7 @@ public:
/// Returns whether this allocation is compatible with the arguments.
[[nodiscard]] bool IsCompatible(VkMemoryPropertyFlags flags, u32 type_mask) const {
- return (flags & property_flags) && (type_mask & shifted_memory_type) != 0;
+ return (flags & property_flags) == property_flags && (type_mask & shifted_memory_type) != 0;
}
private:
@@ -217,14 +229,18 @@ MemoryAllocator::~MemoryAllocator() = default;
MemoryCommit MemoryAllocator::Commit(const VkMemoryRequirements& requirements, MemoryUsage usage) {
// Find the fastest memory flags we can afford with the current requirements
- const VkMemoryPropertyFlags flags = MemoryPropertyFlags(requirements.memoryTypeBits, usage);
+ const u32 type_mask = requirements.memoryTypeBits;
+ const VkMemoryPropertyFlags usage_flags = MemoryUsagePropertyFlags(usage);
+ const VkMemoryPropertyFlags flags = MemoryPropertyFlags(type_mask, usage_flags);
if (std::optional<MemoryCommit> commit = TryCommit(requirements, flags)) {
return std::move(*commit);
}
// Commit has failed, allocate more memory.
- // TODO(Rodrigo): Handle out of memory situations in some way like flushing to guest memory.
- AllocMemory(flags, requirements.memoryTypeBits, AllocationChunkSize(requirements.size));
-
+ const u64 chunk_size = AllocationChunkSize(requirements.size);
+ if (!TryAllocMemory(flags, type_mask, chunk_size)) {
+ // TODO(Rodrigo): Handle out of memory situations in some way like flushing to guest memory.
+ throw vk::Exception(VK_ERROR_OUT_OF_DEVICE_MEMORY);
+ }
// Commit again, this time it won't fail since there's a fresh allocation above.
// If it does, there's a bug.
return TryCommit(requirements, flags).value();
@@ -242,26 +258,25 @@ MemoryCommit MemoryAllocator::Commit(const vk::Image& image, MemoryUsage usage)
return commit;
}
-void MemoryAllocator::AllocMemory(VkMemoryPropertyFlags flags, u32 type_mask, u64 size) {
+bool MemoryAllocator::TryAllocMemory(VkMemoryPropertyFlags flags, u32 type_mask, u64 size) {
const u32 type = FindType(flags, type_mask).value();
- const VkExportMemoryAllocateInfo export_allocate_info{
- .sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO,
- .pNext = nullptr,
-#ifdef _WIN32
- .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT,
-#elif __unix__
- .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT,
-#else
- .handleTypes = 0,
-#endif
- };
- vk::DeviceMemory memory = device.GetLogical().AllocateMemory({
+ vk::DeviceMemory memory = device.GetLogical().TryAllocateMemory({
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
- .pNext = export_allocations ? &export_allocate_info : nullptr,
+ .pNext = export_allocations ? &EXPORT_ALLOCATE_INFO : nullptr,
.allocationSize = size,
.memoryTypeIndex = type,
});
+ if (!memory) {
+ if ((flags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) != 0) {
+ // Try to allocate non device local memory
+ return TryAllocMemory(flags & ~VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, type_mask, size);
+ } else {
+ // RIP
+ return false;
+ }
+ }
allocations.push_back(std::make_unique<MemoryAllocation>(std::move(memory), flags, size, type));
+ return true;
}
std::optional<MemoryCommit> MemoryAllocator::TryCommit(const VkMemoryRequirements& requirements,
@@ -274,24 +289,24 @@ std::optional<MemoryCommit> MemoryAllocator::TryCommit(const VkMemoryRequirement
return commit;
}
}
+ if ((flags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) != 0) {
+ // Look for non device local commits on failure
+ return TryCommit(requirements, flags & ~VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
+ }
return std::nullopt;
}
-VkMemoryPropertyFlags MemoryAllocator::MemoryPropertyFlags(u32 type_mask, MemoryUsage usage) const {
- return MemoryPropertyFlags(type_mask, MemoryUsagePropertyFlags(usage));
-}
-
VkMemoryPropertyFlags MemoryAllocator::MemoryPropertyFlags(u32 type_mask,
VkMemoryPropertyFlags flags) const {
if (FindType(flags, type_mask)) {
// Found a memory type with those requirements
return flags;
}
- if (flags & VK_MEMORY_PROPERTY_HOST_CACHED_BIT) {
+ if ((flags & VK_MEMORY_PROPERTY_HOST_CACHED_BIT) != 0) {
// Remove host cached bit in case it's not supported
return MemoryPropertyFlags(type_mask, flags & ~VK_MEMORY_PROPERTY_HOST_CACHED_BIT);
}
- if (flags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) {
+ if ((flags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) != 0) {
// Remove device local, if it's not supported by the requested resource
return MemoryPropertyFlags(type_mask, flags & ~VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
}
@@ -302,7 +317,7 @@ VkMemoryPropertyFlags MemoryAllocator::MemoryPropertyFlags(u32 type_mask,
std::optional<u32> MemoryAllocator::FindType(VkMemoryPropertyFlags flags, u32 type_mask) const {
for (u32 type_index = 0; type_index < properties.memoryTypeCount; ++type_index) {
const VkMemoryPropertyFlags type_flags = properties.memoryTypes[type_index].propertyFlags;
- if ((type_mask & (1U << type_index)) && (type_flags & flags)) {
+ if ((type_mask & (1U << type_index)) != 0 && (type_flags & flags) == flags) {
// The type matches in type and in the wanted properties.
return type_index;
}
diff --git a/src/video_core/vulkan_common/vulkan_memory_allocator.h b/src/video_core/vulkan_common/vulkan_memory_allocator.h
index d1ce29450..db12d02f4 100644
--- a/src/video_core/vulkan_common/vulkan_memory_allocator.h
+++ b/src/video_core/vulkan_common/vulkan_memory_allocator.h
@@ -101,16 +101,13 @@ public:
MemoryCommit Commit(const vk::Image& image, MemoryUsage usage);
private:
- /// Allocates a chunk of memory.
- void AllocMemory(VkMemoryPropertyFlags flags, u32 type_mask, u64 size);
+ /// Tries to allocate a chunk of memory.
+ bool TryAllocMemory(VkMemoryPropertyFlags flags, u32 type_mask, u64 size);
/// Tries to allocate a memory commit.
std::optional<MemoryCommit> TryCommit(const VkMemoryRequirements& requirements,
VkMemoryPropertyFlags flags);
- /// Returns the fastest compatible memory property flags from a wanted usage.
- VkMemoryPropertyFlags MemoryPropertyFlags(u32 type_mask, MemoryUsage usage) const;
-
/// Returns the fastest compatible memory property flags from the wanted flags.
VkMemoryPropertyFlags MemoryPropertyFlags(u32 type_mask, VkMemoryPropertyFlags flags) const;