summaryrefslogtreecommitdiffstats
path: root/src/video_core/renderer_vulkan
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/video_core/renderer_vulkan/blit_image.cpp3
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.cpp20
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.cpp34
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.h5
-rw-r--r--src/video_core/renderer_vulkan/vk_blit_screen.cpp18
-rw-r--r--src/video_core/renderer_vulkan/vk_blit_screen.h3
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.cpp86
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.h2
-rw-r--r--src/video_core/renderer_vulkan/vk_fsr.cpp4
-rw-r--r--src/video_core/renderer_vulkan/vk_fsr.h1
-rw-r--r--src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp5
-rw-r--r--src/video_core/renderer_vulkan/vk_master_semaphore.cpp22
-rw-r--r--src/video_core/renderer_vulkan/vk_master_semaphore.h1
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.cpp22
-rw-r--r--src/video_core/renderer_vulkan/vk_present_manager.cpp4
-rw-r--r--src/video_core/renderer_vulkan/vk_present_manager.h1
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp23
-rw-r--r--src/video_core/renderer_vulkan/vk_smaa.cpp39
-rw-r--r--src/video_core/renderer_vulkan/vk_smaa.h2
-rw-r--r--src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp115
-rw-r--r--src/video_core/renderer_vulkan/vk_staging_buffer_pool.h4
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.cpp54
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.h3
-rw-r--r--src/video_core/renderer_vulkan/vk_turbo_mode.cpp10
24 files changed, 215 insertions, 266 deletions
diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp
index cf2964a3f..28d4b15a0 100644
--- a/src/video_core/renderer_vulkan/blit_image.cpp
+++ b/src/video_core/renderer_vulkan/blit_image.cpp
@@ -495,6 +495,9 @@ void BlitImageHelper::BlitDepthStencil(const Framebuffer* dst_framebuffer,
const Region2D& dst_region, const Region2D& src_region,
Tegra::Engines::Fermi2D::Filter filter,
Tegra::Engines::Fermi2D::Operation operation) {
+ if (!device.IsExtShaderStencilExportSupported()) {
+ return;
+ }
ASSERT(filter == Tegra::Engines::Fermi2D::Filter::Point);
ASSERT(operation == Tegra::Engines::Fermi2D::Operation::SrcCopy);
const BlitImagePipelineKey key{
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
index 9a0b10568..a8540339d 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -259,6 +259,26 @@ FormatInfo SurfaceFormat(const Device& device, FormatType format_type, bool with
break;
}
}
+ // Transcode on hardware that doesn't support BCn natively
+ if (!device.IsOptimalBcnSupported() && VideoCore::Surface::IsPixelFormatBCn(pixel_format)) {
+ const bool is_srgb = with_srgb && VideoCore::Surface::IsPixelFormatSRGB(pixel_format);
+ if (pixel_format == PixelFormat::BC4_SNORM) {
+ tuple.format = VK_FORMAT_R8_SNORM;
+ } else if (pixel_format == PixelFormat::BC4_UNORM) {
+ tuple.format = VK_FORMAT_R8_UNORM;
+ } else if (pixel_format == PixelFormat::BC5_SNORM) {
+ tuple.format = VK_FORMAT_R8G8_SNORM;
+ } else if (pixel_format == PixelFormat::BC5_UNORM) {
+ tuple.format = VK_FORMAT_R8G8_UNORM;
+ } else if (pixel_format == PixelFormat::BC6H_SFLOAT ||
+ pixel_format == PixelFormat::BC6H_UFLOAT) {
+ tuple.format = VK_FORMAT_R16G16B16A16_SFLOAT;
+ } else if (is_srgb) {
+ tuple.format = VK_FORMAT_A8B8G8R8_SRGB_PACK32;
+ } else {
+ tuple.format = VK_FORMAT_A8B8G8R8_UNORM_PACK32;
+ }
+ }
const bool attachable = (tuple.usage & Attachable) != 0;
const bool storage = (tuple.usage & Storage) != 0;
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
index 77128c6e2..454bb66a4 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
@@ -12,6 +12,7 @@
#include <fmt/format.h>
#include "common/logging/log.h"
+#include "common/polyfill_ranges.h"
#include "common/scope_exit.h"
#include "common/settings.h"
#include "common/telemetry.h"
@@ -65,6 +66,21 @@ std::string BuildCommaSeparatedExtensions(
return fmt::format("{}", fmt::join(available_extensions, ","));
}
+DebugCallback MakeDebugCallback(const vk::Instance& instance, const vk::InstanceDispatch& dld) {
+ if (!Settings::values.renderer_debug) {
+ return DebugCallback{};
+ }
+ const std::optional properties = vk::EnumerateInstanceExtensionProperties(dld);
+ const auto it = std::ranges::find_if(*properties, [](const auto& prop) {
+ return std::strcmp(VK_EXT_DEBUG_UTILS_EXTENSION_NAME, prop.extensionName) == 0;
+ });
+ if (it != properties->end()) {
+ return CreateDebugUtilsCallback(instance);
+ } else {
+ return CreateDebugReportCallback(instance);
+ }
+}
+
} // Anonymous namespace
Device CreateDevice(const vk::Instance& instance, const vk::InstanceDispatch& dld,
@@ -87,10 +103,10 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_,
cpu_memory(cpu_memory_), gpu(gpu_), library(OpenLibrary(context.get())),
instance(CreateInstance(*library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type,
Settings::values.renderer_debug.GetValue())),
- debug_callback(Settings::values.renderer_debug ? CreateDebugCallback(instance) : nullptr),
+ debug_callback(MakeDebugCallback(instance, dld)),
surface(CreateSurface(instance, render_window.GetWindowInfo())),
- device(CreateDevice(instance, dld, *surface)), memory_allocator(device, false),
- state_tracker(), scheduler(device, state_tracker),
+ device(CreateDevice(instance, dld, *surface)), memory_allocator(device), state_tracker(),
+ scheduler(device, state_tracker),
swapchain(*surface, device, scheduler, render_window.GetFramebufferLayout().width,
render_window.GetFramebufferLayout().height, false),
present_manager(instance, render_window, device, memory_allocator, scheduler, swapchain,
@@ -173,7 +189,7 @@ void Vulkan::RendererVulkan::RenderScreenshot(const Tegra::FramebufferConfig& fr
return;
}
const Layout::FramebufferLayout layout{renderer_settings.screenshot_framebuffer_layout};
- vk::Image staging_image = device.GetLogical().CreateImage(VkImageCreateInfo{
+ vk::Image staging_image = memory_allocator.CreateImage(VkImageCreateInfo{
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
.pNext = nullptr,
.flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT,
@@ -196,7 +212,6 @@ void Vulkan::RendererVulkan::RenderScreenshot(const Tegra::FramebufferConfig& fr
.pQueueFamilyIndices = nullptr,
.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
});
- const auto image_commit = memory_allocator.Commit(staging_image, MemoryUsage::DeviceLocal);
const vk::ImageView dst_view = device.GetLogical().CreateImageView(VkImageViewCreateInfo{
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
@@ -234,8 +249,8 @@ void Vulkan::RendererVulkan::RenderScreenshot(const Tegra::FramebufferConfig& fr
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
};
- const vk::Buffer dst_buffer = device.GetLogical().CreateBuffer(dst_buffer_info);
- MemoryCommit dst_buffer_memory = memory_allocator.Commit(dst_buffer, MemoryUsage::Download);
+ const vk::Buffer dst_buffer =
+ memory_allocator.CreateBuffer(dst_buffer_info, MemoryUsage::Download);
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([&](vk::CommandBuffer cmdbuf) {
@@ -309,8 +324,9 @@ void Vulkan::RendererVulkan::RenderScreenshot(const Tegra::FramebufferConfig& fr
scheduler.Finish();
// Copy backing image data to the QImage screenshot buffer
- const auto dst_memory_map = dst_buffer_memory.Map();
- std::memcpy(renderer_settings.screenshot_bits, dst_memory_map.data(), dst_memory_map.size());
+ dst_buffer.Invalidate();
+ std::memcpy(renderer_settings.screenshot_bits, dst_buffer.Mapped().data(),
+ dst_buffer.Mapped().size());
renderer_settings.screenshot_complete_callback(false);
renderer_settings.screenshot_requested = false;
}
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h
index b2e8cbd1b..ca22c0baa 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.h
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.h
@@ -5,6 +5,7 @@
#include <memory>
#include <string>
+#include <variant>
#include "common/dynamic_library.h"
#include "video_core/renderer_base.h"
@@ -33,6 +34,8 @@ class GPU;
namespace Vulkan {
+using DebugCallback = std::variant<vk::DebugUtilsMessenger, vk::DebugReportCallback>;
+
Device CreateDevice(const vk::Instance& instance, const vk::InstanceDispatch& dld,
VkSurfaceKHR surface);
@@ -71,7 +74,7 @@ private:
vk::InstanceDispatch dld;
vk::Instance instance;
- vk::DebugUtilsMessenger debug_callback;
+ DebugCallback debug_callback;
vk::SurfaceKHR surface;
ScreenInfo screen_info;
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
index acb143fc7..ad3b29f0e 100644
--- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp
+++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
@@ -162,7 +162,7 @@ void BlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer,
SetUniformData(data, layout);
SetVertexData(data, framebuffer, layout);
- const std::span<u8> mapped_span = buffer_commit.Map();
+ const std::span<u8> mapped_span = buffer.Mapped();
std::memcpy(mapped_span.data(), &data, sizeof(data));
if (!use_accelerated) {
@@ -1071,14 +1071,9 @@ void BlitScreen::ReleaseRawImages() {
scheduler.Wait(tick);
}
raw_images.clear();
- raw_buffer_commits.clear();
-
aa_image_view.reset();
aa_image.reset();
- aa_commit = MemoryCommit{};
-
buffer.reset();
- buffer_commit = MemoryCommit{};
}
void BlitScreen::CreateStagingBuffer(const Tegra::FramebufferConfig& framebuffer) {
@@ -1094,20 +1089,18 @@ void BlitScreen::CreateStagingBuffer(const Tegra::FramebufferConfig& framebuffer
.pQueueFamilyIndices = nullptr,
};
- buffer = device.GetLogical().CreateBuffer(ci);
- buffer_commit = memory_allocator.Commit(buffer, MemoryUsage::Upload);
+ buffer = memory_allocator.CreateBuffer(ci, MemoryUsage::Upload);
}
void BlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) {
raw_images.resize(image_count);
raw_image_views.resize(image_count);
- raw_buffer_commits.resize(image_count);
const auto create_image = [&](bool used_on_framebuffer = false, u32 up_scale = 1,
u32 down_shift = 0) {
u32 extra_usages = used_on_framebuffer ? VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT
: VK_IMAGE_USAGE_TRANSFER_DST_BIT;
- return device.GetLogical().CreateImage(VkImageCreateInfo{
+ return memory_allocator.CreateImage(VkImageCreateInfo{
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
@@ -1130,9 +1123,6 @@ void BlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) {
.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
});
};
- const auto create_commit = [&](vk::Image& image) {
- return memory_allocator.Commit(image, MemoryUsage::DeviceLocal);
- };
const auto create_image_view = [&](vk::Image& image, bool used_on_framebuffer = false) {
return device.GetLogical().CreateImageView(VkImageViewCreateInfo{
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
@@ -1161,7 +1151,6 @@ void BlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) {
for (size_t i = 0; i < image_count; ++i) {
raw_images[i] = create_image();
- raw_buffer_commits[i] = create_commit(raw_images[i]);
raw_image_views[i] = create_image_view(raw_images[i]);
}
@@ -1169,7 +1158,6 @@ void BlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) {
const u32 up_scale = Settings::values.resolution_info.up_scale;
const u32 down_shift = Settings::values.resolution_info.down_shift;
aa_image = create_image(true, up_scale, down_shift);
- aa_commit = create_commit(aa_image);
aa_image_view = create_image_view(aa_image, true);
VkExtent2D size{
.width = (up_scale * framebuffer.width) >> down_shift,
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.h b/src/video_core/renderer_vulkan/vk_blit_screen.h
index 68ec20253..8365b5668 100644
--- a/src/video_core/renderer_vulkan/vk_blit_screen.h
+++ b/src/video_core/renderer_vulkan/vk_blit_screen.h
@@ -142,13 +142,11 @@ private:
vk::Sampler sampler;
vk::Buffer buffer;
- MemoryCommit buffer_commit;
std::vector<u64> resource_ticks;
std::vector<vk::Image> raw_images;
std::vector<vk::ImageView> raw_image_views;
- std::vector<MemoryCommit> raw_buffer_commits;
vk::DescriptorPool aa_descriptor_pool;
vk::DescriptorSetLayout aa_descriptor_set_layout;
@@ -159,7 +157,6 @@ private:
vk::DescriptorSets aa_descriptor_sets;
vk::Image aa_image;
vk::ImageView aa_image_view;
- MemoryCommit aa_commit;
u32 raw_width = 0;
u32 raw_height = 0;
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index e30fcb1ed..b72f95235 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -50,7 +50,7 @@ size_t BytesPerIndex(VkIndexType index_type) {
}
}
-vk::Buffer CreateBuffer(const Device& device, u64 size) {
+vk::Buffer CreateBuffer(const Device& device, const MemoryAllocator& memory_allocator, u64 size) {
VkBufferUsageFlags flags =
VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT |
@@ -60,7 +60,7 @@ vk::Buffer CreateBuffer(const Device& device, u64 size) {
if (device.IsExtTransformFeedbackSupported()) {
flags |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT;
}
- return device.GetLogical().CreateBuffer({
+ const VkBufferCreateInfo buffer_ci = {
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
@@ -69,7 +69,8 @@ vk::Buffer CreateBuffer(const Device& device, u64 size) {
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
- });
+ };
+ return memory_allocator.CreateBuffer(buffer_ci, MemoryUsage::DeviceLocal);
}
} // Anonymous namespace
@@ -79,8 +80,8 @@ Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params)
Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_,
VAddr cpu_addr_, u64 size_bytes_)
: VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_),
- device{&runtime.device}, buffer{CreateBuffer(*device, SizeBytes())},
- commit{runtime.memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal)} {
+ device{&runtime.device}, buffer{
+ CreateBuffer(*device, runtime.memory_allocator, SizeBytes())} {
if (runtime.device.HasDebuggingToolAttached()) {
buffer.SetObjectNameEXT(fmt::format("Buffer 0x{:x}", CpuAddr()).c_str());
}
@@ -138,7 +139,7 @@ public:
const u32 num_first_offset_copies = 4;
const size_t bytes_per_index = BytesPerIndex(index_type);
const size_t size_bytes = num_triangle_indices * bytes_per_index * num_first_offset_copies;
- buffer = device.GetLogical().CreateBuffer(VkBufferCreateInfo{
+ const VkBufferCreateInfo buffer_ci = {
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
@@ -147,14 +148,21 @@ public:
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
- });
+ };
+ buffer = memory_allocator.CreateBuffer(buffer_ci, MemoryUsage::DeviceLocal);
if (device.HasDebuggingToolAttached()) {
buffer.SetObjectNameEXT("Quad LUT");
}
- memory_commit = memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal);
- const StagingBufferRef staging = staging_pool.Request(size_bytes, MemoryUsage::Upload);
- u8* staging_data = staging.mapped_span.data();
+ const bool host_visible = buffer.IsHostVisible();
+ const StagingBufferRef staging = [&] {
+ if (host_visible) {
+ return StagingBufferRef{};
+ }
+ return staging_pool.Request(size_bytes, MemoryUsage::Upload);
+ }();
+
+ u8* staging_data = host_visible ? buffer.Mapped().data() : staging.mapped_span.data();
const size_t quad_size = bytes_per_index * 6;
for (u32 first = 0; first < num_first_offset_copies; ++first) {
@@ -164,29 +172,33 @@ public:
}
}
- scheduler.RequestOutsideRenderPassOperationContext();
- scheduler.Record([src_buffer = staging.buffer, src_offset = staging.offset,
- dst_buffer = *buffer, size_bytes](vk::CommandBuffer cmdbuf) {
- const VkBufferCopy copy{
- .srcOffset = src_offset,
- .dstOffset = 0,
- .size = size_bytes,
- };
- const VkBufferMemoryBarrier write_barrier{
- .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
- .pNext = nullptr,
- .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
- .dstAccessMask = VK_ACCESS_INDEX_READ_BIT,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .buffer = dst_buffer,
- .offset = 0,
- .size = size_bytes,
- };
- cmdbuf.CopyBuffer(src_buffer, dst_buffer, copy);
- cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT,
- VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, write_barrier);
- });
+ if (!host_visible) {
+ scheduler.RequestOutsideRenderPassOperationContext();
+ scheduler.Record([src_buffer = staging.buffer, src_offset = staging.offset,
+ dst_buffer = *buffer, size_bytes](vk::CommandBuffer cmdbuf) {
+ const VkBufferCopy copy{
+ .srcOffset = src_offset,
+ .dstOffset = 0,
+ .size = size_bytes,
+ };
+ const VkBufferMemoryBarrier write_barrier{
+ .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
+ .pNext = nullptr,
+ .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
+ .dstAccessMask = VK_ACCESS_INDEX_READ_BIT,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .buffer = dst_buffer,
+ .offset = 0,
+ .size = size_bytes,
+ };
+ cmdbuf.CopyBuffer(src_buffer, dst_buffer, copy);
+ cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT,
+ VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, write_barrier);
+ });
+ } else {
+ buffer.Flush();
+ }
}
void BindBuffer(u32 first) {
@@ -361,7 +373,7 @@ void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer,
.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
};
// Measuring a popular game, this number never exceeds the specified size once data is warmed up
- boost::container::small_vector<VkBufferCopy, 3> vk_copies(copies.size());
+ boost::container::small_vector<VkBufferCopy, 8> vk_copies(copies.size());
std::ranges::transform(copies, vk_copies.begin(), MakeBufferCopy);
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([src_buffer, dst_buffer, vk_copies, barrier](vk::CommandBuffer cmdbuf) {
@@ -578,7 +590,8 @@ void BufferCacheRuntime::ReserveNullBuffer() {
.pNext = nullptr,
.flags = 0,
.size = 4,
- .usage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
+ .usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT |
+ VK_BUFFER_USAGE_TRANSFER_DST_BIT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
@@ -587,11 +600,10 @@ void BufferCacheRuntime::ReserveNullBuffer() {
create_info.usage |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT;
}
create_info.usage |= VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT;
- null_buffer = device.GetLogical().CreateBuffer(create_info);
+ null_buffer = memory_allocator.CreateBuffer(create_info, MemoryUsage::DeviceLocal);
if (device.HasDebuggingToolAttached()) {
null_buffer.SetObjectNameEXT("Null buffer");
}
- null_buffer_commit = memory_allocator.Commit(null_buffer, MemoryUsage::DeviceLocal);
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([buffer = *null_buffer](vk::CommandBuffer cmdbuf) {
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
index cdeef8846..95446c732 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -48,7 +48,6 @@ private:
const Device* device{};
vk::Buffer buffer;
- MemoryCommit commit;
std::vector<BufferView> views;
};
@@ -142,7 +141,6 @@ private:
std::shared_ptr<QuadStripIndexBuffer> quad_strip_index_buffer;
vk::Buffer null_buffer;
- MemoryCommit null_buffer_commit;
std::unique_ptr<Uint8Pass> uint8_pass;
QuadIndexedPass quad_index_pass;
diff --git a/src/video_core/renderer_vulkan/vk_fsr.cpp b/src/video_core/renderer_vulkan/vk_fsr.cpp
index df972cd54..9bcdca2fb 100644
--- a/src/video_core/renderer_vulkan/vk_fsr.cpp
+++ b/src/video_core/renderer_vulkan/vk_fsr.cpp
@@ -205,10 +205,9 @@ void FSR::CreateDescriptorSets() {
void FSR::CreateImages() {
images.resize(image_count * 2);
image_views.resize(image_count * 2);
- buffer_commits.resize(image_count * 2);
for (size_t i = 0; i < image_count * 2; ++i) {
- images[i] = device.GetLogical().CreateImage(VkImageCreateInfo{
+ images[i] = memory_allocator.CreateImage(VkImageCreateInfo{
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
@@ -231,7 +230,6 @@ void FSR::CreateImages() {
.pQueueFamilyIndices = nullptr,
.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
});
- buffer_commits[i] = memory_allocator.Commit(images[i], MemoryUsage::DeviceLocal);
image_views[i] = device.GetLogical().CreateImageView(VkImageViewCreateInfo{
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
.pNext = nullptr,
diff --git a/src/video_core/renderer_vulkan/vk_fsr.h b/src/video_core/renderer_vulkan/vk_fsr.h
index 5d872861f..8bb9fc23a 100644
--- a/src/video_core/renderer_vulkan/vk_fsr.h
+++ b/src/video_core/renderer_vulkan/vk_fsr.h
@@ -47,7 +47,6 @@ private:
vk::Sampler sampler;
std::vector<vk::Image> images;
std::vector<vk::ImageView> image_views;
- std::vector<MemoryCommit> buffer_commits;
};
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
index c1595642e..ad35cacac 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
@@ -652,13 +652,14 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) {
.pNext = nullptr,
.negativeOneToOne = key.state.ndc_minus_one_to_one.Value() != 0 ? VK_TRUE : VK_FALSE,
};
+ const u32 num_viewports = std::min<u32>(device.GetMaxViewports(), Maxwell::NumViewports);
VkPipelineViewportStateCreateInfo viewport_ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
- .viewportCount = Maxwell::NumViewports,
+ .viewportCount = num_viewports,
.pViewports = nullptr,
- .scissorCount = Maxwell::NumViewports,
+ .scissorCount = num_viewports,
.pScissors = nullptr,
};
if (device.IsNvViewportSwizzleSupported()) {
diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.cpp b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp
index 5eeda08d2..6b288b994 100644
--- a/src/video_core/renderer_vulkan/vk_master_semaphore.cpp
+++ b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp
@@ -75,15 +75,9 @@ void MasterSemaphore::Refresh() {
void MasterSemaphore::Wait(u64 tick) {
if (!semaphore) {
- // If we don't support timeline semaphores, use an atomic wait
- while (true) {
- u64 current_value = gpu_tick.load(std::memory_order_relaxed);
- if (current_value >= tick) {
- return;
- }
- gpu_tick.wait(current_value);
- }
-
+ // If we don't support timeline semaphores, wait for the value normally
+ std::unique_lock lk{free_mutex};
+ free_cv.wait(lk, [&] { return gpu_tick.load(std::memory_order_relaxed) >= tick; });
return;
}
@@ -198,11 +192,13 @@ void MasterSemaphore::WaitThread(std::stop_token token) {
fence.Wait();
fence.Reset();
- gpu_tick.store(host_tick);
- gpu_tick.notify_all();
- std::scoped_lock lock{free_mutex};
- free_queue.push_front(std::move(fence));
+ {
+ std::scoped_lock lock{free_mutex};
+ free_queue.push_front(std::move(fence));
+ gpu_tick.store(host_tick);
+ }
+ free_cv.notify_one();
}
}
diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.h b/src/video_core/renderer_vulkan/vk_master_semaphore.h
index 1e7c90215..3f599d7bd 100644
--- a/src/video_core/renderer_vulkan/vk_master_semaphore.h
+++ b/src/video_core/renderer_vulkan/vk_master_semaphore.h
@@ -72,6 +72,7 @@ private:
std::atomic<u64> current_tick{1}; ///< Current logical tick.
std::mutex wait_mutex;
std::mutex free_mutex;
+ std::condition_variable free_cv;
std::condition_variable_any wait_cv;
std::queue<Waitable> wait_queue; ///< Queue for the fences to be waited on by the wait thread.
std::deque<vk::Fence> free_queue; ///< Holds available fences for submission.
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index 18e040a1b..d600c4e61 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -167,7 +167,10 @@ Shader::RuntimeInfo MakeRuntimeInfo(std::span<const Shader::IR::Program> program
info.fixed_state_point_size = point_size;
}
if (key.state.xfb_enabled) {
- info.xfb_varyings = VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state);
+ auto [varyings, count] =
+ VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state);
+ info.xfb_varyings = varyings;
+ info.xfb_count = count;
}
info.convert_depth_mode = gl_ndc;
}
@@ -214,7 +217,10 @@ Shader::RuntimeInfo MakeRuntimeInfo(std::span<const Shader::IR::Program> program
info.fixed_state_point_size = point_size;
}
if (key.state.xfb_enabled != 0) {
- info.xfb_varyings = VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state);
+ auto [varyings, count] =
+ VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state);
+ info.xfb_varyings = varyings;
+ info.xfb_count = count;
}
info.convert_depth_mode = gl_ndc;
break;
@@ -303,7 +309,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device
.support_int16 = device.IsShaderInt16Supported(),
.support_int64 = device.IsShaderInt64Supported(),
.support_vertex_instance_id = false,
- .support_float_controls = true,
+ .support_float_controls = device.IsKhrShaderFloatControlsSupported(),
.support_separate_denorm_behavior =
float_control.denormBehaviorIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL,
.support_separate_rounding_mode =
@@ -319,12 +325,13 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device
.support_fp64_signed_zero_nan_preserve =
float_control.shaderSignedZeroInfNanPreserveFloat64 != VK_FALSE,
.support_explicit_workgroup_layout = device.IsKhrWorkgroupMemoryExplicitLayoutSupported(),
- .support_vote = true,
+ .support_vote = device.IsSubgroupFeatureSupported(VK_SUBGROUP_FEATURE_VOTE_BIT),
.support_viewport_index_layer_non_geometry =
device.IsExtShaderViewportIndexLayerSupported(),
.support_viewport_mask = device.IsNvViewportArray2Supported(),
.support_typeless_image_loads = device.IsFormatlessImageLoadSupported(),
- .support_demote_to_helper_invocation = true,
+ .support_demote_to_helper_invocation =
+ device.IsExtShaderDemoteToHelperInvocationSupported(),
.support_int64_atomics = device.IsExtShaderAtomicInt64Supported(),
.support_derivative_control = true,
.support_geometry_shader_passthrough = device.IsNvGeometryShaderPassthroughSupported(),
@@ -705,10 +712,7 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline(
std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline(
ShaderPools& pools, const ComputePipelineCacheKey& key, Shader::Environment& env,
PipelineStatistics* statistics, bool build_in_parallel) try {
- // TODO: Remove this when Intel fixes their shader compiler.
- // https://github.com/IGCIT/Intel-GPU-Community-Issue-Tracker-IGCIT/issues/159
- if (device.GetDriverID() == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS &&
- !Settings::values.enable_compute_pipelines.GetValue()) {
+ if (device.HasBrokenCompute()) {
LOG_ERROR(Render_Vulkan, "Skipping 0x{:016x}", key.Hash());
return nullptr;
}
diff --git a/src/video_core/renderer_vulkan/vk_present_manager.cpp b/src/video_core/renderer_vulkan/vk_present_manager.cpp
index 10ace0420..d681bd22a 100644
--- a/src/video_core/renderer_vulkan/vk_present_manager.cpp
+++ b/src/video_core/renderer_vulkan/vk_present_manager.cpp
@@ -181,7 +181,7 @@ void PresentManager::RecreateFrame(Frame* frame, u32 width, u32 height, bool is_
frame->height = height;
frame->is_srgb = is_srgb;
- frame->image = dld.CreateImage({
+ frame->image = memory_allocator.CreateImage({
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
.pNext = nullptr,
.flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT,
@@ -204,8 +204,6 @@ void PresentManager::RecreateFrame(Frame* frame, u32 width, u32 height, bool is_
.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
});
- frame->image_commit = memory_allocator.Commit(frame->image, MemoryUsage::DeviceLocal);
-
frame->image_view = dld.CreateImageView({
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
.pNext = nullptr,
diff --git a/src/video_core/renderer_vulkan/vk_present_manager.h b/src/video_core/renderer_vulkan/vk_present_manager.h
index 4ac2e2395..83e859416 100644
--- a/src/video_core/renderer_vulkan/vk_present_manager.h
+++ b/src/video_core/renderer_vulkan/vk_present_manager.h
@@ -29,7 +29,6 @@ struct Frame {
vk::Image image;
vk::ImageView image_view;
vk::Framebuffer framebuffer;
- MemoryCommit image_commit;
vk::CommandBuffer cmdbuf;
vk::Semaphore render_ready;
vk::Fence present_done;
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 84e3a30cc..f7c0d939a 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -315,7 +315,14 @@ void RasterizerVulkan::Clear(u32 layer_count) {
FlushWork();
gpu_memory->FlushCaching();
+#if ANDROID
+ if (Settings::IsGPULevelHigh()) {
+ // This is problematic on Android, disable on GPU Normal.
+ query_cache.UpdateCounters();
+ }
+#else
query_cache.UpdateCounters();
+#endif
auto& regs = maxwell3d->regs;
const bool use_color = regs.clear_surface.R || regs.clear_surface.G || regs.clear_surface.B ||
@@ -925,7 +932,7 @@ void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& reg
}
const bool is_rescaling{texture_cache.IsRescaling()};
const float scale = is_rescaling ? Settings::values.resolution_info.up_factor : 1.0f;
- const std::array viewports{
+ const std::array viewport_list{
GetViewportState(device, regs, 0, scale), GetViewportState(device, regs, 1, scale),
GetViewportState(device, regs, 2, scale), GetViewportState(device, regs, 3, scale),
GetViewportState(device, regs, 4, scale), GetViewportState(device, regs, 5, scale),
@@ -935,7 +942,11 @@ void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& reg
GetViewportState(device, regs, 12, scale), GetViewportState(device, regs, 13, scale),
GetViewportState(device, regs, 14, scale), GetViewportState(device, regs, 15, scale),
};
- scheduler.Record([viewports](vk::CommandBuffer cmdbuf) { cmdbuf.SetViewport(0, viewports); });
+ scheduler.Record([this, viewport_list](vk::CommandBuffer cmdbuf) {
+ const u32 num_viewports = std::min<u32>(device.GetMaxViewports(), Maxwell::NumViewports);
+ const vk::Span<VkViewport> viewports(viewport_list.data(), num_viewports);
+ cmdbuf.SetViewport(0, viewports);
+ });
}
void RasterizerVulkan::UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs) {
@@ -948,7 +959,7 @@ void RasterizerVulkan::UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs
up_scale = Settings::values.resolution_info.up_scale;
down_shift = Settings::values.resolution_info.down_shift;
}
- const std::array scissors{
+ const std::array scissor_list{
GetScissorState(regs, 0, up_scale, down_shift),
GetScissorState(regs, 1, up_scale, down_shift),
GetScissorState(regs, 2, up_scale, down_shift),
@@ -966,7 +977,11 @@ void RasterizerVulkan::UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs
GetScissorState(regs, 14, up_scale, down_shift),
GetScissorState(regs, 15, up_scale, down_shift),
};
- scheduler.Record([scissors](vk::CommandBuffer cmdbuf) { cmdbuf.SetScissor(0, scissors); });
+ scheduler.Record([this, scissor_list](vk::CommandBuffer cmdbuf) {
+ const u32 num_scissors = std::min<u32>(device.GetMaxViewports(), Maxwell::NumViewports);
+ const vk::Span<VkRect2D> scissors(scissor_list.data(), num_scissors);
+ cmdbuf.SetScissor(0, scissors);
+ });
}
void RasterizerVulkan::UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs) {
diff --git a/src/video_core/renderer_vulkan/vk_smaa.cpp b/src/video_core/renderer_vulkan/vk_smaa.cpp
index f8735189d..5efd7d66e 100644
--- a/src/video_core/renderer_vulkan/vk_smaa.cpp
+++ b/src/video_core/renderer_vulkan/vk_smaa.cpp
@@ -25,9 +25,7 @@ namespace {
#define ARRAY_TO_SPAN(a) std::span(a, (sizeof(a) / sizeof(a[0])))
-std::pair<vk::Image, MemoryCommit> CreateWrappedImage(const Device& device,
- MemoryAllocator& allocator,
- VkExtent2D dimensions, VkFormat format) {
+vk::Image CreateWrappedImage(MemoryAllocator& allocator, VkExtent2D dimensions, VkFormat format) {
const VkImageCreateInfo image_ci{
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
.pNext = nullptr,
@@ -46,11 +44,7 @@ std::pair<vk::Image, MemoryCommit> CreateWrappedImage(const Device& device,
.pQueueFamilyIndices = nullptr,
.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
};
-
- auto image = device.GetLogical().CreateImage(image_ci);
- auto commit = allocator.Commit(image, Vulkan::MemoryUsage::DeviceLocal);
-
- return std::make_pair(std::move(image), std::move(commit));
+ return allocator.CreateImage(image_ci);
}
void TransitionImageLayout(vk::CommandBuffer& cmdbuf, VkImage image, VkImageLayout target_layout,
@@ -82,7 +76,7 @@ void TransitionImageLayout(vk::CommandBuffer& cmdbuf, VkImage image, VkImageLayo
void UploadImage(const Device& device, MemoryAllocator& allocator, Scheduler& scheduler,
vk::Image& image, VkExtent2D dimensions, VkFormat format,
std::span<const u8> initial_contents = {}) {
- auto upload_buffer = device.GetLogical().CreateBuffer(VkBufferCreateInfo{
+ const VkBufferCreateInfo upload_ci = {
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
@@ -91,9 +85,10 @@ void UploadImage(const Device& device, MemoryAllocator& allocator, Scheduler& sc
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
- });
- auto upload_commit = allocator.Commit(upload_buffer, MemoryUsage::Upload);
- std::ranges::copy(initial_contents, upload_commit.Map().begin());
+ };
+ auto upload_buffer = allocator.CreateBuffer(upload_ci, MemoryUsage::Upload);
+ std::ranges::copy(initial_contents, upload_buffer.Mapped().begin());
+ upload_buffer.Flush();
const std::array<VkBufferImageCopy, 1> regions{{{
.bufferOffset = 0,
@@ -117,9 +112,6 @@ void UploadImage(const Device& device, MemoryAllocator& allocator, Scheduler& sc
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
});
scheduler.Finish();
-
- // This should go out of scope before the commit
- auto upload_buffer2 = std::move(upload_buffer);
}
vk::ImageView CreateWrappedImageView(const Device& device, vk::Image& image, VkFormat format) {
@@ -531,10 +523,8 @@ void SMAA::CreateImages() {
static constexpr VkExtent2D area_extent{AREATEX_WIDTH, AREATEX_HEIGHT};
static constexpr VkExtent2D search_extent{SEARCHTEX_WIDTH, SEARCHTEX_HEIGHT};
- std::tie(m_static_images[Area], m_static_buffer_commits[Area]) =
- CreateWrappedImage(m_device, m_allocator, area_extent, VK_FORMAT_R8G8_UNORM);
- std::tie(m_static_images[Search], m_static_buffer_commits[Search]) =
- CreateWrappedImage(m_device, m_allocator, search_extent, VK_FORMAT_R8_UNORM);
+ m_static_images[Area] = CreateWrappedImage(m_allocator, area_extent, VK_FORMAT_R8G8_UNORM);
+ m_static_images[Search] = CreateWrappedImage(m_allocator, search_extent, VK_FORMAT_R8_UNORM);
m_static_image_views[Area] =
CreateWrappedImageView(m_device, m_static_images[Area], VK_FORMAT_R8G8_UNORM);
@@ -544,12 +534,11 @@ void SMAA::CreateImages() {
for (u32 i = 0; i < m_image_count; i++) {
Images& images = m_dynamic_images.emplace_back();
- std::tie(images.images[Blend], images.buffer_commits[Blend]) =
- CreateWrappedImage(m_device, m_allocator, m_extent, VK_FORMAT_R16G16B16A16_SFLOAT);
- std::tie(images.images[Edges], images.buffer_commits[Edges]) =
- CreateWrappedImage(m_device, m_allocator, m_extent, VK_FORMAT_R16G16_SFLOAT);
- std::tie(images.images[Output], images.buffer_commits[Output]) =
- CreateWrappedImage(m_device, m_allocator, m_extent, VK_FORMAT_R16G16B16A16_SFLOAT);
+ images.images[Blend] =
+ CreateWrappedImage(m_allocator, m_extent, VK_FORMAT_R16G16B16A16_SFLOAT);
+ images.images[Edges] = CreateWrappedImage(m_allocator, m_extent, VK_FORMAT_R16G16_SFLOAT);
+ images.images[Output] =
+ CreateWrappedImage(m_allocator, m_extent, VK_FORMAT_R16G16B16A16_SFLOAT);
images.image_views[Blend] =
CreateWrappedImageView(m_device, images.images[Blend], VK_FORMAT_R16G16B16A16_SFLOAT);
diff --git a/src/video_core/renderer_vulkan/vk_smaa.h b/src/video_core/renderer_vulkan/vk_smaa.h
index 99a369148..0e214258a 100644
--- a/src/video_core/renderer_vulkan/vk_smaa.h
+++ b/src/video_core/renderer_vulkan/vk_smaa.h
@@ -66,13 +66,11 @@ private:
std::array<vk::Pipeline, MaxSMAAStage> m_pipelines{};
std::array<vk::RenderPass, MaxSMAAStage> m_renderpasses{};
- std::array<MemoryCommit, MaxStaticImage> m_static_buffer_commits;
std::array<vk::Image, MaxStaticImage> m_static_images{};
std::array<vk::ImageView, MaxStaticImage> m_static_image_views{};
struct Images {
vk::DescriptorSets descriptor_sets{};
- std::array<MemoryCommit, MaxDynamicImage> buffer_commits;
std::array<vk::Image, MaxDynamicImage> images{};
std::array<vk::ImageView, MaxDynamicImage> image_views{};
std::array<vk::Framebuffer, MaxSMAAStage> framebuffers{};
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
index 74ca77216..ce92f66ab 100644
--- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
+++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
@@ -30,55 +30,6 @@ constexpr VkDeviceSize MAX_STREAM_BUFFER_REQUEST_SIZE = 8_MiB;
constexpr VkDeviceSize STREAM_BUFFER_SIZE = 128_MiB;
constexpr VkDeviceSize REGION_SIZE = STREAM_BUFFER_SIZE / StagingBufferPool::NUM_SYNCS;
-constexpr VkMemoryPropertyFlags HOST_FLAGS =
- VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
-constexpr VkMemoryPropertyFlags STREAM_FLAGS = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | HOST_FLAGS;
-
-bool IsStreamHeap(VkMemoryHeap heap) noexcept {
- return STREAM_BUFFER_SIZE < (heap.size * 2) / 3;
-}
-
-std::optional<u32> FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& props, u32 type_mask,
- VkMemoryPropertyFlags flags) noexcept {
- for (u32 type_index = 0; type_index < props.memoryTypeCount; ++type_index) {
- if (((type_mask >> type_index) & 1) == 0) {
- // Memory type is incompatible
- continue;
- }
- const VkMemoryType& memory_type = props.memoryTypes[type_index];
- if ((memory_type.propertyFlags & flags) != flags) {
- // Memory type doesn't have the flags we want
- continue;
- }
- if (!IsStreamHeap(props.memoryHeaps[memory_type.heapIndex])) {
- // Memory heap is not suitable for streaming
- continue;
- }
- // Success!
- return type_index;
- }
- return std::nullopt;
-}
-
-u32 FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& props, u32 type_mask,
- bool try_device_local) {
- std::optional<u32> type;
- if (try_device_local) {
- // Try to find a DEVICE_LOCAL_BIT type, Nvidia and AMD have a dedicated heap for this
- type = FindMemoryTypeIndex(props, type_mask, STREAM_FLAGS);
- if (type) {
- return *type;
- }
- }
- // Otherwise try without the DEVICE_LOCAL_BIT
- type = FindMemoryTypeIndex(props, type_mask, HOST_FLAGS);
- if (type) {
- return *type;
- }
- // This should never happen, and in case it does, signal it as an out of memory situation
- throw vk::Exception(VK_ERROR_OUT_OF_DEVICE_MEMORY);
-}
-
size_t Region(size_t iterator) noexcept {
return iterator / REGION_SIZE;
}
@@ -87,58 +38,26 @@ size_t Region(size_t iterator) noexcept {
StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& memory_allocator_,
Scheduler& scheduler_)
: device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_} {
- const vk::Device& dev = device.GetLogical();
- stream_buffer = dev.CreateBuffer(VkBufferCreateInfo{
+ VkBufferCreateInfo stream_ci = {
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.size = STREAM_BUFFER_SIZE,
.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT |
- VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
- VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT,
+ VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
- });
- if (device.HasDebuggingToolAttached()) {
- stream_buffer.SetObjectNameEXT("Stream Buffer");
- }
- VkMemoryDedicatedRequirements dedicated_reqs{
- .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
- .pNext = nullptr,
- .prefersDedicatedAllocation = VK_FALSE,
- .requiresDedicatedAllocation = VK_FALSE,
- };
- const auto requirements = dev.GetBufferMemoryRequirements(*stream_buffer, &dedicated_reqs);
- const bool make_dedicated = dedicated_reqs.prefersDedicatedAllocation == VK_TRUE ||
- dedicated_reqs.requiresDedicatedAllocation == VK_TRUE;
- const VkMemoryDedicatedAllocateInfo dedicated_info{
- .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
- .pNext = nullptr,
- .image = nullptr,
- .buffer = *stream_buffer,
};
- const auto memory_properties = device.GetPhysical().GetMemoryProperties().memoryProperties;
- VkMemoryAllocateInfo stream_memory_info{
- .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
- .pNext = make_dedicated ? &dedicated_info : nullptr,
- .allocationSize = requirements.size,
- .memoryTypeIndex =
- FindMemoryTypeIndex(memory_properties, requirements.memoryTypeBits, true),
- };
- stream_memory = dev.TryAllocateMemory(stream_memory_info);
- if (!stream_memory) {
- LOG_INFO(Render_Vulkan, "Dynamic memory allocation failed, trying with system memory");
- stream_memory_info.memoryTypeIndex =
- FindMemoryTypeIndex(memory_properties, requirements.memoryTypeBits, false);
- stream_memory = dev.AllocateMemory(stream_memory_info);
+ if (device.IsExtTransformFeedbackSupported()) {
+ stream_ci.usage |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT;
}
-
+ stream_buffer = memory_allocator.CreateBuffer(stream_ci, MemoryUsage::Stream);
if (device.HasDebuggingToolAttached()) {
- stream_memory.SetObjectNameEXT("Stream Buffer Memory");
+ stream_buffer.SetObjectNameEXT("Stream Buffer");
}
- stream_buffer.BindMemory(*stream_memory, 0);
- stream_pointer = stream_memory.Map(0, STREAM_BUFFER_SIZE);
+ stream_pointer = stream_buffer.Mapped();
+ ASSERT_MSG(!stream_pointer.empty(), "Stream buffer must be host visible!");
}
StagingBufferPool::~StagingBufferPool() = default;
@@ -199,7 +118,7 @@ StagingBufferRef StagingBufferPool::GetStreamBuffer(size_t size) {
return StagingBufferRef{
.buffer = *stream_buffer,
.offset = static_cast<VkDeviceSize>(offset),
- .mapped_span = std::span<u8>(stream_pointer + offset, size),
+ .mapped_span = stream_pointer.subspan(offset, size),
.usage{},
.log2_level{},
.index{},
@@ -247,29 +166,29 @@ std::optional<StagingBufferRef> StagingBufferPool::TryGetReservedBuffer(size_t s
StagingBufferRef StagingBufferPool::CreateStagingBuffer(size_t size, MemoryUsage usage,
bool deferred) {
const u32 log2 = Common::Log2Ceil64(size);
- vk::Buffer buffer = device.GetLogical().CreateBuffer({
+ VkBufferCreateInfo buffer_ci = {
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.size = 1ULL << log2,
.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
- VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT |
- VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT,
+ VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
- });
+ };
+ if (device.IsExtTransformFeedbackSupported()) {
+ buffer_ci.usage |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT;
+ }
+ vk::Buffer buffer = memory_allocator.CreateBuffer(buffer_ci, usage);
if (device.HasDebuggingToolAttached()) {
++buffer_index;
buffer.SetObjectNameEXT(fmt::format("Staging Buffer {}", buffer_index).c_str());
}
- MemoryCommit commit = memory_allocator.Commit(buffer, usage);
- const std::span<u8> mapped_span = IsHostVisible(usage) ? commit.Map() : std::span<u8>{};
-
+ const std::span<u8> mapped_span = buffer.Mapped();
StagingBuffer& entry = GetCache(usage)[log2].entries.emplace_back(StagingBuffer{
.buffer = std::move(buffer),
- .commit = std::move(commit),
.mapped_span = mapped_span,
.usage = usage,
.log2_level = log2,
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
index 4fd15f11a..5f69f08b1 100644
--- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
+++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
@@ -46,7 +46,6 @@ private:
struct StagingBuffer {
vk::Buffer buffer;
- MemoryCommit commit;
std::span<u8> mapped_span;
MemoryUsage usage;
u32 log2_level;
@@ -97,8 +96,7 @@ private:
Scheduler& scheduler;
vk::Buffer stream_buffer;
- vk::DeviceMemory stream_memory;
- u8* stream_pointer = nullptr;
+ std::span<u8> stream_pointer;
size_t iterator = 0;
size_t used_iterator = 0;
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index f025f618b..8385b5509 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -15,7 +15,6 @@
#include "video_core/renderer_vulkan/blit_image.h"
#include "video_core/renderer_vulkan/maxwell_to_vk.h"
#include "video_core/renderer_vulkan/vk_compute_pass.h"
-#include "video_core/renderer_vulkan/vk_rasterizer.h"
#include "video_core/renderer_vulkan/vk_render_pass_cache.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
@@ -163,11 +162,12 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
};
}
-[[nodiscard]] vk::Image MakeImage(const Device& device, const ImageInfo& info) {
+[[nodiscard]] vk::Image MakeImage(const Device& device, const MemoryAllocator& allocator,
+ const ImageInfo& info) {
if (info.type == ImageType::Buffer) {
return vk::Image{};
}
- return device.GetLogical().CreateImage(MakeImageCreateInfo(device, info));
+ return allocator.CreateImage(MakeImageCreateInfo(device, info));
}
[[nodiscard]] VkImageAspectFlags ImageAspectMask(PixelFormat format) {
@@ -330,9 +330,9 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
};
}
-[[maybe_unused]] [[nodiscard]] std::vector<VkBufferCopy> TransformBufferCopies(
- std::span<const VideoCommon::BufferCopy> copies, size_t buffer_offset) {
- std::vector<VkBufferCopy> result(copies.size());
+[[maybe_unused]] [[nodiscard]] boost::container::small_vector<VkBufferCopy, 16>
+TransformBufferCopies(std::span<const VideoCommon::BufferCopy> copies, size_t buffer_offset) {
+ boost::container::small_vector<VkBufferCopy, 16> result(copies.size());
std::ranges::transform(
copies, result.begin(), [buffer_offset](const VideoCommon::BufferCopy& copy) {
return VkBufferCopy{
@@ -344,7 +344,7 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
return result;
}
-[[nodiscard]] std::vector<VkBufferImageCopy> TransformBufferImageCopies(
+[[nodiscard]] boost::container::small_vector<VkBufferImageCopy, 16> TransformBufferImageCopies(
std::span<const BufferImageCopy> copies, size_t buffer_offset, VkImageAspectFlags aspect_mask) {
struct Maker {
VkBufferImageCopy operator()(const BufferImageCopy& copy) const {
@@ -377,14 +377,14 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
VkImageAspectFlags aspect_mask;
};
if (aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
- std::vector<VkBufferImageCopy> result(copies.size() * 2);
+ boost::container::small_vector<VkBufferImageCopy, 16> result(copies.size() * 2);
std::ranges::transform(copies, result.begin(),
Maker{buffer_offset, VK_IMAGE_ASPECT_DEPTH_BIT});
std::ranges::transform(copies, result.begin() + copies.size(),
Maker{buffer_offset, VK_IMAGE_ASPECT_STENCIL_BIT});
return result;
} else {
- std::vector<VkBufferImageCopy> result(copies.size());
+ boost::container::small_vector<VkBufferImageCopy, 16> result(copies.size());
std::ranges::transform(copies, result.begin(), Maker{buffer_offset, aspect_mask});
return result;
}
@@ -839,14 +839,14 @@ bool TextureCacheRuntime::ShouldReinterpret(Image& dst, Image& src) {
VkBuffer TextureCacheRuntime::GetTemporaryBuffer(size_t needed_size) {
const auto level = (8 * sizeof(size_t)) - std::countl_zero(needed_size - 1ULL);
- if (buffer_commits[level]) {
+ if (buffers[level]) {
return *buffers[level];
}
const auto new_size = Common::NextPow2(needed_size);
static constexpr VkBufferUsageFlags flags =
VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT;
- buffers[level] = device.GetLogical().CreateBuffer({
+ const VkBufferCreateInfo temp_ci = {
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
@@ -855,9 +855,8 @@ VkBuffer TextureCacheRuntime::GetTemporaryBuffer(size_t needed_size) {
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
- });
- buffer_commits[level] = std::make_unique<MemoryCommit>(
- memory_allocator.Commit(buffers[level], MemoryUsage::DeviceLocal));
+ };
+ buffers[level] = memory_allocator.CreateBuffer(temp_ci, MemoryUsage::DeviceLocal);
return *buffers[level];
}
@@ -867,8 +866,8 @@ void TextureCacheRuntime::BarrierFeedbackLoop() {
void TextureCacheRuntime::ReinterpretImage(Image& dst, Image& src,
std::span<const VideoCommon::ImageCopy> copies) {
- std::vector<VkBufferImageCopy> vk_in_copies(copies.size());
- std::vector<VkBufferImageCopy> vk_out_copies(copies.size());
+ boost::container::small_vector<VkBufferImageCopy, 16> vk_in_copies(copies.size());
+ boost::container::small_vector<VkBufferImageCopy, 16> vk_out_copies(copies.size());
const VkImageAspectFlags src_aspect_mask = src.AspectMask();
const VkImageAspectFlags dst_aspect_mask = dst.AspectMask();
@@ -1157,7 +1156,7 @@ void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, Im
void TextureCacheRuntime::CopyImage(Image& dst, Image& src,
std::span<const VideoCommon::ImageCopy> copies) {
- std::vector<VkImageCopy> vk_copies(copies.size());
+ boost::container::small_vector<VkImageCopy, 16> vk_copies(copies.size());
const VkImageAspectFlags aspect_mask = dst.AspectMask();
ASSERT(aspect_mask == src.AspectMask());
@@ -1266,8 +1265,8 @@ void TextureCacheRuntime::TickFrame() {}
Image::Image(TextureCacheRuntime& runtime_, const ImageInfo& info_, GPUVAddr gpu_addr_,
VAddr cpu_addr_)
: VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), scheduler{&runtime_.scheduler},
- runtime{&runtime_}, original_image(MakeImage(runtime_.device, info)),
- commit(runtime_.memory_allocator.Commit(original_image, MemoryUsage::DeviceLocal)),
+ runtime{&runtime_},
+ original_image(MakeImage(runtime_.device, runtime_.memory_allocator, info)),
aspect_mask(ImageAspectMask(info.format)) {
if (IsPixelFormatASTC(info.format) && !runtime->device.IsOptimalAstcSupported()) {
if (Settings::values.async_astc.GetValue()) {
@@ -1280,6 +1279,10 @@ Image::Image(TextureCacheRuntime& runtime_, const ImageInfo& info_, GPUVAddr gpu
flags |= VideoCommon::ImageFlagBits::Converted;
flags |= VideoCommon::ImageFlagBits::CostlyLoad;
}
+ if (IsPixelFormatBCn(info.format) && !runtime->device.IsOptimalBcnSupported()) {
+ flags |= VideoCommon::ImageFlagBits::Converted;
+ flags |= VideoCommon::ImageFlagBits::CostlyLoad;
+ }
if (runtime->device.HasDebuggingToolAttached()) {
original_image.SetObjectNameEXT(VideoCommon::Name(*this).c_str());
}
@@ -1332,7 +1335,7 @@ void Image::UploadMemory(VkBuffer buffer, VkDeviceSize offset,
ScaleDown(true);
}
scheduler->RequestOutsideRenderPassOperationContext();
- std::vector vk_copies = TransformBufferImageCopies(copies, offset, aspect_mask);
+ auto vk_copies = TransformBufferImageCopies(copies, offset, aspect_mask);
const VkBuffer src_buffer = buffer;
const VkImage vk_image = *original_image;
const VkImageAspectFlags vk_aspect_mask = aspect_mask;
@@ -1367,8 +1370,9 @@ void Image::DownloadMemory(std::span<VkBuffer> buffers_span, std::span<VkDeviceS
if (is_rescaled) {
ScaleDown();
}
- boost::container::small_vector<VkBuffer, 1> buffers_vector{};
- boost::container::small_vector<std::vector<VkBufferImageCopy>, 1> vk_copies;
+ boost::container::small_vector<VkBuffer, 8> buffers_vector{};
+ boost::container::small_vector<boost::container::small_vector<VkBufferImageCopy, 16>, 8>
+ vk_copies;
for (size_t index = 0; index < buffers_span.size(); index++) {
buffers_vector.emplace_back(buffers_span[index]);
vk_copies.emplace_back(
@@ -1467,9 +1471,7 @@ bool Image::ScaleUp(bool ignore) {
auto scaled_info = info;
scaled_info.size.width = scaled_width;
scaled_info.size.height = scaled_height;
- scaled_image = MakeImage(runtime->device, scaled_info);
- auto& allocator = runtime->memory_allocator;
- scaled_commit = MemoryCommit(allocator.Commit(scaled_image, MemoryUsage::DeviceLocal));
+ scaled_image = MakeImage(runtime->device, runtime->memory_allocator, scaled_info);
ignore = false;
}
current_image = *scaled_image;
@@ -1858,7 +1860,7 @@ Framebuffer::~Framebuffer() = default;
void Framebuffer::CreateFramebuffer(TextureCacheRuntime& runtime,
std::span<ImageView*, NUM_RT> color_buffers,
ImageView* depth_buffer, bool is_rescaled) {
- std::vector<VkImageView> attachments;
+ boost::container::small_vector<VkImageView, NUM_RT + 1> attachments;
RenderPassKey renderpass_key{};
s32 num_layers = 1;
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h
index f14525dcb..220943116 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.h
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.h
@@ -116,7 +116,6 @@ public:
static constexpr size_t indexing_slots = 8 * sizeof(size_t);
std::array<vk::Buffer, indexing_slots> buffers{};
- std::array<std::unique_ptr<MemoryCommit>, indexing_slots> buffer_commits{};
};
class Image : public VideoCommon::ImageBase {
@@ -180,12 +179,10 @@ private:
TextureCacheRuntime* runtime{};
vk::Image original_image;
- MemoryCommit commit;
std::vector<vk::ImageView> storage_image_views;
VkImageAspectFlags aspect_mask = 0;
bool initialized = false;
vk::Image scaled_image{};
- MemoryCommit scaled_commit{};
VkImage current_image{};
std::unique_ptr<Framebuffer> scale_framebuffer;
diff --git a/src/video_core/renderer_vulkan/vk_turbo_mode.cpp b/src/video_core/renderer_vulkan/vk_turbo_mode.cpp
index a802d3c49..460d8d59d 100644
--- a/src/video_core/renderer_vulkan/vk_turbo_mode.cpp
+++ b/src/video_core/renderer_vulkan/vk_turbo_mode.cpp
@@ -18,7 +18,7 @@ using namespace Common::Literals;
TurboMode::TurboMode(const vk::Instance& instance, const vk::InstanceDispatch& dld)
#ifndef ANDROID
- : m_device{CreateDevice(instance, dld, VK_NULL_HANDLE)}, m_allocator{m_device, false}
+ : m_device{CreateDevice(instance, dld, VK_NULL_HANDLE)}, m_allocator{m_device}
#endif
{
{
@@ -41,7 +41,7 @@ void TurboMode::Run(std::stop_token stop_token) {
auto& dld = m_device.GetLogical();
// Allocate buffer. 2MiB should be sufficient.
- auto buffer = dld.CreateBuffer(VkBufferCreateInfo{
+ const VkBufferCreateInfo buffer_ci = {
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
@@ -50,10 +50,8 @@ void TurboMode::Run(std::stop_token stop_token) {
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
- });
-
- // Commit some device local memory for the buffer.
- auto commit = m_allocator.Commit(buffer, MemoryUsage::DeviceLocal);
+ };
+ vk::Buffer buffer = m_allocator.CreateBuffer(buffer_ci, MemoryUsage::DeviceLocal);
// Create the descriptor pool to contain our descriptor.
static constexpr VkDescriptorPoolSize pool_size{