summaryrefslogtreecommitdiffstats
path: root/src/video_core/renderer_vulkan
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/renderer_vulkan')
-rw-r--r--src/video_core/renderer_vulkan/blit_image.cpp126
-rw-r--r--src/video_core/renderer_vulkan/blit_image.h10
-rw-r--r--src/video_core/renderer_vulkan/fixed_pipeline_state.cpp6
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.cpp42
-rw-r--r--src/video_core/renderer_vulkan/pipeline_helper.h15
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.cpp76
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.h8
-rw-r--r--src/video_core/renderer_vulkan/vk_blit_screen.cpp257
-rw-r--r--src/video_core/renderer_vulkan/vk_blit_screen.h37
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.cpp165
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.h31
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache_base.cpp9
-rw-r--r--src/video_core/renderer_vulkan/vk_command_pool.cpp4
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pass.cpp39
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pass.h14
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pipeline.cpp20
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pipeline.h4
-rw-r--r--src/video_core/renderer_vulkan/vk_fence_manager.cpp1
-rw-r--r--src/video_core/renderer_vulkan/vk_fence_manager.h11
-rw-r--r--src/video_core/renderer_vulkan/vk_fsr.cpp148
-rw-r--r--src/video_core/renderer_vulkan/vk_fsr.h1
-rw-r--r--src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp47
-rw-r--r--src/video_core/renderer_vulkan/vk_graphics_pipeline.h5
-rw-r--r--src/video_core/renderer_vulkan/vk_master_semaphore.cpp175
-rw-r--r--src/video_core/renderer_vulkan/vk_master_semaphore.h62
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.cpp48
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.h11
-rw-r--r--src/video_core/renderer_vulkan/vk_present_manager.cpp491
-rw-r--r--src/video_core/renderer_vulkan/vk_present_manager.h91
-rw-r--r--src/video_core/renderer_vulkan/vk_query_cache.cpp15
-rw-r--r--src/video_core/renderer_vulkan/vk_query_cache.h5
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp145
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.h20
-rw-r--r--src/video_core/renderer_vulkan/vk_resource_pool.cpp4
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.cpp144
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.h12
-rw-r--r--src/video_core/renderer_vulkan/vk_smaa.cpp53
-rw-r--r--src/video_core/renderer_vulkan/vk_smaa.h2
-rw-r--r--src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp105
-rw-r--r--src/video_core/renderer_vulkan/vk_staging_buffer_pool.h4
-rw-r--r--src/video_core/renderer_vulkan/vk_swapchain.cpp156
-rw-r--r--src/video_core/renderer_vulkan/vk_swapchain.h41
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.cpp227
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.h42
-rw-r--r--src/video_core/renderer_vulkan/vk_turbo_mode.cpp33
-rw-r--r--src/video_core/renderer_vulkan/vk_turbo_mode.h2
-rw-r--r--src/video_core/renderer_vulkan/vk_update_descriptor.cpp13
-rw-r--r--src/video_core/renderer_vulkan/vk_update_descriptor.h14
48 files changed, 1985 insertions, 1006 deletions
diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp
index dd00d3edf..cf2964a3f 100644
--- a/src/video_core/renderer_vulkan/blit_image.cpp
+++ b/src/video_core/renderer_vulkan/blit_image.cpp
@@ -12,6 +12,8 @@
#include "video_core/host_shaders/convert_s8d24_to_abgr8_frag_spv.h"
#include "video_core/host_shaders/full_screen_triangle_vert_spv.h"
#include "video_core/host_shaders/vulkan_blit_depth_stencil_frag_spv.h"
+#include "video_core/host_shaders/vulkan_color_clear_frag_spv.h"
+#include "video_core/host_shaders/vulkan_color_clear_vert_spv.h"
#include "video_core/renderer_vulkan/blit_image.h"
#include "video_core/renderer_vulkan/maxwell_to_vk.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
@@ -69,10 +71,11 @@ constexpr VkDescriptorSetLayoutCreateInfo TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_CRE
.bindingCount = static_cast<u32>(TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_BINDINGS.size()),
.pBindings = TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_BINDINGS.data(),
};
-constexpr VkPushConstantRange PUSH_CONSTANT_RANGE{
- .stageFlags = VK_SHADER_STAGE_VERTEX_BIT,
+template <VkShaderStageFlags stageFlags, size_t size>
+inline constexpr VkPushConstantRange PUSH_CONSTANT_RANGE{
+ .stageFlags = stageFlags,
.offset = 0,
- .size = sizeof(PushConstants),
+ .size = static_cast<u32>(size),
};
constexpr VkPipelineVertexInputStateCreateInfo PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO{
.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
@@ -125,10 +128,8 @@ constexpr VkPipelineMultisampleStateCreateInfo PIPELINE_MULTISAMPLE_STATE_CREATE
.alphaToCoverageEnable = VK_FALSE,
.alphaToOneEnable = VK_FALSE,
};
-constexpr std::array DYNAMIC_STATES{
- VK_DYNAMIC_STATE_VIEWPORT,
- VK_DYNAMIC_STATE_SCISSOR,
-};
+constexpr std::array DYNAMIC_STATES{VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR,
+ VK_DYNAMIC_STATE_BLEND_CONSTANTS};
constexpr VkPipelineDynamicStateCreateInfo PIPELINE_DYNAMIC_STATE_CREATE_INFO{
.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
.pNext = nullptr,
@@ -205,15 +206,15 @@ inline constexpr VkSamplerCreateInfo SAMPLER_CREATE_INFO{
};
constexpr VkPipelineLayoutCreateInfo PipelineLayoutCreateInfo(
- const VkDescriptorSetLayout* set_layout) {
+ const VkDescriptorSetLayout* set_layout, vk::Span<VkPushConstantRange> push_constants) {
return VkPipelineLayoutCreateInfo{
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
- .setLayoutCount = 1,
+ .setLayoutCount = (set_layout != nullptr ? 1u : 0u),
.pSetLayouts = set_layout,
- .pushConstantRangeCount = 1,
- .pPushConstantRanges = &PUSH_CONSTANT_RANGE,
+ .pushConstantRangeCount = push_constants.size(),
+ .pPushConstantRanges = push_constants.data(),
};
}
@@ -302,8 +303,7 @@ void UpdateTwoTexturesDescriptorSet(const Device& device, VkDescriptorSet descri
device.GetLogical().UpdateDescriptorSets(write_descriptor_sets, nullptr);
}
-void BindBlitState(vk::CommandBuffer cmdbuf, VkPipelineLayout layout, const Region2D& dst_region,
- const Region2D& src_region, const Extent3D& src_size = {1, 1, 1}) {
+void BindBlitState(vk::CommandBuffer cmdbuf, const Region2D& dst_region) {
const VkOffset2D offset{
.x = std::min(dst_region.start.x, dst_region.end.x),
.y = std::min(dst_region.start.y, dst_region.end.y),
@@ -325,6 +325,13 @@ void BindBlitState(vk::CommandBuffer cmdbuf, VkPipelineLayout layout, const Regi
.offset = offset,
.extent = extent,
};
+ cmdbuf.SetViewport(0, viewport);
+ cmdbuf.SetScissor(0, scissor);
+}
+
+void BindBlitState(vk::CommandBuffer cmdbuf, VkPipelineLayout layout, const Region2D& dst_region,
+ const Region2D& src_region, const Extent3D& src_size = {1, 1, 1}) {
+ BindBlitState(cmdbuf, dst_region);
const float scale_x = static_cast<float>(src_region.end.x - src_region.start.x) /
static_cast<float>(src_size.width);
const float scale_y = static_cast<float>(src_region.end.y - src_region.start.y) /
@@ -335,8 +342,6 @@ void BindBlitState(vk::CommandBuffer cmdbuf, VkPipelineLayout layout, const Regi
static_cast<float>(src_region.start.y) /
static_cast<float>(src_size.height)},
};
- cmdbuf.SetViewport(0, viewport);
- cmdbuf.SetScissor(0, scissor);
cmdbuf.PushConstants(layout, VK_SHADER_STAGE_VERTEX_BIT, push_constants);
}
@@ -408,13 +413,20 @@ BlitImageHelper::BlitImageHelper(const Device& device_, Scheduler& scheduler_,
descriptor_pool.Allocator(*one_texture_set_layout, TEXTURE_DESCRIPTOR_BANK_INFO<1>)},
two_textures_descriptor_allocator{
descriptor_pool.Allocator(*two_textures_set_layout, TEXTURE_DESCRIPTOR_BANK_INFO<2>)},
- one_texture_pipeline_layout(device.GetLogical().CreatePipelineLayout(
- PipelineLayoutCreateInfo(one_texture_set_layout.address()))),
- two_textures_pipeline_layout(device.GetLogical().CreatePipelineLayout(
- PipelineLayoutCreateInfo(two_textures_set_layout.address()))),
+ one_texture_pipeline_layout(device.GetLogical().CreatePipelineLayout(PipelineLayoutCreateInfo(
+ one_texture_set_layout.address(),
+ PUSH_CONSTANT_RANGE<VK_SHADER_STAGE_VERTEX_BIT, sizeof(PushConstants)>))),
+ two_textures_pipeline_layout(
+ device.GetLogical().CreatePipelineLayout(PipelineLayoutCreateInfo(
+ two_textures_set_layout.address(),
+ PUSH_CONSTANT_RANGE<VK_SHADER_STAGE_VERTEX_BIT, sizeof(PushConstants)>))),
+ clear_color_pipeline_layout(device.GetLogical().CreatePipelineLayout(PipelineLayoutCreateInfo(
+ nullptr, PUSH_CONSTANT_RANGE<VK_SHADER_STAGE_FRAGMENT_BIT, sizeof(float) * 4>))),
full_screen_vert(BuildShader(device, FULL_SCREEN_TRIANGLE_VERT_SPV)),
blit_color_to_color_frag(BuildShader(device, BLIT_COLOR_FLOAT_FRAG_SPV)),
blit_depth_stencil_frag(BuildShader(device, VULKAN_BLIT_DEPTH_STENCIL_FRAG_SPV)),
+ clear_color_vert(BuildShader(device, VULKAN_COLOR_CLEAR_VERT_SPV)),
+ clear_color_frag(BuildShader(device, VULKAN_COLOR_CLEAR_FRAG_SPV)),
convert_depth_to_float_frag(BuildShader(device, CONVERT_DEPTH_TO_FLOAT_FRAG_SPV)),
convert_float_to_depth_frag(BuildShader(device, CONVERT_FLOAT_TO_DEPTH_FRAG_SPV)),
convert_abgr8_to_d24s8_frag(BuildShader(device, CONVERT_ABGR8_TO_D24S8_FRAG_SPV)),
@@ -553,6 +565,30 @@ void BlitImageHelper::ConvertS8D24ToABGR8(const Framebuffer* dst_framebuffer,
ConvertDepthStencil(*convert_s8d24_to_abgr8_pipeline, dst_framebuffer, src_image_view);
}
+void BlitImageHelper::ClearColor(const Framebuffer* dst_framebuffer, u8 color_mask,
+ const std::array<f32, 4>& clear_color,
+ const Region2D& dst_region) {
+ const BlitImagePipelineKey key{
+ .renderpass = dst_framebuffer->RenderPass(),
+ .operation = Tegra::Engines::Fermi2D::Operation::BlendPremult,
+ };
+ const VkPipeline pipeline = FindOrEmplaceClearColorPipeline(key);
+ const VkPipelineLayout layout = *clear_color_pipeline_layout;
+ scheduler.RequestRenderpass(dst_framebuffer);
+ scheduler.Record(
+ [pipeline, layout, color_mask, clear_color, dst_region](vk::CommandBuffer cmdbuf) {
+ cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
+ const std::array blend_color = {
+ (color_mask & 0x1) ? 1.0f : 0.0f, (color_mask & 0x2) ? 1.0f : 0.0f,
+ (color_mask & 0x4) ? 1.0f : 0.0f, (color_mask & 0x8) ? 1.0f : 0.0f};
+ cmdbuf.SetBlendConstants(blend_color.data());
+ BindBlitState(cmdbuf, dst_region);
+ cmdbuf.PushConstants(layout, VK_SHADER_STAGE_FRAGMENT_BIT, clear_color);
+ cmdbuf.Draw(3, 1, 0, 0);
+ });
+ scheduler.InvalidateState();
+}
+
void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer,
const ImageView& src_image_view) {
const VkPipelineLayout layout = *one_texture_pipeline_layout;
@@ -728,6 +764,58 @@ VkPipeline BlitImageHelper::FindOrEmplaceDepthStencilPipeline(const BlitImagePip
return *blit_depth_stencil_pipelines.back();
}
+VkPipeline BlitImageHelper::FindOrEmplaceClearColorPipeline(const BlitImagePipelineKey& key) {
+ const auto it = std::ranges::find(clear_color_keys, key);
+ if (it != clear_color_keys.end()) {
+ return *clear_color_pipelines[std::distance(clear_color_keys.begin(), it)];
+ }
+ clear_color_keys.push_back(key);
+ const std::array stages = MakeStages(*clear_color_vert, *clear_color_frag);
+ const VkPipelineColorBlendAttachmentState color_blend_attachment_state{
+ .blendEnable = VK_TRUE,
+ .srcColorBlendFactor = VK_BLEND_FACTOR_CONSTANT_COLOR,
+ .dstColorBlendFactor = VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR,
+ .colorBlendOp = VK_BLEND_OP_ADD,
+ .srcAlphaBlendFactor = VK_BLEND_FACTOR_CONSTANT_ALPHA,
+ .dstAlphaBlendFactor = VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA,
+ .alphaBlendOp = VK_BLEND_OP_ADD,
+ .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT |
+ VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT,
+ };
+ const VkPipelineColorBlendStateCreateInfo color_blend_state_generic_create_info{
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .logicOpEnable = VK_FALSE,
+ .logicOp = VK_LOGIC_OP_CLEAR,
+ .attachmentCount = 1,
+ .pAttachments = &color_blend_attachment_state,
+ .blendConstants = {0.0f, 0.0f, 0.0f, 0.0f},
+ };
+ clear_color_pipelines.push_back(device.GetLogical().CreateGraphicsPipeline({
+ .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .stageCount = static_cast<u32>(stages.size()),
+ .pStages = stages.data(),
+ .pVertexInputState = &PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
+ .pInputAssemblyState = &PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
+ .pTessellationState = nullptr,
+ .pViewportState = &PIPELINE_VIEWPORT_STATE_CREATE_INFO,
+ .pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
+ .pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
+ .pDepthStencilState = &PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
+ .pColorBlendState = &color_blend_state_generic_create_info,
+ .pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO,
+ .layout = *clear_color_pipeline_layout,
+ .renderPass = key.renderpass,
+ .subpass = 0,
+ .basePipelineHandle = VK_NULL_HANDLE,
+ .basePipelineIndex = 0,
+ }));
+ return *clear_color_pipelines.back();
+}
+
void BlitImageHelper::ConvertPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass,
bool is_target_depth) {
if (pipeline) {
diff --git a/src/video_core/renderer_vulkan/blit_image.h b/src/video_core/renderer_vulkan/blit_image.h
index be8a9a2f6..2976a7d91 100644
--- a/src/video_core/renderer_vulkan/blit_image.h
+++ b/src/video_core/renderer_vulkan/blit_image.h
@@ -61,6 +61,9 @@ public:
void ConvertS8D24ToABGR8(const Framebuffer* dst_framebuffer, ImageView& src_image_view);
+ void ClearColor(const Framebuffer* dst_framebuffer, u8 color_mask,
+ const std::array<f32, 4>& clear_color, const Region2D& dst_region);
+
private:
void Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer,
const ImageView& src_image_view);
@@ -72,6 +75,8 @@ private:
[[nodiscard]] VkPipeline FindOrEmplaceDepthStencilPipeline(const BlitImagePipelineKey& key);
+ [[nodiscard]] VkPipeline FindOrEmplaceClearColorPipeline(const BlitImagePipelineKey& key);
+
void ConvertPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass, bool is_target_depth);
void ConvertDepthToColorPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass);
@@ -97,9 +102,12 @@ private:
DescriptorAllocator two_textures_descriptor_allocator;
vk::PipelineLayout one_texture_pipeline_layout;
vk::PipelineLayout two_textures_pipeline_layout;
+ vk::PipelineLayout clear_color_pipeline_layout;
vk::ShaderModule full_screen_vert;
vk::ShaderModule blit_color_to_color_frag;
vk::ShaderModule blit_depth_stencil_frag;
+ vk::ShaderModule clear_color_vert;
+ vk::ShaderModule clear_color_frag;
vk::ShaderModule convert_depth_to_float_frag;
vk::ShaderModule convert_float_to_depth_frag;
vk::ShaderModule convert_abgr8_to_d24s8_frag;
@@ -112,6 +120,8 @@ private:
std::vector<vk::Pipeline> blit_color_pipelines;
std::vector<BlitImagePipelineKey> blit_depth_stencil_keys;
std::vector<vk::Pipeline> blit_depth_stencil_pipelines;
+ std::vector<BlitImagePipelineKey> clear_color_keys;
+ std::vector<vk::Pipeline> clear_color_pipelines;
vk::Pipeline convert_d32_to_r32_pipeline;
vk::Pipeline convert_r32_to_d32_pipeline;
vk::Pipeline convert_d16_to_r16_pipeline;
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
index f8398b511..e7df32d84 100644
--- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
+++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
@@ -271,7 +271,7 @@ bool FixedPipelineState::operator==(const FixedPipelineState& rhs) const noexcep
u32 FixedPipelineState::PackComparisonOp(Maxwell::ComparisonOp op) noexcept {
// OpenGL enums go from 0x200 to 0x207 and the others from 1 to 8
- // If we substract 0x200 to OpenGL enums and 1 to the others we get a 0-7 range.
+ // If we subtract 0x200 to OpenGL enums and 1 to the others we get a 0-7 range.
// Perfect for a hash.
const u32 value = static_cast<u32>(op);
return value - (value >= 0x200 ? 0x200 : 1);
@@ -322,8 +322,8 @@ Maxwell::StencilOp::Op FixedPipelineState::UnpackStencilOp(u32 packed) noexcept
}
u32 FixedPipelineState::PackCullFace(Maxwell::CullFace cull) noexcept {
- // FrontAndBack is 0x408, by substracting 0x406 in it we get 2.
- // Individual cull faces are in 0x404 and 0x405, substracting 0x404 we get 0 and 1.
+ // FrontAndBack is 0x408, by subtracting 0x406 in it we get 2.
+ // Individual cull faces are in 0x404 and 0x405, subtracting 0x404 we get 0 and 1.
const u32 value = static_cast<u32>(cull);
return value - (value == 0x408 ? 0x406 : 0x404);
}
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
index ca52e2389..9a0b10568 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -6,6 +6,7 @@
#include "common/assert.h"
#include "common/common_types.h"
#include "common/logging/log.h"
+#include "common/settings.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_vulkan/maxwell_to_vk.h"
#include "video_core/surface.h"
@@ -166,7 +167,7 @@ struct FormatTuple {
{VK_FORMAT_R16G16_UINT, Attachable | Storage}, // R16G16_UINT
{VK_FORMAT_R16G16_SINT, Attachable | Storage}, // R16G16_SINT
{VK_FORMAT_R16G16_SNORM, Attachable | Storage}, // R16G16_SNORM
- {VK_FORMAT_UNDEFINED}, // R32G32B32_FLOAT
+ {VK_FORMAT_R32G32B32_SFLOAT}, // R32G32B32_FLOAT
{VK_FORMAT_A8B8G8R8_SRGB_PACK32, Attachable}, // A8B8G8R8_SRGB
{VK_FORMAT_R8G8_UNORM, Attachable | Storage}, // R8G8_UNORM
{VK_FORMAT_R8G8_SNORM, Attachable | Storage}, // R8G8_SNORM
@@ -197,10 +198,13 @@ struct FormatTuple {
{VK_FORMAT_ASTC_6x6_UNORM_BLOCK}, // ASTC_2D_6X6_UNORM
{VK_FORMAT_ASTC_6x6_SRGB_BLOCK}, // ASTC_2D_6X6_SRGB
{VK_FORMAT_ASTC_10x6_UNORM_BLOCK}, // ASTC_2D_10X6_UNORM
+ {VK_FORMAT_ASTC_10x6_SRGB_BLOCK}, // ASTC_2D_10X6_SRGB
{VK_FORMAT_ASTC_10x5_UNORM_BLOCK}, // ASTC_2D_10X5_UNORM
{VK_FORMAT_ASTC_10x5_SRGB_BLOCK}, // ASTC_2D_10X5_SRGB
{VK_FORMAT_ASTC_10x10_UNORM_BLOCK}, // ASTC_2D_10X10_UNORM
{VK_FORMAT_ASTC_10x10_SRGB_BLOCK}, // ASTC_2D_10X10_SRGB
+ {VK_FORMAT_ASTC_12x10_UNORM_BLOCK}, // ASTC_2D_12X10_UNORM
+ {VK_FORMAT_ASTC_12x10_SRGB_BLOCK}, // ASTC_2D_12X10_SRGB
{VK_FORMAT_ASTC_12x12_UNORM_BLOCK}, // ASTC_2D_12X12_UNORM
{VK_FORMAT_ASTC_12x12_SRGB_BLOCK}, // ASTC_2D_12X12_SRGB
{VK_FORMAT_ASTC_8x6_UNORM_BLOCK}, // ASTC_2D_8X6_UNORM
@@ -234,19 +238,25 @@ FormatInfo SurfaceFormat(const Device& device, FormatType format_type, bool with
PixelFormat pixel_format) {
ASSERT(static_cast<size_t>(pixel_format) < std::size(tex_format_tuples));
FormatTuple tuple = tex_format_tuples[static_cast<size_t>(pixel_format)];
- if (tuple.format == VK_FORMAT_UNDEFINED) {
- UNIMPLEMENTED_MSG("Unimplemented texture format with pixel format={}", pixel_format);
- return FormatInfo{VK_FORMAT_A8B8G8R8_UNORM_PACK32, true, true};
- }
-
- // Use A8B8G8R8_UNORM on hardware that doesn't support ASTC natively
+ // Transcode on hardware that doesn't support ASTC natively
if (!device.IsOptimalAstcSupported() && VideoCore::Surface::IsPixelFormatASTC(pixel_format)) {
const bool is_srgb = with_srgb && VideoCore::Surface::IsPixelFormatSRGB(pixel_format);
- if (is_srgb) {
- tuple.format = VK_FORMAT_A8B8G8R8_SRGB_PACK32;
- } else {
- tuple.format = VK_FORMAT_A8B8G8R8_UNORM_PACK32;
- tuple.usage |= Storage;
+
+ switch (Settings::values.astc_recompression.GetValue()) {
+ case Settings::AstcRecompression::Uncompressed:
+ if (is_srgb) {
+ tuple.format = VK_FORMAT_A8B8G8R8_SRGB_PACK32;
+ } else {
+ tuple.format = VK_FORMAT_A8B8G8R8_UNORM_PACK32;
+ tuple.usage |= Storage;
+ }
+ break;
+ case Settings::AstcRecompression::Bc1:
+ tuple.format = is_srgb ? VK_FORMAT_BC1_RGBA_SRGB_BLOCK : VK_FORMAT_BC1_RGBA_UNORM_BLOCK;
+ break;
+ case Settings::AstcRecompression::Bc3:
+ tuple.format = is_srgb ? VK_FORMAT_BC3_SRGB_BLOCK : VK_FORMAT_BC3_UNORM_BLOCK;
+ break;
}
}
const bool attachable = (tuple.usage & Attachable) != 0;
@@ -337,6 +347,14 @@ VkPrimitiveTopology PrimitiveTopology([[maybe_unused]] const Device& device,
VkFormat VertexFormat(const Device& device, Maxwell::VertexAttribute::Type type,
Maxwell::VertexAttribute::Size size) {
+ if (device.MustEmulateScaledFormats()) {
+ if (type == Maxwell::VertexAttribute::Type::SScaled) {
+ type = Maxwell::VertexAttribute::Type::SInt;
+ } else if (type == Maxwell::VertexAttribute::Type::UScaled) {
+ type = Maxwell::VertexAttribute::Type::UInt;
+ }
+ }
+
const VkFormat format{([&]() {
switch (type) {
case Maxwell::VertexAttribute::Type::UnusedEnumDoNotUseBecauseItWillGoAway:
diff --git a/src/video_core/renderer_vulkan/pipeline_helper.h b/src/video_core/renderer_vulkan/pipeline_helper.h
index 28b893e25..71c783709 100644
--- a/src/video_core/renderer_vulkan/pipeline_helper.h
+++ b/src/video_core/renderer_vulkan/pipeline_helper.h
@@ -176,9 +176,9 @@ public:
};
inline void PushImageDescriptors(TextureCache& texture_cache,
- UpdateDescriptorQueue& update_descriptor_queue,
+ GuestDescriptorQueue& guest_descriptor_queue,
const Shader::Info& info, RescalingPushConstant& rescaling,
- const VkSampler*& samplers,
+ const VideoCommon::SamplerId*& samplers,
const VideoCommon::ImageViewInOut*& views) {
const u32 num_texture_buffers = Shader::NumDescriptors(info.texture_buffer_descriptors);
const u32 num_image_buffers = Shader::NumDescriptors(info.image_buffer_descriptors);
@@ -187,10 +187,15 @@ inline void PushImageDescriptors(TextureCache& texture_cache,
for (const auto& desc : info.texture_descriptors) {
for (u32 index = 0; index < desc.count; ++index) {
const VideoCommon::ImageViewId image_view_id{(views++)->id};
- const VkSampler sampler{*(samplers++)};
+ const VideoCommon::SamplerId sampler_id{*(samplers++)};
ImageView& image_view{texture_cache.GetImageView(image_view_id)};
const VkImageView vk_image_view{image_view.Handle(desc.type)};
- update_descriptor_queue.AddSampledImage(vk_image_view, sampler);
+ const Sampler& sampler{texture_cache.GetSampler(sampler_id)};
+ const bool use_fallback_sampler{sampler.HasAddedAnisotropy() &&
+ !image_view.SupportsAnisotropy()};
+ const VkSampler vk_sampler{use_fallback_sampler ? sampler.HandleWithDefaultAnisotropy()
+ : sampler.Handle()};
+ guest_descriptor_queue.AddSampledImage(vk_image_view, vk_sampler);
rescaling.PushTexture(texture_cache.IsRescaling(image_view));
}
}
@@ -201,7 +206,7 @@ inline void PushImageDescriptors(TextureCache& texture_cache,
texture_cache.MarkModification(image_view.image_id);
}
const VkImageView vk_image_view{image_view.StorageView(desc.type, desc.format)};
- update_descriptor_queue.AddImage(vk_image_view);
+ guest_descriptor_queue.AddImage(vk_image_view);
rescaling.PushImage(texture_cache.IsRescaling(image_view));
}
}
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
index 2a8d9e377..ddf28ca28 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
@@ -16,7 +16,7 @@
#include "common/settings.h"
#include "common/telemetry.h"
#include "core/core_timing.h"
-#include "core/frontend/emu_window.h"
+#include "core/frontend/graphics_context.h"
#include "core/telemetry_session.h"
#include "video_core/gpu.h"
#include "video_core/renderer_vulkan/renderer_vulkan.h"
@@ -84,17 +84,19 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_,
Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_,
std::unique_ptr<Core::Frontend::GraphicsContext> context_) try
: RendererBase(emu_window, std::move(context_)), telemetry_session(telemetry_session_),
- cpu_memory(cpu_memory_), gpu(gpu_), library(OpenLibrary()),
- instance(CreateInstance(library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type,
+ cpu_memory(cpu_memory_), gpu(gpu_), library(OpenLibrary(context.get())),
+ instance(CreateInstance(*library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type,
Settings::values.renderer_debug.GetValue())),
debug_callback(Settings::values.renderer_debug ? CreateDebugCallback(instance) : nullptr),
- surface(CreateSurface(instance, render_window)),
- device(CreateDevice(instance, dld, *surface)), memory_allocator(device, false),
- state_tracker(), scheduler(device, state_tracker),
+ surface(CreateSurface(instance, render_window.GetWindowInfo())),
+ device(CreateDevice(instance, dld, *surface)), memory_allocator(device), state_tracker(),
+ scheduler(device, state_tracker),
swapchain(*surface, device, scheduler, render_window.GetFramebufferLayout().width,
render_window.GetFramebufferLayout().height, false),
- blit_screen(cpu_memory, render_window, device, memory_allocator, swapchain, scheduler,
- screen_info),
+ present_manager(instance, render_window, device, memory_allocator, scheduler, swapchain,
+ surface),
+ blit_screen(cpu_memory, render_window, device, memory_allocator, swapchain, present_manager,
+ scheduler, screen_info),
rasterizer(render_window, gpu, cpu_memory, screen_info, device, memory_allocator,
state_tracker, scheduler) {
if (Settings::values.renderer_force_max_clock.GetValue() && device.ShouldBoostClocks()) {
@@ -121,46 +123,19 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
return;
}
// Update screen info if the framebuffer size has changed.
- if (screen_info.width != framebuffer->width || screen_info.height != framebuffer->height) {
- screen_info.width = framebuffer->width;
- screen_info.height = framebuffer->height;
- }
+ screen_info.width = framebuffer->width;
+ screen_info.height = framebuffer->height;
+
const VAddr framebuffer_addr = framebuffer->address + framebuffer->offset;
const bool use_accelerated =
rasterizer.AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride);
const bool is_srgb = use_accelerated && screen_info.is_srgb;
RenderScreenshot(*framebuffer, use_accelerated);
- bool has_been_recreated = false;
- const auto recreate_swapchain = [&](u32 width, u32 height) {
- if (!has_been_recreated) {
- has_been_recreated = true;
- scheduler.Finish();
- }
- swapchain.Create(width, height, is_srgb);
- };
-
- const Layout::FramebufferLayout layout = render_window.GetFramebufferLayout();
- if (swapchain.NeedsRecreation(is_srgb) || swapchain.GetWidth() != layout.width ||
- swapchain.GetHeight() != layout.height) {
- recreate_swapchain(layout.width, layout.height);
- }
- bool is_outdated;
- do {
- swapchain.AcquireNextImage();
- is_outdated = swapchain.IsOutDated();
- if (is_outdated) {
- recreate_swapchain(layout.width, layout.height);
- }
- } while (is_outdated);
- if (has_been_recreated) {
- blit_screen.Recreate();
- }
- const VkSemaphore render_semaphore = blit_screen.DrawToSwapchain(*framebuffer, use_accelerated);
- const VkSemaphore present_semaphore = swapchain.CurrentPresentSemaphore();
- scheduler.Flush(render_semaphore, present_semaphore);
- scheduler.WaitWorker();
- swapchain.Present(render_semaphore);
+ Frame* frame = present_manager.GetRenderFrame();
+ blit_screen.DrawToSwapchain(frame, *framebuffer, use_accelerated, is_srgb);
+ scheduler.Flush(*frame->render_ready);
+ present_manager.Present(frame);
gpu.RendererFrameEndNotify();
rasterizer.TickFrame();
@@ -198,7 +173,7 @@ void Vulkan::RendererVulkan::RenderScreenshot(const Tegra::FramebufferConfig& fr
return;
}
const Layout::FramebufferLayout layout{renderer_settings.screenshot_framebuffer_layout};
- vk::Image staging_image = device.GetLogical().CreateImage(VkImageCreateInfo{
+ vk::Image staging_image = memory_allocator.CreateImage(VkImageCreateInfo{
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
.pNext = nullptr,
.flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT,
@@ -221,7 +196,6 @@ void Vulkan::RendererVulkan::RenderScreenshot(const Tegra::FramebufferConfig& fr
.pQueueFamilyIndices = nullptr,
.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
});
- const auto image_commit = memory_allocator.Commit(staging_image, MemoryUsage::DeviceLocal);
const vk::ImageView dst_view = device.GetLogical().CreateImageView(VkImageViewCreateInfo{
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
@@ -246,8 +220,7 @@ void Vulkan::RendererVulkan::RenderScreenshot(const Tegra::FramebufferConfig& fr
});
const VkExtent2D render_area{.width = layout.width, .height = layout.height};
const vk::Framebuffer screenshot_fb = blit_screen.CreateFramebuffer(*dst_view, render_area);
- // Since we're not rendering to the screen, ignore the render semaphore.
- void(blit_screen.Draw(framebuffer, *screenshot_fb, layout, render_area, use_accelerated));
+ blit_screen.Draw(framebuffer, *screenshot_fb, layout, render_area, use_accelerated);
const auto buffer_size = static_cast<VkDeviceSize>(layout.width * layout.height * 4);
const VkBufferCreateInfo dst_buffer_info{
@@ -260,8 +233,8 @@ void Vulkan::RendererVulkan::RenderScreenshot(const Tegra::FramebufferConfig& fr
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
};
- const vk::Buffer dst_buffer = device.GetLogical().CreateBuffer(dst_buffer_info);
- MemoryCommit dst_buffer_memory = memory_allocator.Commit(dst_buffer, MemoryUsage::Download);
+ const vk::Buffer dst_buffer =
+ memory_allocator.CreateBuffer(dst_buffer_info, MemoryUsage::Download);
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([&](vk::CommandBuffer cmdbuf) {
@@ -270,7 +243,7 @@ void Vulkan::RendererVulkan::RenderScreenshot(const Tegra::FramebufferConfig& fr
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
- .oldLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR,
+ .oldLayout = VK_IMAGE_LAYOUT_GENERAL,
.newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
@@ -335,8 +308,9 @@ void Vulkan::RendererVulkan::RenderScreenshot(const Tegra::FramebufferConfig& fr
scheduler.Finish();
// Copy backing image data to the QImage screenshot buffer
- const auto dst_memory_map = dst_buffer_memory.Map();
- std::memcpy(renderer_settings.screenshot_bits, dst_memory_map.data(), dst_memory_map.size());
+ dst_buffer.Invalidate();
+ std::memcpy(renderer_settings.screenshot_bits, dst_buffer.Mapped().data(),
+ dst_buffer.Mapped().size());
renderer_settings.screenshot_complete_callback(false);
renderer_settings.screenshot_requested = false;
}
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h
index 009e75e0d..b2e8cbd1b 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.h
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.h
@@ -9,6 +9,7 @@
#include "common/dynamic_library.h"
#include "video_core/renderer_base.h"
#include "video_core/renderer_vulkan/vk_blit_screen.h"
+#include "video_core/renderer_vulkan/vk_present_manager.h"
#include "video_core/renderer_vulkan/vk_rasterizer.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_state_tracker.h"
@@ -53,6 +54,10 @@ public:
return device.GetDriverName();
}
+ void NotifySurfaceChanged() override {
+ present_manager.NotifySurfaceChanged();
+ }
+
private:
void Report() const;
@@ -62,7 +67,7 @@ private:
Core::Memory::Memory& cpu_memory;
Tegra::GPU& gpu;
- Common::DynamicLibrary library;
+ std::shared_ptr<Common::DynamicLibrary> library;
vk::InstanceDispatch dld;
vk::Instance instance;
@@ -76,6 +81,7 @@ private:
StateTracker state_tracker;
Scheduler scheduler;
Swapchain swapchain;
+ PresentManager present_manager;
BlitScreen blit_screen;
RasterizerVulkan rasterizer;
std::optional<TurboMode> turbo_mode;
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
index 2f0cc27e8..ad3b29f0e 100644
--- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp
+++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
@@ -74,7 +74,7 @@ struct ScreenRectVertex {
}
};
-constexpr std::array<f32, 4 * 4> MakeOrthographicMatrix(f32 width, f32 height) {
+std::array<f32, 4 * 4> MakeOrthographicMatrix(f32 width, f32 height) {
// clang-format off
return { 2.f / width, 0.f, 0.f, 0.f,
0.f, 2.f / height, 0.f, 0.f,
@@ -122,10 +122,12 @@ struct BlitScreen::BufferData {
BlitScreen::BlitScreen(Core::Memory::Memory& cpu_memory_, Core::Frontend::EmuWindow& render_window_,
const Device& device_, MemoryAllocator& memory_allocator_,
- Swapchain& swapchain_, Scheduler& scheduler_, const ScreenInfo& screen_info_)
+ Swapchain& swapchain_, PresentManager& present_manager_,
+ Scheduler& scheduler_, const ScreenInfo& screen_info_)
: cpu_memory{cpu_memory_}, render_window{render_window_}, device{device_},
- memory_allocator{memory_allocator_}, swapchain{swapchain_}, scheduler{scheduler_},
- image_count{swapchain.GetImageCount()}, screen_info{screen_info_} {
+ memory_allocator{memory_allocator_}, swapchain{swapchain_}, present_manager{present_manager_},
+ scheduler{scheduler_}, image_count{swapchain.GetImageCount()}, screen_info{screen_info_},
+ current_srgb{swapchain.IsSrgb()}, image_view_format{swapchain.GetImageViewFormat()} {
resource_ticks.resize(image_count);
CreateStaticResources();
@@ -135,25 +137,20 @@ BlitScreen::BlitScreen(Core::Memory::Memory& cpu_memory_, Core::Frontend::EmuWin
BlitScreen::~BlitScreen() = default;
void BlitScreen::Recreate() {
+ present_manager.WaitPresent();
+ scheduler.Finish();
+ device.GetLogical().WaitIdle();
CreateDynamicResources();
}
-VkSemaphore BlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer,
- const VkFramebuffer& host_framebuffer,
- const Layout::FramebufferLayout layout, VkExtent2D render_area,
- bool use_accelerated) {
+void BlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer,
+ const VkFramebuffer& host_framebuffer, const Layout::FramebufferLayout layout,
+ VkExtent2D render_area, bool use_accelerated) {
RefreshResources(framebuffer);
// Finish any pending renderpass
scheduler.RequestOutsideRenderPassOperationContext();
- if (const auto swapchain_images = swapchain.GetImageCount(); swapchain_images != image_count) {
- image_count = swapchain_images;
- Recreate();
- }
-
- const std::size_t image_index = swapchain.GetImageIndex();
-
scheduler.Wait(resource_ticks[image_index]);
resource_ticks[image_index] = scheduler.CurrentTick();
@@ -165,11 +162,11 @@ VkSemaphore BlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer,
SetUniformData(data, layout);
SetVertexData(data, framebuffer, layout);
- const std::span<u8> mapped_span = buffer_commit.Map();
+ const std::span<u8> mapped_span = buffer.Mapped();
std::memcpy(mapped_span.data(), &data, sizeof(data));
if (!use_accelerated) {
- const u64 image_offset = GetRawImageOffset(framebuffer, image_index);
+ const u64 image_offset = GetRawImageOffset(framebuffer);
const VAddr framebuffer_addr = framebuffer.address + framebuffer.offset;
const u8* const host_ptr = cpu_memory.GetPointer(framebuffer_addr);
@@ -204,8 +201,8 @@ VkSemaphore BlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer,
.depth = 1,
},
};
- scheduler.Record([this, copy, image_index](vk::CommandBuffer cmdbuf) {
- const VkImage image = *raw_images[image_index];
+ scheduler.Record([this, copy, index = image_index](vk::CommandBuffer cmdbuf) {
+ const VkImage image = *raw_images[index];
const VkImageMemoryBarrier base_barrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr,
@@ -245,14 +242,15 @@ VkSemaphore BlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer,
const auto anti_alias_pass = Settings::values.anti_aliasing.GetValue();
if (use_accelerated && anti_alias_pass == Settings::AntiAliasing::Fxaa) {
- UpdateAADescriptorSet(image_index, source_image_view, false);
+ UpdateAADescriptorSet(source_image_view, false);
const u32 up_scale = Settings::values.resolution_info.up_scale;
const u32 down_shift = Settings::values.resolution_info.down_shift;
VkExtent2D size{
.width = (up_scale * framebuffer.width) >> down_shift,
.height = (up_scale * framebuffer.height) >> down_shift,
};
- scheduler.Record([this, image_index, size, anti_alias_pass](vk::CommandBuffer cmdbuf) {
+ scheduler.Record([this, index = image_index, size,
+ anti_alias_pass](vk::CommandBuffer cmdbuf) {
const VkImageMemoryBarrier base_barrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr,
@@ -326,7 +324,7 @@ VkSemaphore BlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer,
cmdbuf.BindVertexBuffer(0, *buffer, offsetof(BufferData, vertices));
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *aa_pipeline_layout, 0,
- aa_descriptor_sets[image_index], {});
+ aa_descriptor_sets[index], {});
cmdbuf.Draw(4, 1, 0, 0);
cmdbuf.EndRenderPass();
@@ -369,81 +367,104 @@ VkSemaphore BlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer,
};
VkImageView fsr_image_view =
fsr->Draw(scheduler, image_index, source_image_view, fsr_input_size, crop_rect);
- UpdateDescriptorSet(image_index, fsr_image_view, true);
+ UpdateDescriptorSet(fsr_image_view, true);
} else {
const bool is_nn =
Settings::values.scaling_filter.GetValue() == Settings::ScalingFilter::NearestNeighbor;
- UpdateDescriptorSet(image_index, source_image_view, is_nn);
+ UpdateDescriptorSet(source_image_view, is_nn);
}
- scheduler.Record(
- [this, host_framebuffer, image_index, size = render_area](vk::CommandBuffer cmdbuf) {
- const f32 bg_red = Settings::values.bg_red.GetValue() / 255.0f;
- const f32 bg_green = Settings::values.bg_green.GetValue() / 255.0f;
- const f32 bg_blue = Settings::values.bg_blue.GetValue() / 255.0f;
- const VkClearValue clear_color{
- .color = {.float32 = {bg_red, bg_green, bg_blue, 1.0f}},
- };
- const VkRenderPassBeginInfo renderpass_bi{
- .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
- .pNext = nullptr,
- .renderPass = *renderpass,
- .framebuffer = host_framebuffer,
- .renderArea =
- {
- .offset = {0, 0},
- .extent = size,
- },
- .clearValueCount = 1,
- .pClearValues = &clear_color,
- };
- const VkViewport viewport{
- .x = 0.0f,
- .y = 0.0f,
- .width = static_cast<float>(size.width),
- .height = static_cast<float>(size.height),
- .minDepth = 0.0f,
- .maxDepth = 1.0f,
- };
- const VkRect2D scissor{
- .offset = {0, 0},
- .extent = size,
- };
- cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE);
- auto graphics_pipeline = [this]() {
- switch (Settings::values.scaling_filter.GetValue()) {
- case Settings::ScalingFilter::NearestNeighbor:
- case Settings::ScalingFilter::Bilinear:
- return *bilinear_pipeline;
- case Settings::ScalingFilter::Bicubic:
- return *bicubic_pipeline;
- case Settings::ScalingFilter::Gaussian:
- return *gaussian_pipeline;
- case Settings::ScalingFilter::ScaleForce:
- return *scaleforce_pipeline;
- default:
- return *bilinear_pipeline;
- }
- }();
- cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, graphics_pipeline);
- cmdbuf.SetViewport(0, viewport);
- cmdbuf.SetScissor(0, scissor);
-
- cmdbuf.BindVertexBuffer(0, *buffer, offsetof(BufferData, vertices));
- cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline_layout, 0,
- descriptor_sets[image_index], {});
- cmdbuf.Draw(4, 1, 0, 0);
- cmdbuf.EndRenderPass();
- });
- return *semaphores[image_index];
+ scheduler.Record([this, host_framebuffer, index = image_index,
+ size = render_area](vk::CommandBuffer cmdbuf) {
+ const f32 bg_red = Settings::values.bg_red.GetValue() / 255.0f;
+ const f32 bg_green = Settings::values.bg_green.GetValue() / 255.0f;
+ const f32 bg_blue = Settings::values.bg_blue.GetValue() / 255.0f;
+ const VkClearValue clear_color{
+ .color = {.float32 = {bg_red, bg_green, bg_blue, 1.0f}},
+ };
+ const VkRenderPassBeginInfo renderpass_bi{
+ .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
+ .pNext = nullptr,
+ .renderPass = *renderpass,
+ .framebuffer = host_framebuffer,
+ .renderArea =
+ {
+ .offset = {0, 0},
+ .extent = size,
+ },
+ .clearValueCount = 1,
+ .pClearValues = &clear_color,
+ };
+ const VkViewport viewport{
+ .x = 0.0f,
+ .y = 0.0f,
+ .width = static_cast<float>(size.width),
+ .height = static_cast<float>(size.height),
+ .minDepth = 0.0f,
+ .maxDepth = 1.0f,
+ };
+ const VkRect2D scissor{
+ .offset = {0, 0},
+ .extent = size,
+ };
+ cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE);
+ auto graphics_pipeline = [this]() {
+ switch (Settings::values.scaling_filter.GetValue()) {
+ case Settings::ScalingFilter::NearestNeighbor:
+ case Settings::ScalingFilter::Bilinear:
+ return *bilinear_pipeline;
+ case Settings::ScalingFilter::Bicubic:
+ return *bicubic_pipeline;
+ case Settings::ScalingFilter::Gaussian:
+ return *gaussian_pipeline;
+ case Settings::ScalingFilter::ScaleForce:
+ return *scaleforce_pipeline;
+ default:
+ return *bilinear_pipeline;
+ }
+ }();
+ cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, graphics_pipeline);
+ cmdbuf.SetViewport(0, viewport);
+ cmdbuf.SetScissor(0, scissor);
+
+ cmdbuf.BindVertexBuffer(0, *buffer, offsetof(BufferData, vertices));
+ cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline_layout, 0,
+ descriptor_sets[index], {});
+ cmdbuf.Draw(4, 1, 0, 0);
+ cmdbuf.EndRenderPass();
+ });
}
-VkSemaphore BlitScreen::DrawToSwapchain(const Tegra::FramebufferConfig& framebuffer,
- bool use_accelerated) {
- const std::size_t image_index = swapchain.GetImageIndex();
- const VkExtent2D render_area = swapchain.GetSize();
+void BlitScreen::DrawToSwapchain(Frame* frame, const Tegra::FramebufferConfig& framebuffer,
+ bool use_accelerated, bool is_srgb) {
+ // Recreate dynamic resources if the the image count or colorspace changed
+ if (const std::size_t swapchain_images = swapchain.GetImageCount();
+ swapchain_images != image_count || current_srgb != is_srgb) {
+ current_srgb = is_srgb;
+#ifdef ANDROID
+ // Android is already ordered the same as Switch.
+ image_view_format = current_srgb ? VK_FORMAT_R8G8B8A8_SRGB : VK_FORMAT_R8G8B8A8_UNORM;
+#else
+ image_view_format = current_srgb ? VK_FORMAT_B8G8R8A8_SRGB : VK_FORMAT_B8G8R8A8_UNORM;
+#endif
+ image_count = swapchain_images;
+ Recreate();
+ }
+
+ // Recreate the presentation frame if the dimensions of the window changed
const Layout::FramebufferLayout layout = render_window.GetFramebufferLayout();
- return Draw(framebuffer, *framebuffers[image_index], layout, render_area, use_accelerated);
+ if (layout.width != frame->width || layout.height != frame->height ||
+ is_srgb != frame->is_srgb) {
+ Recreate();
+ present_manager.RecreateFrame(frame, layout.width, layout.height, is_srgb,
+ image_view_format, *renderpass);
+ }
+
+ const VkExtent2D render_area{frame->width, frame->height};
+ Draw(framebuffer, *frame->framebuffer, layout, render_area, use_accelerated);
+ if (++image_index >= image_count) {
+ image_index = 0;
+ }
}
vk::Framebuffer BlitScreen::CreateFramebuffer(const VkImageView& image_view, VkExtent2D extent) {
@@ -471,13 +492,11 @@ void BlitScreen::CreateStaticResources() {
}
void BlitScreen::CreateDynamicResources() {
- CreateSemaphores();
CreateDescriptorPool();
CreateDescriptorSetLayout();
CreateDescriptorSets();
CreatePipelineLayout();
CreateRenderPass();
- CreateFramebuffers();
CreateGraphicsPipeline();
fsr.reset();
smaa.reset();
@@ -525,11 +544,6 @@ void BlitScreen::CreateShaders() {
}
}
-void BlitScreen::CreateSemaphores() {
- semaphores.resize(image_count);
- std::ranges::generate(semaphores, [this] { return device.GetLogical().CreateSemaphore(); });
-}
-
void BlitScreen::CreateDescriptorPool() {
const std::array<VkDescriptorPoolSize, 2> pool_sizes{{
{
@@ -571,10 +585,10 @@ void BlitScreen::CreateDescriptorPool() {
}
void BlitScreen::CreateRenderPass() {
- renderpass = CreateRenderPassImpl(swapchain.GetImageViewFormat());
+ renderpass = CreateRenderPassImpl(image_view_format);
}
-vk::RenderPass BlitScreen::CreateRenderPassImpl(VkFormat format, bool is_present) {
+vk::RenderPass BlitScreen::CreateRenderPassImpl(VkFormat format) {
const VkAttachmentDescription color_attachment{
.flags = 0,
.format = format,
@@ -584,7 +598,7 @@ vk::RenderPass BlitScreen::CreateRenderPassImpl(VkFormat format, bool is_present
.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE,
.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE,
.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
- .finalLayout = is_present ? VK_IMAGE_LAYOUT_PRESENT_SRC_KHR : VK_IMAGE_LAYOUT_GENERAL,
+ .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
};
const VkAttachmentReference color_attachment_ref{
@@ -1052,29 +1066,14 @@ void BlitScreen::CreateSampler() {
nn_sampler = device.GetLogical().CreateSampler(ci_nn);
}
-void BlitScreen::CreateFramebuffers() {
- const VkExtent2D size{swapchain.GetSize()};
- framebuffers.resize(image_count);
-
- for (std::size_t i = 0; i < image_count; ++i) {
- const VkImageView image_view{swapchain.GetImageViewIndex(i)};
- framebuffers[i] = CreateFramebuffer(image_view, size, renderpass);
- }
-}
-
void BlitScreen::ReleaseRawImages() {
for (const u64 tick : resource_ticks) {
scheduler.Wait(tick);
}
raw_images.clear();
- raw_buffer_commits.clear();
-
aa_image_view.reset();
aa_image.reset();
- aa_commit = MemoryCommit{};
-
buffer.reset();
- buffer_commit = MemoryCommit{};
}
void BlitScreen::CreateStagingBuffer(const Tegra::FramebufferConfig& framebuffer) {
@@ -1090,25 +1089,23 @@ void BlitScreen::CreateStagingBuffer(const Tegra::FramebufferConfig& framebuffer
.pQueueFamilyIndices = nullptr,
};
- buffer = device.GetLogical().CreateBuffer(ci);
- buffer_commit = memory_allocator.Commit(buffer, MemoryUsage::Upload);
+ buffer = memory_allocator.CreateBuffer(ci, MemoryUsage::Upload);
}
void BlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) {
raw_images.resize(image_count);
raw_image_views.resize(image_count);
- raw_buffer_commits.resize(image_count);
const auto create_image = [&](bool used_on_framebuffer = false, u32 up_scale = 1,
u32 down_shift = 0) {
u32 extra_usages = used_on_framebuffer ? VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT
: VK_IMAGE_USAGE_TRANSFER_DST_BIT;
- return device.GetLogical().CreateImage(VkImageCreateInfo{
+ return memory_allocator.CreateImage(VkImageCreateInfo{
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.imageType = VK_IMAGE_TYPE_2D,
- .format = GetFormat(framebuffer),
+ .format = used_on_framebuffer ? VK_FORMAT_R16G16B16A16_SFLOAT : GetFormat(framebuffer),
.extent =
{
.width = (up_scale * framebuffer.width) >> down_shift,
@@ -1126,17 +1123,14 @@ void BlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) {
.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
});
};
- const auto create_commit = [&](vk::Image& image) {
- return memory_allocator.Commit(image, MemoryUsage::DeviceLocal);
- };
- const auto create_image_view = [&](vk::Image& image) {
+ const auto create_image_view = [&](vk::Image& image, bool used_on_framebuffer = false) {
return device.GetLogical().CreateImageView(VkImageViewCreateInfo{
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.image = *image,
.viewType = VK_IMAGE_VIEW_TYPE_2D,
- .format = GetFormat(framebuffer),
+ .format = used_on_framebuffer ? VK_FORMAT_R16G16B16A16_SFLOAT : GetFormat(framebuffer),
.components =
{
.r = VK_COMPONENT_SWIZZLE_IDENTITY,
@@ -1157,7 +1151,6 @@ void BlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) {
for (size_t i = 0; i < image_count; ++i) {
raw_images[i] = create_image();
- raw_buffer_commits[i] = create_commit(raw_images[i]);
raw_image_views[i] = create_image_view(raw_images[i]);
}
@@ -1165,8 +1158,7 @@ void BlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) {
const u32 up_scale = Settings::values.resolution_info.up_scale;
const u32 down_shift = Settings::values.resolution_info.down_shift;
aa_image = create_image(true, up_scale, down_shift);
- aa_commit = create_commit(aa_image);
- aa_image_view = create_image_view(aa_image);
+ aa_image_view = create_image_view(aa_image, true);
VkExtent2D size{
.width = (up_scale * framebuffer.width) >> down_shift,
.height = (up_scale * framebuffer.height) >> down_shift,
@@ -1175,7 +1167,7 @@ void BlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) {
aa_framebuffer = CreateFramebuffer(*aa_image_view, size, aa_renderpass);
return;
}
- aa_renderpass = CreateRenderPassImpl(GetFormat(framebuffer), false);
+ aa_renderpass = CreateRenderPassImpl(GetFormat(framebuffer));
aa_framebuffer = CreateFramebuffer(*aa_image_view, size, aa_renderpass);
const std::array<VkPipelineShaderStageCreateInfo, 2> fxaa_shader_stages{{
@@ -1319,8 +1311,7 @@ void BlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) {
aa_pipeline = device.GetLogical().CreateGraphicsPipeline(fxaa_pipeline_ci);
}
-void BlitScreen::UpdateAADescriptorSet(std::size_t image_index, VkImageView image_view,
- bool nn) const {
+void BlitScreen::UpdateAADescriptorSet(VkImageView image_view, bool nn) const {
const VkDescriptorImageInfo image_info{
.sampler = nn ? *nn_sampler : *sampler,
.imageView = image_view,
@@ -1356,8 +1347,7 @@ void BlitScreen::UpdateAADescriptorSet(std::size_t image_index, VkImageView imag
device.GetLogical().UpdateDescriptorSets(std::array{sampler_write, sampler_write_2}, {});
}
-void BlitScreen::UpdateDescriptorSet(std::size_t image_index, VkImageView image_view,
- bool nn) const {
+void BlitScreen::UpdateDescriptorSet(VkImageView image_view, bool nn) const {
const VkDescriptorBufferInfo buffer_info{
.buffer = *buffer,
.offset = offsetof(BufferData, uniform),
@@ -1480,8 +1470,7 @@ u64 BlitScreen::CalculateBufferSize(const Tegra::FramebufferConfig& framebuffer)
return sizeof(BufferData) + GetSizeInBytes(framebuffer) * image_count;
}
-u64 BlitScreen::GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer,
- std::size_t image_index) const {
+u64 BlitScreen::GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer) const {
constexpr auto first_image_offset = static_cast<u64>(sizeof(BufferData));
return first_image_offset + GetSizeInBytes(framebuffer) * image_index;
}
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.h b/src/video_core/renderer_vulkan/vk_blit_screen.h
index ebe10b08b..8365b5668 100644
--- a/src/video_core/renderer_vulkan/vk_blit_screen.h
+++ b/src/video_core/renderer_vulkan/vk_blit_screen.h
@@ -5,6 +5,7 @@
#include <memory>
+#include "core/frontend/framebuffer_layout.h"
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
@@ -42,6 +43,9 @@ class RasterizerVulkan;
class Scheduler;
class SMAA;
class Swapchain;
+class PresentManager;
+
+struct Frame;
struct ScreenInfo {
VkImage image{};
@@ -55,18 +59,17 @@ class BlitScreen {
public:
explicit BlitScreen(Core::Memory::Memory& cpu_memory, Core::Frontend::EmuWindow& render_window,
const Device& device, MemoryAllocator& memory_manager, Swapchain& swapchain,
- Scheduler& scheduler, const ScreenInfo& screen_info);
+ PresentManager& present_manager, Scheduler& scheduler,
+ const ScreenInfo& screen_info);
~BlitScreen();
void Recreate();
- [[nodiscard]] VkSemaphore Draw(const Tegra::FramebufferConfig& framebuffer,
- const VkFramebuffer& host_framebuffer,
- const Layout::FramebufferLayout layout, VkExtent2D render_area,
- bool use_accelerated);
+ void Draw(const Tegra::FramebufferConfig& framebuffer, const VkFramebuffer& host_framebuffer,
+ const Layout::FramebufferLayout layout, VkExtent2D render_area, bool use_accelerated);
- [[nodiscard]] VkSemaphore DrawToSwapchain(const Tegra::FramebufferConfig& framebuffer,
- bool use_accelerated);
+ void DrawToSwapchain(Frame* frame, const Tegra::FramebufferConfig& framebuffer,
+ bool use_accelerated, bool is_srgb);
[[nodiscard]] vk::Framebuffer CreateFramebuffer(const VkImageView& image_view,
VkExtent2D extent);
@@ -79,10 +82,9 @@ private:
void CreateStaticResources();
void CreateShaders();
- void CreateSemaphores();
void CreateDescriptorPool();
void CreateRenderPass();
- vk::RenderPass CreateRenderPassImpl(VkFormat, bool is_present = true);
+ vk::RenderPass CreateRenderPassImpl(VkFormat format);
void CreateDescriptorSetLayout();
void CreateDescriptorSets();
void CreatePipelineLayout();
@@ -90,15 +92,14 @@ private:
void CreateSampler();
void CreateDynamicResources();
- void CreateFramebuffers();
void RefreshResources(const Tegra::FramebufferConfig& framebuffer);
void ReleaseRawImages();
void CreateStagingBuffer(const Tegra::FramebufferConfig& framebuffer);
void CreateRawImages(const Tegra::FramebufferConfig& framebuffer);
- void UpdateDescriptorSet(std::size_t image_index, VkImageView image_view, bool nn) const;
- void UpdateAADescriptorSet(std::size_t image_index, VkImageView image_view, bool nn) const;
+ void UpdateDescriptorSet(VkImageView image_view, bool nn) const;
+ void UpdateAADescriptorSet(VkImageView image_view, bool nn) const;
void SetUniformData(BufferData& data, const Layout::FramebufferLayout layout) const;
void SetVertexData(BufferData& data, const Tegra::FramebufferConfig& framebuffer,
const Layout::FramebufferLayout layout) const;
@@ -107,16 +108,17 @@ private:
void CreateFSR();
u64 CalculateBufferSize(const Tegra::FramebufferConfig& framebuffer) const;
- u64 GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer,
- std::size_t image_index) const;
+ u64 GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer) const;
Core::Memory::Memory& cpu_memory;
Core::Frontend::EmuWindow& render_window;
const Device& device;
MemoryAllocator& memory_allocator;
Swapchain& swapchain;
+ PresentManager& present_manager;
Scheduler& scheduler;
std::size_t image_count;
+ std::size_t image_index{};
const ScreenInfo& screen_info;
vk::ShaderModule vertex_shader;
@@ -135,20 +137,16 @@ private:
vk::Pipeline gaussian_pipeline;
vk::Pipeline scaleforce_pipeline;
vk::RenderPass renderpass;
- std::vector<vk::Framebuffer> framebuffers;
vk::DescriptorSets descriptor_sets;
vk::Sampler nn_sampler;
vk::Sampler sampler;
vk::Buffer buffer;
- MemoryCommit buffer_commit;
std::vector<u64> resource_ticks;
- std::vector<vk::Semaphore> semaphores;
std::vector<vk::Image> raw_images;
std::vector<vk::ImageView> raw_image_views;
- std::vector<MemoryCommit> raw_buffer_commits;
vk::DescriptorPool aa_descriptor_pool;
vk::DescriptorSetLayout aa_descriptor_set_layout;
@@ -159,11 +157,12 @@ private:
vk::DescriptorSets aa_descriptor_sets;
vk::Image aa_image;
vk::ImageView aa_image_view;
- MemoryCommit aa_commit;
u32 raw_width = 0;
u32 raw_height = 0;
Service::android::PixelFormat pixel_format{};
+ bool current_srgb;
+ VkFormat image_view_format;
std::unique_ptr<FSR> fsr;
std::unique_ptr<SMAA> smaa;
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index 1cfb4c2ff..660f7c9ff 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -7,7 +7,6 @@
#include <span>
#include <vector>
-#include "video_core/buffer_cache/buffer_cache.h"
#include "video_core/renderer_vulkan/maxwell_to_vk.h"
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
@@ -51,7 +50,7 @@ size_t BytesPerIndex(VkIndexType index_type) {
}
}
-vk::Buffer CreateBuffer(const Device& device, u64 size) {
+vk::Buffer CreateBuffer(const Device& device, const MemoryAllocator& memory_allocator, u64 size) {
VkBufferUsageFlags flags =
VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT |
@@ -61,7 +60,7 @@ vk::Buffer CreateBuffer(const Device& device, u64 size) {
if (device.IsExtTransformFeedbackSupported()) {
flags |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT;
}
- return device.GetLogical().CreateBuffer({
+ const VkBufferCreateInfo buffer_ci = {
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
@@ -70,7 +69,8 @@ vk::Buffer CreateBuffer(const Device& device, u64 size) {
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
- });
+ };
+ return memory_allocator.CreateBuffer(buffer_ci, MemoryUsage::DeviceLocal);
}
} // Anonymous namespace
@@ -80,8 +80,8 @@ Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params)
Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_,
VAddr cpu_addr_, u64 size_bytes_)
: VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_),
- device{&runtime.device}, buffer{CreateBuffer(*device, SizeBytes())},
- commit{runtime.memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal)} {
+ device{&runtime.device}, buffer{
+ CreateBuffer(*device, runtime.memory_allocator, SizeBytes())} {
if (runtime.device.HasDebuggingToolAttached()) {
buffer.SetObjectNameEXT(fmt::format("Buffer 0x{:x}", CpuAddr()).c_str());
}
@@ -139,7 +139,7 @@ public:
const u32 num_first_offset_copies = 4;
const size_t bytes_per_index = BytesPerIndex(index_type);
const size_t size_bytes = num_triangle_indices * bytes_per_index * num_first_offset_copies;
- buffer = device.GetLogical().CreateBuffer(VkBufferCreateInfo{
+ const VkBufferCreateInfo buffer_ci = {
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
@@ -148,14 +148,21 @@ public:
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
- });
+ };
+ buffer = memory_allocator.CreateBuffer(buffer_ci, MemoryUsage::DeviceLocal);
if (device.HasDebuggingToolAttached()) {
buffer.SetObjectNameEXT("Quad LUT");
}
- memory_commit = memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal);
- const StagingBufferRef staging = staging_pool.Request(size_bytes, MemoryUsage::Upload);
- u8* staging_data = staging.mapped_span.data();
+ const bool host_visible = buffer.IsHostVisible();
+ const StagingBufferRef staging = [&] {
+ if (host_visible) {
+ return StagingBufferRef{};
+ }
+ return staging_pool.Request(size_bytes, MemoryUsage::Upload);
+ }();
+
+ u8* staging_data = host_visible ? buffer.Mapped().data() : staging.mapped_span.data();
const size_t quad_size = bytes_per_index * 6;
for (u32 first = 0; first < num_first_offset_copies; ++first) {
@@ -165,29 +172,33 @@ public:
}
}
- scheduler.RequestOutsideRenderPassOperationContext();
- scheduler.Record([src_buffer = staging.buffer, src_offset = staging.offset,
- dst_buffer = *buffer, size_bytes](vk::CommandBuffer cmdbuf) {
- const VkBufferCopy copy{
- .srcOffset = src_offset,
- .dstOffset = 0,
- .size = size_bytes,
- };
- const VkBufferMemoryBarrier write_barrier{
- .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
- .pNext = nullptr,
- .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
- .dstAccessMask = VK_ACCESS_INDEX_READ_BIT,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .buffer = dst_buffer,
- .offset = 0,
- .size = size_bytes,
- };
- cmdbuf.CopyBuffer(src_buffer, dst_buffer, copy);
- cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT,
- VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, write_barrier);
- });
+ if (!host_visible) {
+ scheduler.RequestOutsideRenderPassOperationContext();
+ scheduler.Record([src_buffer = staging.buffer, src_offset = staging.offset,
+ dst_buffer = *buffer, size_bytes](vk::CommandBuffer cmdbuf) {
+ const VkBufferCopy copy{
+ .srcOffset = src_offset,
+ .dstOffset = 0,
+ .size = size_bytes,
+ };
+ const VkBufferMemoryBarrier write_barrier{
+ .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
+ .pNext = nullptr,
+ .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
+ .dstAccessMask = VK_ACCESS_INDEX_READ_BIT,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .buffer = dst_buffer,
+ .offset = 0,
+ .size = size_bytes,
+ };
+ cmdbuf.CopyBuffer(src_buffer, dst_buffer, copy);
+ cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT,
+ VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, write_barrier);
+ });
+ } else {
+ buffer.Flush();
+ }
}
void BindBuffer(u32 first) {
@@ -238,7 +249,7 @@ private:
return indices;
}
- void MakeAndUpdateIndices(u8* staging_data, size_t quad_size, u32 quad, u32 first) {
+ void MakeAndUpdateIndices(u8* staging_data, size_t quad_size, u32 quad, u32 first) override {
switch (index_type) {
case VK_INDEX_TYPE_UINT8_EXT:
std::memcpy(staging_data, MakeIndices<u8>(quad, first).data(), quad_size);
@@ -278,7 +289,7 @@ private:
return indices;
}
- void MakeAndUpdateIndices(u8* staging_data, size_t quad_size, u32 quad, u32 first) {
+ void MakeAndUpdateIndices(u8* staging_data, size_t quad_size, u32 quad, u32 first) override {
switch (index_type) {
case VK_INDEX_TYPE_UINT8_EXT:
std::memcpy(staging_data, MakeIndices<u8>(quad, first).data(), quad_size);
@@ -298,12 +309,18 @@ private:
BufferCacheRuntime::BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_allocator_,
Scheduler& scheduler_, StagingBufferPool& staging_pool_,
- UpdateDescriptorQueue& update_descriptor_queue_,
+ GuestDescriptorQueue& guest_descriptor_queue_,
+ ComputePassDescriptorQueue& compute_pass_descriptor_queue,
DescriptorPool& descriptor_pool)
: device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_},
- staging_pool{staging_pool_}, update_descriptor_queue{update_descriptor_queue_},
- uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
- quad_index_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue) {
+ staging_pool{staging_pool_}, guest_descriptor_queue{guest_descriptor_queue_},
+ quad_index_pass(device, scheduler, descriptor_pool, staging_pool,
+ compute_pass_descriptor_queue) {
+ if (device.GetDriverID() != VK_DRIVER_ID_QUALCOMM_PROPRIETARY) {
+ // TODO: FixMe: Uint8Pass compute shader does not build on some Qualcomm drivers.
+ uint8_pass = std::make_unique<Uint8Pass>(device, scheduler, descriptor_pool, staging_pool,
+ compute_pass_descriptor_queue);
+ }
quad_array_index_buffer = std::make_shared<QuadArrayIndexBuffer>(device_, memory_allocator_,
scheduler_, staging_pool_);
quad_strip_index_buffer = std::make_shared<QuadStripIndexBuffer>(device_, memory_allocator_,
@@ -314,8 +331,12 @@ StagingBufferRef BufferCacheRuntime::UploadStagingBuffer(size_t size) {
return staging_pool.Request(size, MemoryUsage::Upload);
}
-StagingBufferRef BufferCacheRuntime::DownloadStagingBuffer(size_t size) {
- return staging_pool.Request(size, MemoryUsage::Download);
+StagingBufferRef BufferCacheRuntime::DownloadStagingBuffer(size_t size, bool deferred) {
+ return staging_pool.Request(size, MemoryUsage::Download, deferred);
+}
+
+void BufferCacheRuntime::FreeDeferredStagingBuffer(StagingBufferRef& ref) {
+ staging_pool.FreeDeferred(ref);
}
u64 BufferCacheRuntime::GetDeviceLocalMemory() const {
@@ -330,10 +351,6 @@ bool BufferCacheRuntime::CanReportMemoryUsage() const {
return device.CanReportMemoryUsage();
}
-u32 BufferCacheRuntime::GetStorageBufferAlignment() const {
- return static_cast<u32>(device.GetStorageBufferAlignment());
-}
-
void BufferCacheRuntime::Finish() {
scheduler.Finish();
}
@@ -356,7 +373,7 @@ void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer,
.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
};
// Measuring a popular game, this number never exceeds the specified size once data is warmed up
- boost::container::small_vector<VkBufferCopy, 3> vk_copies(copies.size());
+ boost::container::small_vector<VkBufferCopy, 8> vk_copies(copies.size());
std::ranges::transform(copies, vk_copies.begin(), MakeBufferCopy);
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([src_buffer, dst_buffer, vk_copies, barrier](vk::CommandBuffer cmdbuf) {
@@ -440,7 +457,9 @@ void BufferCacheRuntime::BindIndexBuffer(PrimitiveTopology topology, IndexFormat
topology == PrimitiveTopology::QuadStrip);
} else if (vk_index_type == VK_INDEX_TYPE_UINT8_EXT && !device.IsExtIndexTypeUint8Supported()) {
vk_index_type = VK_INDEX_TYPE_UINT16;
- std::tie(vk_buffer, vk_offset) = uint8_pass.Assemble(num_indices, buffer, offset);
+ if (uint8_pass) {
+ std::tie(vk_buffer, vk_offset) = uint8_pass->Assemble(num_indices, buffer, offset);
+ }
}
if (vk_buffer == VK_NULL_HANDLE) {
// Vulkan doesn't support null index buffers. Replace it with our own null buffer.
@@ -494,6 +513,36 @@ void BufferCacheRuntime::BindVertexBuffer(u32 index, VkBuffer buffer, u32 offset
}
}
+void BufferCacheRuntime::BindVertexBuffers(VideoCommon::HostBindings<Buffer>& bindings) {
+ boost::container::small_vector<VkBuffer, 32> buffer_handles;
+ for (u32 index = 0; index < bindings.buffers.size(); ++index) {
+ auto handle = bindings.buffers[index]->Handle();
+ if (handle == VK_NULL_HANDLE) {
+ bindings.offsets[index] = 0;
+ bindings.sizes[index] = VK_WHOLE_SIZE;
+ if (!device.HasNullDescriptor()) {
+ ReserveNullBuffer();
+ handle = *null_buffer;
+ }
+ }
+ buffer_handles.push_back(handle);
+ }
+ if (device.IsExtExtendedDynamicStateSupported()) {
+ scheduler.Record([bindings = std::move(bindings),
+ buffer_handles = std::move(buffer_handles)](vk::CommandBuffer cmdbuf) {
+ cmdbuf.BindVertexBuffers2EXT(
+ bindings.min_index, bindings.max_index - bindings.min_index, buffer_handles.data(),
+ bindings.offsets.data(), bindings.sizes.data(), bindings.strides.data());
+ });
+ } else {
+ scheduler.Record([bindings = std::move(bindings),
+ buffer_handles = std::move(buffer_handles)](vk::CommandBuffer cmdbuf) {
+ cmdbuf.BindVertexBuffers(bindings.min_index, bindings.max_index - bindings.min_index,
+ buffer_handles.data(), bindings.offsets.data());
+ });
+ }
+}
+
void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, VkBuffer buffer, u32 offset,
u32 size) {
if (!device.IsExtTransformFeedbackSupported()) {
@@ -515,6 +564,23 @@ void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, VkBuffer buffer,
});
}
+void BufferCacheRuntime::BindTransformFeedbackBuffers(VideoCommon::HostBindings<Buffer>& bindings) {
+ if (!device.IsExtTransformFeedbackSupported()) {
+ // Already logged in the rasterizer
+ return;
+ }
+ boost::container::small_vector<VkBuffer, 4> buffer_handles;
+ for (u32 index = 0; index < bindings.buffers.size(); ++index) {
+ buffer_handles.push_back(bindings.buffers[index]->Handle());
+ }
+ scheduler.Record([bindings = std::move(bindings),
+ buffer_handles = std::move(buffer_handles)](vk::CommandBuffer cmdbuf) {
+ cmdbuf.BindTransformFeedbackBuffersEXT(0, static_cast<u32>(buffer_handles.size()),
+ buffer_handles.data(), bindings.offsets.data(),
+ bindings.sizes.data());
+ });
+}
+
void BufferCacheRuntime::ReserveNullBuffer() {
if (null_buffer) {
return;
@@ -533,11 +599,10 @@ void BufferCacheRuntime::ReserveNullBuffer() {
create_info.usage |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT;
}
create_info.usage |= VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT;
- null_buffer = device.GetLogical().CreateBuffer(create_info);
+ null_buffer = memory_allocator.CreateBuffer(create_info, MemoryUsage::DeviceLocal);
if (device.HasDebuggingToolAttached()) {
null_buffer.SetObjectNameEXT("Null buffer");
}
- null_buffer_commit = memory_allocator.Commit(null_buffer, MemoryUsage::DeviceLocal);
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([buffer = *null_buffer](vk::CommandBuffer cmdbuf) {
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
index 06539c733..95446c732 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -3,7 +3,8 @@
#pragma once
-#include "video_core/buffer_cache/buffer_cache.h"
+#include "video_core/buffer_cache/buffer_cache_base.h"
+#include "video_core/buffer_cache/memory_tracker_base.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_vulkan/vk_compute_pass.h"
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
@@ -17,6 +18,7 @@ namespace Vulkan {
class Device;
class DescriptorPool;
class Scheduler;
+struct HostVertexBinding;
class BufferCacheRuntime;
@@ -46,7 +48,6 @@ private:
const Device* device{};
vk::Buffer buffer;
- MemoryCommit commit;
std::vector<BufferView> views;
};
@@ -62,7 +63,8 @@ class BufferCacheRuntime {
public:
explicit BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_manager_,
Scheduler& scheduler_, StagingBufferPool& staging_pool_,
- UpdateDescriptorQueue& update_descriptor_queue_,
+ GuestDescriptorQueue& guest_descriptor_queue,
+ ComputePassDescriptorQueue& compute_pass_descriptor_queue,
DescriptorPool& descriptor_pool);
void Finish();
@@ -73,11 +75,11 @@ public:
bool CanReportMemoryUsage() const;
- u32 GetStorageBufferAlignment() const;
-
[[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size);
- [[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size);
+ [[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size, bool deferred = false);
+
+ void FreeDeferredStagingBuffer(StagingBufferRef& ref);
void PreCopyBarrier();
@@ -95,8 +97,12 @@ public:
void BindVertexBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size, u32 stride);
+ void BindVertexBuffers(VideoCommon::HostBindings<Buffer>& bindings);
+
void BindTransformFeedbackBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size);
+ void BindTransformFeedbackBuffers(VideoCommon::HostBindings<Buffer>& bindings);
+
std::span<u8> BindMappedUniformBuffer([[maybe_unused]] size_t stage,
[[maybe_unused]] u32 binding_index, u32 size) {
const StagingBufferRef ref = staging_pool.Request(size, MemoryUsage::Upload);
@@ -115,12 +121,12 @@ public:
void BindTextureBuffer(Buffer& buffer, u32 offset, u32 size,
VideoCore::Surface::PixelFormat format) {
- update_descriptor_queue.AddTexelBuffer(buffer.View(offset, size, format));
+ guest_descriptor_queue.AddTexelBuffer(buffer.View(offset, size, format));
}
private:
void BindBuffer(VkBuffer buffer, u32 offset, u32 size) {
- update_descriptor_queue.AddBuffer(buffer, offset, size);
+ guest_descriptor_queue.AddBuffer(buffer, offset, size);
}
void ReserveNullBuffer();
@@ -129,21 +135,22 @@ private:
MemoryAllocator& memory_allocator;
Scheduler& scheduler;
StagingBufferPool& staging_pool;
- UpdateDescriptorQueue& update_descriptor_queue;
+ GuestDescriptorQueue& guest_descriptor_queue;
std::shared_ptr<QuadArrayIndexBuffer> quad_array_index_buffer;
std::shared_ptr<QuadStripIndexBuffer> quad_strip_index_buffer;
vk::Buffer null_buffer;
- MemoryCommit null_buffer_commit;
- Uint8Pass uint8_pass;
+ std::unique_ptr<Uint8Pass> uint8_pass;
QuadIndexedPass quad_index_pass;
};
struct BufferCacheParams {
using Runtime = Vulkan::BufferCacheRuntime;
using Buffer = Vulkan::Buffer;
+ using Async_Buffer = Vulkan::StagingBufferRef;
+ using MemoryTracker = VideoCommon::MemoryTrackerBase<VideoCore::RasterizerInterface>;
static constexpr bool IS_OPENGL = false;
static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = false;
@@ -152,6 +159,8 @@ struct BufferCacheParams {
static constexpr bool NEEDS_BIND_STORAGE_INDEX = false;
static constexpr bool USE_MEMORY_MAPS = true;
static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = false;
+ static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = true;
+ static constexpr bool USE_MEMORY_MAPS_FOR_UPLOADS = true;
};
using BufferCache = VideoCommon::BufferCache<BufferCacheParams>;
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache_base.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache_base.cpp
new file mode 100644
index 000000000..f9e271507
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache_base.cpp
@@ -0,0 +1,9 @@
+// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include "video_core/buffer_cache/buffer_cache.h"
+#include "video_core/renderer_vulkan/vk_buffer_cache.h"
+
+namespace VideoCommon {
+template class VideoCommon::BufferCache<Vulkan::BufferCacheParams>;
+}
diff --git a/src/video_core/renderer_vulkan/vk_command_pool.cpp b/src/video_core/renderer_vulkan/vk_command_pool.cpp
index 2f09de1c1..d0dbf7ca5 100644
--- a/src/video_core/renderer_vulkan/vk_command_pool.cpp
+++ b/src/video_core/renderer_vulkan/vk_command_pool.cpp
@@ -22,8 +22,8 @@ CommandPool::CommandPool(MasterSemaphore& master_semaphore_, const Device& devic
CommandPool::~CommandPool() = default;
void CommandPool::Allocate(size_t begin, size_t end) {
- // Command buffers are going to be commited, recorded, executed every single usage cycle.
- // They are also going to be reseted when commited.
+ // Command buffers are going to be committed, recorded, executed every single usage cycle.
+ // They are also going to be reset when committed.
Pool& pool = pools.emplace_back();
pool.handle = device.GetLogical().CreateCommandPool({
.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
index 1a316b6eb..3bc8553e1 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
@@ -200,12 +200,12 @@ ComputePass::~ComputePass() = default;
Uint8Pass::Uint8Pass(const Device& device_, Scheduler& scheduler_, DescriptorPool& descriptor_pool,
StagingBufferPool& staging_buffer_pool_,
- UpdateDescriptorQueue& update_descriptor_queue_)
+ ComputePassDescriptorQueue& compute_pass_descriptor_queue_)
: ComputePass(device_, descriptor_pool, INPUT_OUTPUT_DESCRIPTOR_SET_BINDINGS,
INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE, INPUT_OUTPUT_BANK_INFO, {},
VULKAN_UINT8_COMP_SPV),
scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_},
- update_descriptor_queue{update_descriptor_queue_} {}
+ compute_pass_descriptor_queue{compute_pass_descriptor_queue_} {}
Uint8Pass::~Uint8Pass() = default;
@@ -214,10 +214,10 @@ std::pair<VkBuffer, VkDeviceSize> Uint8Pass::Assemble(u32 num_vertices, VkBuffer
const u32 staging_size = static_cast<u32>(num_vertices * sizeof(u16));
const auto staging = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal);
- update_descriptor_queue.Acquire();
- update_descriptor_queue.AddBuffer(src_buffer, src_offset, num_vertices);
- update_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size);
- const void* const descriptor_data{update_descriptor_queue.UpdateData()};
+ compute_pass_descriptor_queue.Acquire();
+ compute_pass_descriptor_queue.AddBuffer(src_buffer, src_offset, num_vertices);
+ compute_pass_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size);
+ const void* const descriptor_data{compute_pass_descriptor_queue.UpdateData()};
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([this, descriptor_data, num_vertices](vk::CommandBuffer cmdbuf) {
@@ -242,12 +242,12 @@ std::pair<VkBuffer, VkDeviceSize> Uint8Pass::Assemble(u32 num_vertices, VkBuffer
QuadIndexedPass::QuadIndexedPass(const Device& device_, Scheduler& scheduler_,
DescriptorPool& descriptor_pool_,
StagingBufferPool& staging_buffer_pool_,
- UpdateDescriptorQueue& update_descriptor_queue_)
+ ComputePassDescriptorQueue& compute_pass_descriptor_queue_)
: ComputePass(device_, descriptor_pool_, INPUT_OUTPUT_DESCRIPTOR_SET_BINDINGS,
INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE, INPUT_OUTPUT_BANK_INFO,
COMPUTE_PUSH_CONSTANT_RANGE<sizeof(u32) * 3>, VULKAN_QUAD_INDEXED_COMP_SPV),
scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_},
- update_descriptor_queue{update_descriptor_queue_} {}
+ compute_pass_descriptor_queue{compute_pass_descriptor_queue_} {}
QuadIndexedPass::~QuadIndexedPass() = default;
@@ -272,10 +272,10 @@ std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble(
const std::size_t staging_size = num_tri_vertices * sizeof(u32);
const auto staging = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal);
- update_descriptor_queue.Acquire();
- update_descriptor_queue.AddBuffer(src_buffer, src_offset, input_size);
- update_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size);
- const void* const descriptor_data{update_descriptor_queue.UpdateData()};
+ compute_pass_descriptor_queue.Acquire();
+ compute_pass_descriptor_queue.AddBuffer(src_buffer, src_offset, input_size);
+ compute_pass_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size);
+ const void* const descriptor_data{compute_pass_descriptor_queue.UpdateData()};
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([this, descriptor_data, num_tri_vertices, base_vertex, index_shift,
@@ -304,13 +304,14 @@ std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble(
ASTCDecoderPass::ASTCDecoderPass(const Device& device_, Scheduler& scheduler_,
DescriptorPool& descriptor_pool_,
StagingBufferPool& staging_buffer_pool_,
- UpdateDescriptorQueue& update_descriptor_queue_,
+ ComputePassDescriptorQueue& compute_pass_descriptor_queue_,
MemoryAllocator& memory_allocator_)
: ComputePass(device_, descriptor_pool_, ASTC_DESCRIPTOR_SET_BINDINGS,
ASTC_PASS_DESCRIPTOR_UPDATE_TEMPLATE_ENTRY, ASTC_BANK_INFO,
COMPUTE_PUSH_CONSTANT_RANGE<sizeof(AstcPushConstants)>, ASTC_DECODER_COMP_SPV),
scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_},
- update_descriptor_queue{update_descriptor_queue_}, memory_allocator{memory_allocator_} {}
+ compute_pass_descriptor_queue{compute_pass_descriptor_queue_}, memory_allocator{
+ memory_allocator_} {}
ASTCDecoderPass::~ASTCDecoderPass() = default;
@@ -358,11 +359,11 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map,
const u32 num_dispatches_y = Common::DivCeil(swizzle.num_tiles.height, 8U);
const u32 num_dispatches_z = image.info.resources.layers;
- update_descriptor_queue.Acquire();
- update_descriptor_queue.AddBuffer(map.buffer, input_offset,
- image.guest_size_bytes - swizzle.buffer_offset);
- update_descriptor_queue.AddImage(image.StorageImageView(swizzle.level));
- const void* const descriptor_data{update_descriptor_queue.UpdateData()};
+ compute_pass_descriptor_queue.Acquire();
+ compute_pass_descriptor_queue.AddBuffer(map.buffer, input_offset,
+ image.guest_size_bytes - swizzle.buffer_offset);
+ compute_pass_descriptor_queue.AddImage(image.StorageImageView(swizzle.level));
+ const void* const descriptor_data{compute_pass_descriptor_queue.UpdateData()};
// To unswizzle the ASTC data
const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info);
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h
index c4c8fa081..dd3927376 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pass.h
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.h
@@ -9,6 +9,7 @@
#include "common/common_types.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
+#include "video_core/renderer_vulkan/vk_update_descriptor.h"
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
@@ -21,7 +22,6 @@ namespace Vulkan {
class Device;
class StagingBufferPool;
class Scheduler;
-class UpdateDescriptorQueue;
class Image;
struct StagingBufferRef;
@@ -50,7 +50,7 @@ class Uint8Pass final : public ComputePass {
public:
explicit Uint8Pass(const Device& device_, Scheduler& scheduler_,
DescriptorPool& descriptor_pool_, StagingBufferPool& staging_buffer_pool_,
- UpdateDescriptorQueue& update_descriptor_queue_);
+ ComputePassDescriptorQueue& compute_pass_descriptor_queue_);
~Uint8Pass();
/// Assemble uint8 indices into an uint16 index buffer
@@ -61,7 +61,7 @@ public:
private:
Scheduler& scheduler;
StagingBufferPool& staging_buffer_pool;
- UpdateDescriptorQueue& update_descriptor_queue;
+ ComputePassDescriptorQueue& compute_pass_descriptor_queue;
};
class QuadIndexedPass final : public ComputePass {
@@ -69,7 +69,7 @@ public:
explicit QuadIndexedPass(const Device& device_, Scheduler& scheduler_,
DescriptorPool& descriptor_pool_,
StagingBufferPool& staging_buffer_pool_,
- UpdateDescriptorQueue& update_descriptor_queue_);
+ ComputePassDescriptorQueue& compute_pass_descriptor_queue_);
~QuadIndexedPass();
std::pair<VkBuffer, VkDeviceSize> Assemble(
@@ -79,7 +79,7 @@ public:
private:
Scheduler& scheduler;
StagingBufferPool& staging_buffer_pool;
- UpdateDescriptorQueue& update_descriptor_queue;
+ ComputePassDescriptorQueue& compute_pass_descriptor_queue;
};
class ASTCDecoderPass final : public ComputePass {
@@ -87,7 +87,7 @@ public:
explicit ASTCDecoderPass(const Device& device_, Scheduler& scheduler_,
DescriptorPool& descriptor_pool_,
StagingBufferPool& staging_buffer_pool_,
- UpdateDescriptorQueue& update_descriptor_queue_,
+ ComputePassDescriptorQueue& compute_pass_descriptor_queue_,
MemoryAllocator& memory_allocator_);
~ASTCDecoderPass();
@@ -97,7 +97,7 @@ public:
private:
Scheduler& scheduler;
StagingBufferPool& staging_buffer_pool;
- UpdateDescriptorQueue& update_descriptor_queue;
+ ComputePassDescriptorQueue& compute_pass_descriptor_queue;
MemoryAllocator& memory_allocator;
};
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
index 2a0f0dbf0..73e585c2b 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
@@ -26,13 +26,13 @@ using Tegra::Texture::TexturePair;
ComputePipeline::ComputePipeline(const Device& device_, vk::PipelineCache& pipeline_cache_,
DescriptorPool& descriptor_pool,
- UpdateDescriptorQueue& update_descriptor_queue_,
+ GuestDescriptorQueue& guest_descriptor_queue_,
Common::ThreadWorker* thread_worker,
PipelineStatistics* pipeline_statistics,
VideoCore::ShaderNotify* shader_notify, const Shader::Info& info_,
vk::ShaderModule spv_module_)
- : device{device_}, pipeline_cache(pipeline_cache_),
- update_descriptor_queue{update_descriptor_queue_}, info{info_},
+ : device{device_},
+ pipeline_cache(pipeline_cache_), guest_descriptor_queue{guest_descriptor_queue_}, info{info_},
spv_module(std::move(spv_module_)) {
if (shader_notify) {
shader_notify->MarkShaderBuilding();
@@ -99,7 +99,7 @@ ComputePipeline::ComputePipeline(const Device& device_, vk::PipelineCache& pipel
void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute,
Tegra::MemoryManager& gpu_memory, Scheduler& scheduler,
BufferCache& buffer_cache, TextureCache& texture_cache) {
- update_descriptor_queue.Acquire();
+ guest_descriptor_queue.Acquire();
buffer_cache.SetComputeUniformBufferState(info.constant_buffer_mask, &uniform_buffer_sizes);
buffer_cache.UnbindComputeStorageBuffers();
@@ -115,7 +115,7 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute,
static constexpr size_t max_elements = 64;
boost::container::static_vector<VideoCommon::ImageViewInOut, max_elements> views;
- boost::container::static_vector<VkSampler, max_elements> samplers;
+ boost::container::static_vector<VideoCommon::SamplerId, max_elements> samplers;
const auto& qmd{kepler_compute.launch_description};
const auto& cbufs{qmd.const_buffer_config};
@@ -160,8 +160,8 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute,
const auto handle{read_handle(desc, index)};
views.push_back({handle.first});
- Sampler* const sampler = texture_cache.GetComputeSampler(handle.second);
- samplers.push_back(sampler->Handle());
+ VideoCommon::SamplerId sampler = texture_cache.GetComputeSamplerId(handle.second);
+ samplers.push_back(sampler);
}
}
for (const auto& desc : info.image_descriptors) {
@@ -192,9 +192,9 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute,
buffer_cache.BindHostComputeBuffers();
RescalingPushConstant rescaling;
- const VkSampler* samplers_it{samplers.data()};
+ const VideoCommon::SamplerId* samplers_it{samplers.data()};
const VideoCommon::ImageViewInOut* views_it{views.data()};
- PushImageDescriptors(texture_cache, update_descriptor_queue, info, rescaling, samplers_it,
+ PushImageDescriptors(texture_cache, guest_descriptor_queue, info, rescaling, samplers_it,
views_it);
if (!is_built.load(std::memory_order::relaxed)) {
@@ -204,7 +204,7 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute,
build_condvar.wait(lock, [this] { return is_built.load(std::memory_order::relaxed); });
});
}
- const void* const descriptor_data{update_descriptor_queue.UpdateData()};
+ const void* const descriptor_data{guest_descriptor_queue.UpdateData()};
const bool is_rescaling = !info.texture_descriptors.empty() || !info.image_descriptors.empty();
scheduler.Record([this, descriptor_data, is_rescaling,
rescaling_data = rescaling.Data()](vk::CommandBuffer cmdbuf) {
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h
index 78d77027f..d1a1e2c46 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h
+++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h
@@ -30,7 +30,7 @@ class ComputePipeline {
public:
explicit ComputePipeline(const Device& device, vk::PipelineCache& pipeline_cache,
DescriptorPool& descriptor_pool,
- UpdateDescriptorQueue& update_descriptor_queue,
+ GuestDescriptorQueue& guest_descriptor_queue,
Common::ThreadWorker* thread_worker,
PipelineStatistics* pipeline_statistics,
VideoCore::ShaderNotify* shader_notify, const Shader::Info& info,
@@ -48,7 +48,7 @@ public:
private:
const Device& device;
vk::PipelineCache& pipeline_cache;
- UpdateDescriptorQueue& update_descriptor_queue;
+ GuestDescriptorQueue& guest_descriptor_queue;
Shader::Info info;
VideoCommon::ComputeUniformBufferSizes uniform_buffer_sizes{};
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.cpp b/src/video_core/renderer_vulkan/vk_fence_manager.cpp
index 0214b103a..fad9e3832 100644
--- a/src/video_core/renderer_vulkan/vk_fence_manager.cpp
+++ b/src/video_core/renderer_vulkan/vk_fence_manager.cpp
@@ -5,6 +5,7 @@
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
#include "video_core/renderer_vulkan/vk_fence_manager.h"
+#include "video_core/renderer_vulkan/vk_query_cache.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_texture_cache.h"
#include "video_core/vulkan_common/vulkan_device.h"
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.h b/src/video_core/renderer_vulkan/vk_fence_manager.h
index 7fe2afcd9..145359d4e 100644
--- a/src/video_core/renderer_vulkan/vk_fence_manager.h
+++ b/src/video_core/renderer_vulkan/vk_fence_manager.h
@@ -40,7 +40,16 @@ private:
};
using Fence = std::shared_ptr<InnerFence>;
-using GenericFenceManager = VideoCommon::FenceManager<Fence, TextureCache, BufferCache, QueryCache>;
+struct FenceManagerParams {
+ using FenceType = Fence;
+ using BufferCacheType = BufferCache;
+ using TextureCacheType = TextureCache;
+ using QueryCacheType = QueryCache;
+
+ static constexpr bool HAS_ASYNC_CHECK = true;
+};
+
+using GenericFenceManager = VideoCommon::FenceManager<FenceManagerParams>;
class FenceManager final : public GenericFenceManager {
public:
diff --git a/src/video_core/renderer_vulkan/vk_fsr.cpp b/src/video_core/renderer_vulkan/vk_fsr.cpp
index 33daa8c1c..9bcdca2fb 100644
--- a/src/video_core/renderer_vulkan/vk_fsr.cpp
+++ b/src/video_core/renderer_vulkan/vk_fsr.cpp
@@ -1,12 +1,11 @@
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
-#include <cmath>
-#include "common/bit_cast.h"
#include "common/common_types.h"
#include "common/div_ceil.h"
#include "common/settings.h"
+#include "video_core/fsr.h"
#include "video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp16_comp_spv.h"
#include "video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp32_comp_spv.h"
#include "video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp16_comp_spv.h"
@@ -17,146 +16,7 @@
#include "video_core/vulkan_common/vulkan_device.h"
namespace Vulkan {
-namespace {
-// Reimplementations of the constant generating functions in ffx_fsr1.h
-// GCC generated a lot of warnings when using the official header.
-u32 AU1_AH1_AF1(f32 f) {
- static constexpr u32 base[512]{
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040,
- 0x0080, 0x0100, 0x0200, 0x0400, 0x0800, 0x0c00, 0x1000, 0x1400, 0x1800, 0x1c00, 0x2000,
- 0x2400, 0x2800, 0x2c00, 0x3000, 0x3400, 0x3800, 0x3c00, 0x4000, 0x4400, 0x4800, 0x4c00,
- 0x5000, 0x5400, 0x5800, 0x5c00, 0x6000, 0x6400, 0x6800, 0x6c00, 0x7000, 0x7400, 0x7800,
- 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
- 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
- 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
- 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
- 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
- 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
- 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
- 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
- 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
- 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
- 0x7bff, 0x7bff, 0x7bff, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
- 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
- 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
- 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
- 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
- 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
- 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
- 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
- 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
- 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8001, 0x8002, 0x8004, 0x8008,
- 0x8010, 0x8020, 0x8040, 0x8080, 0x8100, 0x8200, 0x8400, 0x8800, 0x8c00, 0x9000, 0x9400,
- 0x9800, 0x9c00, 0xa000, 0xa400, 0xa800, 0xac00, 0xb000, 0xb400, 0xb800, 0xbc00, 0xc000,
- 0xc400, 0xc800, 0xcc00, 0xd000, 0xd400, 0xd800, 0xdc00, 0xe000, 0xe400, 0xe800, 0xec00,
- 0xf000, 0xf400, 0xf800, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
- 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
- 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
- 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
- 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
- 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
- 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
- 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
- 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
- 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
- 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
- };
- static constexpr s8 shift[512]{
- 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
- 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
- 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
- 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
- 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
- 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
- 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x17, 0x16,
- 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
- 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
- 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
- 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
- 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
- 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
- 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
- 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
- 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
- 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
- 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
- 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
- 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
- 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
- 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
- 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
- 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x17,
- 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
- 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
- 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
- 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
- 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
- 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
- 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
- 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
- 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
- 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
- 0x18, 0x18,
- };
- const u32 u = Common::BitCast<u32>(f);
- const u32 i = u >> 23;
- return base[i] + ((u & 0x7fffff) >> shift[i]);
-}
-
-u32 AU1_AH2_AF2(f32 a[2]) {
- return AU1_AH1_AF1(a[0]) + (AU1_AH1_AF1(a[1]) << 16);
-}
-
-void FsrEasuCon(u32 con0[4], u32 con1[4], u32 con2[4], u32 con3[4], f32 inputViewportInPixelsX,
- f32 inputViewportInPixelsY, f32 inputSizeInPixelsX, f32 inputSizeInPixelsY,
- f32 outputSizeInPixelsX, f32 outputSizeInPixelsY) {
- con0[0] = Common::BitCast<u32>(inputViewportInPixelsX / outputSizeInPixelsX);
- con0[1] = Common::BitCast<u32>(inputViewportInPixelsY / outputSizeInPixelsY);
- con0[2] = Common::BitCast<u32>(0.5f * inputViewportInPixelsX / outputSizeInPixelsX - 0.5f);
- con0[3] = Common::BitCast<u32>(0.5f * inputViewportInPixelsY / outputSizeInPixelsY - 0.5f);
- con1[0] = Common::BitCast<u32>(1.0f / inputSizeInPixelsX);
- con1[1] = Common::BitCast<u32>(1.0f / inputSizeInPixelsY);
- con1[2] = Common::BitCast<u32>(1.0f / inputSizeInPixelsX);
- con1[3] = Common::BitCast<u32>(-1.0f / inputSizeInPixelsY);
- con2[0] = Common::BitCast<u32>(-1.0f / inputSizeInPixelsX);
- con2[1] = Common::BitCast<u32>(2.0f / inputSizeInPixelsY);
- con2[2] = Common::BitCast<u32>(1.0f / inputSizeInPixelsX);
- con2[3] = Common::BitCast<u32>(2.0f / inputSizeInPixelsY);
- con3[0] = Common::BitCast<u32>(0.0f / inputSizeInPixelsX);
- con3[1] = Common::BitCast<u32>(4.0f / inputSizeInPixelsY);
- con3[2] = con3[3] = 0;
-}
-
-void FsrEasuConOffset(u32 con0[4], u32 con1[4], u32 con2[4], u32 con3[4],
- f32 inputViewportInPixelsX, f32 inputViewportInPixelsY,
- f32 inputSizeInPixelsX, f32 inputSizeInPixelsY, f32 outputSizeInPixelsX,
- f32 outputSizeInPixelsY, f32 inputOffsetInPixelsX, f32 inputOffsetInPixelsY) {
- FsrEasuCon(con0, con1, con2, con3, inputViewportInPixelsX, inputViewportInPixelsY,
- inputSizeInPixelsX, inputSizeInPixelsY, outputSizeInPixelsX, outputSizeInPixelsY);
- con0[2] = Common::BitCast<u32>(0.5f * inputViewportInPixelsX / outputSizeInPixelsX - 0.5f +
- inputOffsetInPixelsX);
- con0[3] = Common::BitCast<u32>(0.5f * inputViewportInPixelsY / outputSizeInPixelsY - 0.5f +
- inputOffsetInPixelsY);
-}
-
-void FsrRcasCon(u32* con, f32 sharpness) {
- sharpness = std::exp2f(-sharpness);
- f32 hSharp[2]{sharpness, sharpness};
- con[0] = Common::BitCast<u32>(sharpness);
- con[1] = AU1_AH2_AF2(hSharp);
- con[2] = 0;
- con[3] = 0;
-}
-} // Anonymous namespace
+using namespace FSR;
FSR::FSR(const Device& device_, MemoryAllocator& memory_allocator_, size_t image_count_,
VkExtent2D output_size_)
@@ -345,10 +205,9 @@ void FSR::CreateDescriptorSets() {
void FSR::CreateImages() {
images.resize(image_count * 2);
image_views.resize(image_count * 2);
- buffer_commits.resize(image_count * 2);
for (size_t i = 0; i < image_count * 2; ++i) {
- images[i] = device.GetLogical().CreateImage(VkImageCreateInfo{
+ images[i] = memory_allocator.CreateImage(VkImageCreateInfo{
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
@@ -371,7 +230,6 @@ void FSR::CreateImages() {
.pQueueFamilyIndices = nullptr,
.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
});
- buffer_commits[i] = memory_allocator.Commit(images[i], MemoryUsage::DeviceLocal);
image_views[i] = device.GetLogical().CreateImageView(VkImageViewCreateInfo{
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
.pNext = nullptr,
diff --git a/src/video_core/renderer_vulkan/vk_fsr.h b/src/video_core/renderer_vulkan/vk_fsr.h
index 5d872861f..8bb9fc23a 100644
--- a/src/video_core/renderer_vulkan/vk_fsr.h
+++ b/src/video_core/renderer_vulkan/vk_fsr.h
@@ -47,7 +47,6 @@ private:
vk::Sampler sampler;
std::vector<vk::Image> images;
std::vector<vk::ImageView> image_views;
- std::vector<MemoryCommit> buffer_commits;
};
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
index f91bb5a1d..c1595642e 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
@@ -236,13 +236,13 @@ GraphicsPipeline::GraphicsPipeline(
Scheduler& scheduler_, BufferCache& buffer_cache_, TextureCache& texture_cache_,
vk::PipelineCache& pipeline_cache_, VideoCore::ShaderNotify* shader_notify,
const Device& device_, DescriptorPool& descriptor_pool,
- UpdateDescriptorQueue& update_descriptor_queue_, Common::ThreadWorker* worker_thread,
+ GuestDescriptorQueue& guest_descriptor_queue_, Common::ThreadWorker* worker_thread,
PipelineStatistics* pipeline_statistics, RenderPassCache& render_pass_cache,
const GraphicsPipelineCacheKey& key_, std::array<vk::ShaderModule, NUM_STAGES> stages,
const std::array<const Shader::Info*, NUM_STAGES>& infos)
: key{key_}, device{device_}, texture_cache{texture_cache_}, buffer_cache{buffer_cache_},
pipeline_cache(pipeline_cache_), scheduler{scheduler_},
- update_descriptor_queue{update_descriptor_queue_}, spv_modules{std::move(stages)} {
+ guest_descriptor_queue{guest_descriptor_queue_}, spv_modules{std::move(stages)} {
if (shader_notify) {
shader_notify->MarkShaderBuilding();
}
@@ -298,7 +298,7 @@ void GraphicsPipeline::AddTransition(GraphicsPipeline* transition) {
template <typename Spec>
void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
std::array<VideoCommon::ImageViewInOut, MAX_IMAGE_ELEMENTS> views;
- std::array<VkSampler, MAX_IMAGE_ELEMENTS> samplers;
+ std::array<VideoCommon::SamplerId, MAX_IMAGE_ELEMENTS> samplers;
size_t sampler_index{};
size_t view_index{};
@@ -367,8 +367,8 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
const auto handle{read_handle(desc, index)};
views[view_index++] = {handle.first};
- Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.second)};
- samplers[sampler_index++] = sampler->Handle();
+ VideoCommon::SamplerId sampler{texture_cache.GetGraphicsSamplerId(handle.second)};
+ samplers[sampler_index++] = sampler;
}
}
if constexpr (Spec::has_images) {
@@ -449,15 +449,15 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
buffer_cache.UpdateGraphicsBuffers(is_indexed);
buffer_cache.BindHostGeometryBuffers(is_indexed);
- update_descriptor_queue.Acquire();
+ guest_descriptor_queue.Acquire();
RescalingPushConstant rescaling;
RenderAreaPushConstant render_area;
- const VkSampler* samplers_it{samplers.data()};
+ const VideoCommon::SamplerId* samplers_it{samplers.data()};
const VideoCommon::ImageViewInOut* views_it{views.data()};
const auto prepare_stage{[&](size_t stage) LAMBDA_FORCEINLINE {
buffer_cache.BindHostStageBuffers(stage);
- PushImageDescriptors(texture_cache, update_descriptor_queue, stage_infos[stage], rescaling,
+ PushImageDescriptors(texture_cache, guest_descriptor_queue, stage_infos[stage], rescaling,
samplers_it, views_it);
const auto& info{stage_infos[0]};
if (info.uses_render_area) {
@@ -481,12 +481,13 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
if constexpr (Spec::enabled_stages[4]) {
prepare_stage(4);
}
+ texture_cache.UpdateRenderTargets(false);
+ texture_cache.CheckFeedbackLoop(views);
ConfigureDraw(rescaling, render_area);
}
void GraphicsPipeline::ConfigureDraw(const RescalingPushConstant& rescaling,
const RenderAreaPushConstant& render_area) {
- texture_cache.UpdateRenderTargets(false);
scheduler.RequestRenderpass(texture_cache.GetFramebuffer());
if (!is_built.load(std::memory_order::relaxed)) {
@@ -499,7 +500,7 @@ void GraphicsPipeline::ConfigureDraw(const RescalingPushConstant& rescaling,
const bool is_rescaling{texture_cache.IsRescaling()};
const bool update_rescaling{scheduler.UpdateRescaling(is_rescaling)};
const bool bind_pipeline{scheduler.UpdateGraphicsPipeline(this)};
- const void* const descriptor_data{update_descriptor_queue.UpdateData()};
+ const void* const descriptor_data{guest_descriptor_queue.UpdateData()};
scheduler.Record([this, descriptor_data, bind_pipeline, rescaling_data = rescaling.Data(),
is_rescaling, update_rescaling,
uses_render_area = render_area.uses_render_area,
@@ -548,31 +549,7 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) {
static_vector<VkVertexInputBindingDescription, 32> vertex_bindings;
static_vector<VkVertexInputBindingDivisorDescriptionEXT, 32> vertex_binding_divisors;
static_vector<VkVertexInputAttributeDescription, 32> vertex_attributes;
- if (key.state.dynamic_vertex_input) {
- const size_t num_vertex_arrays = std::min(
- key.state.attributes.size(), static_cast<size_t>(device.GetMaxVertexInputBindings()));
- for (size_t index = 0; index < num_vertex_arrays; ++index) {
- const u32 type = key.state.DynamicAttributeType(index);
- if (!stage_infos[0].loads.Generic(index) || type == 0) {
- continue;
- }
- vertex_attributes.push_back({
- .location = static_cast<u32>(index),
- .binding = 0,
- .format = type == 1 ? VK_FORMAT_R32_SFLOAT
- : type == 2 ? VK_FORMAT_R32_SINT
- : VK_FORMAT_R32_UINT,
- .offset = 0,
- });
- }
- if (!vertex_attributes.empty()) {
- vertex_bindings.push_back({
- .binding = 0,
- .stride = 4,
- .inputRate = VK_VERTEX_INPUT_RATE_VERTEX,
- });
- }
- } else {
+ if (!key.state.dynamic_vertex_input) {
const size_t num_vertex_arrays = std::min(
Maxwell::NumVertexArrays, static_cast<size_t>(device.GetMaxVertexInputBindings()));
for (size_t index = 0; index < num_vertex_arrays; ++index) {
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
index 67c657d0e..99e56e9ad 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
@@ -64,7 +64,6 @@ class RenderPassCache;
class RescalingPushConstant;
class RenderAreaPushConstant;
class Scheduler;
-class UpdateDescriptorQueue;
class GraphicsPipeline {
static constexpr size_t NUM_STAGES = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage;
@@ -74,7 +73,7 @@ public:
Scheduler& scheduler, BufferCache& buffer_cache, TextureCache& texture_cache,
vk::PipelineCache& pipeline_cache, VideoCore::ShaderNotify* shader_notify,
const Device& device, DescriptorPool& descriptor_pool,
- UpdateDescriptorQueue& update_descriptor_queue, Common::ThreadWorker* worker_thread,
+ GuestDescriptorQueue& guest_descriptor_queue, Common::ThreadWorker* worker_thread,
PipelineStatistics* pipeline_statistics, RenderPassCache& render_pass_cache,
const GraphicsPipelineCacheKey& key, std::array<vk::ShaderModule, NUM_STAGES> stages,
const std::array<const Shader::Info*, NUM_STAGES>& infos);
@@ -133,7 +132,7 @@ private:
BufferCache& buffer_cache;
vk::PipelineCache& pipeline_cache;
Scheduler& scheduler;
- UpdateDescriptorQueue& update_descriptor_queue;
+ GuestDescriptorQueue& guest_descriptor_queue;
void (*configure_func)(GraphicsPipeline*, bool){};
diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.cpp b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp
index 8aa07ef9d..6b288b994 100644
--- a/src/video_core/renderer_vulkan/vk_master_semaphore.cpp
+++ b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp
@@ -3,6 +3,7 @@
#include <thread>
+#include "common/polyfill_ranges.h"
#include "common/settings.h"
#include "video_core/renderer_vulkan/vk_master_semaphore.h"
#include "video_core/vulkan_common/vulkan_device.h"
@@ -10,7 +11,19 @@
namespace Vulkan {
-MasterSemaphore::MasterSemaphore(const Device& device) {
+constexpr u64 FENCE_RESERVE_SIZE = 8;
+
+MasterSemaphore::MasterSemaphore(const Device& device_) : device(device_) {
+ if (!device.HasTimelineSemaphore()) {
+ static constexpr VkFenceCreateInfo fence_ci{
+ .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, .pNext = nullptr, .flags = 0};
+ free_queue.resize(FENCE_RESERVE_SIZE);
+ std::ranges::generate(free_queue,
+ [&] { return device.GetLogical().CreateFence(fence_ci); });
+ wait_thread = std::jthread([this](std::stop_token token) { WaitThread(token); });
+ return;
+ }
+
static constexpr VkSemaphoreTypeCreateInfo semaphore_type_ci{
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO,
.pNext = nullptr,
@@ -42,4 +55,164 @@ MasterSemaphore::MasterSemaphore(const Device& device) {
MasterSemaphore::~MasterSemaphore() = default;
+void MasterSemaphore::Refresh() {
+ if (!semaphore) {
+ // If we don't support timeline semaphores, there's nothing to refresh
+ return;
+ }
+
+ u64 this_tick{};
+ u64 counter{};
+ do {
+ this_tick = gpu_tick.load(std::memory_order_acquire);
+ counter = semaphore.GetCounter();
+ if (counter < this_tick) {
+ return;
+ }
+ } while (!gpu_tick.compare_exchange_weak(this_tick, counter, std::memory_order_release,
+ std::memory_order_relaxed));
+}
+
+void MasterSemaphore::Wait(u64 tick) {
+ if (!semaphore) {
+ // If we don't support timeline semaphores, wait for the value normally
+ std::unique_lock lk{free_mutex};
+ free_cv.wait(lk, [&] { return gpu_tick.load(std::memory_order_relaxed) >= tick; });
+ return;
+ }
+
+ // No need to wait if the GPU is ahead of the tick
+ if (IsFree(tick)) {
+ return;
+ }
+
+ // Update the GPU tick and try again
+ Refresh();
+
+ if (IsFree(tick)) {
+ return;
+ }
+
+ // If none of the above is hit, fallback to a regular wait
+ while (!semaphore.Wait(tick)) {
+ }
+
+ Refresh();
+}
+
+VkResult MasterSemaphore::SubmitQueue(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore,
+ VkSemaphore wait_semaphore, u64 host_tick) {
+ if (semaphore) {
+ return SubmitQueueTimeline(cmdbuf, signal_semaphore, wait_semaphore, host_tick);
+ } else {
+ return SubmitQueueFence(cmdbuf, signal_semaphore, wait_semaphore, host_tick);
+ }
+}
+
+static constexpr std::array<VkPipelineStageFlags, 2> wait_stage_masks{
+ VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
+ VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
+};
+
+VkResult MasterSemaphore::SubmitQueueTimeline(vk::CommandBuffer& cmdbuf,
+ VkSemaphore signal_semaphore,
+ VkSemaphore wait_semaphore, u64 host_tick) {
+ const VkSemaphore timeline_semaphore = *semaphore;
+
+ const u32 num_signal_semaphores = signal_semaphore ? 2 : 1;
+ const std::array signal_values{host_tick, u64(0)};
+ const std::array signal_semaphores{timeline_semaphore, signal_semaphore};
+
+ const u32 num_wait_semaphores = wait_semaphore ? 1 : 0;
+ const VkTimelineSemaphoreSubmitInfo timeline_si{
+ .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO,
+ .pNext = nullptr,
+ .waitSemaphoreValueCount = 0,
+ .pWaitSemaphoreValues = nullptr,
+ .signalSemaphoreValueCount = num_signal_semaphores,
+ .pSignalSemaphoreValues = signal_values.data(),
+ };
+ const VkSubmitInfo submit_info{
+ .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
+ .pNext = &timeline_si,
+ .waitSemaphoreCount = num_wait_semaphores,
+ .pWaitSemaphores = &wait_semaphore,
+ .pWaitDstStageMask = wait_stage_masks.data(),
+ .commandBufferCount = 1,
+ .pCommandBuffers = cmdbuf.address(),
+ .signalSemaphoreCount = num_signal_semaphores,
+ .pSignalSemaphores = signal_semaphores.data(),
+ };
+
+ return device.GetGraphicsQueue().Submit(submit_info);
+}
+
+VkResult MasterSemaphore::SubmitQueueFence(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore,
+ VkSemaphore wait_semaphore, u64 host_tick) {
+ const u32 num_signal_semaphores = signal_semaphore ? 1 : 0;
+ const u32 num_wait_semaphores = wait_semaphore ? 1 : 0;
+
+ const VkSubmitInfo submit_info{
+ .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
+ .pNext = nullptr,
+ .waitSemaphoreCount = num_wait_semaphores,
+ .pWaitSemaphores = &wait_semaphore,
+ .pWaitDstStageMask = wait_stage_masks.data(),
+ .commandBufferCount = 1,
+ .pCommandBuffers = cmdbuf.address(),
+ .signalSemaphoreCount = num_signal_semaphores,
+ .pSignalSemaphores = &signal_semaphore,
+ };
+
+ auto fence = GetFreeFence();
+ auto result = device.GetGraphicsQueue().Submit(submit_info, *fence);
+
+ if (result == VK_SUCCESS) {
+ std::scoped_lock lock{wait_mutex};
+ wait_queue.emplace(host_tick, std::move(fence));
+ wait_cv.notify_one();
+ }
+
+ return result;
+}
+
+void MasterSemaphore::WaitThread(std::stop_token token) {
+ while (!token.stop_requested()) {
+ u64 host_tick;
+ vk::Fence fence;
+ {
+ std::unique_lock lock{wait_mutex};
+ Common::CondvarWait(wait_cv, lock, token, [this] { return !wait_queue.empty(); });
+ if (token.stop_requested()) {
+ return;
+ }
+ std::tie(host_tick, fence) = std::move(wait_queue.front());
+ wait_queue.pop();
+ }
+
+ fence.Wait();
+ fence.Reset();
+
+ {
+ std::scoped_lock lock{free_mutex};
+ free_queue.push_front(std::move(fence));
+ gpu_tick.store(host_tick);
+ }
+ free_cv.notify_one();
+ }
+}
+
+vk::Fence MasterSemaphore::GetFreeFence() {
+ std::scoped_lock lock{free_mutex};
+ if (free_queue.empty()) {
+ static constexpr VkFenceCreateInfo fence_ci{
+ .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, .pNext = nullptr, .flags = 0};
+ return device.GetLogical().CreateFence(fence_ci);
+ }
+
+ auto fence = std::move(free_queue.back());
+ free_queue.pop_back();
+ return fence;
+}
+
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.h b/src/video_core/renderer_vulkan/vk_master_semaphore.h
index 689f02ea5..3f599d7bd 100644
--- a/src/video_core/renderer_vulkan/vk_master_semaphore.h
+++ b/src/video_core/renderer_vulkan/vk_master_semaphore.h
@@ -4,7 +4,11 @@
#pragma once
#include <atomic>
+#include <condition_variable>
+#include <deque>
+#include <mutex>
#include <thread>
+#include <queue>
#include "common/common_types.h"
#include "common/polyfill_thread.h"
@@ -15,6 +19,8 @@ namespace Vulkan {
class Device;
class MasterSemaphore {
+ using Waitable = std::pair<u64, vk::Fence>;
+
public:
explicit MasterSemaphore(const Device& device);
~MasterSemaphore();
@@ -29,11 +35,6 @@ public:
return gpu_tick.load(std::memory_order_acquire);
}
- /// Returns the timeline semaphore handle.
- [[nodiscard]] VkSemaphore Handle() const noexcept {
- return *semaphore;
- }
-
/// Returns true when a tick has been hit by the GPU.
[[nodiscard]] bool IsFree(u64 tick) const noexcept {
return KnownGpuTick() >= tick;
@@ -45,41 +46,38 @@ public:
}
/// Refresh the known GPU tick
- void Refresh() {
- u64 this_tick{};
- u64 counter{};
- do {
- this_tick = gpu_tick.load(std::memory_order_acquire);
- counter = semaphore.GetCounter();
- if (counter < this_tick) {
- return;
- }
- } while (!gpu_tick.compare_exchange_weak(this_tick, counter, std::memory_order_release,
- std::memory_order_relaxed));
- }
+ void Refresh();
/// Waits for a tick to be hit on the GPU
- void Wait(u64 tick) {
- // No need to wait if the GPU is ahead of the tick
- if (IsFree(tick)) {
- return;
- }
- // Update the GPU tick and try again
- Refresh();
- if (IsFree(tick)) {
- return;
- }
- // If none of the above is hit, fallback to a regular wait
- while (!semaphore.Wait(tick)) {
- }
- Refresh();
- }
+ void Wait(u64 tick);
+
+ /// Submits the device graphics queue, updating the tick as necessary
+ VkResult SubmitQueue(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore,
+ VkSemaphore wait_semaphore, u64 host_tick);
+
+private:
+ VkResult SubmitQueueTimeline(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore,
+ VkSemaphore wait_semaphore, u64 host_tick);
+ VkResult SubmitQueueFence(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore,
+ VkSemaphore wait_semaphore, u64 host_tick);
+
+ void WaitThread(std::stop_token token);
+
+ vk::Fence GetFreeFence();
private:
+ const Device& device; ///< Device.
vk::Semaphore semaphore; ///< Timeline semaphore.
std::atomic<u64> gpu_tick{0}; ///< Current known GPU tick.
std::atomic<u64> current_tick{1}; ///< Current logical tick.
+ std::mutex wait_mutex;
+ std::mutex free_mutex;
+ std::condition_variable free_cv;
+ std::condition_variable_any wait_cv;
+ std::queue<Waitable> wait_queue; ///< Queue for the fences to be waited on by the wait thread.
+ std::deque<vk::Fence> free_queue; ///< Holds available fences for submission.
std::jthread debug_thread; ///< Debug thread to workaround validation layer bugs.
+ std::jthread wait_thread; ///< Helper thread that waits for submitted fences.
};
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index 7e69b11d8..9f316113c 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -114,14 +114,16 @@ Shader::AttributeType CastAttributeType(const FixedPipelineState::VertexAttribut
return Shader::AttributeType::Disabled;
case Maxwell::VertexAttribute::Type::SNorm:
case Maxwell::VertexAttribute::Type::UNorm:
- case Maxwell::VertexAttribute::Type::UScaled:
- case Maxwell::VertexAttribute::Type::SScaled:
case Maxwell::VertexAttribute::Type::Float:
return Shader::AttributeType::Float;
case Maxwell::VertexAttribute::Type::SInt:
return Shader::AttributeType::SignedInt;
case Maxwell::VertexAttribute::Type::UInt:
return Shader::AttributeType::UnsignedInt;
+ case Maxwell::VertexAttribute::Type::UScaled:
+ return Shader::AttributeType::UnsignedScaled;
+ case Maxwell::VertexAttribute::Type::SScaled:
+ return Shader::AttributeType::SignedScaled;
}
return Shader::AttributeType::Float;
}
@@ -165,7 +167,10 @@ Shader::RuntimeInfo MakeRuntimeInfo(std::span<const Shader::IR::Program> program
info.fixed_state_point_size = point_size;
}
if (key.state.xfb_enabled) {
- info.xfb_varyings = VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state);
+ auto [varyings, count] =
+ VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state);
+ info.xfb_varyings = varyings;
+ info.xfb_count = count;
}
info.convert_depth_mode = gl_ndc;
}
@@ -212,7 +217,10 @@ Shader::RuntimeInfo MakeRuntimeInfo(std::span<const Shader::IR::Program> program
info.fixed_state_point_size = point_size;
}
if (key.state.xfb_enabled != 0) {
- info.xfb_varyings = VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state);
+ auto [varyings, count] =
+ VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state);
+ info.xfb_varyings = varyings;
+ info.xfb_count = count;
}
info.convert_depth_mode = gl_ndc;
break;
@@ -277,23 +285,26 @@ bool GraphicsPipelineCacheKey::operator==(const GraphicsPipelineCacheKey& rhs) c
PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device_,
Scheduler& scheduler_, DescriptorPool& descriptor_pool_,
- UpdateDescriptorQueue& update_descriptor_queue_,
+ GuestDescriptorQueue& guest_descriptor_queue_,
RenderPassCache& render_pass_cache_, BufferCache& buffer_cache_,
TextureCache& texture_cache_, VideoCore::ShaderNotify& shader_notify_)
: VideoCommon::ShaderCache{rasterizer_}, device{device_}, scheduler{scheduler_},
- descriptor_pool{descriptor_pool_}, update_descriptor_queue{update_descriptor_queue_},
+ descriptor_pool{descriptor_pool_}, guest_descriptor_queue{guest_descriptor_queue_},
render_pass_cache{render_pass_cache_}, buffer_cache{buffer_cache_},
texture_cache{texture_cache_}, shader_notify{shader_notify_},
use_asynchronous_shaders{Settings::values.use_asynchronous_shaders.GetValue()},
use_vulkan_pipeline_cache{Settings::values.use_vulkan_driver_pipeline_cache.GetValue()},
- workers(std::max(std::thread::hardware_concurrency(), 2U) - 1, "VkPipelineBuilder"),
+ workers(device.GetDriverID() == VK_DRIVER_ID_QUALCOMM_PROPRIETARY
+ ? 1
+ : (std::max(std::thread::hardware_concurrency(), 2U) - 1),
+ "VkPipelineBuilder"),
serialization_thread(1, "VkPipelineSerialization") {
const auto& float_control{device.FloatControlProperties()};
const VkDriverId driver_id{device.GetDriverID()};
profile = Shader::Profile{
.supported_spirv = device.SupportedSpirvVersion(),
.unified_descriptor_binding = true,
- .support_descriptor_aliasing = true,
+ .support_descriptor_aliasing = device.IsDescriptorAliasingSupported(),
.support_int8 = device.IsInt8Supported(),
.support_int16 = device.IsShaderInt16Supported(),
.support_int64 = device.IsShaderInt64Supported(),
@@ -324,11 +335,17 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device
.support_derivative_control = true,
.support_geometry_shader_passthrough = device.IsNvGeometryShaderPassthroughSupported(),
.support_native_ndc = device.IsExtDepthClipControlSupported(),
+ .support_scaled_attributes = !device.MustEmulateScaledFormats(),
.warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(),
.lower_left_origin_mode = false,
.need_declared_frag_colors = false,
+ .need_gather_subpixel_offset = driver_id == VK_DRIVER_ID_AMD_PROPRIETARY ||
+ driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE ||
+ driver_id == VK_DRIVER_ID_MESA_RADV ||
+ driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS ||
+ driver_id == VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA,
.has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS,
.has_broken_spirv_position_input = driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY,
@@ -336,16 +353,18 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device
.has_broken_signed_operations = false,
.has_broken_fp16_float_controls = driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY,
.ignore_nan_fp_comparisons = false,
- };
+ .has_broken_spirv_subgroup_mask_vector_extract_dynamic =
+ driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY};
host_info = Shader::HostTranslateInfo{
+ .support_float64 = device.IsFloat64Supported(),
.support_float16 = device.IsFloat16Supported(),
.support_int64 = device.IsShaderInt64Supported(),
.needs_demote_reorder =
driver_id == VK_DRIVER_ID_AMD_PROPRIETARY || driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE,
.support_snorm_render_buffer = true,
.support_viewport_index_layer = device.IsExtShaderViewportIndexLayerSupported(),
- .min_ssbo_alignment = static_cast<u32>(device.GetStorageBufferAlignment()),
.support_geometry_shader_passthrough = device.IsNvGeometryShaderPassthroughSupported(),
+ .support_conditional_barrier = device.SupportsConditionalBarriers(),
};
if (device.GetMaxVertexInputAttributes() < Maxwell::NumVertexAttributes) {
@@ -639,7 +658,7 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr};
return std::make_unique<GraphicsPipeline>(
scheduler, buffer_cache, texture_cache, vulkan_pipeline_cache, &shader_notify, device,
- descriptor_pool, update_descriptor_queue, thread_worker, statistics, render_pass_cache, key,
+ descriptor_pool, guest_descriptor_queue, thread_worker, statistics, render_pass_cache, key,
std::move(modules), infos);
} catch (const Shader::Exception& exception) {
@@ -692,6 +711,11 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline(
std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline(
ShaderPools& pools, const ComputePipelineCacheKey& key, Shader::Environment& env,
PipelineStatistics* statistics, bool build_in_parallel) try {
+ if (device.HasBrokenCompute()) {
+ LOG_ERROR(Render_Vulkan, "Skipping 0x{:016x}", key.Hash());
+ return nullptr;
+ }
+
LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash());
Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()};
@@ -711,7 +735,7 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline(
}
Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr};
return std::make_unique<ComputePipeline>(device, vulkan_pipeline_cache, descriptor_pool,
- update_descriptor_queue, thread_worker, statistics,
+ guest_descriptor_queue, thread_worker, statistics,
&shader_notify, program.info, std::move(spv_module));
} catch (const Shader::Exception& exception) {
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
index 5171912d7..e323ea0fd 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
@@ -82,7 +82,6 @@ class PipelineStatistics;
class RasterizerVulkan;
class RenderPassCache;
class Scheduler;
-class UpdateDescriptorQueue;
using VideoCommon::ShaderInfo;
@@ -93,16 +92,16 @@ struct ShaderPools {
inst.ReleaseContents();
}
- Shader::ObjectPool<Shader::IR::Inst> inst;
- Shader::ObjectPool<Shader::IR::Block> block;
- Shader::ObjectPool<Shader::Maxwell::Flow::Block> flow_block;
+ Shader::ObjectPool<Shader::IR::Inst> inst{8192};
+ Shader::ObjectPool<Shader::IR::Block> block{32};
+ Shader::ObjectPool<Shader::Maxwell::Flow::Block> flow_block{32};
};
class PipelineCache : public VideoCommon::ShaderCache {
public:
explicit PipelineCache(RasterizerVulkan& rasterizer, const Device& device, Scheduler& scheduler,
DescriptorPool& descriptor_pool,
- UpdateDescriptorQueue& update_descriptor_queue,
+ GuestDescriptorQueue& guest_descriptor_queue,
RenderPassCache& render_pass_cache, BufferCache& buffer_cache,
TextureCache& texture_cache, VideoCore::ShaderNotify& shader_notify_);
~PipelineCache();
@@ -144,7 +143,7 @@ private:
const Device& device;
Scheduler& scheduler;
DescriptorPool& descriptor_pool;
- UpdateDescriptorQueue& update_descriptor_queue;
+ GuestDescriptorQueue& guest_descriptor_queue;
RenderPassCache& render_pass_cache;
BufferCache& buffer_cache;
TextureCache& texture_cache;
diff --git a/src/video_core/renderer_vulkan/vk_present_manager.cpp b/src/video_core/renderer_vulkan/vk_present_manager.cpp
new file mode 100644
index 000000000..d681bd22a
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_present_manager.cpp
@@ -0,0 +1,491 @@
+// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include "common/microprofile.h"
+#include "common/settings.h"
+#include "common/thread.h"
+#include "core/frontend/emu_window.h"
+#include "video_core/renderer_vulkan/vk_present_manager.h"
+#include "video_core/renderer_vulkan/vk_scheduler.h"
+#include "video_core/renderer_vulkan/vk_swapchain.h"
+#include "video_core/vulkan_common/vulkan_device.h"
+#include "video_core/vulkan_common/vulkan_surface.h"
+
+namespace Vulkan {
+
+MICROPROFILE_DEFINE(Vulkan_WaitPresent, "Vulkan", "Wait For Present", MP_RGB(128, 128, 128));
+MICROPROFILE_DEFINE(Vulkan_CopyToSwapchain, "Vulkan", "Copy to swapchain", MP_RGB(192, 255, 192));
+
+namespace {
+
+bool CanBlitToSwapchain(const vk::PhysicalDevice& physical_device, VkFormat format) {
+ const VkFormatProperties props{physical_device.GetFormatProperties(format)};
+ return (props.optimalTilingFeatures & VK_FORMAT_FEATURE_BLIT_DST_BIT);
+}
+
+[[nodiscard]] VkImageSubresourceLayers MakeImageSubresourceLayers() {
+ return VkImageSubresourceLayers{
+ .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .mipLevel = 0,
+ .baseArrayLayer = 0,
+ .layerCount = 1,
+ };
+}
+
+[[nodiscard]] VkImageBlit MakeImageBlit(s32 frame_width, s32 frame_height, s32 swapchain_width,
+ s32 swapchain_height) {
+ return VkImageBlit{
+ .srcSubresource = MakeImageSubresourceLayers(),
+ .srcOffsets =
+ {
+ {
+ .x = 0,
+ .y = 0,
+ .z = 0,
+ },
+ {
+ .x = frame_width,
+ .y = frame_height,
+ .z = 1,
+ },
+ },
+ .dstSubresource = MakeImageSubresourceLayers(),
+ .dstOffsets =
+ {
+ {
+ .x = 0,
+ .y = 0,
+ .z = 0,
+ },
+ {
+ .x = swapchain_width,
+ .y = swapchain_height,
+ .z = 1,
+ },
+ },
+ };
+}
+
+[[nodiscard]] VkImageCopy MakeImageCopy(u32 frame_width, u32 frame_height, u32 swapchain_width,
+ u32 swapchain_height) {
+ return VkImageCopy{
+ .srcSubresource = MakeImageSubresourceLayers(),
+ .srcOffset =
+ {
+ .x = 0,
+ .y = 0,
+ .z = 0,
+ },
+ .dstSubresource = MakeImageSubresourceLayers(),
+ .dstOffset =
+ {
+ .x = 0,
+ .y = 0,
+ .z = 0,
+ },
+ .extent =
+ {
+ .width = std::min(frame_width, swapchain_width),
+ .height = std::min(frame_height, swapchain_height),
+ .depth = 1,
+ },
+ };
+}
+
+} // Anonymous namespace
+
+PresentManager::PresentManager(const vk::Instance& instance_,
+ Core::Frontend::EmuWindow& render_window_, const Device& device_,
+ MemoryAllocator& memory_allocator_, Scheduler& scheduler_,
+ Swapchain& swapchain_, vk::SurfaceKHR& surface_)
+ : instance{instance_}, render_window{render_window_}, device{device_},
+ memory_allocator{memory_allocator_}, scheduler{scheduler_}, swapchain{swapchain_},
+ surface{surface_}, blit_supported{CanBlitToSwapchain(device.GetPhysical(),
+ swapchain.GetImageViewFormat())},
+ use_present_thread{Settings::values.async_presentation.GetValue()},
+ image_count{swapchain.GetImageCount()}, last_render_surface{
+ render_window_.GetWindowInfo().render_surface} {
+
+ auto& dld = device.GetLogical();
+ cmdpool = dld.CreateCommandPool({
+ .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
+ .pNext = nullptr,
+ .flags =
+ VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
+ .queueFamilyIndex = device.GetGraphicsFamily(),
+ });
+ auto cmdbuffers = cmdpool.Allocate(image_count);
+
+ frames.resize(image_count);
+ for (u32 i = 0; i < frames.size(); i++) {
+ Frame& frame = frames[i];
+ frame.cmdbuf = vk::CommandBuffer{cmdbuffers[i], device.GetDispatchLoader()};
+ frame.render_ready = dld.CreateSemaphore({
+ .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ });
+ frame.present_done = dld.CreateFence({
+ .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = VK_FENCE_CREATE_SIGNALED_BIT,
+ });
+ free_queue.push(&frame);
+ }
+
+ if (use_present_thread) {
+ present_thread = std::jthread([this](std::stop_token token) { PresentThread(token); });
+ }
+}
+
+PresentManager::~PresentManager() = default;
+
+Frame* PresentManager::GetRenderFrame() {
+ MICROPROFILE_SCOPE(Vulkan_WaitPresent);
+
+ // Wait for free presentation frames
+ std::unique_lock lock{free_mutex};
+ free_cv.wait(lock, [this] { return !free_queue.empty(); });
+
+ // Take the frame from the queue
+ Frame* frame = free_queue.front();
+ free_queue.pop();
+
+ // Wait for the presentation to be finished so all frame resources are free
+ frame->present_done.Wait();
+ frame->present_done.Reset();
+
+ return frame;
+}
+
+void PresentManager::Present(Frame* frame) {
+ if (!use_present_thread) {
+ scheduler.WaitWorker();
+ CopyToSwapchain(frame);
+ free_queue.push(frame);
+ return;
+ }
+
+ scheduler.Record([this, frame](vk::CommandBuffer) {
+ std::unique_lock lock{queue_mutex};
+ present_queue.push(frame);
+ frame_cv.notify_one();
+ });
+}
+
+void PresentManager::RecreateFrame(Frame* frame, u32 width, u32 height, bool is_srgb,
+ VkFormat image_view_format, VkRenderPass rd) {
+ auto& dld = device.GetLogical();
+
+ frame->width = width;
+ frame->height = height;
+ frame->is_srgb = is_srgb;
+
+ frame->image = memory_allocator.CreateImage({
+ .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT,
+ .imageType = VK_IMAGE_TYPE_2D,
+ .format = swapchain.GetImageFormat(),
+ .extent =
+ {
+ .width = width,
+ .height = height,
+ .depth = 1,
+ },
+ .mipLevels = 1,
+ .arrayLayers = 1,
+ .samples = VK_SAMPLE_COUNT_1_BIT,
+ .tiling = VK_IMAGE_TILING_OPTIMAL,
+ .usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
+ .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
+ .queueFamilyIndexCount = 0,
+ .pQueueFamilyIndices = nullptr,
+ .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
+ });
+
+ frame->image_view = dld.CreateImageView({
+ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .image = *frame->image,
+ .viewType = VK_IMAGE_VIEW_TYPE_2D,
+ .format = image_view_format,
+ .components =
+ {
+ .r = VK_COMPONENT_SWIZZLE_IDENTITY,
+ .g = VK_COMPONENT_SWIZZLE_IDENTITY,
+ .b = VK_COMPONENT_SWIZZLE_IDENTITY,
+ .a = VK_COMPONENT_SWIZZLE_IDENTITY,
+ },
+ .subresourceRange =
+ {
+ .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .baseMipLevel = 0,
+ .levelCount = 1,
+ .baseArrayLayer = 0,
+ .layerCount = 1,
+ },
+ });
+
+ const VkImageView image_view{*frame->image_view};
+ frame->framebuffer = dld.CreateFramebuffer({
+ .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .renderPass = rd,
+ .attachmentCount = 1,
+ .pAttachments = &image_view,
+ .width = width,
+ .height = height,
+ .layers = 1,
+ });
+}
+
+void PresentManager::WaitPresent() {
+ if (!use_present_thread) {
+ return;
+ }
+
+ // Wait for the present queue to be empty
+ {
+ std::unique_lock queue_lock{queue_mutex};
+ frame_cv.wait(queue_lock, [this] { return present_queue.empty(); });
+ }
+
+ // The above condition will be satisfied when the last frame is taken from the queue.
+ // To ensure that frame has been presented as well take hold of the swapchain
+ // mutex.
+ std::scoped_lock swapchain_lock{swapchain_mutex};
+}
+
+void PresentManager::PresentThread(std::stop_token token) {
+ Common::SetCurrentThreadName("VulkanPresent");
+ while (!token.stop_requested()) {
+ std::unique_lock lock{queue_mutex};
+
+ // Wait for presentation frames
+ Common::CondvarWait(frame_cv, lock, token, [this] { return !present_queue.empty(); });
+ if (token.stop_requested()) {
+ return;
+ }
+
+ // Take the frame and notify anyone waiting
+ Frame* frame = present_queue.front();
+ present_queue.pop();
+ frame_cv.notify_one();
+
+ // By exchanging the lock ownership we take the swapchain lock
+ // before the queue lock goes out of scope. This way the swapchain
+ // lock in WaitPresent is guaranteed to occur after here.
+ std::exchange(lock, std::unique_lock{swapchain_mutex});
+
+ CopyToSwapchain(frame);
+
+ // Free the frame for reuse
+ std::scoped_lock fl{free_mutex};
+ free_queue.push(frame);
+ free_cv.notify_one();
+ }
+}
+
+void PresentManager::NotifySurfaceChanged() {
+#ifdef ANDROID
+ std::scoped_lock lock{recreate_surface_mutex};
+ recreate_surface_cv.notify_one();
+#endif
+}
+
+void PresentManager::CopyToSwapchain(Frame* frame) {
+ MICROPROFILE_SCOPE(Vulkan_CopyToSwapchain);
+
+ const auto recreate_swapchain = [&] {
+ swapchain.Create(*surface, frame->width, frame->height, frame->is_srgb);
+ image_count = swapchain.GetImageCount();
+ };
+
+#ifdef ANDROID
+ std::unique_lock lock{recreate_surface_mutex};
+
+ const auto needs_recreation = [&] {
+ if (last_render_surface != render_window.GetWindowInfo().render_surface) {
+ return true;
+ }
+ if (swapchain.NeedsRecreation(frame->is_srgb)) {
+ return true;
+ }
+ return false;
+ };
+
+ recreate_surface_cv.wait_for(lock, std::chrono::milliseconds(400),
+ [&]() { return !needs_recreation(); });
+
+ // If the frontend recreated the surface, recreate the renderer surface and swapchain.
+ if (last_render_surface != render_window.GetWindowInfo().render_surface) {
+ last_render_surface = render_window.GetWindowInfo().render_surface;
+ surface = CreateSurface(instance, render_window.GetWindowInfo());
+ recreate_swapchain();
+ }
+#endif
+
+ // If the size or colorspace of the incoming frames has changed, recreate the swapchain
+ // to account for that.
+ const bool srgb_changed = swapchain.NeedsRecreation(frame->is_srgb);
+ const bool size_changed =
+ swapchain.GetWidth() != frame->width || swapchain.GetHeight() != frame->height;
+ if (srgb_changed || size_changed) {
+ recreate_swapchain();
+ }
+
+ while (swapchain.AcquireNextImage()) {
+ recreate_swapchain();
+ }
+
+ const vk::CommandBuffer cmdbuf{frame->cmdbuf};
+ cmdbuf.Begin({
+ .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
+ .pNext = nullptr,
+ .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
+ .pInheritanceInfo = nullptr,
+ });
+
+ const VkImage image{swapchain.CurrentImage()};
+ const VkExtent2D extent = swapchain.GetExtent();
+ const std::array pre_barriers{
+ VkImageMemoryBarrier{
+ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
+ .pNext = nullptr,
+ .srcAccessMask = 0,
+ .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
+ .oldLayout = VK_IMAGE_LAYOUT_UNDEFINED,
+ .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .image = image,
+ .subresourceRange{
+ .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .baseMipLevel = 0,
+ .levelCount = 1,
+ .baseArrayLayer = 0,
+ .layerCount = VK_REMAINING_ARRAY_LAYERS,
+ },
+ },
+ VkImageMemoryBarrier{
+ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
+ .pNext = nullptr,
+ .srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
+ .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
+ .oldLayout = VK_IMAGE_LAYOUT_GENERAL,
+ .newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .image = *frame->image,
+ .subresourceRange{
+ .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .baseMipLevel = 0,
+ .levelCount = 1,
+ .baseArrayLayer = 0,
+ .layerCount = VK_REMAINING_ARRAY_LAYERS,
+ },
+ },
+ };
+ const std::array post_barriers{
+ VkImageMemoryBarrier{
+ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
+ .pNext = nullptr,
+ .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
+ .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT,
+ .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
+ .newLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .image = image,
+ .subresourceRange{
+ .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .baseMipLevel = 0,
+ .levelCount = 1,
+ .baseArrayLayer = 0,
+ .layerCount = VK_REMAINING_ARRAY_LAYERS,
+ },
+ },
+ VkImageMemoryBarrier{
+ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
+ .pNext = nullptr,
+ .srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
+ .dstAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
+ .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
+ .newLayout = VK_IMAGE_LAYOUT_GENERAL,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .image = *frame->image,
+ .subresourceRange{
+ .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .baseMipLevel = 0,
+ .levelCount = 1,
+ .baseArrayLayer = 0,
+ .layerCount = VK_REMAINING_ARRAY_LAYERS,
+ },
+ },
+ };
+
+ cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, {},
+ {}, {}, pre_barriers);
+
+ if (blit_supported) {
+ cmdbuf.BlitImage(*frame->image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, image,
+ VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
+ MakeImageBlit(frame->width, frame->height, extent.width, extent.height),
+ VK_FILTER_LINEAR);
+ } else {
+ cmdbuf.CopyImage(*frame->image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, image,
+ VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
+ MakeImageCopy(frame->width, frame->height, extent.width, extent.height));
+ }
+
+ cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, {},
+ {}, {}, post_barriers);
+
+ cmdbuf.End();
+
+ const VkSemaphore present_semaphore = swapchain.CurrentPresentSemaphore();
+ const VkSemaphore render_semaphore = swapchain.CurrentRenderSemaphore();
+ const std::array wait_semaphores = {present_semaphore, *frame->render_ready};
+
+ static constexpr std::array<VkPipelineStageFlags, 2> wait_stage_masks{
+ VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
+ VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
+ };
+
+ const VkSubmitInfo submit_info{
+ .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
+ .pNext = nullptr,
+ .waitSemaphoreCount = 2U,
+ .pWaitSemaphores = wait_semaphores.data(),
+ .pWaitDstStageMask = wait_stage_masks.data(),
+ .commandBufferCount = 1,
+ .pCommandBuffers = cmdbuf.address(),
+ .signalSemaphoreCount = 1U,
+ .pSignalSemaphores = &render_semaphore,
+ };
+
+ // Submit the image copy/blit to the swapchain
+ {
+ std::scoped_lock submit_lock{scheduler.submit_mutex};
+ switch (const VkResult result =
+ device.GetGraphicsQueue().Submit(submit_info, *frame->present_done)) {
+ case VK_SUCCESS:
+ break;
+ case VK_ERROR_DEVICE_LOST:
+ device.ReportLoss();
+ [[fallthrough]];
+ default:
+ vk::Check(result);
+ break;
+ }
+ }
+
+ // Present
+ swapchain.Present(render_semaphore);
+}
+
+} // namespace Vulkan \ No newline at end of file
diff --git a/src/video_core/renderer_vulkan/vk_present_manager.h b/src/video_core/renderer_vulkan/vk_present_manager.h
new file mode 100644
index 000000000..83e859416
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_present_manager.h
@@ -0,0 +1,91 @@
+// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#pragma once
+
+#include <condition_variable>
+#include <mutex>
+#include <queue>
+
+#include "common/common_types.h"
+#include "common/polyfill_thread.h"
+#include "video_core/vulkan_common/vulkan_memory_allocator.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
+
+namespace Core::Frontend {
+class EmuWindow;
+} // namespace Core::Frontend
+
+namespace Vulkan {
+
+class Device;
+class Scheduler;
+class Swapchain;
+
+struct Frame {
+ u32 width;
+ u32 height;
+ bool is_srgb;
+ vk::Image image;
+ vk::ImageView image_view;
+ vk::Framebuffer framebuffer;
+ vk::CommandBuffer cmdbuf;
+ vk::Semaphore render_ready;
+ vk::Fence present_done;
+};
+
+class PresentManager {
+public:
+ PresentManager(const vk::Instance& instance, Core::Frontend::EmuWindow& render_window,
+ const Device& device, MemoryAllocator& memory_allocator, Scheduler& scheduler,
+ Swapchain& swapchain, vk::SurfaceKHR& surface);
+ ~PresentManager();
+
+ /// Returns the last used presentation frame
+ Frame* GetRenderFrame();
+
+ /// Pushes a frame for presentation
+ void Present(Frame* frame);
+
+ /// Recreates the present frame to match the provided parameters
+ void RecreateFrame(Frame* frame, u32 width, u32 height, bool is_srgb,
+ VkFormat image_view_format, VkRenderPass rd);
+
+ /// Waits for the present thread to finish presenting all queued frames.
+ void WaitPresent();
+
+ /// This is called to notify the rendering backend of a surface change
+ void NotifySurfaceChanged();
+
+private:
+ void PresentThread(std::stop_token token);
+
+ void CopyToSwapchain(Frame* frame);
+
+private:
+ const vk::Instance& instance;
+ Core::Frontend::EmuWindow& render_window;
+ const Device& device;
+ MemoryAllocator& memory_allocator;
+ Scheduler& scheduler;
+ Swapchain& swapchain;
+ vk::SurfaceKHR& surface;
+ vk::CommandPool cmdpool;
+ std::vector<Frame> frames;
+ std::queue<Frame*> present_queue;
+ std::queue<Frame*> free_queue;
+ std::condition_variable_any frame_cv;
+ std::condition_variable free_cv;
+ std::condition_variable recreate_surface_cv;
+ std::mutex swapchain_mutex;
+ std::mutex recreate_surface_mutex;
+ std::mutex queue_mutex;
+ std::mutex free_mutex;
+ std::jthread present_thread;
+ bool blit_supported;
+ bool use_present_thread;
+ std::size_t image_count{};
+ void* last_render_surface{};
+};
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp
index 929c8ece6..d67490449 100644
--- a/src/video_core/renderer_vulkan/vk_query_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp
@@ -66,9 +66,10 @@ void QueryPool::Reserve(std::pair<VkQueryPool, u32> query) {
}
}
-QueryCache::QueryCache(VideoCore::RasterizerInterface& rasterizer_, const Device& device_,
+QueryCache::QueryCache(VideoCore::RasterizerInterface& rasterizer_,
+ Core::Memory::Memory& cpu_memory_, const Device& device_,
Scheduler& scheduler_)
- : QueryCacheBase{rasterizer_}, device{device_}, scheduler{scheduler_},
+ : QueryCacheBase{rasterizer_, cpu_memory_}, device{device_}, scheduler{scheduler_},
query_pools{
QueryPool{device_, scheduler_, QueryType::SamplesPassed},
} {}
@@ -98,8 +99,10 @@ HostCounter::HostCounter(QueryCache& cache_, std::shared_ptr<HostCounter> depend
query{cache_.AllocateQuery(type_)}, tick{cache_.GetScheduler().CurrentTick()} {
const vk::Device* logical = &cache.GetDevice().GetLogical();
cache.GetScheduler().Record([logical, query = query](vk::CommandBuffer cmdbuf) {
+ const bool use_precise = Settings::IsGPULevelHigh();
logical->ResetQueryPool(query.first, query.second, 1);
- cmdbuf.BeginQuery(query.first, query.second, VK_QUERY_CONTROL_PRECISE_BIT);
+ cmdbuf.BeginQuery(query.first, query.second,
+ use_precise ? VK_QUERY_CONTROL_PRECISE_BIT : 0);
});
}
@@ -112,8 +115,10 @@ void HostCounter::EndQuery() {
[query = query](vk::CommandBuffer cmdbuf) { cmdbuf.EndQuery(query.first, query.second); });
}
-u64 HostCounter::BlockingQuery() const {
- cache.GetScheduler().Wait(tick);
+u64 HostCounter::BlockingQuery(bool async) const {
+ if (!async) {
+ cache.GetScheduler().Wait(tick);
+ }
u64 data;
const VkResult query_result = cache.GetDevice().GetLogical().GetQueryResults(
query.first, query.second, 1, sizeof(data), &data, sizeof(data),
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.h b/src/video_core/renderer_vulkan/vk_query_cache.h
index 26762ee09..c1b9552eb 100644
--- a/src/video_core/renderer_vulkan/vk_query_cache.h
+++ b/src/video_core/renderer_vulkan/vk_query_cache.h
@@ -52,7 +52,8 @@ private:
class QueryCache final
: public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter> {
public:
- explicit QueryCache(VideoCore::RasterizerInterface& rasterizer_, const Device& device_,
+ explicit QueryCache(VideoCore::RasterizerInterface& rasterizer_,
+ Core::Memory::Memory& cpu_memory_, const Device& device_,
Scheduler& scheduler_);
~QueryCache();
@@ -83,7 +84,7 @@ public:
void EndQuery();
private:
- u64 BlockingQuery() const override;
+ u64 BlockingQuery(bool async = false) const override;
QueryCache& cache;
const VideoCore::QueryType type;
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 86ef0daeb..84e3a30cc 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -160,19 +160,19 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra
: RasterizerAccelerated{cpu_memory_}, gpu{gpu_}, screen_info{screen_info_}, device{device_},
memory_allocator{memory_allocator_}, state_tracker{state_tracker_}, scheduler{scheduler_},
staging_pool(device, memory_allocator, scheduler), descriptor_pool(device, scheduler),
- update_descriptor_queue(device, scheduler),
- blit_image(device, scheduler, state_tracker, descriptor_pool),
- render_pass_cache(device), texture_cache_runtime{device, scheduler,
- memory_allocator, staging_pool,
- blit_image, render_pass_cache,
- descriptor_pool, update_descriptor_queue},
+ guest_descriptor_queue(device, scheduler), compute_pass_descriptor_queue(device, scheduler),
+ blit_image(device, scheduler, state_tracker, descriptor_pool), render_pass_cache(device),
+ texture_cache_runtime{
+ device, scheduler, memory_allocator, staging_pool,
+ blit_image, render_pass_cache, descriptor_pool, compute_pass_descriptor_queue},
texture_cache(texture_cache_runtime, *this),
buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool,
- update_descriptor_queue, descriptor_pool),
+ guest_descriptor_queue, compute_pass_descriptor_queue, descriptor_pool),
buffer_cache(*this, cpu_memory_, buffer_cache_runtime),
- pipeline_cache(*this, device, scheduler, descriptor_pool, update_descriptor_queue,
+ pipeline_cache(*this, device, scheduler, descriptor_pool, guest_descriptor_queue,
render_pass_cache, buffer_cache, texture_cache, gpu.ShaderNotify()),
- query_cache{*this, device, scheduler}, accelerate_dma{buffer_cache},
+ query_cache{*this, cpu_memory_, device, scheduler},
+ accelerate_dma(buffer_cache, texture_cache, scheduler),
fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler),
wfi_event(device.GetLogical().CreateEvent()) {
scheduler.SetQueryCache(query_cache);
@@ -188,7 +188,14 @@ void RasterizerVulkan::PrepareDraw(bool is_indexed, Func&& draw_func) {
FlushWork();
gpu_memory->FlushCaching();
+#if ANDROID
+ if (Settings::IsGPULevelHigh()) {
+ // This is problematic on Android, disable on GPU Normal.
+ query_cache.UpdateCounters();
+ }
+#else
query_cache.UpdateCounters();
+#endif
GraphicsPipeline* const pipeline{pipeline_cache.CurrentGraphicsPipeline()};
if (!pipeline) {
@@ -272,7 +279,14 @@ void RasterizerVulkan::DrawTexture() {
SCOPE_EXIT({ gpu.TickWork(); });
FlushWork();
+#if ANDROID
+ if (Settings::IsGPULevelHigh()) {
+ // This is problematic on Android, disable on GPU Normal.
+ query_cache.UpdateCounters();
+ }
+#else
query_cache.UpdateCounters();
+#endif
texture_cache.SynchronizeGraphicsDescriptors();
texture_cache.UpdateRenderTargets(false);
@@ -348,25 +362,12 @@ void RasterizerVulkan::Clear(u32 layer_count) {
const u32 color_attachment = regs.clear_surface.RT;
if (use_color && framebuffer->HasAspectColorBit(color_attachment)) {
- VkClearValue clear_value;
- bool is_integer = false;
- bool is_signed = false;
- size_t int_size = 8;
- for (std::size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; ++i) {
- const auto& this_rt = regs.rt[i];
- if (this_rt.Address() == 0) {
- continue;
- }
- if (this_rt.format == Tegra::RenderTargetFormat::NONE) {
- continue;
- }
- const auto format =
- VideoCore::Surface::PixelFormatFromRenderTargetFormat(this_rt.format);
- is_integer = IsPixelFormatInteger(format);
- is_signed = IsPixelFormatSignedInteger(format);
- int_size = PixelComponentSizeBitsInteger(format);
- break;
- }
+ const auto format =
+ VideoCore::Surface::PixelFormatFromRenderTargetFormat(regs.rt[color_attachment].format);
+ bool is_integer = IsPixelFormatInteger(format);
+ bool is_signed = IsPixelFormatSignedInteger(format);
+ size_t int_size = PixelComponentSizeBitsInteger(format);
+ VkClearValue clear_value{};
if (!is_integer) {
std::memcpy(clear_value.color.float32, regs.clear_color.data(),
regs.clear_color.size() * sizeof(f32));
@@ -394,7 +395,15 @@ void RasterizerVulkan::Clear(u32 layer_count) {
cmdbuf.ClearAttachments(attachment, clear_rect);
});
} else {
- UNIMPLEMENTED_MSG("Unimplemented Clear only the specified channel");
+ u8 color_mask = static_cast<u8>(regs.clear_surface.R | regs.clear_surface.G << 1 |
+ regs.clear_surface.B << 2 | regs.clear_surface.A << 3);
+ Region2D dst_region = {
+ Offset2D{.x = clear_rect.rect.offset.x, .y = clear_rect.rect.offset.y},
+ Offset2D{.x = clear_rect.rect.offset.x +
+ static_cast<s32>(clear_rect.rect.extent.width),
+ .y = clear_rect.rect.offset.y +
+ static_cast<s32>(clear_rect.rect.extent.height)}};
+ blit_image.ClearColor(framebuffer, color_mask, regs.clear_color, dst_region);
}
}
@@ -493,6 +502,22 @@ bool RasterizerVulkan::MustFlushRegion(VAddr addr, u64 size, VideoCommon::CacheT
return false;
}
+VideoCore::RasterizerDownloadArea RasterizerVulkan::GetFlushArea(VAddr addr, u64 size) {
+ {
+ std::scoped_lock lock{texture_cache.mutex};
+ auto area = texture_cache.GetFlushArea(addr, size);
+ if (area) {
+ return *area;
+ }
+ }
+ VideoCore::RasterizerDownloadArea new_area{
+ .start_address = Common::AlignDown(addr, Core::Memory::YUZU_PAGESIZE),
+ .end_address = Common::AlignUp(addr + size, Core::Memory::YUZU_PAGESIZE),
+ .preemtive = true,
+ };
+ return new_area;
+}
+
void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size, VideoCommon::CacheType which) {
if (addr == 0 || size == 0) {
return;
@@ -589,7 +614,7 @@ void RasterizerVulkan::SignalSyncPoint(u32 value) {
}
void RasterizerVulkan::SignalReference() {
- fence_manager.SignalOrdering();
+ fence_manager.SignalReference();
}
void RasterizerVulkan::ReleaseFences() {
@@ -622,7 +647,7 @@ void RasterizerVulkan::WaitForIdle() {
cmdbuf.SetEvent(event, flags);
cmdbuf.WaitEvents(event, flags, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, {}, {}, {});
});
- SignalReference();
+ fence_manager.SignalOrdering();
}
void RasterizerVulkan::FragmentBarrier() {
@@ -644,7 +669,8 @@ void RasterizerVulkan::FlushCommands() {
void RasterizerVulkan::TickFrame() {
draw_counter = 0;
- update_descriptor_queue.TickFrame();
+ guest_descriptor_queue.TickFrame();
+ compute_pass_descriptor_queue.TickFrame();
fence_manager.TickFrame();
staging_pool.TickFrame();
{
@@ -663,11 +689,12 @@ bool RasterizerVulkan::AccelerateConditionalRendering() {
// TODO(Blinkhawk): Reimplement Host conditional rendering.
return false;
}
- // Medium / Low Hack: stub any checks on queries writen into the buffer cache.
+ // Medium / Low Hack: stub any checks on queries written into the buffer cache.
const GPUVAddr condition_address{maxwell3d->regs.render_enable.Address()};
Maxwell::ReportSemaphore::Compare cmp;
if (gpu_memory->IsMemoryDirty(condition_address, sizeof(cmp),
- VideoCommon::CacheType::BufferCache)) {
+ VideoCommon::CacheType::BufferCache |
+ VideoCommon::CacheType::QueryCache)) {
return true;
}
return false;
@@ -730,7 +757,11 @@ void RasterizerVulkan::LoadDiskResources(u64 title_id, std::stop_token stop_load
}
void RasterizerVulkan::FlushWork() {
+#ifdef ANDROID
+ static constexpr u32 DRAWS_TO_DISPATCH = 1024;
+#else
static constexpr u32 DRAWS_TO_DISPATCH = 4096;
+#endif // ANDROID
// Only check multiples of 8 draws
static_assert(DRAWS_TO_DISPATCH % 8 == 0);
@@ -748,7 +779,9 @@ void RasterizerVulkan::FlushWork() {
draw_counter = 0;
}
-AccelerateDMA::AccelerateDMA(BufferCache& buffer_cache_) : buffer_cache{buffer_cache_} {}
+AccelerateDMA::AccelerateDMA(BufferCache& buffer_cache_, TextureCache& texture_cache_,
+ Scheduler& scheduler_)
+ : buffer_cache{buffer_cache_}, texture_cache{texture_cache_}, scheduler{scheduler_} {}
bool AccelerateDMA::BufferClear(GPUVAddr src_address, u64 amount, u32 value) {
std::scoped_lock lock{buffer_cache.mutex};
@@ -760,6 +793,46 @@ bool AccelerateDMA::BufferCopy(GPUVAddr src_address, GPUVAddr dest_address, u64
return buffer_cache.DMACopy(src_address, dest_address, amount);
}
+template <bool IS_IMAGE_UPLOAD>
+bool AccelerateDMA::DmaBufferImageCopy(const Tegra::DMA::ImageCopy& copy_info,
+ const Tegra::DMA::BufferOperand& buffer_operand,
+ const Tegra::DMA::ImageOperand& image_operand) {
+ std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
+ const auto image_id = texture_cache.DmaImageId(image_operand, IS_IMAGE_UPLOAD);
+ if (image_id == VideoCommon::NULL_IMAGE_ID) {
+ return false;
+ }
+ const u32 buffer_size = static_cast<u32>(buffer_operand.pitch * buffer_operand.height);
+ static constexpr auto sync_info = VideoCommon::ObtainBufferSynchronize::FullSynchronize;
+ const auto post_op = VideoCommon::ObtainBufferOperation::DoNothing;
+ const auto [buffer, offset] =
+ buffer_cache.ObtainBuffer(buffer_operand.address, buffer_size, sync_info, post_op);
+
+ const auto [image, copy] = texture_cache.DmaBufferImageCopy(
+ copy_info, buffer_operand, image_operand, image_id, IS_IMAGE_UPLOAD);
+ const std::span copy_span{&copy, 1};
+
+ if constexpr (IS_IMAGE_UPLOAD) {
+ image->UploadMemory(buffer->Handle(), offset, copy_span);
+ } else {
+ texture_cache.DownloadImageIntoBuffer(image, buffer->Handle(), offset, copy_span,
+ buffer_operand.address, buffer_size);
+ }
+ return true;
+}
+
+bool AccelerateDMA::ImageToBuffer(const Tegra::DMA::ImageCopy& copy_info,
+ const Tegra::DMA::ImageOperand& image_operand,
+ const Tegra::DMA::BufferOperand& buffer_operand) {
+ return DmaBufferImageCopy<false>(copy_info, buffer_operand, image_operand);
+}
+
+bool AccelerateDMA::BufferToImage(const Tegra::DMA::ImageCopy& copy_info,
+ const Tegra::DMA::BufferOperand& buffer_operand,
+ const Tegra::DMA::ImageOperand& image_operand) {
+ return DmaBufferImageCopy<true>(copy_info, buffer_operand, image_operand);
+}
+
void RasterizerVulkan::UpdateDynamicStates() {
auto& regs = maxwell3d->regs;
UpdateViewportsState(regs);
@@ -1056,7 +1129,7 @@ void RasterizerVulkan::UpdateDepthBoundsTestEnable(Tegra::Engines::Maxwell3D::Re
LOG_WARNING(Render_Vulkan, "Depth bounds is enabled but not supported");
enabled = false;
}
- scheduler.Record([enable = regs.depth_bounds_enable](vk::CommandBuffer cmdbuf) {
+ scheduler.Record([enable = enabled](vk::CommandBuffer cmdbuf) {
cmdbuf.SetDepthBoundsTestEnableEXT(enable);
});
}
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index a0508b57c..b39710b3c 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -45,14 +45,28 @@ class StateTracker;
class AccelerateDMA : public Tegra::Engines::AccelerateDMAInterface {
public:
- explicit AccelerateDMA(BufferCache& buffer_cache);
+ explicit AccelerateDMA(BufferCache& buffer_cache, TextureCache& texture_cache,
+ Scheduler& scheduler);
bool BufferCopy(GPUVAddr start_address, GPUVAddr end_address, u64 amount) override;
bool BufferClear(GPUVAddr src_address, u64 amount, u32 value) override;
+ bool ImageToBuffer(const Tegra::DMA::ImageCopy& copy_info, const Tegra::DMA::ImageOperand& src,
+ const Tegra::DMA::BufferOperand& dst) override;
+
+ bool BufferToImage(const Tegra::DMA::ImageCopy& copy_info, const Tegra::DMA::BufferOperand& src,
+ const Tegra::DMA::ImageOperand& dst) override;
+
private:
+ template <bool IS_IMAGE_UPLOAD>
+ bool DmaBufferImageCopy(const Tegra::DMA::ImageCopy& copy_info,
+ const Tegra::DMA::BufferOperand& src,
+ const Tegra::DMA::ImageOperand& dst);
+
BufferCache& buffer_cache;
+ TextureCache& texture_cache;
+ Scheduler& scheduler;
};
class RasterizerVulkan final : public VideoCore::RasterizerAccelerated,
@@ -78,6 +92,7 @@ public:
VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
bool MustFlushRegion(VAddr addr, u64 size,
VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
+ VideoCore::RasterizerDownloadArea GetFlushArea(VAddr addr, u64 size) override;
void InvalidateRegion(VAddr addr, u64 size,
VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
void InnerInvalidation(std::span<const std::pair<VAddr, std::size_t>> sequences) override;
@@ -169,7 +184,8 @@ private:
StagingBufferPool staging_pool;
DescriptorPool descriptor_pool;
- UpdateDescriptorQueue update_descriptor_queue;
+ GuestDescriptorQueue guest_descriptor_queue;
+ ComputePassDescriptorQueue compute_pass_descriptor_queue;
BlitImageHelper blit_image;
RenderPassCache render_pass_cache;
diff --git a/src/video_core/renderer_vulkan/vk_resource_pool.cpp b/src/video_core/renderer_vulkan/vk_resource_pool.cpp
index 6c8ac22f4..6572f82ba 100644
--- a/src/video_core/renderer_vulkan/vk_resource_pool.cpp
+++ b/src/video_core/renderer_vulkan/vk_resource_pool.cpp
@@ -37,7 +37,7 @@ size_t ResourcePool::CommitResource() {
found = free_resource;
}
}
- // Free iterator is hinted to the resource after the one that's been commited.
+ // Free iterator is hinted to the resource after the one that's been committed.
hint_iterator = (*found + 1) % ticks.size();
return *found;
}
@@ -46,7 +46,7 @@ size_t ResourcePool::ManageOverflow() {
const size_t old_capacity = ticks.size();
Grow();
- // The last entry is guaranted to be free, since it's the first element of the freshly
+ // The last entry is guaranteed to be free, since it's the first element of the freshly
// allocated resources.
return old_capacity;
}
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp
index e03685af1..17ef61147 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.cpp
+++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp
@@ -46,15 +46,17 @@ Scheduler::Scheduler(const Device& device_, StateTracker& state_tracker_)
Scheduler::~Scheduler() = default;
-void Scheduler::Flush(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) {
- SubmitExecution(signal_semaphore, wait_semaphore);
+u64 Scheduler::Flush(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) {
+ // When flushing, we only send data to the worker thread; no waiting is necessary.
+ const u64 signal_value = SubmitExecution(signal_semaphore, wait_semaphore);
AllocateNewContext();
+ return signal_value;
}
void Scheduler::Finish(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) {
+ // When finishing, we need to wait for the submission to have executed on the device.
const u64 presubmit_tick = CurrentTick();
SubmitExecution(signal_semaphore, wait_semaphore);
- WaitWorker();
Wait(presubmit_tick);
AllocateNewContext();
}
@@ -63,8 +65,14 @@ void Scheduler::WaitWorker() {
MICROPROFILE_SCOPE(Vulkan_WaitForWorker);
DispatchWork();
- std::unique_lock lock{work_mutex};
- wait_cv.wait(lock, [this] { return work_queue.empty(); });
+ // Ensure the queue is drained.
+ {
+ std::unique_lock ql{queue_mutex};
+ event_cv.wait(ql, [this] { return work_queue.empty(); });
+ }
+
+ // Now wait for execution to finish.
+ std::scoped_lock el{execution_mutex};
}
void Scheduler::DispatchWork() {
@@ -72,10 +80,10 @@ void Scheduler::DispatchWork() {
return;
}
{
- std::scoped_lock lock{work_mutex};
+ std::scoped_lock ql{queue_mutex};
work_queue.push(std::move(chunk));
}
- work_cv.notify_one();
+ event_cv.notify_all();
AcquireNewChunk();
}
@@ -137,30 +145,55 @@ bool Scheduler::UpdateRescaling(bool is_rescaling) {
void Scheduler::WorkerThread(std::stop_token stop_token) {
Common::SetCurrentThreadName("VulkanWorker");
- do {
+
+ const auto TryPopQueue{[this](auto& work) -> bool {
+ if (work_queue.empty()) {
+ return false;
+ }
+
+ work = std::move(work_queue.front());
+ work_queue.pop();
+ event_cv.notify_all();
+ return true;
+ }};
+
+ while (!stop_token.stop_requested()) {
std::unique_ptr<CommandChunk> work;
- bool has_submit{false};
+
{
- std::unique_lock lock{work_mutex};
- if (work_queue.empty()) {
- wait_cv.notify_all();
- }
- Common::CondvarWait(work_cv, lock, stop_token, [&] { return !work_queue.empty(); });
+ std::unique_lock lk{queue_mutex};
+
+ // Wait for work.
+ Common::CondvarWait(event_cv, lk, stop_token, [&] { return TryPopQueue(work); });
+
+ // If we've been asked to stop, we're done.
if (stop_token.stop_requested()) {
- continue;
+ return;
}
- work = std::move(work_queue.front());
- work_queue.pop();
- has_submit = work->HasSubmit();
+ // Exchange lock ownership so that we take the execution lock before
+ // the queue lock goes out of scope. This allows us to force execution
+ // to complete in the next step.
+ std::exchange(lk, std::unique_lock{execution_mutex});
+
+ // Perform the work, tracking whether the chunk was a submission
+ // before executing.
+ const bool has_submit = work->HasSubmit();
work->ExecuteAll(current_cmdbuf);
+
+ // If the chunk was a submission, reallocate the command buffer.
+ if (has_submit) {
+ AllocateWorkerCommandBuffer();
+ }
}
- if (has_submit) {
- AllocateWorkerCommandBuffer();
+
+ {
+ std::scoped_lock rl{reserve_mutex};
+
+ // Recycle the chunk back to the reserve.
+ chunk_reserve.emplace_back(std::move(work));
}
- std::scoped_lock reserve_lock{reserve_mutex};
- chunk_reserve.push_back(std::move(work));
- } while (!stop_token.stop_requested());
+ }
}
void Scheduler::AllocateWorkerCommandBuffer() {
@@ -173,52 +206,21 @@ void Scheduler::AllocateWorkerCommandBuffer() {
});
}
-void Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) {
+u64 Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) {
EndPendingOperations();
InvalidateState();
const u64 signal_value = master_semaphore->NextTick();
Record([signal_semaphore, wait_semaphore, signal_value, this](vk::CommandBuffer cmdbuf) {
cmdbuf.End();
- const VkSemaphore timeline_semaphore = master_semaphore->Handle();
-
- const u32 num_signal_semaphores = signal_semaphore ? 2U : 1U;
- const std::array signal_values{signal_value, u64(0)};
- const std::array signal_semaphores{timeline_semaphore, signal_semaphore};
-
- const u32 num_wait_semaphores = wait_semaphore ? 2U : 1U;
- const std::array wait_values{signal_value - 1, u64(1)};
- const std::array wait_semaphores{timeline_semaphore, wait_semaphore};
- static constexpr std::array<VkPipelineStageFlags, 2> wait_stage_masks{
- VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
- VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
- };
-
- const VkTimelineSemaphoreSubmitInfo timeline_si{
- .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO,
- .pNext = nullptr,
- .waitSemaphoreValueCount = num_wait_semaphores,
- .pWaitSemaphoreValues = wait_values.data(),
- .signalSemaphoreValueCount = num_signal_semaphores,
- .pSignalSemaphoreValues = signal_values.data(),
- };
- const VkSubmitInfo submit_info{
- .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
- .pNext = &timeline_si,
- .waitSemaphoreCount = num_wait_semaphores,
- .pWaitSemaphores = wait_semaphores.data(),
- .pWaitDstStageMask = wait_stage_masks.data(),
- .commandBufferCount = 1,
- .pCommandBuffers = cmdbuf.address(),
- .signalSemaphoreCount = num_signal_semaphores,
- .pSignalSemaphores = signal_semaphores.data(),
- };
if (on_submit) {
on_submit();
}
- switch (const VkResult result = device.GetGraphicsQueue().Submit(submit_info)) {
+ std::scoped_lock lock{submit_mutex};
+ switch (const VkResult result = master_semaphore->SubmitQueue(
+ cmdbuf, signal_semaphore, wait_semaphore, signal_value)) {
case VK_SUCCESS:
break;
case VK_ERROR_DEVICE_LOST:
@@ -231,12 +233,20 @@ void Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_s
});
chunk->MarkSubmit();
DispatchWork();
+ return signal_value;
}
void Scheduler::AllocateNewContext() {
// Enable counters once again. These are disabled when a command buffer is finished.
if (query_cache) {
+#if ANDROID
+ if (Settings::IsGPULevelHigh()) {
+ // This is problematic on Android, disable on GPU Normal.
+ query_cache->UpdateCounters();
+ }
+#else
query_cache->UpdateCounters();
+#endif
}
}
@@ -247,7 +257,14 @@ void Scheduler::InvalidateState() {
}
void Scheduler::EndPendingOperations() {
+#if ANDROID
+ if (Settings::IsGPULevelHigh()) {
+ // This is problematic on Android, disable on GPU Normal.
+ query_cache->DisableStreams();
+ }
+#else
query_cache->DisableStreams();
+#endif
EndRenderPass();
}
@@ -289,13 +306,16 @@ void Scheduler::EndRenderPass() {
}
void Scheduler::AcquireNewChunk() {
- std::scoped_lock lock{reserve_mutex};
+ std::scoped_lock rl{reserve_mutex};
+
if (chunk_reserve.empty()) {
+ // If we don't have anything reserved, we need to make a new chunk.
chunk = std::make_unique<CommandChunk>();
- return;
+ } else {
+ // Otherwise, we can just take from the reserve.
+ chunk = std::move(chunk_reserve.back());
+ chunk_reserve.pop_back();
}
- chunk = std::move(chunk_reserve.back());
- chunk_reserve.pop_back();
}
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h
index bd4cb0f7e..475c682eb 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.h
+++ b/src/video_core/renderer_vulkan/vk_scheduler.h
@@ -34,7 +34,7 @@ public:
~Scheduler();
/// Sends the current execution context to the GPU.
- void Flush(VkSemaphore signal_semaphore = nullptr, VkSemaphore wait_semaphore = nullptr);
+ u64 Flush(VkSemaphore signal_semaphore = nullptr, VkSemaphore wait_semaphore = nullptr);
/// Sends the current execution context to the GPU and waits for it to complete.
void Finish(VkSemaphore signal_semaphore = nullptr, VkSemaphore wait_semaphore = nullptr);
@@ -106,6 +106,8 @@ public:
return *master_semaphore;
}
+ std::mutex submit_mutex;
+
private:
class Command {
public:
@@ -201,7 +203,7 @@ private:
void AllocateWorkerCommandBuffer();
- void SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore);
+ u64 SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore);
void AllocateNewContext();
@@ -232,10 +234,10 @@ private:
std::queue<std::unique_ptr<CommandChunk>> work_queue;
std::vector<std::unique_ptr<CommandChunk>> chunk_reserve;
+ std::mutex execution_mutex;
std::mutex reserve_mutex;
- std::mutex work_mutex;
- std::condition_variable_any work_cv;
- std::condition_variable wait_cv;
+ std::mutex queue_mutex;
+ std::condition_variable_any event_cv;
std::jthread worker_thread;
};
diff --git a/src/video_core/renderer_vulkan/vk_smaa.cpp b/src/video_core/renderer_vulkan/vk_smaa.cpp
index 8eb735489..5efd7d66e 100644
--- a/src/video_core/renderer_vulkan/vk_smaa.cpp
+++ b/src/video_core/renderer_vulkan/vk_smaa.cpp
@@ -25,9 +25,7 @@ namespace {
#define ARRAY_TO_SPAN(a) std::span(a, (sizeof(a) / sizeof(a[0])))
-std::pair<vk::Image, MemoryCommit> CreateWrappedImage(const Device& device,
- MemoryAllocator& allocator,
- VkExtent2D dimensions, VkFormat format) {
+vk::Image CreateWrappedImage(MemoryAllocator& allocator, VkExtent2D dimensions, VkFormat format) {
const VkImageCreateInfo image_ci{
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
.pNext = nullptr,
@@ -46,11 +44,7 @@ std::pair<vk::Image, MemoryCommit> CreateWrappedImage(const Device& device,
.pQueueFamilyIndices = nullptr,
.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
};
-
- auto image = device.GetLogical().CreateImage(image_ci);
- auto commit = allocator.Commit(image, Vulkan::MemoryUsage::DeviceLocal);
-
- return std::make_pair(std::move(image), std::move(commit));
+ return allocator.CreateImage(image_ci);
}
void TransitionImageLayout(vk::CommandBuffer& cmdbuf, VkImage image, VkImageLayout target_layout,
@@ -82,7 +76,7 @@ void TransitionImageLayout(vk::CommandBuffer& cmdbuf, VkImage image, VkImageLayo
void UploadImage(const Device& device, MemoryAllocator& allocator, Scheduler& scheduler,
vk::Image& image, VkExtent2D dimensions, VkFormat format,
std::span<const u8> initial_contents = {}) {
- auto upload_buffer = device.GetLogical().CreateBuffer(VkBufferCreateInfo{
+ const VkBufferCreateInfo upload_ci = {
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
@@ -91,9 +85,10 @@ void UploadImage(const Device& device, MemoryAllocator& allocator, Scheduler& sc
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
- });
- auto upload_commit = allocator.Commit(upload_buffer, MemoryUsage::Upload);
- std::ranges::copy(initial_contents, upload_commit.Map().begin());
+ };
+ auto upload_buffer = allocator.CreateBuffer(upload_ci, MemoryUsage::Upload);
+ std::ranges::copy(initial_contents, upload_buffer.Mapped().begin());
+ upload_buffer.Flush();
const std::array<VkBufferImageCopy, 1> regions{{{
.bufferOffset = 0,
@@ -117,9 +112,6 @@ void UploadImage(const Device& device, MemoryAllocator& allocator, Scheduler& sc
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
});
scheduler.Finish();
-
- // This should go out of scope before the commit
- auto upload_buffer2 = std::move(upload_buffer);
}
vk::ImageView CreateWrappedImageView(const Device& device, vk::Image& image, VkFormat format) {
@@ -468,7 +460,7 @@ VkWriteDescriptorSet CreateWriteDescriptorSet(std::vector<VkDescriptorImageInfo>
}
void ClearColorImage(vk::CommandBuffer& cmdbuf, VkImage image) {
- constexpr std::array<VkImageSubresourceRange, 1> subresources{{{
+ static constexpr std::array<VkImageSubresourceRange, 1> subresources{{{
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.baseMipLevel = 0,
.levelCount = 1,
@@ -528,13 +520,11 @@ SMAA::SMAA(const Device& device, MemoryAllocator& allocator, size_t image_count,
}
void SMAA::CreateImages() {
- constexpr VkExtent2D area_extent{AREATEX_WIDTH, AREATEX_HEIGHT};
- constexpr VkExtent2D search_extent{SEARCHTEX_WIDTH, SEARCHTEX_HEIGHT};
+ static constexpr VkExtent2D area_extent{AREATEX_WIDTH, AREATEX_HEIGHT};
+ static constexpr VkExtent2D search_extent{SEARCHTEX_WIDTH, SEARCHTEX_HEIGHT};
- std::tie(m_static_images[Area], m_static_buffer_commits[Area]) =
- CreateWrappedImage(m_device, m_allocator, area_extent, VK_FORMAT_R8G8_UNORM);
- std::tie(m_static_images[Search], m_static_buffer_commits[Search]) =
- CreateWrappedImage(m_device, m_allocator, search_extent, VK_FORMAT_R8_UNORM);
+ m_static_images[Area] = CreateWrappedImage(m_allocator, area_extent, VK_FORMAT_R8G8_UNORM);
+ m_static_images[Search] = CreateWrappedImage(m_allocator, search_extent, VK_FORMAT_R8_UNORM);
m_static_image_views[Area] =
CreateWrappedImageView(m_device, m_static_images[Area], VK_FORMAT_R8G8_UNORM);
@@ -544,12 +534,11 @@ void SMAA::CreateImages() {
for (u32 i = 0; i < m_image_count; i++) {
Images& images = m_dynamic_images.emplace_back();
- std::tie(images.images[Blend], images.buffer_commits[Blend]) =
- CreateWrappedImage(m_device, m_allocator, m_extent, VK_FORMAT_R16G16B16A16_SFLOAT);
- std::tie(images.images[Edges], images.buffer_commits[Edges]) =
- CreateWrappedImage(m_device, m_allocator, m_extent, VK_FORMAT_R16G16_SFLOAT);
- std::tie(images.images[Output], images.buffer_commits[Output]) =
- CreateWrappedImage(m_device, m_allocator, m_extent, VK_FORMAT_R16G16B16A16_SFLOAT);
+ images.images[Blend] =
+ CreateWrappedImage(m_allocator, m_extent, VK_FORMAT_R16G16B16A16_SFLOAT);
+ images.images[Edges] = CreateWrappedImage(m_allocator, m_extent, VK_FORMAT_R16G16_SFLOAT);
+ images.images[Output] =
+ CreateWrappedImage(m_allocator, m_extent, VK_FORMAT_R16G16B16A16_SFLOAT);
images.image_views[Blend] =
CreateWrappedImageView(m_device, images.images[Blend], VK_FORMAT_R16G16B16A16_SFLOAT);
@@ -586,12 +575,12 @@ void SMAA::CreateSampler() {
void SMAA::CreateShaders() {
// These match the order of the SMAAStage enum
- constexpr std::array vert_shader_sources{
+ static constexpr std::array vert_shader_sources{
ARRAY_TO_SPAN(SMAA_EDGE_DETECTION_VERT_SPV),
ARRAY_TO_SPAN(SMAA_BLENDING_WEIGHT_CALCULATION_VERT_SPV),
ARRAY_TO_SPAN(SMAA_NEIGHBORHOOD_BLENDING_VERT_SPV),
};
- constexpr std::array frag_shader_sources{
+ static constexpr std::array frag_shader_sources{
ARRAY_TO_SPAN(SMAA_EDGE_DETECTION_FRAG_SPV),
ARRAY_TO_SPAN(SMAA_BLENDING_WEIGHT_CALCULATION_FRAG_SPV),
ARRAY_TO_SPAN(SMAA_NEIGHBORHOOD_BLENDING_FRAG_SPV),
@@ -675,8 +664,8 @@ void SMAA::UploadImages(Scheduler& scheduler) {
return;
}
- constexpr VkExtent2D area_extent{AREATEX_WIDTH, AREATEX_HEIGHT};
- constexpr VkExtent2D search_extent{SEARCHTEX_WIDTH, SEARCHTEX_HEIGHT};
+ static constexpr VkExtent2D area_extent{AREATEX_WIDTH, AREATEX_HEIGHT};
+ static constexpr VkExtent2D search_extent{SEARCHTEX_WIDTH, SEARCHTEX_HEIGHT};
UploadImage(m_device, m_allocator, scheduler, m_static_images[Area], area_extent,
VK_FORMAT_R8G8_UNORM, ARRAY_TO_SPAN(areaTexBytes));
diff --git a/src/video_core/renderer_vulkan/vk_smaa.h b/src/video_core/renderer_vulkan/vk_smaa.h
index 99a369148..0e214258a 100644
--- a/src/video_core/renderer_vulkan/vk_smaa.h
+++ b/src/video_core/renderer_vulkan/vk_smaa.h
@@ -66,13 +66,11 @@ private:
std::array<vk::Pipeline, MaxSMAAStage> m_pipelines{};
std::array<vk::RenderPass, MaxSMAAStage> m_renderpasses{};
- std::array<MemoryCommit, MaxStaticImage> m_static_buffer_commits;
std::array<vk::Image, MaxStaticImage> m_static_images{};
std::array<vk::ImageView, MaxStaticImage> m_static_image_views{};
struct Images {
vk::DescriptorSets descriptor_sets{};
- std::array<MemoryCommit, MaxDynamicImage> buffer_commits;
std::array<vk::Image, MaxDynamicImage> images{};
std::array<vk::ImageView, MaxDynamicImage> image_views{};
std::array<vk::Framebuffer, MaxSMAAStage> framebuffers{};
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
index 74ca77216..62b251a9b 100644
--- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
+++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
@@ -30,55 +30,6 @@ constexpr VkDeviceSize MAX_STREAM_BUFFER_REQUEST_SIZE = 8_MiB;
constexpr VkDeviceSize STREAM_BUFFER_SIZE = 128_MiB;
constexpr VkDeviceSize REGION_SIZE = STREAM_BUFFER_SIZE / StagingBufferPool::NUM_SYNCS;
-constexpr VkMemoryPropertyFlags HOST_FLAGS =
- VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
-constexpr VkMemoryPropertyFlags STREAM_FLAGS = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | HOST_FLAGS;
-
-bool IsStreamHeap(VkMemoryHeap heap) noexcept {
- return STREAM_BUFFER_SIZE < (heap.size * 2) / 3;
-}
-
-std::optional<u32> FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& props, u32 type_mask,
- VkMemoryPropertyFlags flags) noexcept {
- for (u32 type_index = 0; type_index < props.memoryTypeCount; ++type_index) {
- if (((type_mask >> type_index) & 1) == 0) {
- // Memory type is incompatible
- continue;
- }
- const VkMemoryType& memory_type = props.memoryTypes[type_index];
- if ((memory_type.propertyFlags & flags) != flags) {
- // Memory type doesn't have the flags we want
- continue;
- }
- if (!IsStreamHeap(props.memoryHeaps[memory_type.heapIndex])) {
- // Memory heap is not suitable for streaming
- continue;
- }
- // Success!
- return type_index;
- }
- return std::nullopt;
-}
-
-u32 FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& props, u32 type_mask,
- bool try_device_local) {
- std::optional<u32> type;
- if (try_device_local) {
- // Try to find a DEVICE_LOCAL_BIT type, Nvidia and AMD have a dedicated heap for this
- type = FindMemoryTypeIndex(props, type_mask, STREAM_FLAGS);
- if (type) {
- return *type;
- }
- }
- // Otherwise try without the DEVICE_LOCAL_BIT
- type = FindMemoryTypeIndex(props, type_mask, HOST_FLAGS);
- if (type) {
- return *type;
- }
- // This should never happen, and in case it does, signal it as an out of memory situation
- throw vk::Exception(VK_ERROR_OUT_OF_DEVICE_MEMORY);
-}
-
size_t Region(size_t iterator) noexcept {
return iterator / REGION_SIZE;
}
@@ -87,8 +38,7 @@ size_t Region(size_t iterator) noexcept {
StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& memory_allocator_,
Scheduler& scheduler_)
: device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_} {
- const vk::Device& dev = device.GetLogical();
- stream_buffer = dev.CreateBuffer(VkBufferCreateInfo{
+ const VkBufferCreateInfo stream_ci = {
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
@@ -99,46 +49,13 @@ StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& mem
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
- });
- if (device.HasDebuggingToolAttached()) {
- stream_buffer.SetObjectNameEXT("Stream Buffer");
- }
- VkMemoryDedicatedRequirements dedicated_reqs{
- .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
- .pNext = nullptr,
- .prefersDedicatedAllocation = VK_FALSE,
- .requiresDedicatedAllocation = VK_FALSE,
- };
- const auto requirements = dev.GetBufferMemoryRequirements(*stream_buffer, &dedicated_reqs);
- const bool make_dedicated = dedicated_reqs.prefersDedicatedAllocation == VK_TRUE ||
- dedicated_reqs.requiresDedicatedAllocation == VK_TRUE;
- const VkMemoryDedicatedAllocateInfo dedicated_info{
- .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
- .pNext = nullptr,
- .image = nullptr,
- .buffer = *stream_buffer,
};
- const auto memory_properties = device.GetPhysical().GetMemoryProperties().memoryProperties;
- VkMemoryAllocateInfo stream_memory_info{
- .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
- .pNext = make_dedicated ? &dedicated_info : nullptr,
- .allocationSize = requirements.size,
- .memoryTypeIndex =
- FindMemoryTypeIndex(memory_properties, requirements.memoryTypeBits, true),
- };
- stream_memory = dev.TryAllocateMemory(stream_memory_info);
- if (!stream_memory) {
- LOG_INFO(Render_Vulkan, "Dynamic memory allocation failed, trying with system memory");
- stream_memory_info.memoryTypeIndex =
- FindMemoryTypeIndex(memory_properties, requirements.memoryTypeBits, false);
- stream_memory = dev.AllocateMemory(stream_memory_info);
- }
-
+ stream_buffer = memory_allocator.CreateBuffer(stream_ci, MemoryUsage::Stream);
if (device.HasDebuggingToolAttached()) {
- stream_memory.SetObjectNameEXT("Stream Buffer Memory");
+ stream_buffer.SetObjectNameEXT("Stream Buffer");
}
- stream_buffer.BindMemory(*stream_memory, 0);
- stream_pointer = stream_memory.Map(0, STREAM_BUFFER_SIZE);
+ stream_pointer = stream_buffer.Mapped();
+ ASSERT_MSG(!stream_pointer.empty(), "Stream buffer must be host visible!");
}
StagingBufferPool::~StagingBufferPool() = default;
@@ -199,7 +116,7 @@ StagingBufferRef StagingBufferPool::GetStreamBuffer(size_t size) {
return StagingBufferRef{
.buffer = *stream_buffer,
.offset = static_cast<VkDeviceSize>(offset),
- .mapped_span = std::span<u8>(stream_pointer + offset, size),
+ .mapped_span = stream_pointer.subspan(offset, size),
.usage{},
.log2_level{},
.index{},
@@ -247,7 +164,7 @@ std::optional<StagingBufferRef> StagingBufferPool::TryGetReservedBuffer(size_t s
StagingBufferRef StagingBufferPool::CreateStagingBuffer(size_t size, MemoryUsage usage,
bool deferred) {
const u32 log2 = Common::Log2Ceil64(size);
- vk::Buffer buffer = device.GetLogical().CreateBuffer({
+ const VkBufferCreateInfo buffer_ci = {
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
@@ -259,17 +176,15 @@ StagingBufferRef StagingBufferPool::CreateStagingBuffer(size_t size, MemoryUsage
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
- });
+ };
+ vk::Buffer buffer = memory_allocator.CreateBuffer(buffer_ci, usage);
if (device.HasDebuggingToolAttached()) {
++buffer_index;
buffer.SetObjectNameEXT(fmt::format("Staging Buffer {}", buffer_index).c_str());
}
- MemoryCommit commit = memory_allocator.Commit(buffer, usage);
- const std::span<u8> mapped_span = IsHostVisible(usage) ? commit.Map() : std::span<u8>{};
-
+ const std::span<u8> mapped_span = buffer.Mapped();
StagingBuffer& entry = GetCache(usage)[log2].entries.emplace_back(StagingBuffer{
.buffer = std::move(buffer),
- .commit = std::move(commit),
.mapped_span = mapped_span,
.usage = usage,
.log2_level = log2,
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
index 4fd15f11a..5f69f08b1 100644
--- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
+++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
@@ -46,7 +46,6 @@ private:
struct StagingBuffer {
vk::Buffer buffer;
- MemoryCommit commit;
std::span<u8> mapped_span;
MemoryUsage usage;
u32 log2_level;
@@ -97,8 +96,7 @@ private:
Scheduler& scheduler;
vk::Buffer stream_buffer;
- vk::DeviceMemory stream_memory;
- u8* stream_pointer = nullptr;
+ std::span<u8> stream_pointer;
size_t iterator = 0;
size_t used_iterator = 0;
diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp
index b6810eef9..d3cddac69 100644
--- a/src/video_core/renderer_vulkan/vk_swapchain.cpp
+++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp
@@ -14,6 +14,7 @@
#include "video_core/renderer_vulkan/vk_swapchain.h"
#include "video_core/vulkan_common/vulkan_device.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
+#include "vulkan/vulkan_core.h"
namespace Vulkan {
@@ -33,23 +34,47 @@ VkSurfaceFormatKHR ChooseSwapSurfaceFormat(vk::Span<VkSurfaceFormatKHR> formats)
return found != formats.end() ? *found : formats[0];
}
-VkPresentModeKHR ChooseSwapPresentMode(vk::Span<VkPresentModeKHR> modes) {
- // Mailbox (triple buffering) doesn't lock the application like fifo (vsync),
- // prefer it if vsync option is not selected
- const auto found_mailbox = std::find(modes.begin(), modes.end(), VK_PRESENT_MODE_MAILBOX_KHR);
- if (Settings::values.fullscreen_mode.GetValue() == Settings::FullscreenMode::Borderless &&
- found_mailbox != modes.end() && !Settings::values.use_vsync.GetValue()) {
- return VK_PRESENT_MODE_MAILBOX_KHR;
- }
- if (!Settings::values.use_speed_limit.GetValue()) {
- // FIFO present mode locks the framerate to the monitor's refresh rate,
- // Find an alternative to surpass this limitation if FPS is unlocked.
- const auto found_imm = std::find(modes.begin(), modes.end(), VK_PRESENT_MODE_IMMEDIATE_KHR);
- if (found_imm != modes.end()) {
- return VK_PRESENT_MODE_IMMEDIATE_KHR;
+static VkPresentModeKHR ChooseSwapPresentMode(bool has_imm, bool has_mailbox,
+ bool has_fifo_relaxed) {
+ // Mailbox doesn't lock the application like FIFO (vsync)
+ // FIFO present mode locks the framerate to the monitor's refresh rate
+ Settings::VSyncMode setting = [has_imm, has_mailbox]() {
+ // Choose Mailbox or Immediate if unlocked and those modes are supported
+ const auto mode = Settings::values.vsync_mode.GetValue();
+ if (Settings::values.use_speed_limit.GetValue()) {
+ return mode;
+ }
+ switch (mode) {
+ case Settings::VSyncMode::FIFO:
+ case Settings::VSyncMode::FIFORelaxed:
+ if (has_mailbox) {
+ return Settings::VSyncMode::Mailbox;
+ } else if (has_imm) {
+ return Settings::VSyncMode::Immediate;
+ }
+ [[fallthrough]];
+ default:
+ return mode;
}
+ }();
+ if ((setting == Settings::VSyncMode::Mailbox && !has_mailbox) ||
+ (setting == Settings::VSyncMode::Immediate && !has_imm) ||
+ (setting == Settings::VSyncMode::FIFORelaxed && !has_fifo_relaxed)) {
+ setting = Settings::VSyncMode::FIFO;
+ }
+
+ switch (setting) {
+ case Settings::VSyncMode::Immediate:
+ return VK_PRESENT_MODE_IMMEDIATE_KHR;
+ case Settings::VSyncMode::Mailbox:
+ return VK_PRESENT_MODE_MAILBOX_KHR;
+ case Settings::VSyncMode::FIFO:
+ return VK_PRESENT_MODE_FIFO_KHR;
+ case Settings::VSyncMode::FIFORelaxed:
+ return VK_PRESENT_MODE_FIFO_RELAXED_KHR;
+ default:
+ return VK_PRESENT_MODE_FIFO_KHR;
}
- return VK_PRESENT_MODE_FIFO_KHR;
}
VkExtent2D ChooseSwapExtent(const VkSurfaceCapabilitiesKHR& capabilities, u32 width, u32 height) {
@@ -65,21 +90,34 @@ VkExtent2D ChooseSwapExtent(const VkSurfaceCapabilitiesKHR& capabilities, u32 wi
return extent;
}
+VkCompositeAlphaFlagBitsKHR ChooseAlphaFlags(const VkSurfaceCapabilitiesKHR& capabilities) {
+ if (capabilities.supportedCompositeAlpha & VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR) {
+ return VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR;
+ } else if (capabilities.supportedCompositeAlpha & VK_COMPOSITE_ALPHA_INHERIT_BIT_KHR) {
+ return VK_COMPOSITE_ALPHA_INHERIT_BIT_KHR;
+ } else {
+ LOG_ERROR(Render_Vulkan, "Unknown composite alpha flags value {:#x}",
+ capabilities.supportedCompositeAlpha);
+ return VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR;
+ }
+}
+
} // Anonymous namespace
Swapchain::Swapchain(VkSurfaceKHR surface_, const Device& device_, Scheduler& scheduler_,
u32 width_, u32 height_, bool srgb)
: surface{surface_}, device{device_}, scheduler{scheduler_} {
- Create(width_, height_, srgb);
+ Create(surface_, width_, height_, srgb);
}
Swapchain::~Swapchain() = default;
-void Swapchain::Create(u32 width_, u32 height_, bool srgb) {
+void Swapchain::Create(VkSurfaceKHR surface_, u32 width_, u32 height_, bool srgb) {
is_outdated = false;
is_suboptimal = false;
width = width_;
height = height_;
+ surface = surface_;
const auto physical_device = device.GetPhysical();
const auto capabilities{physical_device.GetSurfaceCapabilitiesKHR(surface)};
@@ -87,18 +125,16 @@ void Swapchain::Create(u32 width_, u32 height_, bool srgb) {
return;
}
- device.GetLogical().WaitIdle();
Destroy();
CreateSwapchain(capabilities, srgb);
CreateSemaphores();
- CreateImageViews();
resource_ticks.clear();
resource_ticks.resize(image_count);
}
-void Swapchain::AcquireNextImage() {
+bool Swapchain::AcquireNextImage() {
const VkResult result = device.GetLogical().AcquireNextImageKHR(
*swapchain, std::numeric_limits<u64>::max(), *present_semaphores[frame_index],
VK_NULL_HANDLE, &image_index);
@@ -115,8 +151,11 @@ void Swapchain::AcquireNextImage() {
LOG_ERROR(Render_Vulkan, "vkAcquireNextImageKHR returned {}", vk::ToString(result));
break;
}
+
scheduler.Wait(resource_ticks[image_index]);
resource_ticks[image_index] = scheduler.CurrentTick();
+
+ return is_suboptimal || is_outdated;
}
void Swapchain::Present(VkSemaphore render_semaphore) {
@@ -131,6 +170,7 @@ void Swapchain::Present(VkSemaphore render_semaphore) {
.pImageIndices = &image_index,
.pResults = nullptr,
};
+ std::scoped_lock lock{scheduler.submit_mutex};
switch (const VkResult result = present_queue.Present(present_info)) {
case VK_SUCCESS:
break;
@@ -153,13 +193,20 @@ void Swapchain::Present(VkSemaphore render_semaphore) {
void Swapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, bool srgb) {
const auto physical_device{device.GetPhysical()};
const auto formats{physical_device.GetSurfaceFormatsKHR(surface)};
- const auto present_modes{physical_device.GetSurfacePresentModesKHR(surface)};
+ const auto present_modes = physical_device.GetSurfacePresentModesKHR(surface);
+ has_mailbox = std::find(present_modes.begin(), present_modes.end(),
+ VK_PRESENT_MODE_MAILBOX_KHR) != present_modes.end();
+ has_imm = std::find(present_modes.begin(), present_modes.end(),
+ VK_PRESENT_MODE_IMMEDIATE_KHR) != present_modes.end();
+ has_fifo_relaxed = std::find(present_modes.begin(), present_modes.end(),
+ VK_PRESENT_MODE_FIFO_RELAXED_KHR) != present_modes.end();
- const VkSurfaceFormatKHR surface_format{ChooseSwapSurfaceFormat(formats)};
- present_mode = ChooseSwapPresentMode(present_modes);
+ const VkCompositeAlphaFlagBitsKHR alpha_flags{ChooseAlphaFlags(capabilities)};
+ surface_format = ChooseSwapSurfaceFormat(formats);
+ present_mode = ChooseSwapPresentMode(has_imm, has_mailbox, has_fifo_relaxed);
u32 requested_image_count{capabilities.minImageCount + 1};
- // Ensure Tripple buffering if possible.
+ // Ensure Triple buffering if possible.
if (capabilities.maxImageCount > 0) {
if (requested_image_count > capabilities.maxImageCount) {
requested_image_count = capabilities.maxImageCount;
@@ -180,12 +227,17 @@ void Swapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, bo
.imageColorSpace = surface_format.colorSpace,
.imageExtent = {},
.imageArrayLayers = 1,
- .imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
+ .imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT,
.imageSharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
+#ifdef ANDROID
+ // On Android, do not allow surface rotation to deviate from the frontend.
+ .preTransform = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR,
+#else
.preTransform = capabilities.currentTransform,
- .compositeAlpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR,
+#endif
+ .compositeAlpha = alpha_flags,
.presentMode = present_mode,
.clipped = VK_FALSE,
.oldSwapchain = nullptr,
@@ -217,67 +269,35 @@ void Swapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, bo
extent = swapchain_ci.imageExtent;
current_srgb = srgb;
- current_fps_unlocked = !Settings::values.use_speed_limit.GetValue();
images = swapchain.GetImages();
image_count = static_cast<u32>(images.size());
+#ifdef ANDROID
+ // Android is already ordered the same as Switch.
+ image_view_format = srgb ? VK_FORMAT_R8G8B8A8_SRGB : VK_FORMAT_R8G8B8A8_UNORM;
+#else
image_view_format = srgb ? VK_FORMAT_B8G8R8A8_SRGB : VK_FORMAT_B8G8R8A8_UNORM;
+#endif
}
void Swapchain::CreateSemaphores() {
present_semaphores.resize(image_count);
std::ranges::generate(present_semaphores,
[this] { return device.GetLogical().CreateSemaphore(); });
-}
-
-void Swapchain::CreateImageViews() {
- VkImageViewCreateInfo ci{
- .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
- .pNext = nullptr,
- .flags = 0,
- .image = {},
- .viewType = VK_IMAGE_VIEW_TYPE_2D,
- .format = image_view_format,
- .components =
- {
- .r = VK_COMPONENT_SWIZZLE_IDENTITY,
- .g = VK_COMPONENT_SWIZZLE_IDENTITY,
- .b = VK_COMPONENT_SWIZZLE_IDENTITY,
- .a = VK_COMPONENT_SWIZZLE_IDENTITY,
- },
- .subresourceRange =
- {
- .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
- .baseMipLevel = 0,
- .levelCount = 1,
- .baseArrayLayer = 0,
- .layerCount = 1,
- },
- };
-
- image_views.resize(image_count);
- for (std::size_t i = 0; i < image_count; i++) {
- ci.image = images[i];
- image_views[i] = device.GetLogical().CreateImageView(ci);
- }
+ render_semaphores.resize(image_count);
+ std::ranges::generate(render_semaphores,
+ [this] { return device.GetLogical().CreateSemaphore(); });
}
void Swapchain::Destroy() {
frame_index = 0;
present_semaphores.clear();
- framebuffers.clear();
- image_views.clear();
swapchain.reset();
}
-bool Swapchain::HasFpsUnlockChanged() const {
- return current_fps_unlocked != !Settings::values.use_speed_limit.GetValue();
-}
-
bool Swapchain::NeedsPresentModeUpdate() const {
- // Mailbox present mode is the ideal for all scenarios. If it is not available,
- // A different present mode is needed to support unlocked FPS above the monitor's refresh rate.
- return present_mode != VK_PRESENT_MODE_MAILBOX_KHR && HasFpsUnlockChanged();
+ const auto requested_mode = ChooseSwapPresentMode(has_imm, has_mailbox, has_fifo_relaxed);
+ return present_mode != requested_mode;
}
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_swapchain.h b/src/video_core/renderer_vulkan/vk_swapchain.h
index caf1ff32b..b8a1465a6 100644
--- a/src/video_core/renderer_vulkan/vk_swapchain.h
+++ b/src/video_core/renderer_vulkan/vk_swapchain.h
@@ -24,10 +24,10 @@ public:
~Swapchain();
/// Creates (or recreates) the swapchain with a given size.
- void Create(u32 width, u32 height, bool srgb);
+ void Create(VkSurfaceKHR surface, u32 width, u32 height, bool srgb);
/// Acquires the next image in the swapchain, waits as needed.
- void AcquireNextImage();
+ bool AcquireNextImage();
/// Presents the rendered image to the swapchain.
void Present(VkSemaphore render_semaphore);
@@ -52,6 +52,11 @@ public:
return is_suboptimal;
}
+ /// Returns true when the swapchain format is in the srgb color space
+ bool IsSrgb() const {
+ return current_srgb;
+ }
+
VkExtent2D GetSize() const {
return extent;
}
@@ -64,22 +69,34 @@ public:
return image_index;
}
+ std::size_t GetFrameIndex() const {
+ return frame_index;
+ }
+
VkImage GetImageIndex(std::size_t index) const {
return images[index];
}
- VkImageView GetImageViewIndex(std::size_t index) const {
- return *image_views[index];
+ VkImage CurrentImage() const {
+ return images[image_index];
}
VkFormat GetImageViewFormat() const {
return image_view_format;
}
+ VkFormat GetImageFormat() const {
+ return surface_format.format;
+ }
+
VkSemaphore CurrentPresentSemaphore() const {
return *present_semaphores[frame_index];
}
+ VkSemaphore CurrentRenderSemaphore() const {
+ return *render_semaphores[frame_index];
+ }
+
u32 GetWidth() const {
return width;
}
@@ -88,6 +105,10 @@ public:
return height;
}
+ VkExtent2D GetExtent() const {
+ return extent;
+ }
+
private:
void CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, bool srgb);
void CreateSemaphores();
@@ -95,11 +116,9 @@ private:
void Destroy();
- bool HasFpsUnlockChanged() const;
-
bool NeedsPresentModeUpdate() const;
- const VkSurfaceKHR surface;
+ VkSurfaceKHR surface;
const Device& device;
Scheduler& scheduler;
@@ -107,10 +126,9 @@ private:
std::size_t image_count{};
std::vector<VkImage> images;
- std::vector<vk::ImageView> image_views;
- std::vector<vk::Framebuffer> framebuffers;
std::vector<u64> resource_ticks;
std::vector<vk::Semaphore> present_semaphores;
+ std::vector<vk::Semaphore> render_semaphores;
u32 width;
u32 height;
@@ -121,9 +139,12 @@ private:
VkFormat image_view_format{};
VkExtent2D extent{};
VkPresentModeKHR present_mode{};
+ VkSurfaceFormatKHR surface_format{};
+ bool has_imm{false};
+ bool has_mailbox{false};
+ bool has_fifo_relaxed{false};
bool current_srgb{};
- bool current_fps_unlocked{};
bool is_outdated{};
bool is_suboptimal{};
};
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index d39372ec4..ce6acc30c 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -1,10 +1,11 @@
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
-// SPDX-License-Identifier: GPL-2.0-or-later
+// SPDX-License-Identifier: GPL-3.0-or-later
#include <algorithm>
#include <array>
#include <span>
#include <vector>
+#include <boost/container/small_vector.hpp>
#include "common/bit_cast.h"
#include "common/bit_util.h"
@@ -14,7 +15,6 @@
#include "video_core/renderer_vulkan/blit_image.h"
#include "video_core/renderer_vulkan/maxwell_to_vk.h"
#include "video_core/renderer_vulkan/vk_compute_pass.h"
-#include "video_core/renderer_vulkan/vk_rasterizer.h"
#include "video_core/renderer_vulkan/vk_render_pass_cache.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
@@ -162,11 +162,12 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
};
}
-[[nodiscard]] vk::Image MakeImage(const Device& device, const ImageInfo& info) {
+[[nodiscard]] vk::Image MakeImage(const Device& device, const MemoryAllocator& allocator,
+ const ImageInfo& info) {
if (info.type == ImageType::Buffer) {
return vk::Image{};
}
- return device.GetLogical().CreateImage(MakeImageCreateInfo(device, info));
+ return allocator.CreateImage(MakeImageCreateInfo(device, info));
}
[[nodiscard]] VkImageAspectFlags ImageAspectMask(PixelFormat format) {
@@ -189,13 +190,16 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
if (info.IsRenderTarget()) {
return ImageAspectMask(info.format);
}
- const bool is_first = info.Swizzle()[0] == SwizzleSource::R;
+ bool any_r =
+ std::ranges::any_of(info.Swizzle(), [](SwizzleSource s) { return s == SwizzleSource::R; });
switch (info.format) {
case PixelFormat::D24_UNORM_S8_UINT:
case PixelFormat::D32_FLOAT_S8_UINT:
- return is_first ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_STENCIL_BIT;
+ // R = depth, G = stencil
+ return any_r ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_STENCIL_BIT;
case PixelFormat::S8_UINT_D24_UNORM:
- return is_first ? VK_IMAGE_ASPECT_STENCIL_BIT : VK_IMAGE_ASPECT_DEPTH_BIT;
+ // R = stencil, G = depth
+ return any_r ? VK_IMAGE_ASPECT_STENCIL_BIT : VK_IMAGE_ASPECT_DEPTH_BIT;
case PixelFormat::D16_UNORM:
case PixelFormat::D32_FLOAT:
return VK_IMAGE_ASPECT_DEPTH_BIT;
@@ -326,9 +330,9 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
};
}
-[[maybe_unused]] [[nodiscard]] std::vector<VkBufferCopy> TransformBufferCopies(
- std::span<const VideoCommon::BufferCopy> copies, size_t buffer_offset) {
- std::vector<VkBufferCopy> result(copies.size());
+[[maybe_unused]] [[nodiscard]] boost::container::small_vector<VkBufferCopy, 16>
+TransformBufferCopies(std::span<const VideoCommon::BufferCopy> copies, size_t buffer_offset) {
+ boost::container::small_vector<VkBufferCopy, 16> result(copies.size());
std::ranges::transform(
copies, result.begin(), [buffer_offset](const VideoCommon::BufferCopy& copy) {
return VkBufferCopy{
@@ -340,7 +344,7 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
return result;
}
-[[nodiscard]] std::vector<VkBufferImageCopy> TransformBufferImageCopies(
+[[nodiscard]] boost::container::small_vector<VkBufferImageCopy, 16> TransformBufferImageCopies(
std::span<const BufferImageCopy> copies, size_t buffer_offset, VkImageAspectFlags aspect_mask) {
struct Maker {
VkBufferImageCopy operator()(const BufferImageCopy& copy) const {
@@ -373,14 +377,14 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
VkImageAspectFlags aspect_mask;
};
if (aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
- std::vector<VkBufferImageCopy> result(copies.size() * 2);
+ boost::container::small_vector<VkBufferImageCopy, 16> result(copies.size() * 2);
std::ranges::transform(copies, result.begin(),
Maker{buffer_offset, VK_IMAGE_ASPECT_DEPTH_BIT});
std::ranges::transform(copies, result.begin() + copies.size(),
Maker{buffer_offset, VK_IMAGE_ASPECT_STENCIL_BIT});
return result;
} else {
- std::vector<VkBufferImageCopy> result(copies.size());
+ boost::container::small_vector<VkBufferImageCopy, 16> result(copies.size());
std::ranges::transform(copies, result.begin(), Maker{buffer_offset, aspect_mask});
return result;
}
@@ -794,13 +798,13 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, Scheduler& sched
BlitImageHelper& blit_image_helper_,
RenderPassCache& render_pass_cache_,
DescriptorPool& descriptor_pool,
- UpdateDescriptorQueue& update_descriptor_queue)
+ ComputePassDescriptorQueue& compute_pass_descriptor_queue)
: device{device_}, scheduler{scheduler_}, memory_allocator{memory_allocator_},
staging_buffer_pool{staging_buffer_pool_}, blit_image_helper{blit_image_helper_},
render_pass_cache{render_pass_cache_}, resolution{Settings::values.resolution_info} {
if (Settings::values.accelerate_astc) {
astc_decoder_pass.emplace(device, scheduler, descriptor_pool, staging_buffer_pool,
- update_descriptor_queue, memory_allocator);
+ compute_pass_descriptor_queue, memory_allocator);
}
}
@@ -835,14 +839,14 @@ bool TextureCacheRuntime::ShouldReinterpret(Image& dst, Image& src) {
VkBuffer TextureCacheRuntime::GetTemporaryBuffer(size_t needed_size) {
const auto level = (8 * sizeof(size_t)) - std::countl_zero(needed_size - 1ULL);
- if (buffer_commits[level]) {
+ if (buffers[level]) {
return *buffers[level];
}
const auto new_size = Common::NextPow2(needed_size);
static constexpr VkBufferUsageFlags flags =
VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT;
- buffers[level] = device.GetLogical().CreateBuffer({
+ const VkBufferCreateInfo temp_ci = {
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
@@ -851,26 +855,35 @@ VkBuffer TextureCacheRuntime::GetTemporaryBuffer(size_t needed_size) {
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
- });
- buffer_commits[level] = std::make_unique<MemoryCommit>(
- memory_allocator.Commit(buffers[level], MemoryUsage::DeviceLocal));
+ };
+ buffers[level] = memory_allocator.CreateBuffer(temp_ci, MemoryUsage::DeviceLocal);
return *buffers[level];
}
+void TextureCacheRuntime::BarrierFeedbackLoop() {
+ scheduler.RequestOutsideRenderPassOperationContext();
+}
+
void TextureCacheRuntime::ReinterpretImage(Image& dst, Image& src,
std::span<const VideoCommon::ImageCopy> copies) {
- std::vector<VkBufferImageCopy> vk_in_copies(copies.size());
- std::vector<VkBufferImageCopy> vk_out_copies(copies.size());
+ boost::container::small_vector<VkBufferImageCopy, 16> vk_in_copies(copies.size());
+ boost::container::small_vector<VkBufferImageCopy, 16> vk_out_copies(copies.size());
const VkImageAspectFlags src_aspect_mask = src.AspectMask();
const VkImageAspectFlags dst_aspect_mask = dst.AspectMask();
- std::ranges::transform(copies, vk_in_copies.begin(), [src_aspect_mask](const auto& copy) {
- return MakeBufferImageCopy(copy, true, src_aspect_mask);
- });
+ const auto bpp_in = BytesPerBlock(src.info.format) / DefaultBlockWidth(src.info.format);
+ const auto bpp_out = BytesPerBlock(dst.info.format) / DefaultBlockWidth(dst.info.format);
+ std::ranges::transform(copies, vk_in_copies.begin(),
+ [src_aspect_mask, bpp_in, bpp_out](const auto& copy) {
+ auto copy2 = copy;
+ copy2.src_offset.x = (bpp_out * copy.src_offset.x) / bpp_in;
+ copy2.extent.width = (bpp_out * copy.extent.width) / bpp_in;
+ return MakeBufferImageCopy(copy2, true, src_aspect_mask);
+ });
std::ranges::transform(copies, vk_out_copies.begin(), [dst_aspect_mask](const auto& copy) {
return MakeBufferImageCopy(copy, false, dst_aspect_mask);
});
- const u32 img_bpp = BytesPerBlock(src.info.format);
+ const u32 img_bpp = BytesPerBlock(dst.info.format);
size_t total_size = 0;
for (const auto& copy : copies) {
total_size += copy.extent.width * copy.extent.height * copy.extent.depth * img_bpp;
@@ -1143,7 +1156,7 @@ void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, Im
void TextureCacheRuntime::CopyImage(Image& dst, Image& src,
std::span<const VideoCommon::ImageCopy> copies) {
- std::vector<VkImageCopy> vk_copies(copies.size());
+ boost::container::small_vector<VkImageCopy, 16> vk_copies(copies.size());
const VkImageAspectFlags aspect_mask = dst.AspectMask();
ASSERT(aspect_mask == src.AspectMask());
@@ -1230,6 +1243,11 @@ void TextureCacheRuntime::CopyImage(Image& dst, Image& src,
});
}
+void TextureCacheRuntime::CopyImageMSAA(Image& dst, Image& src,
+ std::span<const VideoCommon::ImageCopy> copies) {
+ UNIMPLEMENTED_MSG("Copying images with different samples is not implemented in Vulkan.");
+}
+
u64 TextureCacheRuntime::GetDeviceLocalMemory() const {
return device.GetDeviceLocalMemory();
}
@@ -1247,15 +1265,18 @@ void TextureCacheRuntime::TickFrame() {}
Image::Image(TextureCacheRuntime& runtime_, const ImageInfo& info_, GPUVAddr gpu_addr_,
VAddr cpu_addr_)
: VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), scheduler{&runtime_.scheduler},
- runtime{&runtime_}, original_image(MakeImage(runtime_.device, info)),
- commit(runtime_.memory_allocator.Commit(original_image, MemoryUsage::DeviceLocal)),
+ runtime{&runtime_},
+ original_image(MakeImage(runtime_.device, runtime_.memory_allocator, info)),
aspect_mask(ImageAspectMask(info.format)) {
if (IsPixelFormatASTC(info.format) && !runtime->device.IsOptimalAstcSupported()) {
- if (Settings::values.accelerate_astc.GetValue()) {
+ if (Settings::values.async_astc.GetValue()) {
+ flags |= VideoCommon::ImageFlagBits::AsynchronousDecode;
+ } else if (Settings::values.astc_recompression.GetValue() ==
+ Settings::AstcRecompression::Uncompressed &&
+ Settings::values.accelerate_astc.GetValue() && info.size.depth == 1) {
flags |= VideoCommon::ImageFlagBits::AcceleratedUpload;
- } else {
- flags |= VideoCommon::ImageFlagBits::Converted;
}
+ flags |= VideoCommon::ImageFlagBits::Converted;
flags |= VideoCommon::ImageFlagBits::CostlyLoad;
}
if (runtime->device.HasDebuggingToolAttached()) {
@@ -1267,7 +1288,9 @@ Image::Image(TextureCacheRuntime& runtime_, const ImageInfo& info_, GPUVAddr gpu
.usage = VK_IMAGE_USAGE_STORAGE_BIT,
};
current_image = *original_image;
- if (IsPixelFormatASTC(info.format) && !runtime->device.IsOptimalAstcSupported()) {
+ if (IsPixelFormatASTC(info.format) && !runtime->device.IsOptimalAstcSupported() &&
+ Settings::values.astc_recompression.GetValue() ==
+ Settings::AstcRecompression::Uncompressed) {
const auto& device = runtime->device.GetLogical();
storage_image_views.reserve(info.resources.levels);
for (s32 level = 0; level < info.resources.levels; ++level) {
@@ -1300,15 +1323,16 @@ Image::Image(const VideoCommon::NullImageParams& params) : VideoCommon::ImageBas
Image::~Image() = default;
-void Image::UploadMemory(const StagingBufferRef& map, std::span<const BufferImageCopy> copies) {
+void Image::UploadMemory(VkBuffer buffer, VkDeviceSize offset,
+ std::span<const VideoCommon::BufferImageCopy> copies) {
// TODO: Move this to another API
const bool is_rescaled = True(flags & ImageFlagBits::Rescaled);
if (is_rescaled) {
ScaleDown(true);
}
scheduler->RequestOutsideRenderPassOperationContext();
- std::vector vk_copies = TransformBufferImageCopies(copies, map.offset, aspect_mask);
- const VkBuffer src_buffer = map.buffer;
+ auto vk_copies = TransformBufferImageCopies(copies, offset, aspect_mask);
+ const VkBuffer src_buffer = buffer;
const VkImage vk_image = *original_image;
const VkImageAspectFlags vk_aspect_mask = aspect_mask;
const bool is_initialized = std::exchange(initialized, true);
@@ -1321,15 +1345,38 @@ void Image::UploadMemory(const StagingBufferRef& map, std::span<const BufferImag
}
}
-void Image::DownloadMemory(const StagingBufferRef& map, std::span<const BufferImageCopy> copies) {
+void Image::UploadMemory(const StagingBufferRef& map, std::span<const BufferImageCopy> copies) {
+ UploadMemory(map.buffer, map.offset, copies);
+}
+
+void Image::DownloadMemory(VkBuffer buffer, VkDeviceSize offset,
+ std::span<const VideoCommon::BufferImageCopy> copies) {
+ std::array buffer_handles{
+ buffer,
+ };
+ std::array buffer_offsets{
+ offset,
+ };
+ DownloadMemory(buffer_handles, buffer_offsets, copies);
+}
+
+void Image::DownloadMemory(std::span<VkBuffer> buffers_span, std::span<VkDeviceSize> offsets_span,
+ std::span<const VideoCommon::BufferImageCopy> copies) {
const bool is_rescaled = True(flags & ImageFlagBits::Rescaled);
if (is_rescaled) {
ScaleDown();
}
- std::vector vk_copies = TransformBufferImageCopies(copies, map.offset, aspect_mask);
+ boost::container::small_vector<VkBuffer, 8> buffers_vector{};
+ boost::container::small_vector<boost::container::small_vector<VkBufferImageCopy, 16>, 8>
+ vk_copies;
+ for (size_t index = 0; index < buffers_span.size(); index++) {
+ buffers_vector.emplace_back(buffers_span[index]);
+ vk_copies.emplace_back(
+ TransformBufferImageCopies(copies, offsets_span[index], aspect_mask));
+ }
scheduler->RequestOutsideRenderPassOperationContext();
- scheduler->Record([buffer = map.buffer, image = *original_image, aspect_mask = aspect_mask,
- vk_copies](vk::CommandBuffer cmdbuf) {
+ scheduler->Record([buffers = std::move(buffers_vector), image = *original_image,
+ aspect_mask = aspect_mask, vk_copies](vk::CommandBuffer cmdbuf) {
const VkImageMemoryBarrier read_barrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr,
@@ -1348,6 +1395,20 @@ void Image::DownloadMemory(const StagingBufferRef& map, std::span<const BufferIm
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
};
+ cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
+ 0, read_barrier);
+
+ for (size_t index = 0; index < buffers.size(); index++) {
+ cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, buffers[index],
+ vk_copies[index]);
+ }
+
+ const VkMemoryBarrier memory_write_barrier{
+ .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
+ .pNext = nullptr,
+ .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
+ .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
+ };
const VkImageMemoryBarrier image_write_barrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr,
@@ -1366,15 +1427,6 @@ void Image::DownloadMemory(const StagingBufferRef& map, std::span<const BufferIm
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
};
- const VkMemoryBarrier memory_write_barrier{
- .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
- .pNext = nullptr,
- .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
- .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
- };
- cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
- 0, read_barrier);
- cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, buffer, vk_copies);
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
0, memory_write_barrier, nullptr, image_write_barrier);
});
@@ -1383,6 +1435,16 @@ void Image::DownloadMemory(const StagingBufferRef& map, std::span<const BufferIm
}
}
+void Image::DownloadMemory(const StagingBufferRef& map, std::span<const BufferImageCopy> copies) {
+ std::array buffers{
+ map.buffer,
+ };
+ std::array offsets{
+ map.offset,
+ };
+ DownloadMemory(buffers, offsets, copies);
+}
+
bool Image::IsRescaled() const noexcept {
return True(flags & ImageFlagBits::Rescaled);
}
@@ -1405,9 +1467,7 @@ bool Image::ScaleUp(bool ignore) {
auto scaled_info = info;
scaled_info.size.width = scaled_width;
scaled_info.size.height = scaled_height;
- scaled_image = MakeImage(runtime->device, scaled_info);
- auto& allocator = runtime->memory_allocator;
- scaled_commit = MemoryCommit(allocator.Commit(scaled_image, MemoryUsage::DeviceLocal));
+ scaled_image = MakeImage(runtime->device, runtime->memory_allocator, scaled_info);
ignore = false;
}
current_image = *scaled_image;
@@ -1530,8 +1590,9 @@ bool Image::NeedsScaleHelper() const {
ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info,
ImageId image_id_, Image& image)
- : VideoCommon::ImageViewBase{info, image.info, image_id_}, device{&runtime.device},
- image_handle{image.Handle()}, samples(ConvertSampleCount(image.info.num_samples)) {
+ : VideoCommon::ImageViewBase{info, image.info, image_id_, image.gpu_addr},
+ device{&runtime.device}, image_handle{image.Handle()},
+ samples(ConvertSampleCount(image.info.num_samples)) {
using Shader::TextureType;
const VkImageAspectFlags aspect_mask = ImageViewAspectMask(info);
@@ -1577,7 +1638,7 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI
}
vk::ImageView handle = device->GetLogical().CreateImageView(ci);
if (device->HasDebuggingToolAttached()) {
- handle.SetObjectNameEXT(VideoCommon::Name(*this).c_str());
+ handle.SetObjectNameEXT(VideoCommon::Name(*this, gpu_addr).c_str());
}
image_views[static_cast<size_t>(tex_type)] = std::move(handle);
};
@@ -1618,7 +1679,7 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI
ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info,
const VideoCommon::ImageViewInfo& view_info, GPUVAddr gpu_addr_)
- : VideoCommon::ImageViewBase{info, view_info}, gpu_addr{gpu_addr_},
+ : VideoCommon::ImageViewBase{info, view_info, gpu_addr_},
buffer_size{VideoCommon::CalculateGuestSizeInBytes(info)} {}
ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::NullImageViewParams& params)
@@ -1739,27 +1800,36 @@ Sampler::Sampler(TextureCacheRuntime& runtime, const Tegra::Texture::TSCEntry& t
// Some games have samplers with garbage. Sanitize them here.
const f32 max_anisotropy = std::clamp(tsc.MaxAnisotropy(), 1.0f, 16.0f);
- sampler = device.GetLogical().CreateSampler(VkSamplerCreateInfo{
- .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
- .pNext = pnext,
- .flags = 0,
- .magFilter = MaxwellToVK::Sampler::Filter(tsc.mag_filter),
- .minFilter = MaxwellToVK::Sampler::Filter(tsc.min_filter),
- .mipmapMode = MaxwellToVK::Sampler::MipmapMode(tsc.mipmap_filter),
- .addressModeU = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_u, tsc.mag_filter),
- .addressModeV = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_v, tsc.mag_filter),
- .addressModeW = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_p, tsc.mag_filter),
- .mipLodBias = tsc.LodBias(),
- .anisotropyEnable = static_cast<VkBool32>(max_anisotropy > 1.0f ? VK_TRUE : VK_FALSE),
- .maxAnisotropy = max_anisotropy,
- .compareEnable = tsc.depth_compare_enabled,
- .compareOp = MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func),
- .minLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.0f : tsc.MinLod(),
- .maxLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.25f : tsc.MaxLod(),
- .borderColor =
- arbitrary_borders ? VK_BORDER_COLOR_INT_CUSTOM_EXT : ConvertBorderColor(color),
- .unnormalizedCoordinates = VK_FALSE,
- });
+ const auto create_sampler = [&](const f32 anisotropy) {
+ return device.GetLogical().CreateSampler(VkSamplerCreateInfo{
+ .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
+ .pNext = pnext,
+ .flags = 0,
+ .magFilter = MaxwellToVK::Sampler::Filter(tsc.mag_filter),
+ .minFilter = MaxwellToVK::Sampler::Filter(tsc.min_filter),
+ .mipmapMode = MaxwellToVK::Sampler::MipmapMode(tsc.mipmap_filter),
+ .addressModeU = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_u, tsc.mag_filter),
+ .addressModeV = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_v, tsc.mag_filter),
+ .addressModeW = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_p, tsc.mag_filter),
+ .mipLodBias = tsc.LodBias(),
+ .anisotropyEnable = static_cast<VkBool32>(anisotropy > 1.0f ? VK_TRUE : VK_FALSE),
+ .maxAnisotropy = anisotropy,
+ .compareEnable = tsc.depth_compare_enabled,
+ .compareOp = MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func),
+ .minLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.0f : tsc.MinLod(),
+ .maxLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.25f : tsc.MaxLod(),
+ .borderColor =
+ arbitrary_borders ? VK_BORDER_COLOR_FLOAT_CUSTOM_EXT : ConvertBorderColor(color),
+ .unnormalizedCoordinates = VK_FALSE,
+ });
+ };
+
+ sampler = create_sampler(max_anisotropy);
+
+ const f32 max_anisotropy_default = static_cast<f32>(1U << tsc.max_anisotropy);
+ if (max_anisotropy > max_anisotropy_default) {
+ sampler_default_anisotropy = create_sampler(max_anisotropy_default);
+ }
}
Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM_RT> color_buffers,
@@ -1786,7 +1856,7 @@ Framebuffer::~Framebuffer() = default;
void Framebuffer::CreateFramebuffer(TextureCacheRuntime& runtime,
std::span<ImageView*, NUM_RT> color_buffers,
ImageView* depth_buffer, bool is_rescaled) {
- std::vector<VkImageView> attachments;
+ boost::container::small_vector<VkImageView, NUM_RT + 1> attachments;
RenderPassKey renderpass_key{};
s32 num_layers = 1;
@@ -1809,6 +1879,7 @@ void Framebuffer::CreateFramebuffer(TextureCacheRuntime& runtime,
num_layers = std::max(num_layers, color_buffer->range.extent.layers);
images[num_images] = color_buffer->ImageHandle();
image_ranges[num_images] = MakeSubresourceRange(color_buffer);
+ rt_map[index] = num_images;
samples = color_buffer->Samples();
++num_images;
}
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h
index 1f27a3589..220943116 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.h
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.h
@@ -1,5 +1,5 @@
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
-// SPDX-License-Identifier: GPL-2.0-or-later
+// SPDX-License-Identifier: GPL-3.0-or-later
#pragma once
@@ -34,7 +34,6 @@ class ImageView;
class Framebuffer;
class RenderPassCache;
class StagingBufferPool;
-class UpdateDescriptorQueue;
class Scheduler;
class TextureCacheRuntime {
@@ -45,7 +44,7 @@ public:
BlitImageHelper& blit_image_helper_,
RenderPassCache& render_pass_cache_,
DescriptorPool& descriptor_pool,
- UpdateDescriptorQueue& update_descriptor_queue);
+ ComputePassDescriptorQueue& compute_pass_descriptor_queue);
void Finish();
@@ -70,6 +69,8 @@ public:
void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
+ void CopyImageMSAA(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
+
bool ShouldReinterpret(Image& dst, Image& src);
void ReinterpretImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
@@ -80,6 +81,11 @@ public:
return false;
}
+ bool CanUploadMSAA() const noexcept {
+ // TODO: Implement buffer to MSAA uploads
+ return false;
+ }
+
void AccelerateImageUpload(Image&, const StagingBufferRef&,
std::span<const VideoCommon::SwizzleParameters>);
@@ -97,6 +103,8 @@ public:
[[nodiscard]] VkBuffer GetTemporaryBuffer(size_t needed_size);
+ void BarrierFeedbackLoop();
+
const Device& device;
Scheduler& scheduler;
MemoryAllocator& memory_allocator;
@@ -106,9 +114,8 @@ public:
std::optional<ASTCDecoderPass> astc_decoder_pass;
const Settings::ResolutionScalingInfo& resolution;
- constexpr static size_t indexing_slots = 8 * sizeof(size_t);
+ static constexpr size_t indexing_slots = 8 * sizeof(size_t);
std::array<vk::Buffer, indexing_slots> buffers{};
- std::array<std::unique_ptr<MemoryCommit>, indexing_slots> buffer_commits{};
};
class Image : public VideoCommon::ImageBase {
@@ -125,9 +132,18 @@ public:
Image(Image&&) = default;
Image& operator=(Image&&) = default;
+ void UploadMemory(VkBuffer buffer, VkDeviceSize offset,
+ std::span<const VideoCommon::BufferImageCopy> copies);
+
void UploadMemory(const StagingBufferRef& map,
std::span<const VideoCommon::BufferImageCopy> copies);
+ void DownloadMemory(VkBuffer buffer, VkDeviceSize offset,
+ std::span<const VideoCommon::BufferImageCopy> copies);
+
+ void DownloadMemory(std::span<VkBuffer> buffers, std::span<VkDeviceSize> offsets,
+ std::span<const VideoCommon::BufferImageCopy> copies);
+
void DownloadMemory(const StagingBufferRef& map,
std::span<const VideoCommon::BufferImageCopy> copies);
@@ -163,12 +179,10 @@ private:
TextureCacheRuntime* runtime{};
vk::Image original_image;
- MemoryCommit commit;
std::vector<vk::ImageView> storage_image_views;
VkImageAspectFlags aspect_mask = 0;
bool initialized = false;
vk::Image scaled_image{};
- MemoryCommit scaled_commit{};
VkImage current_image{};
std::unique_ptr<Framebuffer> scale_framebuffer;
@@ -249,7 +263,6 @@ private:
VkImage image_handle = VK_NULL_HANDLE;
VkImageView render_target = VK_NULL_HANDLE;
VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT;
- GPUVAddr gpu_addr = 0;
u32 buffer_size = 0;
};
@@ -263,8 +276,17 @@ public:
return *sampler;
}
+ [[nodiscard]] VkSampler HandleWithDefaultAnisotropy() const noexcept {
+ return *sampler_default_anisotropy;
+ }
+
+ [[nodiscard]] bool HasAddedAnisotropy() const noexcept {
+ return static_cast<bool>(sampler_default_anisotropy);
+ }
+
private:
vk::Sampler sampler;
+ vk::Sampler sampler_default_anisotropy;
};
class Framebuffer {
@@ -320,7 +342,7 @@ public:
}
[[nodiscard]] bool HasAspectColorBit(size_t index) const noexcept {
- return (image_ranges.at(index).aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) != 0;
+ return (image_ranges.at(rt_map[index]).aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) != 0;
}
[[nodiscard]] bool HasAspectDepthBit() const noexcept {
@@ -340,6 +362,7 @@ private:
u32 num_images = 0;
std::array<VkImage, 9> images{};
std::array<VkImageSubresourceRange, 9> image_ranges{};
+ std::array<size_t, NUM_RT> rt_map{};
bool has_depth{};
bool has_stencil{};
};
@@ -358,6 +381,7 @@ struct TextureCacheParams {
using Sampler = Vulkan::Sampler;
using Framebuffer = Vulkan::Framebuffer;
using AsyncBuffer = Vulkan::StagingBufferRef;
+ using BufferType = VkBuffer;
};
using TextureCache = VideoCommon::TextureCache<TextureCacheParams>;
diff --git a/src/video_core/renderer_vulkan/vk_turbo_mode.cpp b/src/video_core/renderer_vulkan/vk_turbo_mode.cpp
index c42594149..460d8d59d 100644
--- a/src/video_core/renderer_vulkan/vk_turbo_mode.cpp
+++ b/src/video_core/renderer_vulkan/vk_turbo_mode.cpp
@@ -1,6 +1,10 @@
// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
+#if defined(ANDROID) && defined(ARCHITECTURE_arm64)
+#include <adrenotools/driver.h>
+#endif
+
#include "common/literals.h"
#include "video_core/host_shaders/vulkan_turbo_mode_comp_spv.h"
#include "video_core/renderer_vulkan/renderer_vulkan.h"
@@ -13,7 +17,10 @@ namespace Vulkan {
using namespace Common::Literals;
TurboMode::TurboMode(const vk::Instance& instance, const vk::InstanceDispatch& dld)
- : m_device{CreateDevice(instance, dld, VK_NULL_HANDLE)}, m_allocator{m_device, false} {
+#ifndef ANDROID
+ : m_device{CreateDevice(instance, dld, VK_NULL_HANDLE)}, m_allocator{m_device}
+#endif
+{
{
std::scoped_lock lk{m_submission_lock};
m_submission_time = std::chrono::steady_clock::now();
@@ -30,10 +37,11 @@ void TurboMode::QueueSubmitted() {
}
void TurboMode::Run(std::stop_token stop_token) {
+#ifndef ANDROID
auto& dld = m_device.GetLogical();
// Allocate buffer. 2MiB should be sufficient.
- auto buffer = dld.CreateBuffer(VkBufferCreateInfo{
+ const VkBufferCreateInfo buffer_ci = {
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
@@ -42,13 +50,11 @@ void TurboMode::Run(std::stop_token stop_token) {
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
- });
-
- // Commit some device local memory for the buffer.
- auto commit = m_allocator.Commit(buffer, MemoryUsage::DeviceLocal);
+ };
+ vk::Buffer buffer = m_allocator.CreateBuffer(buffer_ci, MemoryUsage::DeviceLocal);
// Create the descriptor pool to contain our descriptor.
- constexpr VkDescriptorPoolSize pool_size{
+ static constexpr VkDescriptorPoolSize pool_size{
.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.descriptorCount = 1,
};
@@ -63,7 +69,7 @@ void TurboMode::Run(std::stop_token stop_token) {
});
// Create the descriptor set layout from the pool.
- constexpr VkDescriptorSetLayoutBinding layout_binding{
+ static constexpr VkDescriptorSetLayoutBinding layout_binding{
.binding = 0,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.descriptorCount = 1,
@@ -142,8 +148,14 @@ void TurboMode::Run(std::stop_token stop_token) {
// Create a single command buffer.
auto cmdbufs = command_pool.Allocate(1, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
auto cmdbuf = vk::CommandBuffer{cmdbufs[0], m_device.GetDispatchLoader()};
+#endif
while (!stop_token.stop_requested()) {
+#ifdef ANDROID
+#ifdef ARCHITECTURE_arm64
+ adrenotools_set_turbo(true);
+#endif
+#else
// Reset the fence.
fence.Reset();
@@ -209,7 +221,7 @@ void TurboMode::Run(std::stop_token stop_token) {
// Wait for completion.
fence.Wait();
-
+#endif
// Wait for the next graphics queue submission if necessary.
std::unique_lock lk{m_submission_lock};
Common::CondvarWait(m_submission_cv, lk, stop_token, [this] {
@@ -217,6 +229,9 @@ void TurboMode::Run(std::stop_token stop_token) {
std::chrono::milliseconds{100};
});
}
+#if defined(ANDROID) && defined(ARCHITECTURE_arm64)
+ adrenotools_set_turbo(false);
+#endif
}
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_turbo_mode.h b/src/video_core/renderer_vulkan/vk_turbo_mode.h
index 99b5ac50b..9341c9867 100644
--- a/src/video_core/renderer_vulkan/vk_turbo_mode.h
+++ b/src/video_core/renderer_vulkan/vk_turbo_mode.h
@@ -23,8 +23,10 @@ public:
private:
void Run(std::stop_token stop_token);
+#ifndef ANDROID
Device m_device;
MemoryAllocator m_allocator;
+#endif
std::mutex m_submission_lock;
std::condition_variable_any m_submission_cv;
std::chrono::time_point<std::chrono::steady_clock> m_submission_time{};
diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
index 4d4a6753b..0630ebda5 100644
--- a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
+++ b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
@@ -14,24 +14,29 @@ namespace Vulkan {
UpdateDescriptorQueue::UpdateDescriptorQueue(const Device& device_, Scheduler& scheduler_)
: device{device_}, scheduler{scheduler_} {
+ payload_start = payload.data();
payload_cursor = payload.data();
}
UpdateDescriptorQueue::~UpdateDescriptorQueue() = default;
void UpdateDescriptorQueue::TickFrame() {
- payload_cursor = payload.data();
+ if (++frame_index >= FRAMES_IN_FLIGHT) {
+ frame_index = 0;
+ }
+ payload_start = payload.data() + frame_index * FRAME_PAYLOAD_SIZE;
+ payload_cursor = payload_start;
}
void UpdateDescriptorQueue::Acquire() {
// Minimum number of entries required.
- // This is the maximum number of entries a single draw call migth use.
+ // This is the maximum number of entries a single draw call might use.
static constexpr size_t MIN_ENTRIES = 0x400;
- if (std::distance(payload.data(), payload_cursor) + MIN_ENTRIES >= payload.max_size()) {
+ if (std::distance(payload_start, payload_cursor) + MIN_ENTRIES >= FRAME_PAYLOAD_SIZE) {
LOG_WARNING(Render_Vulkan, "Payload overflow, waiting for worker thread");
scheduler.WaitWorker();
- payload_cursor = payload.data();
+ payload_cursor = payload_start;
}
upload_start = payload_cursor;
}
diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.h b/src/video_core/renderer_vulkan/vk_update_descriptor.h
index 625bcc809..e77b576ec 100644
--- a/src/video_core/renderer_vulkan/vk_update_descriptor.h
+++ b/src/video_core/renderer_vulkan/vk_update_descriptor.h
@@ -29,6 +29,12 @@ struct DescriptorUpdateEntry {
};
class UpdateDescriptorQueue final {
+ // This should be plenty for the vast majority of cases. Most desktop platforms only
+ // provide up to 3 swapchain images.
+ static constexpr size_t FRAMES_IN_FLIGHT = 7;
+ static constexpr size_t FRAME_PAYLOAD_SIZE = 0x20000;
+ static constexpr size_t PAYLOAD_SIZE = FRAME_PAYLOAD_SIZE * FRAMES_IN_FLIGHT;
+
public:
explicit UpdateDescriptorQueue(const Device& device_, Scheduler& scheduler_);
~UpdateDescriptorQueue();
@@ -73,9 +79,15 @@ private:
const Device& device;
Scheduler& scheduler;
+ size_t frame_index{0};
DescriptorUpdateEntry* payload_cursor = nullptr;
+ DescriptorUpdateEntry* payload_start = nullptr;
const DescriptorUpdateEntry* upload_start = nullptr;
- std::array<DescriptorUpdateEntry, 0x10000> payload;
+ std::array<DescriptorUpdateEntry, PAYLOAD_SIZE> payload;
};
+// TODO: should these be separate classes instead?
+using GuestDescriptorQueue = UpdateDescriptorQueue;
+using ComputePassDescriptorQueue = UpdateDescriptorQueue;
+
} // namespace Vulkan