diff options
Diffstat (limited to 'src/video_core/renderer_vulkan')
48 files changed, 1985 insertions, 1006 deletions
diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp index dd00d3edf..cf2964a3f 100644 --- a/src/video_core/renderer_vulkan/blit_image.cpp +++ b/src/video_core/renderer_vulkan/blit_image.cpp @@ -12,6 +12,8 @@ #include "video_core/host_shaders/convert_s8d24_to_abgr8_frag_spv.h" #include "video_core/host_shaders/full_screen_triangle_vert_spv.h" #include "video_core/host_shaders/vulkan_blit_depth_stencil_frag_spv.h" +#include "video_core/host_shaders/vulkan_color_clear_frag_spv.h" +#include "video_core/host_shaders/vulkan_color_clear_vert_spv.h" #include "video_core/renderer_vulkan/blit_image.h" #include "video_core/renderer_vulkan/maxwell_to_vk.h" #include "video_core/renderer_vulkan/vk_scheduler.h" @@ -69,10 +71,11 @@ constexpr VkDescriptorSetLayoutCreateInfo TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_CRE .bindingCount = static_cast<u32>(TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_BINDINGS.size()), .pBindings = TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_BINDINGS.data(), }; -constexpr VkPushConstantRange PUSH_CONSTANT_RANGE{ - .stageFlags = VK_SHADER_STAGE_VERTEX_BIT, +template <VkShaderStageFlags stageFlags, size_t size> +inline constexpr VkPushConstantRange PUSH_CONSTANT_RANGE{ + .stageFlags = stageFlags, .offset = 0, - .size = sizeof(PushConstants), + .size = static_cast<u32>(size), }; constexpr VkPipelineVertexInputStateCreateInfo PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO{ .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, @@ -125,10 +128,8 @@ constexpr VkPipelineMultisampleStateCreateInfo PIPELINE_MULTISAMPLE_STATE_CREATE .alphaToCoverageEnable = VK_FALSE, .alphaToOneEnable = VK_FALSE, }; -constexpr std::array DYNAMIC_STATES{ - VK_DYNAMIC_STATE_VIEWPORT, - VK_DYNAMIC_STATE_SCISSOR, -}; +constexpr std::array DYNAMIC_STATES{VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR, + VK_DYNAMIC_STATE_BLEND_CONSTANTS}; constexpr VkPipelineDynamicStateCreateInfo PIPELINE_DYNAMIC_STATE_CREATE_INFO{ .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, .pNext = nullptr, @@ -205,15 +206,15 @@ inline constexpr VkSamplerCreateInfo SAMPLER_CREATE_INFO{ }; constexpr VkPipelineLayoutCreateInfo PipelineLayoutCreateInfo( - const VkDescriptorSetLayout* set_layout) { + const VkDescriptorSetLayout* set_layout, vk::Span<VkPushConstantRange> push_constants) { return VkPipelineLayoutCreateInfo{ .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, .pNext = nullptr, .flags = 0, - .setLayoutCount = 1, + .setLayoutCount = (set_layout != nullptr ? 1u : 0u), .pSetLayouts = set_layout, - .pushConstantRangeCount = 1, - .pPushConstantRanges = &PUSH_CONSTANT_RANGE, + .pushConstantRangeCount = push_constants.size(), + .pPushConstantRanges = push_constants.data(), }; } @@ -302,8 +303,7 @@ void UpdateTwoTexturesDescriptorSet(const Device& device, VkDescriptorSet descri device.GetLogical().UpdateDescriptorSets(write_descriptor_sets, nullptr); } -void BindBlitState(vk::CommandBuffer cmdbuf, VkPipelineLayout layout, const Region2D& dst_region, - const Region2D& src_region, const Extent3D& src_size = {1, 1, 1}) { +void BindBlitState(vk::CommandBuffer cmdbuf, const Region2D& dst_region) { const VkOffset2D offset{ .x = std::min(dst_region.start.x, dst_region.end.x), .y = std::min(dst_region.start.y, dst_region.end.y), @@ -325,6 +325,13 @@ void BindBlitState(vk::CommandBuffer cmdbuf, VkPipelineLayout layout, const Regi .offset = offset, .extent = extent, }; + cmdbuf.SetViewport(0, viewport); + cmdbuf.SetScissor(0, scissor); +} + +void BindBlitState(vk::CommandBuffer cmdbuf, VkPipelineLayout layout, const Region2D& dst_region, + const Region2D& src_region, const Extent3D& src_size = {1, 1, 1}) { + BindBlitState(cmdbuf, dst_region); const float scale_x = static_cast<float>(src_region.end.x - src_region.start.x) / static_cast<float>(src_size.width); const float scale_y = static_cast<float>(src_region.end.y - src_region.start.y) / @@ -335,8 +342,6 @@ void BindBlitState(vk::CommandBuffer cmdbuf, VkPipelineLayout layout, const Regi static_cast<float>(src_region.start.y) / static_cast<float>(src_size.height)}, }; - cmdbuf.SetViewport(0, viewport); - cmdbuf.SetScissor(0, scissor); cmdbuf.PushConstants(layout, VK_SHADER_STAGE_VERTEX_BIT, push_constants); } @@ -408,13 +413,20 @@ BlitImageHelper::BlitImageHelper(const Device& device_, Scheduler& scheduler_, descriptor_pool.Allocator(*one_texture_set_layout, TEXTURE_DESCRIPTOR_BANK_INFO<1>)}, two_textures_descriptor_allocator{ descriptor_pool.Allocator(*two_textures_set_layout, TEXTURE_DESCRIPTOR_BANK_INFO<2>)}, - one_texture_pipeline_layout(device.GetLogical().CreatePipelineLayout( - PipelineLayoutCreateInfo(one_texture_set_layout.address()))), - two_textures_pipeline_layout(device.GetLogical().CreatePipelineLayout( - PipelineLayoutCreateInfo(two_textures_set_layout.address()))), + one_texture_pipeline_layout(device.GetLogical().CreatePipelineLayout(PipelineLayoutCreateInfo( + one_texture_set_layout.address(), + PUSH_CONSTANT_RANGE<VK_SHADER_STAGE_VERTEX_BIT, sizeof(PushConstants)>))), + two_textures_pipeline_layout( + device.GetLogical().CreatePipelineLayout(PipelineLayoutCreateInfo( + two_textures_set_layout.address(), + PUSH_CONSTANT_RANGE<VK_SHADER_STAGE_VERTEX_BIT, sizeof(PushConstants)>))), + clear_color_pipeline_layout(device.GetLogical().CreatePipelineLayout(PipelineLayoutCreateInfo( + nullptr, PUSH_CONSTANT_RANGE<VK_SHADER_STAGE_FRAGMENT_BIT, sizeof(float) * 4>))), full_screen_vert(BuildShader(device, FULL_SCREEN_TRIANGLE_VERT_SPV)), blit_color_to_color_frag(BuildShader(device, BLIT_COLOR_FLOAT_FRAG_SPV)), blit_depth_stencil_frag(BuildShader(device, VULKAN_BLIT_DEPTH_STENCIL_FRAG_SPV)), + clear_color_vert(BuildShader(device, VULKAN_COLOR_CLEAR_VERT_SPV)), + clear_color_frag(BuildShader(device, VULKAN_COLOR_CLEAR_FRAG_SPV)), convert_depth_to_float_frag(BuildShader(device, CONVERT_DEPTH_TO_FLOAT_FRAG_SPV)), convert_float_to_depth_frag(BuildShader(device, CONVERT_FLOAT_TO_DEPTH_FRAG_SPV)), convert_abgr8_to_d24s8_frag(BuildShader(device, CONVERT_ABGR8_TO_D24S8_FRAG_SPV)), @@ -553,6 +565,30 @@ void BlitImageHelper::ConvertS8D24ToABGR8(const Framebuffer* dst_framebuffer, ConvertDepthStencil(*convert_s8d24_to_abgr8_pipeline, dst_framebuffer, src_image_view); } +void BlitImageHelper::ClearColor(const Framebuffer* dst_framebuffer, u8 color_mask, + const std::array<f32, 4>& clear_color, + const Region2D& dst_region) { + const BlitImagePipelineKey key{ + .renderpass = dst_framebuffer->RenderPass(), + .operation = Tegra::Engines::Fermi2D::Operation::BlendPremult, + }; + const VkPipeline pipeline = FindOrEmplaceClearColorPipeline(key); + const VkPipelineLayout layout = *clear_color_pipeline_layout; + scheduler.RequestRenderpass(dst_framebuffer); + scheduler.Record( + [pipeline, layout, color_mask, clear_color, dst_region](vk::CommandBuffer cmdbuf) { + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); + const std::array blend_color = { + (color_mask & 0x1) ? 1.0f : 0.0f, (color_mask & 0x2) ? 1.0f : 0.0f, + (color_mask & 0x4) ? 1.0f : 0.0f, (color_mask & 0x8) ? 1.0f : 0.0f}; + cmdbuf.SetBlendConstants(blend_color.data()); + BindBlitState(cmdbuf, dst_region); + cmdbuf.PushConstants(layout, VK_SHADER_STAGE_FRAGMENT_BIT, clear_color); + cmdbuf.Draw(3, 1, 0, 0); + }); + scheduler.InvalidateState(); +} + void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer, const ImageView& src_image_view) { const VkPipelineLayout layout = *one_texture_pipeline_layout; @@ -728,6 +764,58 @@ VkPipeline BlitImageHelper::FindOrEmplaceDepthStencilPipeline(const BlitImagePip return *blit_depth_stencil_pipelines.back(); } +VkPipeline BlitImageHelper::FindOrEmplaceClearColorPipeline(const BlitImagePipelineKey& key) { + const auto it = std::ranges::find(clear_color_keys, key); + if (it != clear_color_keys.end()) { + return *clear_color_pipelines[std::distance(clear_color_keys.begin(), it)]; + } + clear_color_keys.push_back(key); + const std::array stages = MakeStages(*clear_color_vert, *clear_color_frag); + const VkPipelineColorBlendAttachmentState color_blend_attachment_state{ + .blendEnable = VK_TRUE, + .srcColorBlendFactor = VK_BLEND_FACTOR_CONSTANT_COLOR, + .dstColorBlendFactor = VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR, + .colorBlendOp = VK_BLEND_OP_ADD, + .srcAlphaBlendFactor = VK_BLEND_FACTOR_CONSTANT_ALPHA, + .dstAlphaBlendFactor = VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA, + .alphaBlendOp = VK_BLEND_OP_ADD, + .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | + VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT, + }; + const VkPipelineColorBlendStateCreateInfo color_blend_state_generic_create_info{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .logicOpEnable = VK_FALSE, + .logicOp = VK_LOGIC_OP_CLEAR, + .attachmentCount = 1, + .pAttachments = &color_blend_attachment_state, + .blendConstants = {0.0f, 0.0f, 0.0f, 0.0f}, + }; + clear_color_pipelines.push_back(device.GetLogical().CreateGraphicsPipeline({ + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stageCount = static_cast<u32>(stages.size()), + .pStages = stages.data(), + .pVertexInputState = &PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, + .pInputAssemblyState = &PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, + .pTessellationState = nullptr, + .pViewportState = &PIPELINE_VIEWPORT_STATE_CREATE_INFO, + .pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO, + .pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, + .pDepthStencilState = &PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, + .pColorBlendState = &color_blend_state_generic_create_info, + .pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO, + .layout = *clear_color_pipeline_layout, + .renderPass = key.renderpass, + .subpass = 0, + .basePipelineHandle = VK_NULL_HANDLE, + .basePipelineIndex = 0, + })); + return *clear_color_pipelines.back(); +} + void BlitImageHelper::ConvertPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass, bool is_target_depth) { if (pipeline) { diff --git a/src/video_core/renderer_vulkan/blit_image.h b/src/video_core/renderer_vulkan/blit_image.h index be8a9a2f6..2976a7d91 100644 --- a/src/video_core/renderer_vulkan/blit_image.h +++ b/src/video_core/renderer_vulkan/blit_image.h @@ -61,6 +61,9 @@ public: void ConvertS8D24ToABGR8(const Framebuffer* dst_framebuffer, ImageView& src_image_view); + void ClearColor(const Framebuffer* dst_framebuffer, u8 color_mask, + const std::array<f32, 4>& clear_color, const Region2D& dst_region); + private: void Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer, const ImageView& src_image_view); @@ -72,6 +75,8 @@ private: [[nodiscard]] VkPipeline FindOrEmplaceDepthStencilPipeline(const BlitImagePipelineKey& key); + [[nodiscard]] VkPipeline FindOrEmplaceClearColorPipeline(const BlitImagePipelineKey& key); + void ConvertPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass, bool is_target_depth); void ConvertDepthToColorPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass); @@ -97,9 +102,12 @@ private: DescriptorAllocator two_textures_descriptor_allocator; vk::PipelineLayout one_texture_pipeline_layout; vk::PipelineLayout two_textures_pipeline_layout; + vk::PipelineLayout clear_color_pipeline_layout; vk::ShaderModule full_screen_vert; vk::ShaderModule blit_color_to_color_frag; vk::ShaderModule blit_depth_stencil_frag; + vk::ShaderModule clear_color_vert; + vk::ShaderModule clear_color_frag; vk::ShaderModule convert_depth_to_float_frag; vk::ShaderModule convert_float_to_depth_frag; vk::ShaderModule convert_abgr8_to_d24s8_frag; @@ -112,6 +120,8 @@ private: std::vector<vk::Pipeline> blit_color_pipelines; std::vector<BlitImagePipelineKey> blit_depth_stencil_keys; std::vector<vk::Pipeline> blit_depth_stencil_pipelines; + std::vector<BlitImagePipelineKey> clear_color_keys; + std::vector<vk::Pipeline> clear_color_pipelines; vk::Pipeline convert_d32_to_r32_pipeline; vk::Pipeline convert_r32_to_d32_pipeline; vk::Pipeline convert_d16_to_r16_pipeline; diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index f8398b511..e7df32d84 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -271,7 +271,7 @@ bool FixedPipelineState::operator==(const FixedPipelineState& rhs) const noexcep u32 FixedPipelineState::PackComparisonOp(Maxwell::ComparisonOp op) noexcept { // OpenGL enums go from 0x200 to 0x207 and the others from 1 to 8 - // If we substract 0x200 to OpenGL enums and 1 to the others we get a 0-7 range. + // If we subtract 0x200 to OpenGL enums and 1 to the others we get a 0-7 range. // Perfect for a hash. const u32 value = static_cast<u32>(op); return value - (value >= 0x200 ? 0x200 : 1); @@ -322,8 +322,8 @@ Maxwell::StencilOp::Op FixedPipelineState::UnpackStencilOp(u32 packed) noexcept } u32 FixedPipelineState::PackCullFace(Maxwell::CullFace cull) noexcept { - // FrontAndBack is 0x408, by substracting 0x406 in it we get 2. - // Individual cull faces are in 0x404 and 0x405, substracting 0x404 we get 0 and 1. + // FrontAndBack is 0x408, by subtracting 0x406 in it we get 2. + // Individual cull faces are in 0x404 and 0x405, subtracting 0x404 we get 0 and 1. const u32 value = static_cast<u32>(cull); return value - (value == 0x408 ? 0x406 : 0x404); } diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index ca52e2389..9a0b10568 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp @@ -6,6 +6,7 @@ #include "common/assert.h" #include "common/common_types.h" #include "common/logging/log.h" +#include "common/settings.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/renderer_vulkan/maxwell_to_vk.h" #include "video_core/surface.h" @@ -166,7 +167,7 @@ struct FormatTuple { {VK_FORMAT_R16G16_UINT, Attachable | Storage}, // R16G16_UINT {VK_FORMAT_R16G16_SINT, Attachable | Storage}, // R16G16_SINT {VK_FORMAT_R16G16_SNORM, Attachable | Storage}, // R16G16_SNORM - {VK_FORMAT_UNDEFINED}, // R32G32B32_FLOAT + {VK_FORMAT_R32G32B32_SFLOAT}, // R32G32B32_FLOAT {VK_FORMAT_A8B8G8R8_SRGB_PACK32, Attachable}, // A8B8G8R8_SRGB {VK_FORMAT_R8G8_UNORM, Attachable | Storage}, // R8G8_UNORM {VK_FORMAT_R8G8_SNORM, Attachable | Storage}, // R8G8_SNORM @@ -197,10 +198,13 @@ struct FormatTuple { {VK_FORMAT_ASTC_6x6_UNORM_BLOCK}, // ASTC_2D_6X6_UNORM {VK_FORMAT_ASTC_6x6_SRGB_BLOCK}, // ASTC_2D_6X6_SRGB {VK_FORMAT_ASTC_10x6_UNORM_BLOCK}, // ASTC_2D_10X6_UNORM + {VK_FORMAT_ASTC_10x6_SRGB_BLOCK}, // ASTC_2D_10X6_SRGB {VK_FORMAT_ASTC_10x5_UNORM_BLOCK}, // ASTC_2D_10X5_UNORM {VK_FORMAT_ASTC_10x5_SRGB_BLOCK}, // ASTC_2D_10X5_SRGB {VK_FORMAT_ASTC_10x10_UNORM_BLOCK}, // ASTC_2D_10X10_UNORM {VK_FORMAT_ASTC_10x10_SRGB_BLOCK}, // ASTC_2D_10X10_SRGB + {VK_FORMAT_ASTC_12x10_UNORM_BLOCK}, // ASTC_2D_12X10_UNORM + {VK_FORMAT_ASTC_12x10_SRGB_BLOCK}, // ASTC_2D_12X10_SRGB {VK_FORMAT_ASTC_12x12_UNORM_BLOCK}, // ASTC_2D_12X12_UNORM {VK_FORMAT_ASTC_12x12_SRGB_BLOCK}, // ASTC_2D_12X12_SRGB {VK_FORMAT_ASTC_8x6_UNORM_BLOCK}, // ASTC_2D_8X6_UNORM @@ -234,19 +238,25 @@ FormatInfo SurfaceFormat(const Device& device, FormatType format_type, bool with PixelFormat pixel_format) { ASSERT(static_cast<size_t>(pixel_format) < std::size(tex_format_tuples)); FormatTuple tuple = tex_format_tuples[static_cast<size_t>(pixel_format)]; - if (tuple.format == VK_FORMAT_UNDEFINED) { - UNIMPLEMENTED_MSG("Unimplemented texture format with pixel format={}", pixel_format); - return FormatInfo{VK_FORMAT_A8B8G8R8_UNORM_PACK32, true, true}; - } - - // Use A8B8G8R8_UNORM on hardware that doesn't support ASTC natively + // Transcode on hardware that doesn't support ASTC natively if (!device.IsOptimalAstcSupported() && VideoCore::Surface::IsPixelFormatASTC(pixel_format)) { const bool is_srgb = with_srgb && VideoCore::Surface::IsPixelFormatSRGB(pixel_format); - if (is_srgb) { - tuple.format = VK_FORMAT_A8B8G8R8_SRGB_PACK32; - } else { - tuple.format = VK_FORMAT_A8B8G8R8_UNORM_PACK32; - tuple.usage |= Storage; + + switch (Settings::values.astc_recompression.GetValue()) { + case Settings::AstcRecompression::Uncompressed: + if (is_srgb) { + tuple.format = VK_FORMAT_A8B8G8R8_SRGB_PACK32; + } else { + tuple.format = VK_FORMAT_A8B8G8R8_UNORM_PACK32; + tuple.usage |= Storage; + } + break; + case Settings::AstcRecompression::Bc1: + tuple.format = is_srgb ? VK_FORMAT_BC1_RGBA_SRGB_BLOCK : VK_FORMAT_BC1_RGBA_UNORM_BLOCK; + break; + case Settings::AstcRecompression::Bc3: + tuple.format = is_srgb ? VK_FORMAT_BC3_SRGB_BLOCK : VK_FORMAT_BC3_UNORM_BLOCK; + break; } } const bool attachable = (tuple.usage & Attachable) != 0; @@ -337,6 +347,14 @@ VkPrimitiveTopology PrimitiveTopology([[maybe_unused]] const Device& device, VkFormat VertexFormat(const Device& device, Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size) { + if (device.MustEmulateScaledFormats()) { + if (type == Maxwell::VertexAttribute::Type::SScaled) { + type = Maxwell::VertexAttribute::Type::SInt; + } else if (type == Maxwell::VertexAttribute::Type::UScaled) { + type = Maxwell::VertexAttribute::Type::UInt; + } + } + const VkFormat format{([&]() { switch (type) { case Maxwell::VertexAttribute::Type::UnusedEnumDoNotUseBecauseItWillGoAway: diff --git a/src/video_core/renderer_vulkan/pipeline_helper.h b/src/video_core/renderer_vulkan/pipeline_helper.h index 28b893e25..71c783709 100644 --- a/src/video_core/renderer_vulkan/pipeline_helper.h +++ b/src/video_core/renderer_vulkan/pipeline_helper.h @@ -176,9 +176,9 @@ public: }; inline void PushImageDescriptors(TextureCache& texture_cache, - UpdateDescriptorQueue& update_descriptor_queue, + GuestDescriptorQueue& guest_descriptor_queue, const Shader::Info& info, RescalingPushConstant& rescaling, - const VkSampler*& samplers, + const VideoCommon::SamplerId*& samplers, const VideoCommon::ImageViewInOut*& views) { const u32 num_texture_buffers = Shader::NumDescriptors(info.texture_buffer_descriptors); const u32 num_image_buffers = Shader::NumDescriptors(info.image_buffer_descriptors); @@ -187,10 +187,15 @@ inline void PushImageDescriptors(TextureCache& texture_cache, for (const auto& desc : info.texture_descriptors) { for (u32 index = 0; index < desc.count; ++index) { const VideoCommon::ImageViewId image_view_id{(views++)->id}; - const VkSampler sampler{*(samplers++)}; + const VideoCommon::SamplerId sampler_id{*(samplers++)}; ImageView& image_view{texture_cache.GetImageView(image_view_id)}; const VkImageView vk_image_view{image_view.Handle(desc.type)}; - update_descriptor_queue.AddSampledImage(vk_image_view, sampler); + const Sampler& sampler{texture_cache.GetSampler(sampler_id)}; + const bool use_fallback_sampler{sampler.HasAddedAnisotropy() && + !image_view.SupportsAnisotropy()}; + const VkSampler vk_sampler{use_fallback_sampler ? sampler.HandleWithDefaultAnisotropy() + : sampler.Handle()}; + guest_descriptor_queue.AddSampledImage(vk_image_view, vk_sampler); rescaling.PushTexture(texture_cache.IsRescaling(image_view)); } } @@ -201,7 +206,7 @@ inline void PushImageDescriptors(TextureCache& texture_cache, texture_cache.MarkModification(image_view.image_id); } const VkImageView vk_image_view{image_view.StorageView(desc.type, desc.format)}; - update_descriptor_queue.AddImage(vk_image_view); + guest_descriptor_queue.AddImage(vk_image_view); rescaling.PushImage(texture_cache.IsRescaling(image_view)); } } diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index 2a8d9e377..ddf28ca28 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -16,7 +16,7 @@ #include "common/settings.h" #include "common/telemetry.h" #include "core/core_timing.h" -#include "core/frontend/emu_window.h" +#include "core/frontend/graphics_context.h" #include "core/telemetry_session.h" #include "video_core/gpu.h" #include "video_core/renderer_vulkan/renderer_vulkan.h" @@ -84,17 +84,19 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_, Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_, std::unique_ptr<Core::Frontend::GraphicsContext> context_) try : RendererBase(emu_window, std::move(context_)), telemetry_session(telemetry_session_), - cpu_memory(cpu_memory_), gpu(gpu_), library(OpenLibrary()), - instance(CreateInstance(library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type, + cpu_memory(cpu_memory_), gpu(gpu_), library(OpenLibrary(context.get())), + instance(CreateInstance(*library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type, Settings::values.renderer_debug.GetValue())), debug_callback(Settings::values.renderer_debug ? CreateDebugCallback(instance) : nullptr), - surface(CreateSurface(instance, render_window)), - device(CreateDevice(instance, dld, *surface)), memory_allocator(device, false), - state_tracker(), scheduler(device, state_tracker), + surface(CreateSurface(instance, render_window.GetWindowInfo())), + device(CreateDevice(instance, dld, *surface)), memory_allocator(device), state_tracker(), + scheduler(device, state_tracker), swapchain(*surface, device, scheduler, render_window.GetFramebufferLayout().width, render_window.GetFramebufferLayout().height, false), - blit_screen(cpu_memory, render_window, device, memory_allocator, swapchain, scheduler, - screen_info), + present_manager(instance, render_window, device, memory_allocator, scheduler, swapchain, + surface), + blit_screen(cpu_memory, render_window, device, memory_allocator, swapchain, present_manager, + scheduler, screen_info), rasterizer(render_window, gpu, cpu_memory, screen_info, device, memory_allocator, state_tracker, scheduler) { if (Settings::values.renderer_force_max_clock.GetValue() && device.ShouldBoostClocks()) { @@ -121,46 +123,19 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { return; } // Update screen info if the framebuffer size has changed. - if (screen_info.width != framebuffer->width || screen_info.height != framebuffer->height) { - screen_info.width = framebuffer->width; - screen_info.height = framebuffer->height; - } + screen_info.width = framebuffer->width; + screen_info.height = framebuffer->height; + const VAddr framebuffer_addr = framebuffer->address + framebuffer->offset; const bool use_accelerated = rasterizer.AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride); const bool is_srgb = use_accelerated && screen_info.is_srgb; RenderScreenshot(*framebuffer, use_accelerated); - bool has_been_recreated = false; - const auto recreate_swapchain = [&](u32 width, u32 height) { - if (!has_been_recreated) { - has_been_recreated = true; - scheduler.Finish(); - } - swapchain.Create(width, height, is_srgb); - }; - - const Layout::FramebufferLayout layout = render_window.GetFramebufferLayout(); - if (swapchain.NeedsRecreation(is_srgb) || swapchain.GetWidth() != layout.width || - swapchain.GetHeight() != layout.height) { - recreate_swapchain(layout.width, layout.height); - } - bool is_outdated; - do { - swapchain.AcquireNextImage(); - is_outdated = swapchain.IsOutDated(); - if (is_outdated) { - recreate_swapchain(layout.width, layout.height); - } - } while (is_outdated); - if (has_been_recreated) { - blit_screen.Recreate(); - } - const VkSemaphore render_semaphore = blit_screen.DrawToSwapchain(*framebuffer, use_accelerated); - const VkSemaphore present_semaphore = swapchain.CurrentPresentSemaphore(); - scheduler.Flush(render_semaphore, present_semaphore); - scheduler.WaitWorker(); - swapchain.Present(render_semaphore); + Frame* frame = present_manager.GetRenderFrame(); + blit_screen.DrawToSwapchain(frame, *framebuffer, use_accelerated, is_srgb); + scheduler.Flush(*frame->render_ready); + present_manager.Present(frame); gpu.RendererFrameEndNotify(); rasterizer.TickFrame(); @@ -198,7 +173,7 @@ void Vulkan::RendererVulkan::RenderScreenshot(const Tegra::FramebufferConfig& fr return; } const Layout::FramebufferLayout layout{renderer_settings.screenshot_framebuffer_layout}; - vk::Image staging_image = device.GetLogical().CreateImage(VkImageCreateInfo{ + vk::Image staging_image = memory_allocator.CreateImage(VkImageCreateInfo{ .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, .pNext = nullptr, .flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT, @@ -221,7 +196,6 @@ void Vulkan::RendererVulkan::RenderScreenshot(const Tegra::FramebufferConfig& fr .pQueueFamilyIndices = nullptr, .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, }); - const auto image_commit = memory_allocator.Commit(staging_image, MemoryUsage::DeviceLocal); const vk::ImageView dst_view = device.GetLogical().CreateImageView(VkImageViewCreateInfo{ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, @@ -246,8 +220,7 @@ void Vulkan::RendererVulkan::RenderScreenshot(const Tegra::FramebufferConfig& fr }); const VkExtent2D render_area{.width = layout.width, .height = layout.height}; const vk::Framebuffer screenshot_fb = blit_screen.CreateFramebuffer(*dst_view, render_area); - // Since we're not rendering to the screen, ignore the render semaphore. - void(blit_screen.Draw(framebuffer, *screenshot_fb, layout, render_area, use_accelerated)); + blit_screen.Draw(framebuffer, *screenshot_fb, layout, render_area, use_accelerated); const auto buffer_size = static_cast<VkDeviceSize>(layout.width * layout.height * 4); const VkBufferCreateInfo dst_buffer_info{ @@ -260,8 +233,8 @@ void Vulkan::RendererVulkan::RenderScreenshot(const Tegra::FramebufferConfig& fr .queueFamilyIndexCount = 0, .pQueueFamilyIndices = nullptr, }; - const vk::Buffer dst_buffer = device.GetLogical().CreateBuffer(dst_buffer_info); - MemoryCommit dst_buffer_memory = memory_allocator.Commit(dst_buffer, MemoryUsage::Download); + const vk::Buffer dst_buffer = + memory_allocator.CreateBuffer(dst_buffer_info, MemoryUsage::Download); scheduler.RequestOutsideRenderPassOperationContext(); scheduler.Record([&](vk::CommandBuffer cmdbuf) { @@ -270,7 +243,7 @@ void Vulkan::RendererVulkan::RenderScreenshot(const Tegra::FramebufferConfig& fr .pNext = nullptr, .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT, .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, - .oldLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, + .oldLayout = VK_IMAGE_LAYOUT_GENERAL, .newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, @@ -335,8 +308,9 @@ void Vulkan::RendererVulkan::RenderScreenshot(const Tegra::FramebufferConfig& fr scheduler.Finish(); // Copy backing image data to the QImage screenshot buffer - const auto dst_memory_map = dst_buffer_memory.Map(); - std::memcpy(renderer_settings.screenshot_bits, dst_memory_map.data(), dst_memory_map.size()); + dst_buffer.Invalidate(); + std::memcpy(renderer_settings.screenshot_bits, dst_buffer.Mapped().data(), + dst_buffer.Mapped().size()); renderer_settings.screenshot_complete_callback(false); renderer_settings.screenshot_requested = false; } diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h index 009e75e0d..b2e8cbd1b 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.h +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h @@ -9,6 +9,7 @@ #include "common/dynamic_library.h" #include "video_core/renderer_base.h" #include "video_core/renderer_vulkan/vk_blit_screen.h" +#include "video_core/renderer_vulkan/vk_present_manager.h" #include "video_core/renderer_vulkan/vk_rasterizer.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_state_tracker.h" @@ -53,6 +54,10 @@ public: return device.GetDriverName(); } + void NotifySurfaceChanged() override { + present_manager.NotifySurfaceChanged(); + } + private: void Report() const; @@ -62,7 +67,7 @@ private: Core::Memory::Memory& cpu_memory; Tegra::GPU& gpu; - Common::DynamicLibrary library; + std::shared_ptr<Common::DynamicLibrary> library; vk::InstanceDispatch dld; vk::Instance instance; @@ -76,6 +81,7 @@ private: StateTracker state_tracker; Scheduler scheduler; Swapchain swapchain; + PresentManager present_manager; BlitScreen blit_screen; RasterizerVulkan rasterizer; std::optional<TurboMode> turbo_mode; diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp index 2f0cc27e8..ad3b29f0e 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp +++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp @@ -74,7 +74,7 @@ struct ScreenRectVertex { } }; -constexpr std::array<f32, 4 * 4> MakeOrthographicMatrix(f32 width, f32 height) { +std::array<f32, 4 * 4> MakeOrthographicMatrix(f32 width, f32 height) { // clang-format off return { 2.f / width, 0.f, 0.f, 0.f, 0.f, 2.f / height, 0.f, 0.f, @@ -122,10 +122,12 @@ struct BlitScreen::BufferData { BlitScreen::BlitScreen(Core::Memory::Memory& cpu_memory_, Core::Frontend::EmuWindow& render_window_, const Device& device_, MemoryAllocator& memory_allocator_, - Swapchain& swapchain_, Scheduler& scheduler_, const ScreenInfo& screen_info_) + Swapchain& swapchain_, PresentManager& present_manager_, + Scheduler& scheduler_, const ScreenInfo& screen_info_) : cpu_memory{cpu_memory_}, render_window{render_window_}, device{device_}, - memory_allocator{memory_allocator_}, swapchain{swapchain_}, scheduler{scheduler_}, - image_count{swapchain.GetImageCount()}, screen_info{screen_info_} { + memory_allocator{memory_allocator_}, swapchain{swapchain_}, present_manager{present_manager_}, + scheduler{scheduler_}, image_count{swapchain.GetImageCount()}, screen_info{screen_info_}, + current_srgb{swapchain.IsSrgb()}, image_view_format{swapchain.GetImageViewFormat()} { resource_ticks.resize(image_count); CreateStaticResources(); @@ -135,25 +137,20 @@ BlitScreen::BlitScreen(Core::Memory::Memory& cpu_memory_, Core::Frontend::EmuWin BlitScreen::~BlitScreen() = default; void BlitScreen::Recreate() { + present_manager.WaitPresent(); + scheduler.Finish(); + device.GetLogical().WaitIdle(); CreateDynamicResources(); } -VkSemaphore BlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, - const VkFramebuffer& host_framebuffer, - const Layout::FramebufferLayout layout, VkExtent2D render_area, - bool use_accelerated) { +void BlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, + const VkFramebuffer& host_framebuffer, const Layout::FramebufferLayout layout, + VkExtent2D render_area, bool use_accelerated) { RefreshResources(framebuffer); // Finish any pending renderpass scheduler.RequestOutsideRenderPassOperationContext(); - if (const auto swapchain_images = swapchain.GetImageCount(); swapchain_images != image_count) { - image_count = swapchain_images; - Recreate(); - } - - const std::size_t image_index = swapchain.GetImageIndex(); - scheduler.Wait(resource_ticks[image_index]); resource_ticks[image_index] = scheduler.CurrentTick(); @@ -165,11 +162,11 @@ VkSemaphore BlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, SetUniformData(data, layout); SetVertexData(data, framebuffer, layout); - const std::span<u8> mapped_span = buffer_commit.Map(); + const std::span<u8> mapped_span = buffer.Mapped(); std::memcpy(mapped_span.data(), &data, sizeof(data)); if (!use_accelerated) { - const u64 image_offset = GetRawImageOffset(framebuffer, image_index); + const u64 image_offset = GetRawImageOffset(framebuffer); const VAddr framebuffer_addr = framebuffer.address + framebuffer.offset; const u8* const host_ptr = cpu_memory.GetPointer(framebuffer_addr); @@ -204,8 +201,8 @@ VkSemaphore BlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, .depth = 1, }, }; - scheduler.Record([this, copy, image_index](vk::CommandBuffer cmdbuf) { - const VkImage image = *raw_images[image_index]; + scheduler.Record([this, copy, index = image_index](vk::CommandBuffer cmdbuf) { + const VkImage image = *raw_images[index]; const VkImageMemoryBarrier base_barrier{ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, .pNext = nullptr, @@ -245,14 +242,15 @@ VkSemaphore BlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, const auto anti_alias_pass = Settings::values.anti_aliasing.GetValue(); if (use_accelerated && anti_alias_pass == Settings::AntiAliasing::Fxaa) { - UpdateAADescriptorSet(image_index, source_image_view, false); + UpdateAADescriptorSet(source_image_view, false); const u32 up_scale = Settings::values.resolution_info.up_scale; const u32 down_shift = Settings::values.resolution_info.down_shift; VkExtent2D size{ .width = (up_scale * framebuffer.width) >> down_shift, .height = (up_scale * framebuffer.height) >> down_shift, }; - scheduler.Record([this, image_index, size, anti_alias_pass](vk::CommandBuffer cmdbuf) { + scheduler.Record([this, index = image_index, size, + anti_alias_pass](vk::CommandBuffer cmdbuf) { const VkImageMemoryBarrier base_barrier{ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, .pNext = nullptr, @@ -326,7 +324,7 @@ VkSemaphore BlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, cmdbuf.BindVertexBuffer(0, *buffer, offsetof(BufferData, vertices)); cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *aa_pipeline_layout, 0, - aa_descriptor_sets[image_index], {}); + aa_descriptor_sets[index], {}); cmdbuf.Draw(4, 1, 0, 0); cmdbuf.EndRenderPass(); @@ -369,81 +367,104 @@ VkSemaphore BlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, }; VkImageView fsr_image_view = fsr->Draw(scheduler, image_index, source_image_view, fsr_input_size, crop_rect); - UpdateDescriptorSet(image_index, fsr_image_view, true); + UpdateDescriptorSet(fsr_image_view, true); } else { const bool is_nn = Settings::values.scaling_filter.GetValue() == Settings::ScalingFilter::NearestNeighbor; - UpdateDescriptorSet(image_index, source_image_view, is_nn); + UpdateDescriptorSet(source_image_view, is_nn); } - scheduler.Record( - [this, host_framebuffer, image_index, size = render_area](vk::CommandBuffer cmdbuf) { - const f32 bg_red = Settings::values.bg_red.GetValue() / 255.0f; - const f32 bg_green = Settings::values.bg_green.GetValue() / 255.0f; - const f32 bg_blue = Settings::values.bg_blue.GetValue() / 255.0f; - const VkClearValue clear_color{ - .color = {.float32 = {bg_red, bg_green, bg_blue, 1.0f}}, - }; - const VkRenderPassBeginInfo renderpass_bi{ - .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, - .pNext = nullptr, - .renderPass = *renderpass, - .framebuffer = host_framebuffer, - .renderArea = - { - .offset = {0, 0}, - .extent = size, - }, - .clearValueCount = 1, - .pClearValues = &clear_color, - }; - const VkViewport viewport{ - .x = 0.0f, - .y = 0.0f, - .width = static_cast<float>(size.width), - .height = static_cast<float>(size.height), - .minDepth = 0.0f, - .maxDepth = 1.0f, - }; - const VkRect2D scissor{ - .offset = {0, 0}, - .extent = size, - }; - cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE); - auto graphics_pipeline = [this]() { - switch (Settings::values.scaling_filter.GetValue()) { - case Settings::ScalingFilter::NearestNeighbor: - case Settings::ScalingFilter::Bilinear: - return *bilinear_pipeline; - case Settings::ScalingFilter::Bicubic: - return *bicubic_pipeline; - case Settings::ScalingFilter::Gaussian: - return *gaussian_pipeline; - case Settings::ScalingFilter::ScaleForce: - return *scaleforce_pipeline; - default: - return *bilinear_pipeline; - } - }(); - cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, graphics_pipeline); - cmdbuf.SetViewport(0, viewport); - cmdbuf.SetScissor(0, scissor); - - cmdbuf.BindVertexBuffer(0, *buffer, offsetof(BufferData, vertices)); - cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline_layout, 0, - descriptor_sets[image_index], {}); - cmdbuf.Draw(4, 1, 0, 0); - cmdbuf.EndRenderPass(); - }); - return *semaphores[image_index]; + scheduler.Record([this, host_framebuffer, index = image_index, + size = render_area](vk::CommandBuffer cmdbuf) { + const f32 bg_red = Settings::values.bg_red.GetValue() / 255.0f; + const f32 bg_green = Settings::values.bg_green.GetValue() / 255.0f; + const f32 bg_blue = Settings::values.bg_blue.GetValue() / 255.0f; + const VkClearValue clear_color{ + .color = {.float32 = {bg_red, bg_green, bg_blue, 1.0f}}, + }; + const VkRenderPassBeginInfo renderpass_bi{ + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, + .pNext = nullptr, + .renderPass = *renderpass, + .framebuffer = host_framebuffer, + .renderArea = + { + .offset = {0, 0}, + .extent = size, + }, + .clearValueCount = 1, + .pClearValues = &clear_color, + }; + const VkViewport viewport{ + .x = 0.0f, + .y = 0.0f, + .width = static_cast<float>(size.width), + .height = static_cast<float>(size.height), + .minDepth = 0.0f, + .maxDepth = 1.0f, + }; + const VkRect2D scissor{ + .offset = {0, 0}, + .extent = size, + }; + cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE); + auto graphics_pipeline = [this]() { + switch (Settings::values.scaling_filter.GetValue()) { + case Settings::ScalingFilter::NearestNeighbor: + case Settings::ScalingFilter::Bilinear: + return *bilinear_pipeline; + case Settings::ScalingFilter::Bicubic: + return *bicubic_pipeline; + case Settings::ScalingFilter::Gaussian: + return *gaussian_pipeline; + case Settings::ScalingFilter::ScaleForce: + return *scaleforce_pipeline; + default: + return *bilinear_pipeline; + } + }(); + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, graphics_pipeline); + cmdbuf.SetViewport(0, viewport); + cmdbuf.SetScissor(0, scissor); + + cmdbuf.BindVertexBuffer(0, *buffer, offsetof(BufferData, vertices)); + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline_layout, 0, + descriptor_sets[index], {}); + cmdbuf.Draw(4, 1, 0, 0); + cmdbuf.EndRenderPass(); + }); } -VkSemaphore BlitScreen::DrawToSwapchain(const Tegra::FramebufferConfig& framebuffer, - bool use_accelerated) { - const std::size_t image_index = swapchain.GetImageIndex(); - const VkExtent2D render_area = swapchain.GetSize(); +void BlitScreen::DrawToSwapchain(Frame* frame, const Tegra::FramebufferConfig& framebuffer, + bool use_accelerated, bool is_srgb) { + // Recreate dynamic resources if the the image count or colorspace changed + if (const std::size_t swapchain_images = swapchain.GetImageCount(); + swapchain_images != image_count || current_srgb != is_srgb) { + current_srgb = is_srgb; +#ifdef ANDROID + // Android is already ordered the same as Switch. + image_view_format = current_srgb ? VK_FORMAT_R8G8B8A8_SRGB : VK_FORMAT_R8G8B8A8_UNORM; +#else + image_view_format = current_srgb ? VK_FORMAT_B8G8R8A8_SRGB : VK_FORMAT_B8G8R8A8_UNORM; +#endif + image_count = swapchain_images; + Recreate(); + } + + // Recreate the presentation frame if the dimensions of the window changed const Layout::FramebufferLayout layout = render_window.GetFramebufferLayout(); - return Draw(framebuffer, *framebuffers[image_index], layout, render_area, use_accelerated); + if (layout.width != frame->width || layout.height != frame->height || + is_srgb != frame->is_srgb) { + Recreate(); + present_manager.RecreateFrame(frame, layout.width, layout.height, is_srgb, + image_view_format, *renderpass); + } + + const VkExtent2D render_area{frame->width, frame->height}; + Draw(framebuffer, *frame->framebuffer, layout, render_area, use_accelerated); + if (++image_index >= image_count) { + image_index = 0; + } } vk::Framebuffer BlitScreen::CreateFramebuffer(const VkImageView& image_view, VkExtent2D extent) { @@ -471,13 +492,11 @@ void BlitScreen::CreateStaticResources() { } void BlitScreen::CreateDynamicResources() { - CreateSemaphores(); CreateDescriptorPool(); CreateDescriptorSetLayout(); CreateDescriptorSets(); CreatePipelineLayout(); CreateRenderPass(); - CreateFramebuffers(); CreateGraphicsPipeline(); fsr.reset(); smaa.reset(); @@ -525,11 +544,6 @@ void BlitScreen::CreateShaders() { } } -void BlitScreen::CreateSemaphores() { - semaphores.resize(image_count); - std::ranges::generate(semaphores, [this] { return device.GetLogical().CreateSemaphore(); }); -} - void BlitScreen::CreateDescriptorPool() { const std::array<VkDescriptorPoolSize, 2> pool_sizes{{ { @@ -571,10 +585,10 @@ void BlitScreen::CreateDescriptorPool() { } void BlitScreen::CreateRenderPass() { - renderpass = CreateRenderPassImpl(swapchain.GetImageViewFormat()); + renderpass = CreateRenderPassImpl(image_view_format); } -vk::RenderPass BlitScreen::CreateRenderPassImpl(VkFormat format, bool is_present) { +vk::RenderPass BlitScreen::CreateRenderPassImpl(VkFormat format) { const VkAttachmentDescription color_attachment{ .flags = 0, .format = format, @@ -584,7 +598,7 @@ vk::RenderPass BlitScreen::CreateRenderPassImpl(VkFormat format, bool is_present .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE, .stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE, .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, - .finalLayout = is_present ? VK_IMAGE_LAYOUT_PRESENT_SRC_KHR : VK_IMAGE_LAYOUT_GENERAL, + .finalLayout = VK_IMAGE_LAYOUT_GENERAL, }; const VkAttachmentReference color_attachment_ref{ @@ -1052,29 +1066,14 @@ void BlitScreen::CreateSampler() { nn_sampler = device.GetLogical().CreateSampler(ci_nn); } -void BlitScreen::CreateFramebuffers() { - const VkExtent2D size{swapchain.GetSize()}; - framebuffers.resize(image_count); - - for (std::size_t i = 0; i < image_count; ++i) { - const VkImageView image_view{swapchain.GetImageViewIndex(i)}; - framebuffers[i] = CreateFramebuffer(image_view, size, renderpass); - } -} - void BlitScreen::ReleaseRawImages() { for (const u64 tick : resource_ticks) { scheduler.Wait(tick); } raw_images.clear(); - raw_buffer_commits.clear(); - aa_image_view.reset(); aa_image.reset(); - aa_commit = MemoryCommit{}; - buffer.reset(); - buffer_commit = MemoryCommit{}; } void BlitScreen::CreateStagingBuffer(const Tegra::FramebufferConfig& framebuffer) { @@ -1090,25 +1089,23 @@ void BlitScreen::CreateStagingBuffer(const Tegra::FramebufferConfig& framebuffer .pQueueFamilyIndices = nullptr, }; - buffer = device.GetLogical().CreateBuffer(ci); - buffer_commit = memory_allocator.Commit(buffer, MemoryUsage::Upload); + buffer = memory_allocator.CreateBuffer(ci, MemoryUsage::Upload); } void BlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) { raw_images.resize(image_count); raw_image_views.resize(image_count); - raw_buffer_commits.resize(image_count); const auto create_image = [&](bool used_on_framebuffer = false, u32 up_scale = 1, u32 down_shift = 0) { u32 extra_usages = used_on_framebuffer ? VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT : VK_IMAGE_USAGE_TRANSFER_DST_BIT; - return device.GetLogical().CreateImage(VkImageCreateInfo{ + return memory_allocator.CreateImage(VkImageCreateInfo{ .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, .pNext = nullptr, .flags = 0, .imageType = VK_IMAGE_TYPE_2D, - .format = GetFormat(framebuffer), + .format = used_on_framebuffer ? VK_FORMAT_R16G16B16A16_SFLOAT : GetFormat(framebuffer), .extent = { .width = (up_scale * framebuffer.width) >> down_shift, @@ -1126,17 +1123,14 @@ void BlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) { .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, }); }; - const auto create_commit = [&](vk::Image& image) { - return memory_allocator.Commit(image, MemoryUsage::DeviceLocal); - }; - const auto create_image_view = [&](vk::Image& image) { + const auto create_image_view = [&](vk::Image& image, bool used_on_framebuffer = false) { return device.GetLogical().CreateImageView(VkImageViewCreateInfo{ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, .pNext = nullptr, .flags = 0, .image = *image, .viewType = VK_IMAGE_VIEW_TYPE_2D, - .format = GetFormat(framebuffer), + .format = used_on_framebuffer ? VK_FORMAT_R16G16B16A16_SFLOAT : GetFormat(framebuffer), .components = { .r = VK_COMPONENT_SWIZZLE_IDENTITY, @@ -1157,7 +1151,6 @@ void BlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) { for (size_t i = 0; i < image_count; ++i) { raw_images[i] = create_image(); - raw_buffer_commits[i] = create_commit(raw_images[i]); raw_image_views[i] = create_image_view(raw_images[i]); } @@ -1165,8 +1158,7 @@ void BlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) { const u32 up_scale = Settings::values.resolution_info.up_scale; const u32 down_shift = Settings::values.resolution_info.down_shift; aa_image = create_image(true, up_scale, down_shift); - aa_commit = create_commit(aa_image); - aa_image_view = create_image_view(aa_image); + aa_image_view = create_image_view(aa_image, true); VkExtent2D size{ .width = (up_scale * framebuffer.width) >> down_shift, .height = (up_scale * framebuffer.height) >> down_shift, @@ -1175,7 +1167,7 @@ void BlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) { aa_framebuffer = CreateFramebuffer(*aa_image_view, size, aa_renderpass); return; } - aa_renderpass = CreateRenderPassImpl(GetFormat(framebuffer), false); + aa_renderpass = CreateRenderPassImpl(GetFormat(framebuffer)); aa_framebuffer = CreateFramebuffer(*aa_image_view, size, aa_renderpass); const std::array<VkPipelineShaderStageCreateInfo, 2> fxaa_shader_stages{{ @@ -1319,8 +1311,7 @@ void BlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) { aa_pipeline = device.GetLogical().CreateGraphicsPipeline(fxaa_pipeline_ci); } -void BlitScreen::UpdateAADescriptorSet(std::size_t image_index, VkImageView image_view, - bool nn) const { +void BlitScreen::UpdateAADescriptorSet(VkImageView image_view, bool nn) const { const VkDescriptorImageInfo image_info{ .sampler = nn ? *nn_sampler : *sampler, .imageView = image_view, @@ -1356,8 +1347,7 @@ void BlitScreen::UpdateAADescriptorSet(std::size_t image_index, VkImageView imag device.GetLogical().UpdateDescriptorSets(std::array{sampler_write, sampler_write_2}, {}); } -void BlitScreen::UpdateDescriptorSet(std::size_t image_index, VkImageView image_view, - bool nn) const { +void BlitScreen::UpdateDescriptorSet(VkImageView image_view, bool nn) const { const VkDescriptorBufferInfo buffer_info{ .buffer = *buffer, .offset = offsetof(BufferData, uniform), @@ -1480,8 +1470,7 @@ u64 BlitScreen::CalculateBufferSize(const Tegra::FramebufferConfig& framebuffer) return sizeof(BufferData) + GetSizeInBytes(framebuffer) * image_count; } -u64 BlitScreen::GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer, - std::size_t image_index) const { +u64 BlitScreen::GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer) const { constexpr auto first_image_offset = static_cast<u64>(sizeof(BufferData)); return first_image_offset + GetSizeInBytes(framebuffer) * image_index; } diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.h b/src/video_core/renderer_vulkan/vk_blit_screen.h index ebe10b08b..8365b5668 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.h +++ b/src/video_core/renderer_vulkan/vk_blit_screen.h @@ -5,6 +5,7 @@ #include <memory> +#include "core/frontend/framebuffer_layout.h" #include "video_core/vulkan_common/vulkan_memory_allocator.h" #include "video_core/vulkan_common/vulkan_wrapper.h" @@ -42,6 +43,9 @@ class RasterizerVulkan; class Scheduler; class SMAA; class Swapchain; +class PresentManager; + +struct Frame; struct ScreenInfo { VkImage image{}; @@ -55,18 +59,17 @@ class BlitScreen { public: explicit BlitScreen(Core::Memory::Memory& cpu_memory, Core::Frontend::EmuWindow& render_window, const Device& device, MemoryAllocator& memory_manager, Swapchain& swapchain, - Scheduler& scheduler, const ScreenInfo& screen_info); + PresentManager& present_manager, Scheduler& scheduler, + const ScreenInfo& screen_info); ~BlitScreen(); void Recreate(); - [[nodiscard]] VkSemaphore Draw(const Tegra::FramebufferConfig& framebuffer, - const VkFramebuffer& host_framebuffer, - const Layout::FramebufferLayout layout, VkExtent2D render_area, - bool use_accelerated); + void Draw(const Tegra::FramebufferConfig& framebuffer, const VkFramebuffer& host_framebuffer, + const Layout::FramebufferLayout layout, VkExtent2D render_area, bool use_accelerated); - [[nodiscard]] VkSemaphore DrawToSwapchain(const Tegra::FramebufferConfig& framebuffer, - bool use_accelerated); + void DrawToSwapchain(Frame* frame, const Tegra::FramebufferConfig& framebuffer, + bool use_accelerated, bool is_srgb); [[nodiscard]] vk::Framebuffer CreateFramebuffer(const VkImageView& image_view, VkExtent2D extent); @@ -79,10 +82,9 @@ private: void CreateStaticResources(); void CreateShaders(); - void CreateSemaphores(); void CreateDescriptorPool(); void CreateRenderPass(); - vk::RenderPass CreateRenderPassImpl(VkFormat, bool is_present = true); + vk::RenderPass CreateRenderPassImpl(VkFormat format); void CreateDescriptorSetLayout(); void CreateDescriptorSets(); void CreatePipelineLayout(); @@ -90,15 +92,14 @@ private: void CreateSampler(); void CreateDynamicResources(); - void CreateFramebuffers(); void RefreshResources(const Tegra::FramebufferConfig& framebuffer); void ReleaseRawImages(); void CreateStagingBuffer(const Tegra::FramebufferConfig& framebuffer); void CreateRawImages(const Tegra::FramebufferConfig& framebuffer); - void UpdateDescriptorSet(std::size_t image_index, VkImageView image_view, bool nn) const; - void UpdateAADescriptorSet(std::size_t image_index, VkImageView image_view, bool nn) const; + void UpdateDescriptorSet(VkImageView image_view, bool nn) const; + void UpdateAADescriptorSet(VkImageView image_view, bool nn) const; void SetUniformData(BufferData& data, const Layout::FramebufferLayout layout) const; void SetVertexData(BufferData& data, const Tegra::FramebufferConfig& framebuffer, const Layout::FramebufferLayout layout) const; @@ -107,16 +108,17 @@ private: void CreateFSR(); u64 CalculateBufferSize(const Tegra::FramebufferConfig& framebuffer) const; - u64 GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer, - std::size_t image_index) const; + u64 GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer) const; Core::Memory::Memory& cpu_memory; Core::Frontend::EmuWindow& render_window; const Device& device; MemoryAllocator& memory_allocator; Swapchain& swapchain; + PresentManager& present_manager; Scheduler& scheduler; std::size_t image_count; + std::size_t image_index{}; const ScreenInfo& screen_info; vk::ShaderModule vertex_shader; @@ -135,20 +137,16 @@ private: vk::Pipeline gaussian_pipeline; vk::Pipeline scaleforce_pipeline; vk::RenderPass renderpass; - std::vector<vk::Framebuffer> framebuffers; vk::DescriptorSets descriptor_sets; vk::Sampler nn_sampler; vk::Sampler sampler; vk::Buffer buffer; - MemoryCommit buffer_commit; std::vector<u64> resource_ticks; - std::vector<vk::Semaphore> semaphores; std::vector<vk::Image> raw_images; std::vector<vk::ImageView> raw_image_views; - std::vector<MemoryCommit> raw_buffer_commits; vk::DescriptorPool aa_descriptor_pool; vk::DescriptorSetLayout aa_descriptor_set_layout; @@ -159,11 +157,12 @@ private: vk::DescriptorSets aa_descriptor_sets; vk::Image aa_image; vk::ImageView aa_image_view; - MemoryCommit aa_commit; u32 raw_width = 0; u32 raw_height = 0; Service::android::PixelFormat pixel_format{}; + bool current_srgb; + VkFormat image_view_format; std::unique_ptr<FSR> fsr; std::unique_ptr<SMAA> smaa; diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 1cfb4c2ff..660f7c9ff 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -7,7 +7,6 @@ #include <span> #include <vector> -#include "video_core/buffer_cache/buffer_cache.h" #include "video_core/renderer_vulkan/maxwell_to_vk.h" #include "video_core/renderer_vulkan/vk_buffer_cache.h" #include "video_core/renderer_vulkan/vk_scheduler.h" @@ -51,7 +50,7 @@ size_t BytesPerIndex(VkIndexType index_type) { } } -vk::Buffer CreateBuffer(const Device& device, u64 size) { +vk::Buffer CreateBuffer(const Device& device, const MemoryAllocator& memory_allocator, u64 size) { VkBufferUsageFlags flags = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | @@ -61,7 +60,7 @@ vk::Buffer CreateBuffer(const Device& device, u64 size) { if (device.IsExtTransformFeedbackSupported()) { flags |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT; } - return device.GetLogical().CreateBuffer({ + const VkBufferCreateInfo buffer_ci = { .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, .pNext = nullptr, .flags = 0, @@ -70,7 +69,8 @@ vk::Buffer CreateBuffer(const Device& device, u64 size) { .sharingMode = VK_SHARING_MODE_EXCLUSIVE, .queueFamilyIndexCount = 0, .pQueueFamilyIndices = nullptr, - }); + }; + return memory_allocator.CreateBuffer(buffer_ci, MemoryUsage::DeviceLocal); } } // Anonymous namespace @@ -80,8 +80,8 @@ Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params) Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_, VAddr cpu_addr_, u64 size_bytes_) : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_), - device{&runtime.device}, buffer{CreateBuffer(*device, SizeBytes())}, - commit{runtime.memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal)} { + device{&runtime.device}, buffer{ + CreateBuffer(*device, runtime.memory_allocator, SizeBytes())} { if (runtime.device.HasDebuggingToolAttached()) { buffer.SetObjectNameEXT(fmt::format("Buffer 0x{:x}", CpuAddr()).c_str()); } @@ -139,7 +139,7 @@ public: const u32 num_first_offset_copies = 4; const size_t bytes_per_index = BytesPerIndex(index_type); const size_t size_bytes = num_triangle_indices * bytes_per_index * num_first_offset_copies; - buffer = device.GetLogical().CreateBuffer(VkBufferCreateInfo{ + const VkBufferCreateInfo buffer_ci = { .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, .pNext = nullptr, .flags = 0, @@ -148,14 +148,21 @@ public: .sharingMode = VK_SHARING_MODE_EXCLUSIVE, .queueFamilyIndexCount = 0, .pQueueFamilyIndices = nullptr, - }); + }; + buffer = memory_allocator.CreateBuffer(buffer_ci, MemoryUsage::DeviceLocal); if (device.HasDebuggingToolAttached()) { buffer.SetObjectNameEXT("Quad LUT"); } - memory_commit = memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal); - const StagingBufferRef staging = staging_pool.Request(size_bytes, MemoryUsage::Upload); - u8* staging_data = staging.mapped_span.data(); + const bool host_visible = buffer.IsHostVisible(); + const StagingBufferRef staging = [&] { + if (host_visible) { + return StagingBufferRef{}; + } + return staging_pool.Request(size_bytes, MemoryUsage::Upload); + }(); + + u8* staging_data = host_visible ? buffer.Mapped().data() : staging.mapped_span.data(); const size_t quad_size = bytes_per_index * 6; for (u32 first = 0; first < num_first_offset_copies; ++first) { @@ -165,29 +172,33 @@ public: } } - scheduler.RequestOutsideRenderPassOperationContext(); - scheduler.Record([src_buffer = staging.buffer, src_offset = staging.offset, - dst_buffer = *buffer, size_bytes](vk::CommandBuffer cmdbuf) { - const VkBufferCopy copy{ - .srcOffset = src_offset, - .dstOffset = 0, - .size = size_bytes, - }; - const VkBufferMemoryBarrier write_barrier{ - .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, - .pNext = nullptr, - .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, - .dstAccessMask = VK_ACCESS_INDEX_READ_BIT, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .buffer = dst_buffer, - .offset = 0, - .size = size_bytes, - }; - cmdbuf.CopyBuffer(src_buffer, dst_buffer, copy); - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, - VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, write_barrier); - }); + if (!host_visible) { + scheduler.RequestOutsideRenderPassOperationContext(); + scheduler.Record([src_buffer = staging.buffer, src_offset = staging.offset, + dst_buffer = *buffer, size_bytes](vk::CommandBuffer cmdbuf) { + const VkBufferCopy copy{ + .srcOffset = src_offset, + .dstOffset = 0, + .size = size_bytes, + }; + const VkBufferMemoryBarrier write_barrier{ + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_INDEX_READ_BIT, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = dst_buffer, + .offset = 0, + .size = size_bytes, + }; + cmdbuf.CopyBuffer(src_buffer, dst_buffer, copy); + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, write_barrier); + }); + } else { + buffer.Flush(); + } } void BindBuffer(u32 first) { @@ -238,7 +249,7 @@ private: return indices; } - void MakeAndUpdateIndices(u8* staging_data, size_t quad_size, u32 quad, u32 first) { + void MakeAndUpdateIndices(u8* staging_data, size_t quad_size, u32 quad, u32 first) override { switch (index_type) { case VK_INDEX_TYPE_UINT8_EXT: std::memcpy(staging_data, MakeIndices<u8>(quad, first).data(), quad_size); @@ -278,7 +289,7 @@ private: return indices; } - void MakeAndUpdateIndices(u8* staging_data, size_t quad_size, u32 quad, u32 first) { + void MakeAndUpdateIndices(u8* staging_data, size_t quad_size, u32 quad, u32 first) override { switch (index_type) { case VK_INDEX_TYPE_UINT8_EXT: std::memcpy(staging_data, MakeIndices<u8>(quad, first).data(), quad_size); @@ -298,12 +309,18 @@ private: BufferCacheRuntime::BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_allocator_, Scheduler& scheduler_, StagingBufferPool& staging_pool_, - UpdateDescriptorQueue& update_descriptor_queue_, + GuestDescriptorQueue& guest_descriptor_queue_, + ComputePassDescriptorQueue& compute_pass_descriptor_queue, DescriptorPool& descriptor_pool) : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_}, - staging_pool{staging_pool_}, update_descriptor_queue{update_descriptor_queue_}, - uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), - quad_index_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue) { + staging_pool{staging_pool_}, guest_descriptor_queue{guest_descriptor_queue_}, + quad_index_pass(device, scheduler, descriptor_pool, staging_pool, + compute_pass_descriptor_queue) { + if (device.GetDriverID() != VK_DRIVER_ID_QUALCOMM_PROPRIETARY) { + // TODO: FixMe: Uint8Pass compute shader does not build on some Qualcomm drivers. + uint8_pass = std::make_unique<Uint8Pass>(device, scheduler, descriptor_pool, staging_pool, + compute_pass_descriptor_queue); + } quad_array_index_buffer = std::make_shared<QuadArrayIndexBuffer>(device_, memory_allocator_, scheduler_, staging_pool_); quad_strip_index_buffer = std::make_shared<QuadStripIndexBuffer>(device_, memory_allocator_, @@ -314,8 +331,12 @@ StagingBufferRef BufferCacheRuntime::UploadStagingBuffer(size_t size) { return staging_pool.Request(size, MemoryUsage::Upload); } -StagingBufferRef BufferCacheRuntime::DownloadStagingBuffer(size_t size) { - return staging_pool.Request(size, MemoryUsage::Download); +StagingBufferRef BufferCacheRuntime::DownloadStagingBuffer(size_t size, bool deferred) { + return staging_pool.Request(size, MemoryUsage::Download, deferred); +} + +void BufferCacheRuntime::FreeDeferredStagingBuffer(StagingBufferRef& ref) { + staging_pool.FreeDeferred(ref); } u64 BufferCacheRuntime::GetDeviceLocalMemory() const { @@ -330,10 +351,6 @@ bool BufferCacheRuntime::CanReportMemoryUsage() const { return device.CanReportMemoryUsage(); } -u32 BufferCacheRuntime::GetStorageBufferAlignment() const { - return static_cast<u32>(device.GetStorageBufferAlignment()); -} - void BufferCacheRuntime::Finish() { scheduler.Finish(); } @@ -356,7 +373,7 @@ void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer, .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, }; // Measuring a popular game, this number never exceeds the specified size once data is warmed up - boost::container::small_vector<VkBufferCopy, 3> vk_copies(copies.size()); + boost::container::small_vector<VkBufferCopy, 8> vk_copies(copies.size()); std::ranges::transform(copies, vk_copies.begin(), MakeBufferCopy); scheduler.RequestOutsideRenderPassOperationContext(); scheduler.Record([src_buffer, dst_buffer, vk_copies, barrier](vk::CommandBuffer cmdbuf) { @@ -440,7 +457,9 @@ void BufferCacheRuntime::BindIndexBuffer(PrimitiveTopology topology, IndexFormat topology == PrimitiveTopology::QuadStrip); } else if (vk_index_type == VK_INDEX_TYPE_UINT8_EXT && !device.IsExtIndexTypeUint8Supported()) { vk_index_type = VK_INDEX_TYPE_UINT16; - std::tie(vk_buffer, vk_offset) = uint8_pass.Assemble(num_indices, buffer, offset); + if (uint8_pass) { + std::tie(vk_buffer, vk_offset) = uint8_pass->Assemble(num_indices, buffer, offset); + } } if (vk_buffer == VK_NULL_HANDLE) { // Vulkan doesn't support null index buffers. Replace it with our own null buffer. @@ -494,6 +513,36 @@ void BufferCacheRuntime::BindVertexBuffer(u32 index, VkBuffer buffer, u32 offset } } +void BufferCacheRuntime::BindVertexBuffers(VideoCommon::HostBindings<Buffer>& bindings) { + boost::container::small_vector<VkBuffer, 32> buffer_handles; + for (u32 index = 0; index < bindings.buffers.size(); ++index) { + auto handle = bindings.buffers[index]->Handle(); + if (handle == VK_NULL_HANDLE) { + bindings.offsets[index] = 0; + bindings.sizes[index] = VK_WHOLE_SIZE; + if (!device.HasNullDescriptor()) { + ReserveNullBuffer(); + handle = *null_buffer; + } + } + buffer_handles.push_back(handle); + } + if (device.IsExtExtendedDynamicStateSupported()) { + scheduler.Record([bindings = std::move(bindings), + buffer_handles = std::move(buffer_handles)](vk::CommandBuffer cmdbuf) { + cmdbuf.BindVertexBuffers2EXT( + bindings.min_index, bindings.max_index - bindings.min_index, buffer_handles.data(), + bindings.offsets.data(), bindings.sizes.data(), bindings.strides.data()); + }); + } else { + scheduler.Record([bindings = std::move(bindings), + buffer_handles = std::move(buffer_handles)](vk::CommandBuffer cmdbuf) { + cmdbuf.BindVertexBuffers(bindings.min_index, bindings.max_index - bindings.min_index, + buffer_handles.data(), bindings.offsets.data()); + }); + } +} + void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size) { if (!device.IsExtTransformFeedbackSupported()) { @@ -515,6 +564,23 @@ void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, VkBuffer buffer, }); } +void BufferCacheRuntime::BindTransformFeedbackBuffers(VideoCommon::HostBindings<Buffer>& bindings) { + if (!device.IsExtTransformFeedbackSupported()) { + // Already logged in the rasterizer + return; + } + boost::container::small_vector<VkBuffer, 4> buffer_handles; + for (u32 index = 0; index < bindings.buffers.size(); ++index) { + buffer_handles.push_back(bindings.buffers[index]->Handle()); + } + scheduler.Record([bindings = std::move(bindings), + buffer_handles = std::move(buffer_handles)](vk::CommandBuffer cmdbuf) { + cmdbuf.BindTransformFeedbackBuffersEXT(0, static_cast<u32>(buffer_handles.size()), + buffer_handles.data(), bindings.offsets.data(), + bindings.sizes.data()); + }); +} + void BufferCacheRuntime::ReserveNullBuffer() { if (null_buffer) { return; @@ -533,11 +599,10 @@ void BufferCacheRuntime::ReserveNullBuffer() { create_info.usage |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT; } create_info.usage |= VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT; - null_buffer = device.GetLogical().CreateBuffer(create_info); + null_buffer = memory_allocator.CreateBuffer(create_info, MemoryUsage::DeviceLocal); if (device.HasDebuggingToolAttached()) { null_buffer.SetObjectNameEXT("Null buffer"); } - null_buffer_commit = memory_allocator.Commit(null_buffer, MemoryUsage::DeviceLocal); scheduler.RequestOutsideRenderPassOperationContext(); scheduler.Record([buffer = *null_buffer](vk::CommandBuffer cmdbuf) { diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index 06539c733..95446c732 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h @@ -3,7 +3,8 @@ #pragma once -#include "video_core/buffer_cache/buffer_cache.h" +#include "video_core/buffer_cache/buffer_cache_base.h" +#include "video_core/buffer_cache/memory_tracker_base.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/renderer_vulkan/vk_compute_pass.h" #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" @@ -17,6 +18,7 @@ namespace Vulkan { class Device; class DescriptorPool; class Scheduler; +struct HostVertexBinding; class BufferCacheRuntime; @@ -46,7 +48,6 @@ private: const Device* device{}; vk::Buffer buffer; - MemoryCommit commit; std::vector<BufferView> views; }; @@ -62,7 +63,8 @@ class BufferCacheRuntime { public: explicit BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_manager_, Scheduler& scheduler_, StagingBufferPool& staging_pool_, - UpdateDescriptorQueue& update_descriptor_queue_, + GuestDescriptorQueue& guest_descriptor_queue, + ComputePassDescriptorQueue& compute_pass_descriptor_queue, DescriptorPool& descriptor_pool); void Finish(); @@ -73,11 +75,11 @@ public: bool CanReportMemoryUsage() const; - u32 GetStorageBufferAlignment() const; - [[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size); - [[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size); + [[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size, bool deferred = false); + + void FreeDeferredStagingBuffer(StagingBufferRef& ref); void PreCopyBarrier(); @@ -95,8 +97,12 @@ public: void BindVertexBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size, u32 stride); + void BindVertexBuffers(VideoCommon::HostBindings<Buffer>& bindings); + void BindTransformFeedbackBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size); + void BindTransformFeedbackBuffers(VideoCommon::HostBindings<Buffer>& bindings); + std::span<u8> BindMappedUniformBuffer([[maybe_unused]] size_t stage, [[maybe_unused]] u32 binding_index, u32 size) { const StagingBufferRef ref = staging_pool.Request(size, MemoryUsage::Upload); @@ -115,12 +121,12 @@ public: void BindTextureBuffer(Buffer& buffer, u32 offset, u32 size, VideoCore::Surface::PixelFormat format) { - update_descriptor_queue.AddTexelBuffer(buffer.View(offset, size, format)); + guest_descriptor_queue.AddTexelBuffer(buffer.View(offset, size, format)); } private: void BindBuffer(VkBuffer buffer, u32 offset, u32 size) { - update_descriptor_queue.AddBuffer(buffer, offset, size); + guest_descriptor_queue.AddBuffer(buffer, offset, size); } void ReserveNullBuffer(); @@ -129,21 +135,22 @@ private: MemoryAllocator& memory_allocator; Scheduler& scheduler; StagingBufferPool& staging_pool; - UpdateDescriptorQueue& update_descriptor_queue; + GuestDescriptorQueue& guest_descriptor_queue; std::shared_ptr<QuadArrayIndexBuffer> quad_array_index_buffer; std::shared_ptr<QuadStripIndexBuffer> quad_strip_index_buffer; vk::Buffer null_buffer; - MemoryCommit null_buffer_commit; - Uint8Pass uint8_pass; + std::unique_ptr<Uint8Pass> uint8_pass; QuadIndexedPass quad_index_pass; }; struct BufferCacheParams { using Runtime = Vulkan::BufferCacheRuntime; using Buffer = Vulkan::Buffer; + using Async_Buffer = Vulkan::StagingBufferRef; + using MemoryTracker = VideoCommon::MemoryTrackerBase<VideoCore::RasterizerInterface>; static constexpr bool IS_OPENGL = false; static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = false; @@ -152,6 +159,8 @@ struct BufferCacheParams { static constexpr bool NEEDS_BIND_STORAGE_INDEX = false; static constexpr bool USE_MEMORY_MAPS = true; static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = false; + static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = true; + static constexpr bool USE_MEMORY_MAPS_FOR_UPLOADS = true; }; using BufferCache = VideoCommon::BufferCache<BufferCacheParams>; diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache_base.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache_base.cpp new file mode 100644 index 000000000..f9e271507 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_buffer_cache_base.cpp @@ -0,0 +1,9 @@ +// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "video_core/buffer_cache/buffer_cache.h" +#include "video_core/renderer_vulkan/vk_buffer_cache.h" + +namespace VideoCommon { +template class VideoCommon::BufferCache<Vulkan::BufferCacheParams>; +} diff --git a/src/video_core/renderer_vulkan/vk_command_pool.cpp b/src/video_core/renderer_vulkan/vk_command_pool.cpp index 2f09de1c1..d0dbf7ca5 100644 --- a/src/video_core/renderer_vulkan/vk_command_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_command_pool.cpp @@ -22,8 +22,8 @@ CommandPool::CommandPool(MasterSemaphore& master_semaphore_, const Device& devic CommandPool::~CommandPool() = default; void CommandPool::Allocate(size_t begin, size_t end) { - // Command buffers are going to be commited, recorded, executed every single usage cycle. - // They are also going to be reseted when commited. + // Command buffers are going to be committed, recorded, executed every single usage cycle. + // They are also going to be reset when committed. Pool& pool = pools.emplace_back(); pool.handle = device.GetLogical().CreateCommandPool({ .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index 1a316b6eb..3bc8553e1 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp @@ -200,12 +200,12 @@ ComputePass::~ComputePass() = default; Uint8Pass::Uint8Pass(const Device& device_, Scheduler& scheduler_, DescriptorPool& descriptor_pool, StagingBufferPool& staging_buffer_pool_, - UpdateDescriptorQueue& update_descriptor_queue_) + ComputePassDescriptorQueue& compute_pass_descriptor_queue_) : ComputePass(device_, descriptor_pool, INPUT_OUTPUT_DESCRIPTOR_SET_BINDINGS, INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE, INPUT_OUTPUT_BANK_INFO, {}, VULKAN_UINT8_COMP_SPV), scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_}, - update_descriptor_queue{update_descriptor_queue_} {} + compute_pass_descriptor_queue{compute_pass_descriptor_queue_} {} Uint8Pass::~Uint8Pass() = default; @@ -214,10 +214,10 @@ std::pair<VkBuffer, VkDeviceSize> Uint8Pass::Assemble(u32 num_vertices, VkBuffer const u32 staging_size = static_cast<u32>(num_vertices * sizeof(u16)); const auto staging = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal); - update_descriptor_queue.Acquire(); - update_descriptor_queue.AddBuffer(src_buffer, src_offset, num_vertices); - update_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size); - const void* const descriptor_data{update_descriptor_queue.UpdateData()}; + compute_pass_descriptor_queue.Acquire(); + compute_pass_descriptor_queue.AddBuffer(src_buffer, src_offset, num_vertices); + compute_pass_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size); + const void* const descriptor_data{compute_pass_descriptor_queue.UpdateData()}; scheduler.RequestOutsideRenderPassOperationContext(); scheduler.Record([this, descriptor_data, num_vertices](vk::CommandBuffer cmdbuf) { @@ -242,12 +242,12 @@ std::pair<VkBuffer, VkDeviceSize> Uint8Pass::Assemble(u32 num_vertices, VkBuffer QuadIndexedPass::QuadIndexedPass(const Device& device_, Scheduler& scheduler_, DescriptorPool& descriptor_pool_, StagingBufferPool& staging_buffer_pool_, - UpdateDescriptorQueue& update_descriptor_queue_) + ComputePassDescriptorQueue& compute_pass_descriptor_queue_) : ComputePass(device_, descriptor_pool_, INPUT_OUTPUT_DESCRIPTOR_SET_BINDINGS, INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE, INPUT_OUTPUT_BANK_INFO, COMPUTE_PUSH_CONSTANT_RANGE<sizeof(u32) * 3>, VULKAN_QUAD_INDEXED_COMP_SPV), scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_}, - update_descriptor_queue{update_descriptor_queue_} {} + compute_pass_descriptor_queue{compute_pass_descriptor_queue_} {} QuadIndexedPass::~QuadIndexedPass() = default; @@ -272,10 +272,10 @@ std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble( const std::size_t staging_size = num_tri_vertices * sizeof(u32); const auto staging = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal); - update_descriptor_queue.Acquire(); - update_descriptor_queue.AddBuffer(src_buffer, src_offset, input_size); - update_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size); - const void* const descriptor_data{update_descriptor_queue.UpdateData()}; + compute_pass_descriptor_queue.Acquire(); + compute_pass_descriptor_queue.AddBuffer(src_buffer, src_offset, input_size); + compute_pass_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size); + const void* const descriptor_data{compute_pass_descriptor_queue.UpdateData()}; scheduler.RequestOutsideRenderPassOperationContext(); scheduler.Record([this, descriptor_data, num_tri_vertices, base_vertex, index_shift, @@ -304,13 +304,14 @@ std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble( ASTCDecoderPass::ASTCDecoderPass(const Device& device_, Scheduler& scheduler_, DescriptorPool& descriptor_pool_, StagingBufferPool& staging_buffer_pool_, - UpdateDescriptorQueue& update_descriptor_queue_, + ComputePassDescriptorQueue& compute_pass_descriptor_queue_, MemoryAllocator& memory_allocator_) : ComputePass(device_, descriptor_pool_, ASTC_DESCRIPTOR_SET_BINDINGS, ASTC_PASS_DESCRIPTOR_UPDATE_TEMPLATE_ENTRY, ASTC_BANK_INFO, COMPUTE_PUSH_CONSTANT_RANGE<sizeof(AstcPushConstants)>, ASTC_DECODER_COMP_SPV), scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_}, - update_descriptor_queue{update_descriptor_queue_}, memory_allocator{memory_allocator_} {} + compute_pass_descriptor_queue{compute_pass_descriptor_queue_}, memory_allocator{ + memory_allocator_} {} ASTCDecoderPass::~ASTCDecoderPass() = default; @@ -358,11 +359,11 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map, const u32 num_dispatches_y = Common::DivCeil(swizzle.num_tiles.height, 8U); const u32 num_dispatches_z = image.info.resources.layers; - update_descriptor_queue.Acquire(); - update_descriptor_queue.AddBuffer(map.buffer, input_offset, - image.guest_size_bytes - swizzle.buffer_offset); - update_descriptor_queue.AddImage(image.StorageImageView(swizzle.level)); - const void* const descriptor_data{update_descriptor_queue.UpdateData()}; + compute_pass_descriptor_queue.Acquire(); + compute_pass_descriptor_queue.AddBuffer(map.buffer, input_offset, + image.guest_size_bytes - swizzle.buffer_offset); + compute_pass_descriptor_queue.AddImage(image.StorageImageView(swizzle.level)); + const void* const descriptor_data{compute_pass_descriptor_queue.UpdateData()}; // To unswizzle the ASTC data const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info); diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h index c4c8fa081..dd3927376 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.h +++ b/src/video_core/renderer_vulkan/vk_compute_pass.h @@ -9,6 +9,7 @@ #include "common/common_types.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" +#include "video_core/renderer_vulkan/vk_update_descriptor.h" #include "video_core/vulkan_common/vulkan_memory_allocator.h" #include "video_core/vulkan_common/vulkan_wrapper.h" @@ -21,7 +22,6 @@ namespace Vulkan { class Device; class StagingBufferPool; class Scheduler; -class UpdateDescriptorQueue; class Image; struct StagingBufferRef; @@ -50,7 +50,7 @@ class Uint8Pass final : public ComputePass { public: explicit Uint8Pass(const Device& device_, Scheduler& scheduler_, DescriptorPool& descriptor_pool_, StagingBufferPool& staging_buffer_pool_, - UpdateDescriptorQueue& update_descriptor_queue_); + ComputePassDescriptorQueue& compute_pass_descriptor_queue_); ~Uint8Pass(); /// Assemble uint8 indices into an uint16 index buffer @@ -61,7 +61,7 @@ public: private: Scheduler& scheduler; StagingBufferPool& staging_buffer_pool; - UpdateDescriptorQueue& update_descriptor_queue; + ComputePassDescriptorQueue& compute_pass_descriptor_queue; }; class QuadIndexedPass final : public ComputePass { @@ -69,7 +69,7 @@ public: explicit QuadIndexedPass(const Device& device_, Scheduler& scheduler_, DescriptorPool& descriptor_pool_, StagingBufferPool& staging_buffer_pool_, - UpdateDescriptorQueue& update_descriptor_queue_); + ComputePassDescriptorQueue& compute_pass_descriptor_queue_); ~QuadIndexedPass(); std::pair<VkBuffer, VkDeviceSize> Assemble( @@ -79,7 +79,7 @@ public: private: Scheduler& scheduler; StagingBufferPool& staging_buffer_pool; - UpdateDescriptorQueue& update_descriptor_queue; + ComputePassDescriptorQueue& compute_pass_descriptor_queue; }; class ASTCDecoderPass final : public ComputePass { @@ -87,7 +87,7 @@ public: explicit ASTCDecoderPass(const Device& device_, Scheduler& scheduler_, DescriptorPool& descriptor_pool_, StagingBufferPool& staging_buffer_pool_, - UpdateDescriptorQueue& update_descriptor_queue_, + ComputePassDescriptorQueue& compute_pass_descriptor_queue_, MemoryAllocator& memory_allocator_); ~ASTCDecoderPass(); @@ -97,7 +97,7 @@ public: private: Scheduler& scheduler; StagingBufferPool& staging_buffer_pool; - UpdateDescriptorQueue& update_descriptor_queue; + ComputePassDescriptorQueue& compute_pass_descriptor_queue; MemoryAllocator& memory_allocator; }; diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 2a0f0dbf0..73e585c2b 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -26,13 +26,13 @@ using Tegra::Texture::TexturePair; ComputePipeline::ComputePipeline(const Device& device_, vk::PipelineCache& pipeline_cache_, DescriptorPool& descriptor_pool, - UpdateDescriptorQueue& update_descriptor_queue_, + GuestDescriptorQueue& guest_descriptor_queue_, Common::ThreadWorker* thread_worker, PipelineStatistics* pipeline_statistics, VideoCore::ShaderNotify* shader_notify, const Shader::Info& info_, vk::ShaderModule spv_module_) - : device{device_}, pipeline_cache(pipeline_cache_), - update_descriptor_queue{update_descriptor_queue_}, info{info_}, + : device{device_}, + pipeline_cache(pipeline_cache_), guest_descriptor_queue{guest_descriptor_queue_}, info{info_}, spv_module(std::move(spv_module_)) { if (shader_notify) { shader_notify->MarkShaderBuilding(); @@ -99,7 +99,7 @@ ComputePipeline::ComputePipeline(const Device& device_, vk::PipelineCache& pipel void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, Tegra::MemoryManager& gpu_memory, Scheduler& scheduler, BufferCache& buffer_cache, TextureCache& texture_cache) { - update_descriptor_queue.Acquire(); + guest_descriptor_queue.Acquire(); buffer_cache.SetComputeUniformBufferState(info.constant_buffer_mask, &uniform_buffer_sizes); buffer_cache.UnbindComputeStorageBuffers(); @@ -115,7 +115,7 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, static constexpr size_t max_elements = 64; boost::container::static_vector<VideoCommon::ImageViewInOut, max_elements> views; - boost::container::static_vector<VkSampler, max_elements> samplers; + boost::container::static_vector<VideoCommon::SamplerId, max_elements> samplers; const auto& qmd{kepler_compute.launch_description}; const auto& cbufs{qmd.const_buffer_config}; @@ -160,8 +160,8 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, const auto handle{read_handle(desc, index)}; views.push_back({handle.first}); - Sampler* const sampler = texture_cache.GetComputeSampler(handle.second); - samplers.push_back(sampler->Handle()); + VideoCommon::SamplerId sampler = texture_cache.GetComputeSamplerId(handle.second); + samplers.push_back(sampler); } } for (const auto& desc : info.image_descriptors) { @@ -192,9 +192,9 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, buffer_cache.BindHostComputeBuffers(); RescalingPushConstant rescaling; - const VkSampler* samplers_it{samplers.data()}; + const VideoCommon::SamplerId* samplers_it{samplers.data()}; const VideoCommon::ImageViewInOut* views_it{views.data()}; - PushImageDescriptors(texture_cache, update_descriptor_queue, info, rescaling, samplers_it, + PushImageDescriptors(texture_cache, guest_descriptor_queue, info, rescaling, samplers_it, views_it); if (!is_built.load(std::memory_order::relaxed)) { @@ -204,7 +204,7 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, build_condvar.wait(lock, [this] { return is_built.load(std::memory_order::relaxed); }); }); } - const void* const descriptor_data{update_descriptor_queue.UpdateData()}; + const void* const descriptor_data{guest_descriptor_queue.UpdateData()}; const bool is_rescaling = !info.texture_descriptors.empty() || !info.image_descriptors.empty(); scheduler.Record([this, descriptor_data, is_rescaling, rescaling_data = rescaling.Data()](vk::CommandBuffer cmdbuf) { diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h index 78d77027f..d1a1e2c46 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h @@ -30,7 +30,7 @@ class ComputePipeline { public: explicit ComputePipeline(const Device& device, vk::PipelineCache& pipeline_cache, DescriptorPool& descriptor_pool, - UpdateDescriptorQueue& update_descriptor_queue, + GuestDescriptorQueue& guest_descriptor_queue, Common::ThreadWorker* thread_worker, PipelineStatistics* pipeline_statistics, VideoCore::ShaderNotify* shader_notify, const Shader::Info& info, @@ -48,7 +48,7 @@ public: private: const Device& device; vk::PipelineCache& pipeline_cache; - UpdateDescriptorQueue& update_descriptor_queue; + GuestDescriptorQueue& guest_descriptor_queue; Shader::Info info; VideoCommon::ComputeUniformBufferSizes uniform_buffer_sizes{}; diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.cpp b/src/video_core/renderer_vulkan/vk_fence_manager.cpp index 0214b103a..fad9e3832 100644 --- a/src/video_core/renderer_vulkan/vk_fence_manager.cpp +++ b/src/video_core/renderer_vulkan/vk_fence_manager.cpp @@ -5,6 +5,7 @@ #include "video_core/renderer_vulkan/vk_buffer_cache.h" #include "video_core/renderer_vulkan/vk_fence_manager.h" +#include "video_core/renderer_vulkan/vk_query_cache.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_texture_cache.h" #include "video_core/vulkan_common/vulkan_device.h" diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.h b/src/video_core/renderer_vulkan/vk_fence_manager.h index 7fe2afcd9..145359d4e 100644 --- a/src/video_core/renderer_vulkan/vk_fence_manager.h +++ b/src/video_core/renderer_vulkan/vk_fence_manager.h @@ -40,7 +40,16 @@ private: }; using Fence = std::shared_ptr<InnerFence>; -using GenericFenceManager = VideoCommon::FenceManager<Fence, TextureCache, BufferCache, QueryCache>; +struct FenceManagerParams { + using FenceType = Fence; + using BufferCacheType = BufferCache; + using TextureCacheType = TextureCache; + using QueryCacheType = QueryCache; + + static constexpr bool HAS_ASYNC_CHECK = true; +}; + +using GenericFenceManager = VideoCommon::FenceManager<FenceManagerParams>; class FenceManager final : public GenericFenceManager { public: diff --git a/src/video_core/renderer_vulkan/vk_fsr.cpp b/src/video_core/renderer_vulkan/vk_fsr.cpp index 33daa8c1c..9bcdca2fb 100644 --- a/src/video_core/renderer_vulkan/vk_fsr.cpp +++ b/src/video_core/renderer_vulkan/vk_fsr.cpp @@ -1,12 +1,11 @@ // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later -#include <cmath> -#include "common/bit_cast.h" #include "common/common_types.h" #include "common/div_ceil.h" #include "common/settings.h" +#include "video_core/fsr.h" #include "video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp16_comp_spv.h" #include "video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp32_comp_spv.h" #include "video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp16_comp_spv.h" @@ -17,146 +16,7 @@ #include "video_core/vulkan_common/vulkan_device.h" namespace Vulkan { -namespace { -// Reimplementations of the constant generating functions in ffx_fsr1.h -// GCC generated a lot of warnings when using the official header. -u32 AU1_AH1_AF1(f32 f) { - static constexpr u32 base[512]{ - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, - 0x0080, 0x0100, 0x0200, 0x0400, 0x0800, 0x0c00, 0x1000, 0x1400, 0x1800, 0x1c00, 0x2000, - 0x2400, 0x2800, 0x2c00, 0x3000, 0x3400, 0x3800, 0x3c00, 0x4000, 0x4400, 0x4800, 0x4c00, - 0x5000, 0x5400, 0x5800, 0x5c00, 0x6000, 0x6400, 0x6800, 0x6c00, 0x7000, 0x7400, 0x7800, - 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, - 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, - 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, - 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, - 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, - 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, - 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, - 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, - 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, - 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, - 0x7bff, 0x7bff, 0x7bff, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8001, 0x8002, 0x8004, 0x8008, - 0x8010, 0x8020, 0x8040, 0x8080, 0x8100, 0x8200, 0x8400, 0x8800, 0x8c00, 0x9000, 0x9400, - 0x9800, 0x9c00, 0xa000, 0xa400, 0xa800, 0xac00, 0xb000, 0xb400, 0xb800, 0xbc00, 0xc000, - 0xc400, 0xc800, 0xcc00, 0xd000, 0xd400, 0xd800, 0xdc00, 0xe000, 0xe400, 0xe800, 0xec00, - 0xf000, 0xf400, 0xf800, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, - 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, - 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, - 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, - 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, - 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, - 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, - 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, - 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, - 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, - 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, - }; - static constexpr s8 shift[512]{ - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x17, 0x16, - 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, - 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, - 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x17, - 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, - 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, - 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, - }; - const u32 u = Common::BitCast<u32>(f); - const u32 i = u >> 23; - return base[i] + ((u & 0x7fffff) >> shift[i]); -} - -u32 AU1_AH2_AF2(f32 a[2]) { - return AU1_AH1_AF1(a[0]) + (AU1_AH1_AF1(a[1]) << 16); -} - -void FsrEasuCon(u32 con0[4], u32 con1[4], u32 con2[4], u32 con3[4], f32 inputViewportInPixelsX, - f32 inputViewportInPixelsY, f32 inputSizeInPixelsX, f32 inputSizeInPixelsY, - f32 outputSizeInPixelsX, f32 outputSizeInPixelsY) { - con0[0] = Common::BitCast<u32>(inputViewportInPixelsX / outputSizeInPixelsX); - con0[1] = Common::BitCast<u32>(inputViewportInPixelsY / outputSizeInPixelsY); - con0[2] = Common::BitCast<u32>(0.5f * inputViewportInPixelsX / outputSizeInPixelsX - 0.5f); - con0[3] = Common::BitCast<u32>(0.5f * inputViewportInPixelsY / outputSizeInPixelsY - 0.5f); - con1[0] = Common::BitCast<u32>(1.0f / inputSizeInPixelsX); - con1[1] = Common::BitCast<u32>(1.0f / inputSizeInPixelsY); - con1[2] = Common::BitCast<u32>(1.0f / inputSizeInPixelsX); - con1[3] = Common::BitCast<u32>(-1.0f / inputSizeInPixelsY); - con2[0] = Common::BitCast<u32>(-1.0f / inputSizeInPixelsX); - con2[1] = Common::BitCast<u32>(2.0f / inputSizeInPixelsY); - con2[2] = Common::BitCast<u32>(1.0f / inputSizeInPixelsX); - con2[3] = Common::BitCast<u32>(2.0f / inputSizeInPixelsY); - con3[0] = Common::BitCast<u32>(0.0f / inputSizeInPixelsX); - con3[1] = Common::BitCast<u32>(4.0f / inputSizeInPixelsY); - con3[2] = con3[3] = 0; -} - -void FsrEasuConOffset(u32 con0[4], u32 con1[4], u32 con2[4], u32 con3[4], - f32 inputViewportInPixelsX, f32 inputViewportInPixelsY, - f32 inputSizeInPixelsX, f32 inputSizeInPixelsY, f32 outputSizeInPixelsX, - f32 outputSizeInPixelsY, f32 inputOffsetInPixelsX, f32 inputOffsetInPixelsY) { - FsrEasuCon(con0, con1, con2, con3, inputViewportInPixelsX, inputViewportInPixelsY, - inputSizeInPixelsX, inputSizeInPixelsY, outputSizeInPixelsX, outputSizeInPixelsY); - con0[2] = Common::BitCast<u32>(0.5f * inputViewportInPixelsX / outputSizeInPixelsX - 0.5f + - inputOffsetInPixelsX); - con0[3] = Common::BitCast<u32>(0.5f * inputViewportInPixelsY / outputSizeInPixelsY - 0.5f + - inputOffsetInPixelsY); -} - -void FsrRcasCon(u32* con, f32 sharpness) { - sharpness = std::exp2f(-sharpness); - f32 hSharp[2]{sharpness, sharpness}; - con[0] = Common::BitCast<u32>(sharpness); - con[1] = AU1_AH2_AF2(hSharp); - con[2] = 0; - con[3] = 0; -} -} // Anonymous namespace +using namespace FSR; FSR::FSR(const Device& device_, MemoryAllocator& memory_allocator_, size_t image_count_, VkExtent2D output_size_) @@ -345,10 +205,9 @@ void FSR::CreateDescriptorSets() { void FSR::CreateImages() { images.resize(image_count * 2); image_views.resize(image_count * 2); - buffer_commits.resize(image_count * 2); for (size_t i = 0; i < image_count * 2; ++i) { - images[i] = device.GetLogical().CreateImage(VkImageCreateInfo{ + images[i] = memory_allocator.CreateImage(VkImageCreateInfo{ .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, .pNext = nullptr, .flags = 0, @@ -371,7 +230,6 @@ void FSR::CreateImages() { .pQueueFamilyIndices = nullptr, .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, }); - buffer_commits[i] = memory_allocator.Commit(images[i], MemoryUsage::DeviceLocal); image_views[i] = device.GetLogical().CreateImageView(VkImageViewCreateInfo{ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, .pNext = nullptr, diff --git a/src/video_core/renderer_vulkan/vk_fsr.h b/src/video_core/renderer_vulkan/vk_fsr.h index 5d872861f..8bb9fc23a 100644 --- a/src/video_core/renderer_vulkan/vk_fsr.h +++ b/src/video_core/renderer_vulkan/vk_fsr.h @@ -47,7 +47,6 @@ private: vk::Sampler sampler; std::vector<vk::Image> images; std::vector<vk::ImageView> image_views; - std::vector<MemoryCommit> buffer_commits; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index f91bb5a1d..c1595642e 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -236,13 +236,13 @@ GraphicsPipeline::GraphicsPipeline( Scheduler& scheduler_, BufferCache& buffer_cache_, TextureCache& texture_cache_, vk::PipelineCache& pipeline_cache_, VideoCore::ShaderNotify* shader_notify, const Device& device_, DescriptorPool& descriptor_pool, - UpdateDescriptorQueue& update_descriptor_queue_, Common::ThreadWorker* worker_thread, + GuestDescriptorQueue& guest_descriptor_queue_, Common::ThreadWorker* worker_thread, PipelineStatistics* pipeline_statistics, RenderPassCache& render_pass_cache, const GraphicsPipelineCacheKey& key_, std::array<vk::ShaderModule, NUM_STAGES> stages, const std::array<const Shader::Info*, NUM_STAGES>& infos) : key{key_}, device{device_}, texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, pipeline_cache(pipeline_cache_), scheduler{scheduler_}, - update_descriptor_queue{update_descriptor_queue_}, spv_modules{std::move(stages)} { + guest_descriptor_queue{guest_descriptor_queue_}, spv_modules{std::move(stages)} { if (shader_notify) { shader_notify->MarkShaderBuilding(); } @@ -298,7 +298,7 @@ void GraphicsPipeline::AddTransition(GraphicsPipeline* transition) { template <typename Spec> void GraphicsPipeline::ConfigureImpl(bool is_indexed) { std::array<VideoCommon::ImageViewInOut, MAX_IMAGE_ELEMENTS> views; - std::array<VkSampler, MAX_IMAGE_ELEMENTS> samplers; + std::array<VideoCommon::SamplerId, MAX_IMAGE_ELEMENTS> samplers; size_t sampler_index{}; size_t view_index{}; @@ -367,8 +367,8 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { const auto handle{read_handle(desc, index)}; views[view_index++] = {handle.first}; - Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.second)}; - samplers[sampler_index++] = sampler->Handle(); + VideoCommon::SamplerId sampler{texture_cache.GetGraphicsSamplerId(handle.second)}; + samplers[sampler_index++] = sampler; } } if constexpr (Spec::has_images) { @@ -449,15 +449,15 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { buffer_cache.UpdateGraphicsBuffers(is_indexed); buffer_cache.BindHostGeometryBuffers(is_indexed); - update_descriptor_queue.Acquire(); + guest_descriptor_queue.Acquire(); RescalingPushConstant rescaling; RenderAreaPushConstant render_area; - const VkSampler* samplers_it{samplers.data()}; + const VideoCommon::SamplerId* samplers_it{samplers.data()}; const VideoCommon::ImageViewInOut* views_it{views.data()}; const auto prepare_stage{[&](size_t stage) LAMBDA_FORCEINLINE { buffer_cache.BindHostStageBuffers(stage); - PushImageDescriptors(texture_cache, update_descriptor_queue, stage_infos[stage], rescaling, + PushImageDescriptors(texture_cache, guest_descriptor_queue, stage_infos[stage], rescaling, samplers_it, views_it); const auto& info{stage_infos[0]}; if (info.uses_render_area) { @@ -481,12 +481,13 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { if constexpr (Spec::enabled_stages[4]) { prepare_stage(4); } + texture_cache.UpdateRenderTargets(false); + texture_cache.CheckFeedbackLoop(views); ConfigureDraw(rescaling, render_area); } void GraphicsPipeline::ConfigureDraw(const RescalingPushConstant& rescaling, const RenderAreaPushConstant& render_area) { - texture_cache.UpdateRenderTargets(false); scheduler.RequestRenderpass(texture_cache.GetFramebuffer()); if (!is_built.load(std::memory_order::relaxed)) { @@ -499,7 +500,7 @@ void GraphicsPipeline::ConfigureDraw(const RescalingPushConstant& rescaling, const bool is_rescaling{texture_cache.IsRescaling()}; const bool update_rescaling{scheduler.UpdateRescaling(is_rescaling)}; const bool bind_pipeline{scheduler.UpdateGraphicsPipeline(this)}; - const void* const descriptor_data{update_descriptor_queue.UpdateData()}; + const void* const descriptor_data{guest_descriptor_queue.UpdateData()}; scheduler.Record([this, descriptor_data, bind_pipeline, rescaling_data = rescaling.Data(), is_rescaling, update_rescaling, uses_render_area = render_area.uses_render_area, @@ -548,31 +549,7 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { static_vector<VkVertexInputBindingDescription, 32> vertex_bindings; static_vector<VkVertexInputBindingDivisorDescriptionEXT, 32> vertex_binding_divisors; static_vector<VkVertexInputAttributeDescription, 32> vertex_attributes; - if (key.state.dynamic_vertex_input) { - const size_t num_vertex_arrays = std::min( - key.state.attributes.size(), static_cast<size_t>(device.GetMaxVertexInputBindings())); - for (size_t index = 0; index < num_vertex_arrays; ++index) { - const u32 type = key.state.DynamicAttributeType(index); - if (!stage_infos[0].loads.Generic(index) || type == 0) { - continue; - } - vertex_attributes.push_back({ - .location = static_cast<u32>(index), - .binding = 0, - .format = type == 1 ? VK_FORMAT_R32_SFLOAT - : type == 2 ? VK_FORMAT_R32_SINT - : VK_FORMAT_R32_UINT, - .offset = 0, - }); - } - if (!vertex_attributes.empty()) { - vertex_bindings.push_back({ - .binding = 0, - .stride = 4, - .inputRate = VK_VERTEX_INPUT_RATE_VERTEX, - }); - } - } else { + if (!key.state.dynamic_vertex_input) { const size_t num_vertex_arrays = std::min( Maxwell::NumVertexArrays, static_cast<size_t>(device.GetMaxVertexInputBindings())); for (size_t index = 0; index < num_vertex_arrays; ++index) { diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 67c657d0e..99e56e9ad 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -64,7 +64,6 @@ class RenderPassCache; class RescalingPushConstant; class RenderAreaPushConstant; class Scheduler; -class UpdateDescriptorQueue; class GraphicsPipeline { static constexpr size_t NUM_STAGES = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage; @@ -74,7 +73,7 @@ public: Scheduler& scheduler, BufferCache& buffer_cache, TextureCache& texture_cache, vk::PipelineCache& pipeline_cache, VideoCore::ShaderNotify* shader_notify, const Device& device, DescriptorPool& descriptor_pool, - UpdateDescriptorQueue& update_descriptor_queue, Common::ThreadWorker* worker_thread, + GuestDescriptorQueue& guest_descriptor_queue, Common::ThreadWorker* worker_thread, PipelineStatistics* pipeline_statistics, RenderPassCache& render_pass_cache, const GraphicsPipelineCacheKey& key, std::array<vk::ShaderModule, NUM_STAGES> stages, const std::array<const Shader::Info*, NUM_STAGES>& infos); @@ -133,7 +132,7 @@ private: BufferCache& buffer_cache; vk::PipelineCache& pipeline_cache; Scheduler& scheduler; - UpdateDescriptorQueue& update_descriptor_queue; + GuestDescriptorQueue& guest_descriptor_queue; void (*configure_func)(GraphicsPipeline*, bool){}; diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.cpp b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp index 8aa07ef9d..6b288b994 100644 --- a/src/video_core/renderer_vulkan/vk_master_semaphore.cpp +++ b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp @@ -3,6 +3,7 @@ #include <thread> +#include "common/polyfill_ranges.h" #include "common/settings.h" #include "video_core/renderer_vulkan/vk_master_semaphore.h" #include "video_core/vulkan_common/vulkan_device.h" @@ -10,7 +11,19 @@ namespace Vulkan { -MasterSemaphore::MasterSemaphore(const Device& device) { +constexpr u64 FENCE_RESERVE_SIZE = 8; + +MasterSemaphore::MasterSemaphore(const Device& device_) : device(device_) { + if (!device.HasTimelineSemaphore()) { + static constexpr VkFenceCreateInfo fence_ci{ + .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, .pNext = nullptr, .flags = 0}; + free_queue.resize(FENCE_RESERVE_SIZE); + std::ranges::generate(free_queue, + [&] { return device.GetLogical().CreateFence(fence_ci); }); + wait_thread = std::jthread([this](std::stop_token token) { WaitThread(token); }); + return; + } + static constexpr VkSemaphoreTypeCreateInfo semaphore_type_ci{ .sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO, .pNext = nullptr, @@ -42,4 +55,164 @@ MasterSemaphore::MasterSemaphore(const Device& device) { MasterSemaphore::~MasterSemaphore() = default; +void MasterSemaphore::Refresh() { + if (!semaphore) { + // If we don't support timeline semaphores, there's nothing to refresh + return; + } + + u64 this_tick{}; + u64 counter{}; + do { + this_tick = gpu_tick.load(std::memory_order_acquire); + counter = semaphore.GetCounter(); + if (counter < this_tick) { + return; + } + } while (!gpu_tick.compare_exchange_weak(this_tick, counter, std::memory_order_release, + std::memory_order_relaxed)); +} + +void MasterSemaphore::Wait(u64 tick) { + if (!semaphore) { + // If we don't support timeline semaphores, wait for the value normally + std::unique_lock lk{free_mutex}; + free_cv.wait(lk, [&] { return gpu_tick.load(std::memory_order_relaxed) >= tick; }); + return; + } + + // No need to wait if the GPU is ahead of the tick + if (IsFree(tick)) { + return; + } + + // Update the GPU tick and try again + Refresh(); + + if (IsFree(tick)) { + return; + } + + // If none of the above is hit, fallback to a regular wait + while (!semaphore.Wait(tick)) { + } + + Refresh(); +} + +VkResult MasterSemaphore::SubmitQueue(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore, + VkSemaphore wait_semaphore, u64 host_tick) { + if (semaphore) { + return SubmitQueueTimeline(cmdbuf, signal_semaphore, wait_semaphore, host_tick); + } else { + return SubmitQueueFence(cmdbuf, signal_semaphore, wait_semaphore, host_tick); + } +} + +static constexpr std::array<VkPipelineStageFlags, 2> wait_stage_masks{ + VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, +}; + +VkResult MasterSemaphore::SubmitQueueTimeline(vk::CommandBuffer& cmdbuf, + VkSemaphore signal_semaphore, + VkSemaphore wait_semaphore, u64 host_tick) { + const VkSemaphore timeline_semaphore = *semaphore; + + const u32 num_signal_semaphores = signal_semaphore ? 2 : 1; + const std::array signal_values{host_tick, u64(0)}; + const std::array signal_semaphores{timeline_semaphore, signal_semaphore}; + + const u32 num_wait_semaphores = wait_semaphore ? 1 : 0; + const VkTimelineSemaphoreSubmitInfo timeline_si{ + .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO, + .pNext = nullptr, + .waitSemaphoreValueCount = 0, + .pWaitSemaphoreValues = nullptr, + .signalSemaphoreValueCount = num_signal_semaphores, + .pSignalSemaphoreValues = signal_values.data(), + }; + const VkSubmitInfo submit_info{ + .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, + .pNext = &timeline_si, + .waitSemaphoreCount = num_wait_semaphores, + .pWaitSemaphores = &wait_semaphore, + .pWaitDstStageMask = wait_stage_masks.data(), + .commandBufferCount = 1, + .pCommandBuffers = cmdbuf.address(), + .signalSemaphoreCount = num_signal_semaphores, + .pSignalSemaphores = signal_semaphores.data(), + }; + + return device.GetGraphicsQueue().Submit(submit_info); +} + +VkResult MasterSemaphore::SubmitQueueFence(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore, + VkSemaphore wait_semaphore, u64 host_tick) { + const u32 num_signal_semaphores = signal_semaphore ? 1 : 0; + const u32 num_wait_semaphores = wait_semaphore ? 1 : 0; + + const VkSubmitInfo submit_info{ + .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, + .pNext = nullptr, + .waitSemaphoreCount = num_wait_semaphores, + .pWaitSemaphores = &wait_semaphore, + .pWaitDstStageMask = wait_stage_masks.data(), + .commandBufferCount = 1, + .pCommandBuffers = cmdbuf.address(), + .signalSemaphoreCount = num_signal_semaphores, + .pSignalSemaphores = &signal_semaphore, + }; + + auto fence = GetFreeFence(); + auto result = device.GetGraphicsQueue().Submit(submit_info, *fence); + + if (result == VK_SUCCESS) { + std::scoped_lock lock{wait_mutex}; + wait_queue.emplace(host_tick, std::move(fence)); + wait_cv.notify_one(); + } + + return result; +} + +void MasterSemaphore::WaitThread(std::stop_token token) { + while (!token.stop_requested()) { + u64 host_tick; + vk::Fence fence; + { + std::unique_lock lock{wait_mutex}; + Common::CondvarWait(wait_cv, lock, token, [this] { return !wait_queue.empty(); }); + if (token.stop_requested()) { + return; + } + std::tie(host_tick, fence) = std::move(wait_queue.front()); + wait_queue.pop(); + } + + fence.Wait(); + fence.Reset(); + + { + std::scoped_lock lock{free_mutex}; + free_queue.push_front(std::move(fence)); + gpu_tick.store(host_tick); + } + free_cv.notify_one(); + } +} + +vk::Fence MasterSemaphore::GetFreeFence() { + std::scoped_lock lock{free_mutex}; + if (free_queue.empty()) { + static constexpr VkFenceCreateInfo fence_ci{ + .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, .pNext = nullptr, .flags = 0}; + return device.GetLogical().CreateFence(fence_ci); + } + + auto fence = std::move(free_queue.back()); + free_queue.pop_back(); + return fence; +} + } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.h b/src/video_core/renderer_vulkan/vk_master_semaphore.h index 689f02ea5..3f599d7bd 100644 --- a/src/video_core/renderer_vulkan/vk_master_semaphore.h +++ b/src/video_core/renderer_vulkan/vk_master_semaphore.h @@ -4,7 +4,11 @@ #pragma once #include <atomic> +#include <condition_variable> +#include <deque> +#include <mutex> #include <thread> +#include <queue> #include "common/common_types.h" #include "common/polyfill_thread.h" @@ -15,6 +19,8 @@ namespace Vulkan { class Device; class MasterSemaphore { + using Waitable = std::pair<u64, vk::Fence>; + public: explicit MasterSemaphore(const Device& device); ~MasterSemaphore(); @@ -29,11 +35,6 @@ public: return gpu_tick.load(std::memory_order_acquire); } - /// Returns the timeline semaphore handle. - [[nodiscard]] VkSemaphore Handle() const noexcept { - return *semaphore; - } - /// Returns true when a tick has been hit by the GPU. [[nodiscard]] bool IsFree(u64 tick) const noexcept { return KnownGpuTick() >= tick; @@ -45,41 +46,38 @@ public: } /// Refresh the known GPU tick - void Refresh() { - u64 this_tick{}; - u64 counter{}; - do { - this_tick = gpu_tick.load(std::memory_order_acquire); - counter = semaphore.GetCounter(); - if (counter < this_tick) { - return; - } - } while (!gpu_tick.compare_exchange_weak(this_tick, counter, std::memory_order_release, - std::memory_order_relaxed)); - } + void Refresh(); /// Waits for a tick to be hit on the GPU - void Wait(u64 tick) { - // No need to wait if the GPU is ahead of the tick - if (IsFree(tick)) { - return; - } - // Update the GPU tick and try again - Refresh(); - if (IsFree(tick)) { - return; - } - // If none of the above is hit, fallback to a regular wait - while (!semaphore.Wait(tick)) { - } - Refresh(); - } + void Wait(u64 tick); + + /// Submits the device graphics queue, updating the tick as necessary + VkResult SubmitQueue(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore, + VkSemaphore wait_semaphore, u64 host_tick); + +private: + VkResult SubmitQueueTimeline(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore, + VkSemaphore wait_semaphore, u64 host_tick); + VkResult SubmitQueueFence(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore, + VkSemaphore wait_semaphore, u64 host_tick); + + void WaitThread(std::stop_token token); + + vk::Fence GetFreeFence(); private: + const Device& device; ///< Device. vk::Semaphore semaphore; ///< Timeline semaphore. std::atomic<u64> gpu_tick{0}; ///< Current known GPU tick. std::atomic<u64> current_tick{1}; ///< Current logical tick. + std::mutex wait_mutex; + std::mutex free_mutex; + std::condition_variable free_cv; + std::condition_variable_any wait_cv; + std::queue<Waitable> wait_queue; ///< Queue for the fences to be waited on by the wait thread. + std::deque<vk::Fence> free_queue; ///< Holds available fences for submission. std::jthread debug_thread; ///< Debug thread to workaround validation layer bugs. + std::jthread wait_thread; ///< Helper thread that waits for submitted fences. }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 7e69b11d8..9f316113c 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -114,14 +114,16 @@ Shader::AttributeType CastAttributeType(const FixedPipelineState::VertexAttribut return Shader::AttributeType::Disabled; case Maxwell::VertexAttribute::Type::SNorm: case Maxwell::VertexAttribute::Type::UNorm: - case Maxwell::VertexAttribute::Type::UScaled: - case Maxwell::VertexAttribute::Type::SScaled: case Maxwell::VertexAttribute::Type::Float: return Shader::AttributeType::Float; case Maxwell::VertexAttribute::Type::SInt: return Shader::AttributeType::SignedInt; case Maxwell::VertexAttribute::Type::UInt: return Shader::AttributeType::UnsignedInt; + case Maxwell::VertexAttribute::Type::UScaled: + return Shader::AttributeType::UnsignedScaled; + case Maxwell::VertexAttribute::Type::SScaled: + return Shader::AttributeType::SignedScaled; } return Shader::AttributeType::Float; } @@ -165,7 +167,10 @@ Shader::RuntimeInfo MakeRuntimeInfo(std::span<const Shader::IR::Program> program info.fixed_state_point_size = point_size; } if (key.state.xfb_enabled) { - info.xfb_varyings = VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state); + auto [varyings, count] = + VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state); + info.xfb_varyings = varyings; + info.xfb_count = count; } info.convert_depth_mode = gl_ndc; } @@ -212,7 +217,10 @@ Shader::RuntimeInfo MakeRuntimeInfo(std::span<const Shader::IR::Program> program info.fixed_state_point_size = point_size; } if (key.state.xfb_enabled != 0) { - info.xfb_varyings = VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state); + auto [varyings, count] = + VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state); + info.xfb_varyings = varyings; + info.xfb_count = count; } info.convert_depth_mode = gl_ndc; break; @@ -277,23 +285,26 @@ bool GraphicsPipelineCacheKey::operator==(const GraphicsPipelineCacheKey& rhs) c PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device_, Scheduler& scheduler_, DescriptorPool& descriptor_pool_, - UpdateDescriptorQueue& update_descriptor_queue_, + GuestDescriptorQueue& guest_descriptor_queue_, RenderPassCache& render_pass_cache_, BufferCache& buffer_cache_, TextureCache& texture_cache_, VideoCore::ShaderNotify& shader_notify_) : VideoCommon::ShaderCache{rasterizer_}, device{device_}, scheduler{scheduler_}, - descriptor_pool{descriptor_pool_}, update_descriptor_queue{update_descriptor_queue_}, + descriptor_pool{descriptor_pool_}, guest_descriptor_queue{guest_descriptor_queue_}, render_pass_cache{render_pass_cache_}, buffer_cache{buffer_cache_}, texture_cache{texture_cache_}, shader_notify{shader_notify_}, use_asynchronous_shaders{Settings::values.use_asynchronous_shaders.GetValue()}, use_vulkan_pipeline_cache{Settings::values.use_vulkan_driver_pipeline_cache.GetValue()}, - workers(std::max(std::thread::hardware_concurrency(), 2U) - 1, "VkPipelineBuilder"), + workers(device.GetDriverID() == VK_DRIVER_ID_QUALCOMM_PROPRIETARY + ? 1 + : (std::max(std::thread::hardware_concurrency(), 2U) - 1), + "VkPipelineBuilder"), serialization_thread(1, "VkPipelineSerialization") { const auto& float_control{device.FloatControlProperties()}; const VkDriverId driver_id{device.GetDriverID()}; profile = Shader::Profile{ .supported_spirv = device.SupportedSpirvVersion(), .unified_descriptor_binding = true, - .support_descriptor_aliasing = true, + .support_descriptor_aliasing = device.IsDescriptorAliasingSupported(), .support_int8 = device.IsInt8Supported(), .support_int16 = device.IsShaderInt16Supported(), .support_int64 = device.IsShaderInt64Supported(), @@ -324,11 +335,17 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device .support_derivative_control = true, .support_geometry_shader_passthrough = device.IsNvGeometryShaderPassthroughSupported(), .support_native_ndc = device.IsExtDepthClipControlSupported(), + .support_scaled_attributes = !device.MustEmulateScaledFormats(), .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(), .lower_left_origin_mode = false, .need_declared_frag_colors = false, + .need_gather_subpixel_offset = driver_id == VK_DRIVER_ID_AMD_PROPRIETARY || + driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE || + driver_id == VK_DRIVER_ID_MESA_RADV || + driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS || + driver_id == VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA, .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS, .has_broken_spirv_position_input = driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY, @@ -336,16 +353,18 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device .has_broken_signed_operations = false, .has_broken_fp16_float_controls = driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY, .ignore_nan_fp_comparisons = false, - }; + .has_broken_spirv_subgroup_mask_vector_extract_dynamic = + driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY}; host_info = Shader::HostTranslateInfo{ + .support_float64 = device.IsFloat64Supported(), .support_float16 = device.IsFloat16Supported(), .support_int64 = device.IsShaderInt64Supported(), .needs_demote_reorder = driver_id == VK_DRIVER_ID_AMD_PROPRIETARY || driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE, .support_snorm_render_buffer = true, .support_viewport_index_layer = device.IsExtShaderViewportIndexLayerSupported(), - .min_ssbo_alignment = static_cast<u32>(device.GetStorageBufferAlignment()), .support_geometry_shader_passthrough = device.IsNvGeometryShaderPassthroughSupported(), + .support_conditional_barrier = device.SupportsConditionalBarriers(), }; if (device.GetMaxVertexInputAttributes() < Maxwell::NumVertexAttributes) { @@ -639,7 +658,7 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline( Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr}; return std::make_unique<GraphicsPipeline>( scheduler, buffer_cache, texture_cache, vulkan_pipeline_cache, &shader_notify, device, - descriptor_pool, update_descriptor_queue, thread_worker, statistics, render_pass_cache, key, + descriptor_pool, guest_descriptor_queue, thread_worker, statistics, render_pass_cache, key, std::move(modules), infos); } catch (const Shader::Exception& exception) { @@ -692,6 +711,11 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline( std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline( ShaderPools& pools, const ComputePipelineCacheKey& key, Shader::Environment& env, PipelineStatistics* statistics, bool build_in_parallel) try { + if (device.HasBrokenCompute()) { + LOG_ERROR(Render_Vulkan, "Skipping 0x{:016x}", key.Hash()); + return nullptr; + } + LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash()); Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; @@ -711,7 +735,7 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline( } Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr}; return std::make_unique<ComputePipeline>(device, vulkan_pipeline_cache, descriptor_pool, - update_descriptor_queue, thread_worker, statistics, + guest_descriptor_queue, thread_worker, statistics, &shader_notify, program.info, std::move(spv_module)); } catch (const Shader::Exception& exception) { diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 5171912d7..e323ea0fd 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -82,7 +82,6 @@ class PipelineStatistics; class RasterizerVulkan; class RenderPassCache; class Scheduler; -class UpdateDescriptorQueue; using VideoCommon::ShaderInfo; @@ -93,16 +92,16 @@ struct ShaderPools { inst.ReleaseContents(); } - Shader::ObjectPool<Shader::IR::Inst> inst; - Shader::ObjectPool<Shader::IR::Block> block; - Shader::ObjectPool<Shader::Maxwell::Flow::Block> flow_block; + Shader::ObjectPool<Shader::IR::Inst> inst{8192}; + Shader::ObjectPool<Shader::IR::Block> block{32}; + Shader::ObjectPool<Shader::Maxwell::Flow::Block> flow_block{32}; }; class PipelineCache : public VideoCommon::ShaderCache { public: explicit PipelineCache(RasterizerVulkan& rasterizer, const Device& device, Scheduler& scheduler, DescriptorPool& descriptor_pool, - UpdateDescriptorQueue& update_descriptor_queue, + GuestDescriptorQueue& guest_descriptor_queue, RenderPassCache& render_pass_cache, BufferCache& buffer_cache, TextureCache& texture_cache, VideoCore::ShaderNotify& shader_notify_); ~PipelineCache(); @@ -144,7 +143,7 @@ private: const Device& device; Scheduler& scheduler; DescriptorPool& descriptor_pool; - UpdateDescriptorQueue& update_descriptor_queue; + GuestDescriptorQueue& guest_descriptor_queue; RenderPassCache& render_pass_cache; BufferCache& buffer_cache; TextureCache& texture_cache; diff --git a/src/video_core/renderer_vulkan/vk_present_manager.cpp b/src/video_core/renderer_vulkan/vk_present_manager.cpp new file mode 100644 index 000000000..d681bd22a --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_present_manager.cpp @@ -0,0 +1,491 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "common/microprofile.h" +#include "common/settings.h" +#include "common/thread.h" +#include "core/frontend/emu_window.h" +#include "video_core/renderer_vulkan/vk_present_manager.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/renderer_vulkan/vk_swapchain.h" +#include "video_core/vulkan_common/vulkan_device.h" +#include "video_core/vulkan_common/vulkan_surface.h" + +namespace Vulkan { + +MICROPROFILE_DEFINE(Vulkan_WaitPresent, "Vulkan", "Wait For Present", MP_RGB(128, 128, 128)); +MICROPROFILE_DEFINE(Vulkan_CopyToSwapchain, "Vulkan", "Copy to swapchain", MP_RGB(192, 255, 192)); + +namespace { + +bool CanBlitToSwapchain(const vk::PhysicalDevice& physical_device, VkFormat format) { + const VkFormatProperties props{physical_device.GetFormatProperties(format)}; + return (props.optimalTilingFeatures & VK_FORMAT_FEATURE_BLIT_DST_BIT); +} + +[[nodiscard]] VkImageSubresourceLayers MakeImageSubresourceLayers() { + return VkImageSubresourceLayers{ + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .mipLevel = 0, + .baseArrayLayer = 0, + .layerCount = 1, + }; +} + +[[nodiscard]] VkImageBlit MakeImageBlit(s32 frame_width, s32 frame_height, s32 swapchain_width, + s32 swapchain_height) { + return VkImageBlit{ + .srcSubresource = MakeImageSubresourceLayers(), + .srcOffsets = + { + { + .x = 0, + .y = 0, + .z = 0, + }, + { + .x = frame_width, + .y = frame_height, + .z = 1, + }, + }, + .dstSubresource = MakeImageSubresourceLayers(), + .dstOffsets = + { + { + .x = 0, + .y = 0, + .z = 0, + }, + { + .x = swapchain_width, + .y = swapchain_height, + .z = 1, + }, + }, + }; +} + +[[nodiscard]] VkImageCopy MakeImageCopy(u32 frame_width, u32 frame_height, u32 swapchain_width, + u32 swapchain_height) { + return VkImageCopy{ + .srcSubresource = MakeImageSubresourceLayers(), + .srcOffset = + { + .x = 0, + .y = 0, + .z = 0, + }, + .dstSubresource = MakeImageSubresourceLayers(), + .dstOffset = + { + .x = 0, + .y = 0, + .z = 0, + }, + .extent = + { + .width = std::min(frame_width, swapchain_width), + .height = std::min(frame_height, swapchain_height), + .depth = 1, + }, + }; +} + +} // Anonymous namespace + +PresentManager::PresentManager(const vk::Instance& instance_, + Core::Frontend::EmuWindow& render_window_, const Device& device_, + MemoryAllocator& memory_allocator_, Scheduler& scheduler_, + Swapchain& swapchain_, vk::SurfaceKHR& surface_) + : instance{instance_}, render_window{render_window_}, device{device_}, + memory_allocator{memory_allocator_}, scheduler{scheduler_}, swapchain{swapchain_}, + surface{surface_}, blit_supported{CanBlitToSwapchain(device.GetPhysical(), + swapchain.GetImageViewFormat())}, + use_present_thread{Settings::values.async_presentation.GetValue()}, + image_count{swapchain.GetImageCount()}, last_render_surface{ + render_window_.GetWindowInfo().render_surface} { + + auto& dld = device.GetLogical(); + cmdpool = dld.CreateCommandPool({ + .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, + .pNext = nullptr, + .flags = + VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, + .queueFamilyIndex = device.GetGraphicsFamily(), + }); + auto cmdbuffers = cmdpool.Allocate(image_count); + + frames.resize(image_count); + for (u32 i = 0; i < frames.size(); i++) { + Frame& frame = frames[i]; + frame.cmdbuf = vk::CommandBuffer{cmdbuffers[i], device.GetDispatchLoader()}; + frame.render_ready = dld.CreateSemaphore({ + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + }); + frame.present_done = dld.CreateFence({ + .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, + .pNext = nullptr, + .flags = VK_FENCE_CREATE_SIGNALED_BIT, + }); + free_queue.push(&frame); + } + + if (use_present_thread) { + present_thread = std::jthread([this](std::stop_token token) { PresentThread(token); }); + } +} + +PresentManager::~PresentManager() = default; + +Frame* PresentManager::GetRenderFrame() { + MICROPROFILE_SCOPE(Vulkan_WaitPresent); + + // Wait for free presentation frames + std::unique_lock lock{free_mutex}; + free_cv.wait(lock, [this] { return !free_queue.empty(); }); + + // Take the frame from the queue + Frame* frame = free_queue.front(); + free_queue.pop(); + + // Wait for the presentation to be finished so all frame resources are free + frame->present_done.Wait(); + frame->present_done.Reset(); + + return frame; +} + +void PresentManager::Present(Frame* frame) { + if (!use_present_thread) { + scheduler.WaitWorker(); + CopyToSwapchain(frame); + free_queue.push(frame); + return; + } + + scheduler.Record([this, frame](vk::CommandBuffer) { + std::unique_lock lock{queue_mutex}; + present_queue.push(frame); + frame_cv.notify_one(); + }); +} + +void PresentManager::RecreateFrame(Frame* frame, u32 width, u32 height, bool is_srgb, + VkFormat image_view_format, VkRenderPass rd) { + auto& dld = device.GetLogical(); + + frame->width = width; + frame->height = height; + frame->is_srgb = is_srgb; + + frame->image = memory_allocator.CreateImage({ + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .pNext = nullptr, + .flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT, + .imageType = VK_IMAGE_TYPE_2D, + .format = swapchain.GetImageFormat(), + .extent = + { + .width = width, + .height = height, + .depth = 1, + }, + .mipLevels = 1, + .arrayLayers = 1, + .samples = VK_SAMPLE_COUNT_1_BIT, + .tiling = VK_IMAGE_TILING_OPTIMAL, + .usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .queueFamilyIndexCount = 0, + .pQueueFamilyIndices = nullptr, + .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, + }); + + frame->image_view = dld.CreateImageView({ + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .image = *frame->image, + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = image_view_format, + .components = + { + .r = VK_COMPONENT_SWIZZLE_IDENTITY, + .g = VK_COMPONENT_SWIZZLE_IDENTITY, + .b = VK_COMPONENT_SWIZZLE_IDENTITY, + .a = VK_COMPONENT_SWIZZLE_IDENTITY, + }, + .subresourceRange = + { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1, + }, + }); + + const VkImageView image_view{*frame->image_view}; + frame->framebuffer = dld.CreateFramebuffer({ + .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .renderPass = rd, + .attachmentCount = 1, + .pAttachments = &image_view, + .width = width, + .height = height, + .layers = 1, + }); +} + +void PresentManager::WaitPresent() { + if (!use_present_thread) { + return; + } + + // Wait for the present queue to be empty + { + std::unique_lock queue_lock{queue_mutex}; + frame_cv.wait(queue_lock, [this] { return present_queue.empty(); }); + } + + // The above condition will be satisfied when the last frame is taken from the queue. + // To ensure that frame has been presented as well take hold of the swapchain + // mutex. + std::scoped_lock swapchain_lock{swapchain_mutex}; +} + +void PresentManager::PresentThread(std::stop_token token) { + Common::SetCurrentThreadName("VulkanPresent"); + while (!token.stop_requested()) { + std::unique_lock lock{queue_mutex}; + + // Wait for presentation frames + Common::CondvarWait(frame_cv, lock, token, [this] { return !present_queue.empty(); }); + if (token.stop_requested()) { + return; + } + + // Take the frame and notify anyone waiting + Frame* frame = present_queue.front(); + present_queue.pop(); + frame_cv.notify_one(); + + // By exchanging the lock ownership we take the swapchain lock + // before the queue lock goes out of scope. This way the swapchain + // lock in WaitPresent is guaranteed to occur after here. + std::exchange(lock, std::unique_lock{swapchain_mutex}); + + CopyToSwapchain(frame); + + // Free the frame for reuse + std::scoped_lock fl{free_mutex}; + free_queue.push(frame); + free_cv.notify_one(); + } +} + +void PresentManager::NotifySurfaceChanged() { +#ifdef ANDROID + std::scoped_lock lock{recreate_surface_mutex}; + recreate_surface_cv.notify_one(); +#endif +} + +void PresentManager::CopyToSwapchain(Frame* frame) { + MICROPROFILE_SCOPE(Vulkan_CopyToSwapchain); + + const auto recreate_swapchain = [&] { + swapchain.Create(*surface, frame->width, frame->height, frame->is_srgb); + image_count = swapchain.GetImageCount(); + }; + +#ifdef ANDROID + std::unique_lock lock{recreate_surface_mutex}; + + const auto needs_recreation = [&] { + if (last_render_surface != render_window.GetWindowInfo().render_surface) { + return true; + } + if (swapchain.NeedsRecreation(frame->is_srgb)) { + return true; + } + return false; + }; + + recreate_surface_cv.wait_for(lock, std::chrono::milliseconds(400), + [&]() { return !needs_recreation(); }); + + // If the frontend recreated the surface, recreate the renderer surface and swapchain. + if (last_render_surface != render_window.GetWindowInfo().render_surface) { + last_render_surface = render_window.GetWindowInfo().render_surface; + surface = CreateSurface(instance, render_window.GetWindowInfo()); + recreate_swapchain(); + } +#endif + + // If the size or colorspace of the incoming frames has changed, recreate the swapchain + // to account for that. + const bool srgb_changed = swapchain.NeedsRecreation(frame->is_srgb); + const bool size_changed = + swapchain.GetWidth() != frame->width || swapchain.GetHeight() != frame->height; + if (srgb_changed || size_changed) { + recreate_swapchain(); + } + + while (swapchain.AcquireNextImage()) { + recreate_swapchain(); + } + + const vk::CommandBuffer cmdbuf{frame->cmdbuf}; + cmdbuf.Begin({ + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, + .pNext = nullptr, + .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, + .pInheritanceInfo = nullptr, + }); + + const VkImage image{swapchain.CurrentImage()}; + const VkExtent2D extent = swapchain.GetExtent(); + const std::array pre_barriers{ + VkImageMemoryBarrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = 0, + .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .oldLayout = VK_IMAGE_LAYOUT_UNDEFINED, + .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = image, + .subresourceRange{ + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }, + VkImageMemoryBarrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, + .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, + .oldLayout = VK_IMAGE_LAYOUT_GENERAL, + .newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = *frame->image, + .subresourceRange{ + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }, + }; + const std::array post_barriers{ + VkImageMemoryBarrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT, + .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + .newLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = image, + .subresourceRange{ + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }, + VkImageMemoryBarrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT, + .dstAccessMask = VK_ACCESS_MEMORY_WRITE_BIT, + .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + .newLayout = VK_IMAGE_LAYOUT_GENERAL, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = *frame->image, + .subresourceRange{ + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }, + }; + + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, {}, + {}, {}, pre_barriers); + + if (blit_supported) { + cmdbuf.BlitImage(*frame->image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, image, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + MakeImageBlit(frame->width, frame->height, extent.width, extent.height), + VK_FILTER_LINEAR); + } else { + cmdbuf.CopyImage(*frame->image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, image, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + MakeImageCopy(frame->width, frame->height, extent.width, extent.height)); + } + + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, {}, + {}, {}, post_barriers); + + cmdbuf.End(); + + const VkSemaphore present_semaphore = swapchain.CurrentPresentSemaphore(); + const VkSemaphore render_semaphore = swapchain.CurrentRenderSemaphore(); + const std::array wait_semaphores = {present_semaphore, *frame->render_ready}; + + static constexpr std::array<VkPipelineStageFlags, 2> wait_stage_masks{ + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, + VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + }; + + const VkSubmitInfo submit_info{ + .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, + .pNext = nullptr, + .waitSemaphoreCount = 2U, + .pWaitSemaphores = wait_semaphores.data(), + .pWaitDstStageMask = wait_stage_masks.data(), + .commandBufferCount = 1, + .pCommandBuffers = cmdbuf.address(), + .signalSemaphoreCount = 1U, + .pSignalSemaphores = &render_semaphore, + }; + + // Submit the image copy/blit to the swapchain + { + std::scoped_lock submit_lock{scheduler.submit_mutex}; + switch (const VkResult result = + device.GetGraphicsQueue().Submit(submit_info, *frame->present_done)) { + case VK_SUCCESS: + break; + case VK_ERROR_DEVICE_LOST: + device.ReportLoss(); + [[fallthrough]]; + default: + vk::Check(result); + break; + } + } + + // Present + swapchain.Present(render_semaphore); +} + +} // namespace Vulkan
\ No newline at end of file diff --git a/src/video_core/renderer_vulkan/vk_present_manager.h b/src/video_core/renderer_vulkan/vk_present_manager.h new file mode 100644 index 000000000..83e859416 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_present_manager.h @@ -0,0 +1,91 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include <condition_variable> +#include <mutex> +#include <queue> + +#include "common/common_types.h" +#include "common/polyfill_thread.h" +#include "video_core/vulkan_common/vulkan_memory_allocator.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" + +namespace Core::Frontend { +class EmuWindow; +} // namespace Core::Frontend + +namespace Vulkan { + +class Device; +class Scheduler; +class Swapchain; + +struct Frame { + u32 width; + u32 height; + bool is_srgb; + vk::Image image; + vk::ImageView image_view; + vk::Framebuffer framebuffer; + vk::CommandBuffer cmdbuf; + vk::Semaphore render_ready; + vk::Fence present_done; +}; + +class PresentManager { +public: + PresentManager(const vk::Instance& instance, Core::Frontend::EmuWindow& render_window, + const Device& device, MemoryAllocator& memory_allocator, Scheduler& scheduler, + Swapchain& swapchain, vk::SurfaceKHR& surface); + ~PresentManager(); + + /// Returns the last used presentation frame + Frame* GetRenderFrame(); + + /// Pushes a frame for presentation + void Present(Frame* frame); + + /// Recreates the present frame to match the provided parameters + void RecreateFrame(Frame* frame, u32 width, u32 height, bool is_srgb, + VkFormat image_view_format, VkRenderPass rd); + + /// Waits for the present thread to finish presenting all queued frames. + void WaitPresent(); + + /// This is called to notify the rendering backend of a surface change + void NotifySurfaceChanged(); + +private: + void PresentThread(std::stop_token token); + + void CopyToSwapchain(Frame* frame); + +private: + const vk::Instance& instance; + Core::Frontend::EmuWindow& render_window; + const Device& device; + MemoryAllocator& memory_allocator; + Scheduler& scheduler; + Swapchain& swapchain; + vk::SurfaceKHR& surface; + vk::CommandPool cmdpool; + std::vector<Frame> frames; + std::queue<Frame*> present_queue; + std::queue<Frame*> free_queue; + std::condition_variable_any frame_cv; + std::condition_variable free_cv; + std::condition_variable recreate_surface_cv; + std::mutex swapchain_mutex; + std::mutex recreate_surface_mutex; + std::mutex queue_mutex; + std::mutex free_mutex; + std::jthread present_thread; + bool blit_supported; + bool use_present_thread; + std::size_t image_count{}; + void* last_render_surface{}; +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp index 929c8ece6..d67490449 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp @@ -66,9 +66,10 @@ void QueryPool::Reserve(std::pair<VkQueryPool, u32> query) { } } -QueryCache::QueryCache(VideoCore::RasterizerInterface& rasterizer_, const Device& device_, +QueryCache::QueryCache(VideoCore::RasterizerInterface& rasterizer_, + Core::Memory::Memory& cpu_memory_, const Device& device_, Scheduler& scheduler_) - : QueryCacheBase{rasterizer_}, device{device_}, scheduler{scheduler_}, + : QueryCacheBase{rasterizer_, cpu_memory_}, device{device_}, scheduler{scheduler_}, query_pools{ QueryPool{device_, scheduler_, QueryType::SamplesPassed}, } {} @@ -98,8 +99,10 @@ HostCounter::HostCounter(QueryCache& cache_, std::shared_ptr<HostCounter> depend query{cache_.AllocateQuery(type_)}, tick{cache_.GetScheduler().CurrentTick()} { const vk::Device* logical = &cache.GetDevice().GetLogical(); cache.GetScheduler().Record([logical, query = query](vk::CommandBuffer cmdbuf) { + const bool use_precise = Settings::IsGPULevelHigh(); logical->ResetQueryPool(query.first, query.second, 1); - cmdbuf.BeginQuery(query.first, query.second, VK_QUERY_CONTROL_PRECISE_BIT); + cmdbuf.BeginQuery(query.first, query.second, + use_precise ? VK_QUERY_CONTROL_PRECISE_BIT : 0); }); } @@ -112,8 +115,10 @@ void HostCounter::EndQuery() { [query = query](vk::CommandBuffer cmdbuf) { cmdbuf.EndQuery(query.first, query.second); }); } -u64 HostCounter::BlockingQuery() const { - cache.GetScheduler().Wait(tick); +u64 HostCounter::BlockingQuery(bool async) const { + if (!async) { + cache.GetScheduler().Wait(tick); + } u64 data; const VkResult query_result = cache.GetDevice().GetLogical().GetQueryResults( query.first, query.second, 1, sizeof(data), &data, sizeof(data), diff --git a/src/video_core/renderer_vulkan/vk_query_cache.h b/src/video_core/renderer_vulkan/vk_query_cache.h index 26762ee09..c1b9552eb 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.h +++ b/src/video_core/renderer_vulkan/vk_query_cache.h @@ -52,7 +52,8 @@ private: class QueryCache final : public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter> { public: - explicit QueryCache(VideoCore::RasterizerInterface& rasterizer_, const Device& device_, + explicit QueryCache(VideoCore::RasterizerInterface& rasterizer_, + Core::Memory::Memory& cpu_memory_, const Device& device_, Scheduler& scheduler_); ~QueryCache(); @@ -83,7 +84,7 @@ public: void EndQuery(); private: - u64 BlockingQuery() const override; + u64 BlockingQuery(bool async = false) const override; QueryCache& cache; const VideoCore::QueryType type; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 86ef0daeb..84e3a30cc 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -160,19 +160,19 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra : RasterizerAccelerated{cpu_memory_}, gpu{gpu_}, screen_info{screen_info_}, device{device_}, memory_allocator{memory_allocator_}, state_tracker{state_tracker_}, scheduler{scheduler_}, staging_pool(device, memory_allocator, scheduler), descriptor_pool(device, scheduler), - update_descriptor_queue(device, scheduler), - blit_image(device, scheduler, state_tracker, descriptor_pool), - render_pass_cache(device), texture_cache_runtime{device, scheduler, - memory_allocator, staging_pool, - blit_image, render_pass_cache, - descriptor_pool, update_descriptor_queue}, + guest_descriptor_queue(device, scheduler), compute_pass_descriptor_queue(device, scheduler), + blit_image(device, scheduler, state_tracker, descriptor_pool), render_pass_cache(device), + texture_cache_runtime{ + device, scheduler, memory_allocator, staging_pool, + blit_image, render_pass_cache, descriptor_pool, compute_pass_descriptor_queue}, texture_cache(texture_cache_runtime, *this), buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool, - update_descriptor_queue, descriptor_pool), + guest_descriptor_queue, compute_pass_descriptor_queue, descriptor_pool), buffer_cache(*this, cpu_memory_, buffer_cache_runtime), - pipeline_cache(*this, device, scheduler, descriptor_pool, update_descriptor_queue, + pipeline_cache(*this, device, scheduler, descriptor_pool, guest_descriptor_queue, render_pass_cache, buffer_cache, texture_cache, gpu.ShaderNotify()), - query_cache{*this, device, scheduler}, accelerate_dma{buffer_cache}, + query_cache{*this, cpu_memory_, device, scheduler}, + accelerate_dma(buffer_cache, texture_cache, scheduler), fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler), wfi_event(device.GetLogical().CreateEvent()) { scheduler.SetQueryCache(query_cache); @@ -188,7 +188,14 @@ void RasterizerVulkan::PrepareDraw(bool is_indexed, Func&& draw_func) { FlushWork(); gpu_memory->FlushCaching(); +#if ANDROID + if (Settings::IsGPULevelHigh()) { + // This is problematic on Android, disable on GPU Normal. + query_cache.UpdateCounters(); + } +#else query_cache.UpdateCounters(); +#endif GraphicsPipeline* const pipeline{pipeline_cache.CurrentGraphicsPipeline()}; if (!pipeline) { @@ -272,7 +279,14 @@ void RasterizerVulkan::DrawTexture() { SCOPE_EXIT({ gpu.TickWork(); }); FlushWork(); +#if ANDROID + if (Settings::IsGPULevelHigh()) { + // This is problematic on Android, disable on GPU Normal. + query_cache.UpdateCounters(); + } +#else query_cache.UpdateCounters(); +#endif texture_cache.SynchronizeGraphicsDescriptors(); texture_cache.UpdateRenderTargets(false); @@ -348,25 +362,12 @@ void RasterizerVulkan::Clear(u32 layer_count) { const u32 color_attachment = regs.clear_surface.RT; if (use_color && framebuffer->HasAspectColorBit(color_attachment)) { - VkClearValue clear_value; - bool is_integer = false; - bool is_signed = false; - size_t int_size = 8; - for (std::size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; ++i) { - const auto& this_rt = regs.rt[i]; - if (this_rt.Address() == 0) { - continue; - } - if (this_rt.format == Tegra::RenderTargetFormat::NONE) { - continue; - } - const auto format = - VideoCore::Surface::PixelFormatFromRenderTargetFormat(this_rt.format); - is_integer = IsPixelFormatInteger(format); - is_signed = IsPixelFormatSignedInteger(format); - int_size = PixelComponentSizeBitsInteger(format); - break; - } + const auto format = + VideoCore::Surface::PixelFormatFromRenderTargetFormat(regs.rt[color_attachment].format); + bool is_integer = IsPixelFormatInteger(format); + bool is_signed = IsPixelFormatSignedInteger(format); + size_t int_size = PixelComponentSizeBitsInteger(format); + VkClearValue clear_value{}; if (!is_integer) { std::memcpy(clear_value.color.float32, regs.clear_color.data(), regs.clear_color.size() * sizeof(f32)); @@ -394,7 +395,15 @@ void RasterizerVulkan::Clear(u32 layer_count) { cmdbuf.ClearAttachments(attachment, clear_rect); }); } else { - UNIMPLEMENTED_MSG("Unimplemented Clear only the specified channel"); + u8 color_mask = static_cast<u8>(regs.clear_surface.R | regs.clear_surface.G << 1 | + regs.clear_surface.B << 2 | regs.clear_surface.A << 3); + Region2D dst_region = { + Offset2D{.x = clear_rect.rect.offset.x, .y = clear_rect.rect.offset.y}, + Offset2D{.x = clear_rect.rect.offset.x + + static_cast<s32>(clear_rect.rect.extent.width), + .y = clear_rect.rect.offset.y + + static_cast<s32>(clear_rect.rect.extent.height)}}; + blit_image.ClearColor(framebuffer, color_mask, regs.clear_color, dst_region); } } @@ -493,6 +502,22 @@ bool RasterizerVulkan::MustFlushRegion(VAddr addr, u64 size, VideoCommon::CacheT return false; } +VideoCore::RasterizerDownloadArea RasterizerVulkan::GetFlushArea(VAddr addr, u64 size) { + { + std::scoped_lock lock{texture_cache.mutex}; + auto area = texture_cache.GetFlushArea(addr, size); + if (area) { + return *area; + } + } + VideoCore::RasterizerDownloadArea new_area{ + .start_address = Common::AlignDown(addr, Core::Memory::YUZU_PAGESIZE), + .end_address = Common::AlignUp(addr + size, Core::Memory::YUZU_PAGESIZE), + .preemtive = true, + }; + return new_area; +} + void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size, VideoCommon::CacheType which) { if (addr == 0 || size == 0) { return; @@ -589,7 +614,7 @@ void RasterizerVulkan::SignalSyncPoint(u32 value) { } void RasterizerVulkan::SignalReference() { - fence_manager.SignalOrdering(); + fence_manager.SignalReference(); } void RasterizerVulkan::ReleaseFences() { @@ -622,7 +647,7 @@ void RasterizerVulkan::WaitForIdle() { cmdbuf.SetEvent(event, flags); cmdbuf.WaitEvents(event, flags, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, {}, {}, {}); }); - SignalReference(); + fence_manager.SignalOrdering(); } void RasterizerVulkan::FragmentBarrier() { @@ -644,7 +669,8 @@ void RasterizerVulkan::FlushCommands() { void RasterizerVulkan::TickFrame() { draw_counter = 0; - update_descriptor_queue.TickFrame(); + guest_descriptor_queue.TickFrame(); + compute_pass_descriptor_queue.TickFrame(); fence_manager.TickFrame(); staging_pool.TickFrame(); { @@ -663,11 +689,12 @@ bool RasterizerVulkan::AccelerateConditionalRendering() { // TODO(Blinkhawk): Reimplement Host conditional rendering. return false; } - // Medium / Low Hack: stub any checks on queries writen into the buffer cache. + // Medium / Low Hack: stub any checks on queries written into the buffer cache. const GPUVAddr condition_address{maxwell3d->regs.render_enable.Address()}; Maxwell::ReportSemaphore::Compare cmp; if (gpu_memory->IsMemoryDirty(condition_address, sizeof(cmp), - VideoCommon::CacheType::BufferCache)) { + VideoCommon::CacheType::BufferCache | + VideoCommon::CacheType::QueryCache)) { return true; } return false; @@ -730,7 +757,11 @@ void RasterizerVulkan::LoadDiskResources(u64 title_id, std::stop_token stop_load } void RasterizerVulkan::FlushWork() { +#ifdef ANDROID + static constexpr u32 DRAWS_TO_DISPATCH = 1024; +#else static constexpr u32 DRAWS_TO_DISPATCH = 4096; +#endif // ANDROID // Only check multiples of 8 draws static_assert(DRAWS_TO_DISPATCH % 8 == 0); @@ -748,7 +779,9 @@ void RasterizerVulkan::FlushWork() { draw_counter = 0; } -AccelerateDMA::AccelerateDMA(BufferCache& buffer_cache_) : buffer_cache{buffer_cache_} {} +AccelerateDMA::AccelerateDMA(BufferCache& buffer_cache_, TextureCache& texture_cache_, + Scheduler& scheduler_) + : buffer_cache{buffer_cache_}, texture_cache{texture_cache_}, scheduler{scheduler_} {} bool AccelerateDMA::BufferClear(GPUVAddr src_address, u64 amount, u32 value) { std::scoped_lock lock{buffer_cache.mutex}; @@ -760,6 +793,46 @@ bool AccelerateDMA::BufferCopy(GPUVAddr src_address, GPUVAddr dest_address, u64 return buffer_cache.DMACopy(src_address, dest_address, amount); } +template <bool IS_IMAGE_UPLOAD> +bool AccelerateDMA::DmaBufferImageCopy(const Tegra::DMA::ImageCopy& copy_info, + const Tegra::DMA::BufferOperand& buffer_operand, + const Tegra::DMA::ImageOperand& image_operand) { + std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; + const auto image_id = texture_cache.DmaImageId(image_operand, IS_IMAGE_UPLOAD); + if (image_id == VideoCommon::NULL_IMAGE_ID) { + return false; + } + const u32 buffer_size = static_cast<u32>(buffer_operand.pitch * buffer_operand.height); + static constexpr auto sync_info = VideoCommon::ObtainBufferSynchronize::FullSynchronize; + const auto post_op = VideoCommon::ObtainBufferOperation::DoNothing; + const auto [buffer, offset] = + buffer_cache.ObtainBuffer(buffer_operand.address, buffer_size, sync_info, post_op); + + const auto [image, copy] = texture_cache.DmaBufferImageCopy( + copy_info, buffer_operand, image_operand, image_id, IS_IMAGE_UPLOAD); + const std::span copy_span{©, 1}; + + if constexpr (IS_IMAGE_UPLOAD) { + image->UploadMemory(buffer->Handle(), offset, copy_span); + } else { + texture_cache.DownloadImageIntoBuffer(image, buffer->Handle(), offset, copy_span, + buffer_operand.address, buffer_size); + } + return true; +} + +bool AccelerateDMA::ImageToBuffer(const Tegra::DMA::ImageCopy& copy_info, + const Tegra::DMA::ImageOperand& image_operand, + const Tegra::DMA::BufferOperand& buffer_operand) { + return DmaBufferImageCopy<false>(copy_info, buffer_operand, image_operand); +} + +bool AccelerateDMA::BufferToImage(const Tegra::DMA::ImageCopy& copy_info, + const Tegra::DMA::BufferOperand& buffer_operand, + const Tegra::DMA::ImageOperand& image_operand) { + return DmaBufferImageCopy<true>(copy_info, buffer_operand, image_operand); +} + void RasterizerVulkan::UpdateDynamicStates() { auto& regs = maxwell3d->regs; UpdateViewportsState(regs); @@ -1056,7 +1129,7 @@ void RasterizerVulkan::UpdateDepthBoundsTestEnable(Tegra::Engines::Maxwell3D::Re LOG_WARNING(Render_Vulkan, "Depth bounds is enabled but not supported"); enabled = false; } - scheduler.Record([enable = regs.depth_bounds_enable](vk::CommandBuffer cmdbuf) { + scheduler.Record([enable = enabled](vk::CommandBuffer cmdbuf) { cmdbuf.SetDepthBoundsTestEnableEXT(enable); }); } diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index a0508b57c..b39710b3c 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -45,14 +45,28 @@ class StateTracker; class AccelerateDMA : public Tegra::Engines::AccelerateDMAInterface { public: - explicit AccelerateDMA(BufferCache& buffer_cache); + explicit AccelerateDMA(BufferCache& buffer_cache, TextureCache& texture_cache, + Scheduler& scheduler); bool BufferCopy(GPUVAddr start_address, GPUVAddr end_address, u64 amount) override; bool BufferClear(GPUVAddr src_address, u64 amount, u32 value) override; + bool ImageToBuffer(const Tegra::DMA::ImageCopy& copy_info, const Tegra::DMA::ImageOperand& src, + const Tegra::DMA::BufferOperand& dst) override; + + bool BufferToImage(const Tegra::DMA::ImageCopy& copy_info, const Tegra::DMA::BufferOperand& src, + const Tegra::DMA::ImageOperand& dst) override; + private: + template <bool IS_IMAGE_UPLOAD> + bool DmaBufferImageCopy(const Tegra::DMA::ImageCopy& copy_info, + const Tegra::DMA::BufferOperand& src, + const Tegra::DMA::ImageOperand& dst); + BufferCache& buffer_cache; + TextureCache& texture_cache; + Scheduler& scheduler; }; class RasterizerVulkan final : public VideoCore::RasterizerAccelerated, @@ -78,6 +92,7 @@ public: VideoCommon::CacheType which = VideoCommon::CacheType::All) override; bool MustFlushRegion(VAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) override; + VideoCore::RasterizerDownloadArea GetFlushArea(VAddr addr, u64 size) override; void InvalidateRegion(VAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) override; void InnerInvalidation(std::span<const std::pair<VAddr, std::size_t>> sequences) override; @@ -169,7 +184,8 @@ private: StagingBufferPool staging_pool; DescriptorPool descriptor_pool; - UpdateDescriptorQueue update_descriptor_queue; + GuestDescriptorQueue guest_descriptor_queue; + ComputePassDescriptorQueue compute_pass_descriptor_queue; BlitImageHelper blit_image; RenderPassCache render_pass_cache; diff --git a/src/video_core/renderer_vulkan/vk_resource_pool.cpp b/src/video_core/renderer_vulkan/vk_resource_pool.cpp index 6c8ac22f4..6572f82ba 100644 --- a/src/video_core/renderer_vulkan/vk_resource_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_resource_pool.cpp @@ -37,7 +37,7 @@ size_t ResourcePool::CommitResource() { found = free_resource; } } - // Free iterator is hinted to the resource after the one that's been commited. + // Free iterator is hinted to the resource after the one that's been committed. hint_iterator = (*found + 1) % ticks.size(); return *found; } @@ -46,7 +46,7 @@ size_t ResourcePool::ManageOverflow() { const size_t old_capacity = ticks.size(); Grow(); - // The last entry is guaranted to be free, since it's the first element of the freshly + // The last entry is guaranteed to be free, since it's the first element of the freshly // allocated resources. return old_capacity; } diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index e03685af1..17ef61147 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -46,15 +46,17 @@ Scheduler::Scheduler(const Device& device_, StateTracker& state_tracker_) Scheduler::~Scheduler() = default; -void Scheduler::Flush(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) { - SubmitExecution(signal_semaphore, wait_semaphore); +u64 Scheduler::Flush(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) { + // When flushing, we only send data to the worker thread; no waiting is necessary. + const u64 signal_value = SubmitExecution(signal_semaphore, wait_semaphore); AllocateNewContext(); + return signal_value; } void Scheduler::Finish(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) { + // When finishing, we need to wait for the submission to have executed on the device. const u64 presubmit_tick = CurrentTick(); SubmitExecution(signal_semaphore, wait_semaphore); - WaitWorker(); Wait(presubmit_tick); AllocateNewContext(); } @@ -63,8 +65,14 @@ void Scheduler::WaitWorker() { MICROPROFILE_SCOPE(Vulkan_WaitForWorker); DispatchWork(); - std::unique_lock lock{work_mutex}; - wait_cv.wait(lock, [this] { return work_queue.empty(); }); + // Ensure the queue is drained. + { + std::unique_lock ql{queue_mutex}; + event_cv.wait(ql, [this] { return work_queue.empty(); }); + } + + // Now wait for execution to finish. + std::scoped_lock el{execution_mutex}; } void Scheduler::DispatchWork() { @@ -72,10 +80,10 @@ void Scheduler::DispatchWork() { return; } { - std::scoped_lock lock{work_mutex}; + std::scoped_lock ql{queue_mutex}; work_queue.push(std::move(chunk)); } - work_cv.notify_one(); + event_cv.notify_all(); AcquireNewChunk(); } @@ -137,30 +145,55 @@ bool Scheduler::UpdateRescaling(bool is_rescaling) { void Scheduler::WorkerThread(std::stop_token stop_token) { Common::SetCurrentThreadName("VulkanWorker"); - do { + + const auto TryPopQueue{[this](auto& work) -> bool { + if (work_queue.empty()) { + return false; + } + + work = std::move(work_queue.front()); + work_queue.pop(); + event_cv.notify_all(); + return true; + }}; + + while (!stop_token.stop_requested()) { std::unique_ptr<CommandChunk> work; - bool has_submit{false}; + { - std::unique_lock lock{work_mutex}; - if (work_queue.empty()) { - wait_cv.notify_all(); - } - Common::CondvarWait(work_cv, lock, stop_token, [&] { return !work_queue.empty(); }); + std::unique_lock lk{queue_mutex}; + + // Wait for work. + Common::CondvarWait(event_cv, lk, stop_token, [&] { return TryPopQueue(work); }); + + // If we've been asked to stop, we're done. if (stop_token.stop_requested()) { - continue; + return; } - work = std::move(work_queue.front()); - work_queue.pop(); - has_submit = work->HasSubmit(); + // Exchange lock ownership so that we take the execution lock before + // the queue lock goes out of scope. This allows us to force execution + // to complete in the next step. + std::exchange(lk, std::unique_lock{execution_mutex}); + + // Perform the work, tracking whether the chunk was a submission + // before executing. + const bool has_submit = work->HasSubmit(); work->ExecuteAll(current_cmdbuf); + + // If the chunk was a submission, reallocate the command buffer. + if (has_submit) { + AllocateWorkerCommandBuffer(); + } } - if (has_submit) { - AllocateWorkerCommandBuffer(); + + { + std::scoped_lock rl{reserve_mutex}; + + // Recycle the chunk back to the reserve. + chunk_reserve.emplace_back(std::move(work)); } - std::scoped_lock reserve_lock{reserve_mutex}; - chunk_reserve.push_back(std::move(work)); - } while (!stop_token.stop_requested()); + } } void Scheduler::AllocateWorkerCommandBuffer() { @@ -173,52 +206,21 @@ void Scheduler::AllocateWorkerCommandBuffer() { }); } -void Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) { +u64 Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) { EndPendingOperations(); InvalidateState(); const u64 signal_value = master_semaphore->NextTick(); Record([signal_semaphore, wait_semaphore, signal_value, this](vk::CommandBuffer cmdbuf) { cmdbuf.End(); - const VkSemaphore timeline_semaphore = master_semaphore->Handle(); - - const u32 num_signal_semaphores = signal_semaphore ? 2U : 1U; - const std::array signal_values{signal_value, u64(0)}; - const std::array signal_semaphores{timeline_semaphore, signal_semaphore}; - - const u32 num_wait_semaphores = wait_semaphore ? 2U : 1U; - const std::array wait_values{signal_value - 1, u64(1)}; - const std::array wait_semaphores{timeline_semaphore, wait_semaphore}; - static constexpr std::array<VkPipelineStageFlags, 2> wait_stage_masks{ - VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, - VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, - }; - - const VkTimelineSemaphoreSubmitInfo timeline_si{ - .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO, - .pNext = nullptr, - .waitSemaphoreValueCount = num_wait_semaphores, - .pWaitSemaphoreValues = wait_values.data(), - .signalSemaphoreValueCount = num_signal_semaphores, - .pSignalSemaphoreValues = signal_values.data(), - }; - const VkSubmitInfo submit_info{ - .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, - .pNext = &timeline_si, - .waitSemaphoreCount = num_wait_semaphores, - .pWaitSemaphores = wait_semaphores.data(), - .pWaitDstStageMask = wait_stage_masks.data(), - .commandBufferCount = 1, - .pCommandBuffers = cmdbuf.address(), - .signalSemaphoreCount = num_signal_semaphores, - .pSignalSemaphores = signal_semaphores.data(), - }; if (on_submit) { on_submit(); } - switch (const VkResult result = device.GetGraphicsQueue().Submit(submit_info)) { + std::scoped_lock lock{submit_mutex}; + switch (const VkResult result = master_semaphore->SubmitQueue( + cmdbuf, signal_semaphore, wait_semaphore, signal_value)) { case VK_SUCCESS: break; case VK_ERROR_DEVICE_LOST: @@ -231,12 +233,20 @@ void Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_s }); chunk->MarkSubmit(); DispatchWork(); + return signal_value; } void Scheduler::AllocateNewContext() { // Enable counters once again. These are disabled when a command buffer is finished. if (query_cache) { +#if ANDROID + if (Settings::IsGPULevelHigh()) { + // This is problematic on Android, disable on GPU Normal. + query_cache->UpdateCounters(); + } +#else query_cache->UpdateCounters(); +#endif } } @@ -247,7 +257,14 @@ void Scheduler::InvalidateState() { } void Scheduler::EndPendingOperations() { +#if ANDROID + if (Settings::IsGPULevelHigh()) { + // This is problematic on Android, disable on GPU Normal. + query_cache->DisableStreams(); + } +#else query_cache->DisableStreams(); +#endif EndRenderPass(); } @@ -289,13 +306,16 @@ void Scheduler::EndRenderPass() { } void Scheduler::AcquireNewChunk() { - std::scoped_lock lock{reserve_mutex}; + std::scoped_lock rl{reserve_mutex}; + if (chunk_reserve.empty()) { + // If we don't have anything reserved, we need to make a new chunk. chunk = std::make_unique<CommandChunk>(); - return; + } else { + // Otherwise, we can just take from the reserve. + chunk = std::move(chunk_reserve.back()); + chunk_reserve.pop_back(); } - chunk = std::move(chunk_reserve.back()); - chunk_reserve.pop_back(); } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index bd4cb0f7e..475c682eb 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -34,7 +34,7 @@ public: ~Scheduler(); /// Sends the current execution context to the GPU. - void Flush(VkSemaphore signal_semaphore = nullptr, VkSemaphore wait_semaphore = nullptr); + u64 Flush(VkSemaphore signal_semaphore = nullptr, VkSemaphore wait_semaphore = nullptr); /// Sends the current execution context to the GPU and waits for it to complete. void Finish(VkSemaphore signal_semaphore = nullptr, VkSemaphore wait_semaphore = nullptr); @@ -106,6 +106,8 @@ public: return *master_semaphore; } + std::mutex submit_mutex; + private: class Command { public: @@ -201,7 +203,7 @@ private: void AllocateWorkerCommandBuffer(); - void SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore); + u64 SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore); void AllocateNewContext(); @@ -232,10 +234,10 @@ private: std::queue<std::unique_ptr<CommandChunk>> work_queue; std::vector<std::unique_ptr<CommandChunk>> chunk_reserve; + std::mutex execution_mutex; std::mutex reserve_mutex; - std::mutex work_mutex; - std::condition_variable_any work_cv; - std::condition_variable wait_cv; + std::mutex queue_mutex; + std::condition_variable_any event_cv; std::jthread worker_thread; }; diff --git a/src/video_core/renderer_vulkan/vk_smaa.cpp b/src/video_core/renderer_vulkan/vk_smaa.cpp index 8eb735489..5efd7d66e 100644 --- a/src/video_core/renderer_vulkan/vk_smaa.cpp +++ b/src/video_core/renderer_vulkan/vk_smaa.cpp @@ -25,9 +25,7 @@ namespace { #define ARRAY_TO_SPAN(a) std::span(a, (sizeof(a) / sizeof(a[0]))) -std::pair<vk::Image, MemoryCommit> CreateWrappedImage(const Device& device, - MemoryAllocator& allocator, - VkExtent2D dimensions, VkFormat format) { +vk::Image CreateWrappedImage(MemoryAllocator& allocator, VkExtent2D dimensions, VkFormat format) { const VkImageCreateInfo image_ci{ .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, .pNext = nullptr, @@ -46,11 +44,7 @@ std::pair<vk::Image, MemoryCommit> CreateWrappedImage(const Device& device, .pQueueFamilyIndices = nullptr, .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, }; - - auto image = device.GetLogical().CreateImage(image_ci); - auto commit = allocator.Commit(image, Vulkan::MemoryUsage::DeviceLocal); - - return std::make_pair(std::move(image), std::move(commit)); + return allocator.CreateImage(image_ci); } void TransitionImageLayout(vk::CommandBuffer& cmdbuf, VkImage image, VkImageLayout target_layout, @@ -82,7 +76,7 @@ void TransitionImageLayout(vk::CommandBuffer& cmdbuf, VkImage image, VkImageLayo void UploadImage(const Device& device, MemoryAllocator& allocator, Scheduler& scheduler, vk::Image& image, VkExtent2D dimensions, VkFormat format, std::span<const u8> initial_contents = {}) { - auto upload_buffer = device.GetLogical().CreateBuffer(VkBufferCreateInfo{ + const VkBufferCreateInfo upload_ci = { .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, .pNext = nullptr, .flags = 0, @@ -91,9 +85,10 @@ void UploadImage(const Device& device, MemoryAllocator& allocator, Scheduler& sc .sharingMode = VK_SHARING_MODE_EXCLUSIVE, .queueFamilyIndexCount = 0, .pQueueFamilyIndices = nullptr, - }); - auto upload_commit = allocator.Commit(upload_buffer, MemoryUsage::Upload); - std::ranges::copy(initial_contents, upload_commit.Map().begin()); + }; + auto upload_buffer = allocator.CreateBuffer(upload_ci, MemoryUsage::Upload); + std::ranges::copy(initial_contents, upload_buffer.Mapped().begin()); + upload_buffer.Flush(); const std::array<VkBufferImageCopy, 1> regions{{{ .bufferOffset = 0, @@ -117,9 +112,6 @@ void UploadImage(const Device& device, MemoryAllocator& allocator, Scheduler& sc VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); }); scheduler.Finish(); - - // This should go out of scope before the commit - auto upload_buffer2 = std::move(upload_buffer); } vk::ImageView CreateWrappedImageView(const Device& device, vk::Image& image, VkFormat format) { @@ -468,7 +460,7 @@ VkWriteDescriptorSet CreateWriteDescriptorSet(std::vector<VkDescriptorImageInfo> } void ClearColorImage(vk::CommandBuffer& cmdbuf, VkImage image) { - constexpr std::array<VkImageSubresourceRange, 1> subresources{{{ + static constexpr std::array<VkImageSubresourceRange, 1> subresources{{{ .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, .baseMipLevel = 0, .levelCount = 1, @@ -528,13 +520,11 @@ SMAA::SMAA(const Device& device, MemoryAllocator& allocator, size_t image_count, } void SMAA::CreateImages() { - constexpr VkExtent2D area_extent{AREATEX_WIDTH, AREATEX_HEIGHT}; - constexpr VkExtent2D search_extent{SEARCHTEX_WIDTH, SEARCHTEX_HEIGHT}; + static constexpr VkExtent2D area_extent{AREATEX_WIDTH, AREATEX_HEIGHT}; + static constexpr VkExtent2D search_extent{SEARCHTEX_WIDTH, SEARCHTEX_HEIGHT}; - std::tie(m_static_images[Area], m_static_buffer_commits[Area]) = - CreateWrappedImage(m_device, m_allocator, area_extent, VK_FORMAT_R8G8_UNORM); - std::tie(m_static_images[Search], m_static_buffer_commits[Search]) = - CreateWrappedImage(m_device, m_allocator, search_extent, VK_FORMAT_R8_UNORM); + m_static_images[Area] = CreateWrappedImage(m_allocator, area_extent, VK_FORMAT_R8G8_UNORM); + m_static_images[Search] = CreateWrappedImage(m_allocator, search_extent, VK_FORMAT_R8_UNORM); m_static_image_views[Area] = CreateWrappedImageView(m_device, m_static_images[Area], VK_FORMAT_R8G8_UNORM); @@ -544,12 +534,11 @@ void SMAA::CreateImages() { for (u32 i = 0; i < m_image_count; i++) { Images& images = m_dynamic_images.emplace_back(); - std::tie(images.images[Blend], images.buffer_commits[Blend]) = - CreateWrappedImage(m_device, m_allocator, m_extent, VK_FORMAT_R16G16B16A16_SFLOAT); - std::tie(images.images[Edges], images.buffer_commits[Edges]) = - CreateWrappedImage(m_device, m_allocator, m_extent, VK_FORMAT_R16G16_SFLOAT); - std::tie(images.images[Output], images.buffer_commits[Output]) = - CreateWrappedImage(m_device, m_allocator, m_extent, VK_FORMAT_R16G16B16A16_SFLOAT); + images.images[Blend] = + CreateWrappedImage(m_allocator, m_extent, VK_FORMAT_R16G16B16A16_SFLOAT); + images.images[Edges] = CreateWrappedImage(m_allocator, m_extent, VK_FORMAT_R16G16_SFLOAT); + images.images[Output] = + CreateWrappedImage(m_allocator, m_extent, VK_FORMAT_R16G16B16A16_SFLOAT); images.image_views[Blend] = CreateWrappedImageView(m_device, images.images[Blend], VK_FORMAT_R16G16B16A16_SFLOAT); @@ -586,12 +575,12 @@ void SMAA::CreateSampler() { void SMAA::CreateShaders() { // These match the order of the SMAAStage enum - constexpr std::array vert_shader_sources{ + static constexpr std::array vert_shader_sources{ ARRAY_TO_SPAN(SMAA_EDGE_DETECTION_VERT_SPV), ARRAY_TO_SPAN(SMAA_BLENDING_WEIGHT_CALCULATION_VERT_SPV), ARRAY_TO_SPAN(SMAA_NEIGHBORHOOD_BLENDING_VERT_SPV), }; - constexpr std::array frag_shader_sources{ + static constexpr std::array frag_shader_sources{ ARRAY_TO_SPAN(SMAA_EDGE_DETECTION_FRAG_SPV), ARRAY_TO_SPAN(SMAA_BLENDING_WEIGHT_CALCULATION_FRAG_SPV), ARRAY_TO_SPAN(SMAA_NEIGHBORHOOD_BLENDING_FRAG_SPV), @@ -675,8 +664,8 @@ void SMAA::UploadImages(Scheduler& scheduler) { return; } - constexpr VkExtent2D area_extent{AREATEX_WIDTH, AREATEX_HEIGHT}; - constexpr VkExtent2D search_extent{SEARCHTEX_WIDTH, SEARCHTEX_HEIGHT}; + static constexpr VkExtent2D area_extent{AREATEX_WIDTH, AREATEX_HEIGHT}; + static constexpr VkExtent2D search_extent{SEARCHTEX_WIDTH, SEARCHTEX_HEIGHT}; UploadImage(m_device, m_allocator, scheduler, m_static_images[Area], area_extent, VK_FORMAT_R8G8_UNORM, ARRAY_TO_SPAN(areaTexBytes)); diff --git a/src/video_core/renderer_vulkan/vk_smaa.h b/src/video_core/renderer_vulkan/vk_smaa.h index 99a369148..0e214258a 100644 --- a/src/video_core/renderer_vulkan/vk_smaa.h +++ b/src/video_core/renderer_vulkan/vk_smaa.h @@ -66,13 +66,11 @@ private: std::array<vk::Pipeline, MaxSMAAStage> m_pipelines{}; std::array<vk::RenderPass, MaxSMAAStage> m_renderpasses{}; - std::array<MemoryCommit, MaxStaticImage> m_static_buffer_commits; std::array<vk::Image, MaxStaticImage> m_static_images{}; std::array<vk::ImageView, MaxStaticImage> m_static_image_views{}; struct Images { vk::DescriptorSets descriptor_sets{}; - std::array<MemoryCommit, MaxDynamicImage> buffer_commits; std::array<vk::Image, MaxDynamicImage> images{}; std::array<vk::ImageView, MaxDynamicImage> image_views{}; std::array<vk::Framebuffer, MaxSMAAStage> framebuffers{}; diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp index 74ca77216..62b251a9b 100644 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp @@ -30,55 +30,6 @@ constexpr VkDeviceSize MAX_STREAM_BUFFER_REQUEST_SIZE = 8_MiB; constexpr VkDeviceSize STREAM_BUFFER_SIZE = 128_MiB; constexpr VkDeviceSize REGION_SIZE = STREAM_BUFFER_SIZE / StagingBufferPool::NUM_SYNCS; -constexpr VkMemoryPropertyFlags HOST_FLAGS = - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; -constexpr VkMemoryPropertyFlags STREAM_FLAGS = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | HOST_FLAGS; - -bool IsStreamHeap(VkMemoryHeap heap) noexcept { - return STREAM_BUFFER_SIZE < (heap.size * 2) / 3; -} - -std::optional<u32> FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& props, u32 type_mask, - VkMemoryPropertyFlags flags) noexcept { - for (u32 type_index = 0; type_index < props.memoryTypeCount; ++type_index) { - if (((type_mask >> type_index) & 1) == 0) { - // Memory type is incompatible - continue; - } - const VkMemoryType& memory_type = props.memoryTypes[type_index]; - if ((memory_type.propertyFlags & flags) != flags) { - // Memory type doesn't have the flags we want - continue; - } - if (!IsStreamHeap(props.memoryHeaps[memory_type.heapIndex])) { - // Memory heap is not suitable for streaming - continue; - } - // Success! - return type_index; - } - return std::nullopt; -} - -u32 FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& props, u32 type_mask, - bool try_device_local) { - std::optional<u32> type; - if (try_device_local) { - // Try to find a DEVICE_LOCAL_BIT type, Nvidia and AMD have a dedicated heap for this - type = FindMemoryTypeIndex(props, type_mask, STREAM_FLAGS); - if (type) { - return *type; - } - } - // Otherwise try without the DEVICE_LOCAL_BIT - type = FindMemoryTypeIndex(props, type_mask, HOST_FLAGS); - if (type) { - return *type; - } - // This should never happen, and in case it does, signal it as an out of memory situation - throw vk::Exception(VK_ERROR_OUT_OF_DEVICE_MEMORY); -} - size_t Region(size_t iterator) noexcept { return iterator / REGION_SIZE; } @@ -87,8 +38,7 @@ size_t Region(size_t iterator) noexcept { StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& memory_allocator_, Scheduler& scheduler_) : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_} { - const vk::Device& dev = device.GetLogical(); - stream_buffer = dev.CreateBuffer(VkBufferCreateInfo{ + const VkBufferCreateInfo stream_ci = { .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, .pNext = nullptr, .flags = 0, @@ -99,46 +49,13 @@ StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& mem .sharingMode = VK_SHARING_MODE_EXCLUSIVE, .queueFamilyIndexCount = 0, .pQueueFamilyIndices = nullptr, - }); - if (device.HasDebuggingToolAttached()) { - stream_buffer.SetObjectNameEXT("Stream Buffer"); - } - VkMemoryDedicatedRequirements dedicated_reqs{ - .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS, - .pNext = nullptr, - .prefersDedicatedAllocation = VK_FALSE, - .requiresDedicatedAllocation = VK_FALSE, - }; - const auto requirements = dev.GetBufferMemoryRequirements(*stream_buffer, &dedicated_reqs); - const bool make_dedicated = dedicated_reqs.prefersDedicatedAllocation == VK_TRUE || - dedicated_reqs.requiresDedicatedAllocation == VK_TRUE; - const VkMemoryDedicatedAllocateInfo dedicated_info{ - .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO, - .pNext = nullptr, - .image = nullptr, - .buffer = *stream_buffer, }; - const auto memory_properties = device.GetPhysical().GetMemoryProperties().memoryProperties; - VkMemoryAllocateInfo stream_memory_info{ - .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, - .pNext = make_dedicated ? &dedicated_info : nullptr, - .allocationSize = requirements.size, - .memoryTypeIndex = - FindMemoryTypeIndex(memory_properties, requirements.memoryTypeBits, true), - }; - stream_memory = dev.TryAllocateMemory(stream_memory_info); - if (!stream_memory) { - LOG_INFO(Render_Vulkan, "Dynamic memory allocation failed, trying with system memory"); - stream_memory_info.memoryTypeIndex = - FindMemoryTypeIndex(memory_properties, requirements.memoryTypeBits, false); - stream_memory = dev.AllocateMemory(stream_memory_info); - } - + stream_buffer = memory_allocator.CreateBuffer(stream_ci, MemoryUsage::Stream); if (device.HasDebuggingToolAttached()) { - stream_memory.SetObjectNameEXT("Stream Buffer Memory"); + stream_buffer.SetObjectNameEXT("Stream Buffer"); } - stream_buffer.BindMemory(*stream_memory, 0); - stream_pointer = stream_memory.Map(0, STREAM_BUFFER_SIZE); + stream_pointer = stream_buffer.Mapped(); + ASSERT_MSG(!stream_pointer.empty(), "Stream buffer must be host visible!"); } StagingBufferPool::~StagingBufferPool() = default; @@ -199,7 +116,7 @@ StagingBufferRef StagingBufferPool::GetStreamBuffer(size_t size) { return StagingBufferRef{ .buffer = *stream_buffer, .offset = static_cast<VkDeviceSize>(offset), - .mapped_span = std::span<u8>(stream_pointer + offset, size), + .mapped_span = stream_pointer.subspan(offset, size), .usage{}, .log2_level{}, .index{}, @@ -247,7 +164,7 @@ std::optional<StagingBufferRef> StagingBufferPool::TryGetReservedBuffer(size_t s StagingBufferRef StagingBufferPool::CreateStagingBuffer(size_t size, MemoryUsage usage, bool deferred) { const u32 log2 = Common::Log2Ceil64(size); - vk::Buffer buffer = device.GetLogical().CreateBuffer({ + const VkBufferCreateInfo buffer_ci = { .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, .pNext = nullptr, .flags = 0, @@ -259,17 +176,15 @@ StagingBufferRef StagingBufferPool::CreateStagingBuffer(size_t size, MemoryUsage .sharingMode = VK_SHARING_MODE_EXCLUSIVE, .queueFamilyIndexCount = 0, .pQueueFamilyIndices = nullptr, - }); + }; + vk::Buffer buffer = memory_allocator.CreateBuffer(buffer_ci, usage); if (device.HasDebuggingToolAttached()) { ++buffer_index; buffer.SetObjectNameEXT(fmt::format("Staging Buffer {}", buffer_index).c_str()); } - MemoryCommit commit = memory_allocator.Commit(buffer, usage); - const std::span<u8> mapped_span = IsHostVisible(usage) ? commit.Map() : std::span<u8>{}; - + const std::span<u8> mapped_span = buffer.Mapped(); StagingBuffer& entry = GetCache(usage)[log2].entries.emplace_back(StagingBuffer{ .buffer = std::move(buffer), - .commit = std::move(commit), .mapped_span = mapped_span, .usage = usage, .log2_level = log2, diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h index 4fd15f11a..5f69f08b1 100644 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h @@ -46,7 +46,6 @@ private: struct StagingBuffer { vk::Buffer buffer; - MemoryCommit commit; std::span<u8> mapped_span; MemoryUsage usage; u32 log2_level; @@ -97,8 +96,7 @@ private: Scheduler& scheduler; vk::Buffer stream_buffer; - vk::DeviceMemory stream_memory; - u8* stream_pointer = nullptr; + std::span<u8> stream_pointer; size_t iterator = 0; size_t used_iterator = 0; diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp index b6810eef9..d3cddac69 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.cpp +++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp @@ -14,6 +14,7 @@ #include "video_core/renderer_vulkan/vk_swapchain.h" #include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_wrapper.h" +#include "vulkan/vulkan_core.h" namespace Vulkan { @@ -33,23 +34,47 @@ VkSurfaceFormatKHR ChooseSwapSurfaceFormat(vk::Span<VkSurfaceFormatKHR> formats) return found != formats.end() ? *found : formats[0]; } -VkPresentModeKHR ChooseSwapPresentMode(vk::Span<VkPresentModeKHR> modes) { - // Mailbox (triple buffering) doesn't lock the application like fifo (vsync), - // prefer it if vsync option is not selected - const auto found_mailbox = std::find(modes.begin(), modes.end(), VK_PRESENT_MODE_MAILBOX_KHR); - if (Settings::values.fullscreen_mode.GetValue() == Settings::FullscreenMode::Borderless && - found_mailbox != modes.end() && !Settings::values.use_vsync.GetValue()) { - return VK_PRESENT_MODE_MAILBOX_KHR; - } - if (!Settings::values.use_speed_limit.GetValue()) { - // FIFO present mode locks the framerate to the monitor's refresh rate, - // Find an alternative to surpass this limitation if FPS is unlocked. - const auto found_imm = std::find(modes.begin(), modes.end(), VK_PRESENT_MODE_IMMEDIATE_KHR); - if (found_imm != modes.end()) { - return VK_PRESENT_MODE_IMMEDIATE_KHR; +static VkPresentModeKHR ChooseSwapPresentMode(bool has_imm, bool has_mailbox, + bool has_fifo_relaxed) { + // Mailbox doesn't lock the application like FIFO (vsync) + // FIFO present mode locks the framerate to the monitor's refresh rate + Settings::VSyncMode setting = [has_imm, has_mailbox]() { + // Choose Mailbox or Immediate if unlocked and those modes are supported + const auto mode = Settings::values.vsync_mode.GetValue(); + if (Settings::values.use_speed_limit.GetValue()) { + return mode; + } + switch (mode) { + case Settings::VSyncMode::FIFO: + case Settings::VSyncMode::FIFORelaxed: + if (has_mailbox) { + return Settings::VSyncMode::Mailbox; + } else if (has_imm) { + return Settings::VSyncMode::Immediate; + } + [[fallthrough]]; + default: + return mode; } + }(); + if ((setting == Settings::VSyncMode::Mailbox && !has_mailbox) || + (setting == Settings::VSyncMode::Immediate && !has_imm) || + (setting == Settings::VSyncMode::FIFORelaxed && !has_fifo_relaxed)) { + setting = Settings::VSyncMode::FIFO; + } + + switch (setting) { + case Settings::VSyncMode::Immediate: + return VK_PRESENT_MODE_IMMEDIATE_KHR; + case Settings::VSyncMode::Mailbox: + return VK_PRESENT_MODE_MAILBOX_KHR; + case Settings::VSyncMode::FIFO: + return VK_PRESENT_MODE_FIFO_KHR; + case Settings::VSyncMode::FIFORelaxed: + return VK_PRESENT_MODE_FIFO_RELAXED_KHR; + default: + return VK_PRESENT_MODE_FIFO_KHR; } - return VK_PRESENT_MODE_FIFO_KHR; } VkExtent2D ChooseSwapExtent(const VkSurfaceCapabilitiesKHR& capabilities, u32 width, u32 height) { @@ -65,21 +90,34 @@ VkExtent2D ChooseSwapExtent(const VkSurfaceCapabilitiesKHR& capabilities, u32 wi return extent; } +VkCompositeAlphaFlagBitsKHR ChooseAlphaFlags(const VkSurfaceCapabilitiesKHR& capabilities) { + if (capabilities.supportedCompositeAlpha & VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR) { + return VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR; + } else if (capabilities.supportedCompositeAlpha & VK_COMPOSITE_ALPHA_INHERIT_BIT_KHR) { + return VK_COMPOSITE_ALPHA_INHERIT_BIT_KHR; + } else { + LOG_ERROR(Render_Vulkan, "Unknown composite alpha flags value {:#x}", + capabilities.supportedCompositeAlpha); + return VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR; + } +} + } // Anonymous namespace Swapchain::Swapchain(VkSurfaceKHR surface_, const Device& device_, Scheduler& scheduler_, u32 width_, u32 height_, bool srgb) : surface{surface_}, device{device_}, scheduler{scheduler_} { - Create(width_, height_, srgb); + Create(surface_, width_, height_, srgb); } Swapchain::~Swapchain() = default; -void Swapchain::Create(u32 width_, u32 height_, bool srgb) { +void Swapchain::Create(VkSurfaceKHR surface_, u32 width_, u32 height_, bool srgb) { is_outdated = false; is_suboptimal = false; width = width_; height = height_; + surface = surface_; const auto physical_device = device.GetPhysical(); const auto capabilities{physical_device.GetSurfaceCapabilitiesKHR(surface)}; @@ -87,18 +125,16 @@ void Swapchain::Create(u32 width_, u32 height_, bool srgb) { return; } - device.GetLogical().WaitIdle(); Destroy(); CreateSwapchain(capabilities, srgb); CreateSemaphores(); - CreateImageViews(); resource_ticks.clear(); resource_ticks.resize(image_count); } -void Swapchain::AcquireNextImage() { +bool Swapchain::AcquireNextImage() { const VkResult result = device.GetLogical().AcquireNextImageKHR( *swapchain, std::numeric_limits<u64>::max(), *present_semaphores[frame_index], VK_NULL_HANDLE, &image_index); @@ -115,8 +151,11 @@ void Swapchain::AcquireNextImage() { LOG_ERROR(Render_Vulkan, "vkAcquireNextImageKHR returned {}", vk::ToString(result)); break; } + scheduler.Wait(resource_ticks[image_index]); resource_ticks[image_index] = scheduler.CurrentTick(); + + return is_suboptimal || is_outdated; } void Swapchain::Present(VkSemaphore render_semaphore) { @@ -131,6 +170,7 @@ void Swapchain::Present(VkSemaphore render_semaphore) { .pImageIndices = &image_index, .pResults = nullptr, }; + std::scoped_lock lock{scheduler.submit_mutex}; switch (const VkResult result = present_queue.Present(present_info)) { case VK_SUCCESS: break; @@ -153,13 +193,20 @@ void Swapchain::Present(VkSemaphore render_semaphore) { void Swapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, bool srgb) { const auto physical_device{device.GetPhysical()}; const auto formats{physical_device.GetSurfaceFormatsKHR(surface)}; - const auto present_modes{physical_device.GetSurfacePresentModesKHR(surface)}; + const auto present_modes = physical_device.GetSurfacePresentModesKHR(surface); + has_mailbox = std::find(present_modes.begin(), present_modes.end(), + VK_PRESENT_MODE_MAILBOX_KHR) != present_modes.end(); + has_imm = std::find(present_modes.begin(), present_modes.end(), + VK_PRESENT_MODE_IMMEDIATE_KHR) != present_modes.end(); + has_fifo_relaxed = std::find(present_modes.begin(), present_modes.end(), + VK_PRESENT_MODE_FIFO_RELAXED_KHR) != present_modes.end(); - const VkSurfaceFormatKHR surface_format{ChooseSwapSurfaceFormat(formats)}; - present_mode = ChooseSwapPresentMode(present_modes); + const VkCompositeAlphaFlagBitsKHR alpha_flags{ChooseAlphaFlags(capabilities)}; + surface_format = ChooseSwapSurfaceFormat(formats); + present_mode = ChooseSwapPresentMode(has_imm, has_mailbox, has_fifo_relaxed); u32 requested_image_count{capabilities.minImageCount + 1}; - // Ensure Tripple buffering if possible. + // Ensure Triple buffering if possible. if (capabilities.maxImageCount > 0) { if (requested_image_count > capabilities.maxImageCount) { requested_image_count = capabilities.maxImageCount; @@ -180,12 +227,17 @@ void Swapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, bo .imageColorSpace = surface_format.colorSpace, .imageExtent = {}, .imageArrayLayers = 1, - .imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, + .imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT, .imageSharingMode = VK_SHARING_MODE_EXCLUSIVE, .queueFamilyIndexCount = 0, .pQueueFamilyIndices = nullptr, +#ifdef ANDROID + // On Android, do not allow surface rotation to deviate from the frontend. + .preTransform = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR, +#else .preTransform = capabilities.currentTransform, - .compositeAlpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR, +#endif + .compositeAlpha = alpha_flags, .presentMode = present_mode, .clipped = VK_FALSE, .oldSwapchain = nullptr, @@ -217,67 +269,35 @@ void Swapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, bo extent = swapchain_ci.imageExtent; current_srgb = srgb; - current_fps_unlocked = !Settings::values.use_speed_limit.GetValue(); images = swapchain.GetImages(); image_count = static_cast<u32>(images.size()); +#ifdef ANDROID + // Android is already ordered the same as Switch. + image_view_format = srgb ? VK_FORMAT_R8G8B8A8_SRGB : VK_FORMAT_R8G8B8A8_UNORM; +#else image_view_format = srgb ? VK_FORMAT_B8G8R8A8_SRGB : VK_FORMAT_B8G8R8A8_UNORM; +#endif } void Swapchain::CreateSemaphores() { present_semaphores.resize(image_count); std::ranges::generate(present_semaphores, [this] { return device.GetLogical().CreateSemaphore(); }); -} - -void Swapchain::CreateImageViews() { - VkImageViewCreateInfo ci{ - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .image = {}, - .viewType = VK_IMAGE_VIEW_TYPE_2D, - .format = image_view_format, - .components = - { - .r = VK_COMPONENT_SWIZZLE_IDENTITY, - .g = VK_COMPONENT_SWIZZLE_IDENTITY, - .b = VK_COMPONENT_SWIZZLE_IDENTITY, - .a = VK_COMPONENT_SWIZZLE_IDENTITY, - }, - .subresourceRange = - { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = 0, - .levelCount = 1, - .baseArrayLayer = 0, - .layerCount = 1, - }, - }; - - image_views.resize(image_count); - for (std::size_t i = 0; i < image_count; i++) { - ci.image = images[i]; - image_views[i] = device.GetLogical().CreateImageView(ci); - } + render_semaphores.resize(image_count); + std::ranges::generate(render_semaphores, + [this] { return device.GetLogical().CreateSemaphore(); }); } void Swapchain::Destroy() { frame_index = 0; present_semaphores.clear(); - framebuffers.clear(); - image_views.clear(); swapchain.reset(); } -bool Swapchain::HasFpsUnlockChanged() const { - return current_fps_unlocked != !Settings::values.use_speed_limit.GetValue(); -} - bool Swapchain::NeedsPresentModeUpdate() const { - // Mailbox present mode is the ideal for all scenarios. If it is not available, - // A different present mode is needed to support unlocked FPS above the monitor's refresh rate. - return present_mode != VK_PRESENT_MODE_MAILBOX_KHR && HasFpsUnlockChanged(); + const auto requested_mode = ChooseSwapPresentMode(has_imm, has_mailbox, has_fifo_relaxed); + return present_mode != requested_mode; } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_swapchain.h b/src/video_core/renderer_vulkan/vk_swapchain.h index caf1ff32b..b8a1465a6 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.h +++ b/src/video_core/renderer_vulkan/vk_swapchain.h @@ -24,10 +24,10 @@ public: ~Swapchain(); /// Creates (or recreates) the swapchain with a given size. - void Create(u32 width, u32 height, bool srgb); + void Create(VkSurfaceKHR surface, u32 width, u32 height, bool srgb); /// Acquires the next image in the swapchain, waits as needed. - void AcquireNextImage(); + bool AcquireNextImage(); /// Presents the rendered image to the swapchain. void Present(VkSemaphore render_semaphore); @@ -52,6 +52,11 @@ public: return is_suboptimal; } + /// Returns true when the swapchain format is in the srgb color space + bool IsSrgb() const { + return current_srgb; + } + VkExtent2D GetSize() const { return extent; } @@ -64,22 +69,34 @@ public: return image_index; } + std::size_t GetFrameIndex() const { + return frame_index; + } + VkImage GetImageIndex(std::size_t index) const { return images[index]; } - VkImageView GetImageViewIndex(std::size_t index) const { - return *image_views[index]; + VkImage CurrentImage() const { + return images[image_index]; } VkFormat GetImageViewFormat() const { return image_view_format; } + VkFormat GetImageFormat() const { + return surface_format.format; + } + VkSemaphore CurrentPresentSemaphore() const { return *present_semaphores[frame_index]; } + VkSemaphore CurrentRenderSemaphore() const { + return *render_semaphores[frame_index]; + } + u32 GetWidth() const { return width; } @@ -88,6 +105,10 @@ public: return height; } + VkExtent2D GetExtent() const { + return extent; + } + private: void CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, bool srgb); void CreateSemaphores(); @@ -95,11 +116,9 @@ private: void Destroy(); - bool HasFpsUnlockChanged() const; - bool NeedsPresentModeUpdate() const; - const VkSurfaceKHR surface; + VkSurfaceKHR surface; const Device& device; Scheduler& scheduler; @@ -107,10 +126,9 @@ private: std::size_t image_count{}; std::vector<VkImage> images; - std::vector<vk::ImageView> image_views; - std::vector<vk::Framebuffer> framebuffers; std::vector<u64> resource_ticks; std::vector<vk::Semaphore> present_semaphores; + std::vector<vk::Semaphore> render_semaphores; u32 width; u32 height; @@ -121,9 +139,12 @@ private: VkFormat image_view_format{}; VkExtent2D extent{}; VkPresentModeKHR present_mode{}; + VkSurfaceFormatKHR surface_format{}; + bool has_imm{false}; + bool has_mailbox{false}; + bool has_fifo_relaxed{false}; bool current_srgb{}; - bool current_fps_unlocked{}; bool is_outdated{}; bool is_suboptimal{}; }; diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index d39372ec4..ce6acc30c 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -1,10 +1,11 @@ // SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later +// SPDX-License-Identifier: GPL-3.0-or-later #include <algorithm> #include <array> #include <span> #include <vector> +#include <boost/container/small_vector.hpp> #include "common/bit_cast.h" #include "common/bit_util.h" @@ -14,7 +15,6 @@ #include "video_core/renderer_vulkan/blit_image.h" #include "video_core/renderer_vulkan/maxwell_to_vk.h" #include "video_core/renderer_vulkan/vk_compute_pass.h" -#include "video_core/renderer_vulkan/vk_rasterizer.h" #include "video_core/renderer_vulkan/vk_render_pass_cache.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" @@ -162,11 +162,12 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) { }; } -[[nodiscard]] vk::Image MakeImage(const Device& device, const ImageInfo& info) { +[[nodiscard]] vk::Image MakeImage(const Device& device, const MemoryAllocator& allocator, + const ImageInfo& info) { if (info.type == ImageType::Buffer) { return vk::Image{}; } - return device.GetLogical().CreateImage(MakeImageCreateInfo(device, info)); + return allocator.CreateImage(MakeImageCreateInfo(device, info)); } [[nodiscard]] VkImageAspectFlags ImageAspectMask(PixelFormat format) { @@ -189,13 +190,16 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) { if (info.IsRenderTarget()) { return ImageAspectMask(info.format); } - const bool is_first = info.Swizzle()[0] == SwizzleSource::R; + bool any_r = + std::ranges::any_of(info.Swizzle(), [](SwizzleSource s) { return s == SwizzleSource::R; }); switch (info.format) { case PixelFormat::D24_UNORM_S8_UINT: case PixelFormat::D32_FLOAT_S8_UINT: - return is_first ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_STENCIL_BIT; + // R = depth, G = stencil + return any_r ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_STENCIL_BIT; case PixelFormat::S8_UINT_D24_UNORM: - return is_first ? VK_IMAGE_ASPECT_STENCIL_BIT : VK_IMAGE_ASPECT_DEPTH_BIT; + // R = stencil, G = depth + return any_r ? VK_IMAGE_ASPECT_STENCIL_BIT : VK_IMAGE_ASPECT_DEPTH_BIT; case PixelFormat::D16_UNORM: case PixelFormat::D32_FLOAT: return VK_IMAGE_ASPECT_DEPTH_BIT; @@ -326,9 +330,9 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) { }; } -[[maybe_unused]] [[nodiscard]] std::vector<VkBufferCopy> TransformBufferCopies( - std::span<const VideoCommon::BufferCopy> copies, size_t buffer_offset) { - std::vector<VkBufferCopy> result(copies.size()); +[[maybe_unused]] [[nodiscard]] boost::container::small_vector<VkBufferCopy, 16> +TransformBufferCopies(std::span<const VideoCommon::BufferCopy> copies, size_t buffer_offset) { + boost::container::small_vector<VkBufferCopy, 16> result(copies.size()); std::ranges::transform( copies, result.begin(), [buffer_offset](const VideoCommon::BufferCopy& copy) { return VkBufferCopy{ @@ -340,7 +344,7 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) { return result; } -[[nodiscard]] std::vector<VkBufferImageCopy> TransformBufferImageCopies( +[[nodiscard]] boost::container::small_vector<VkBufferImageCopy, 16> TransformBufferImageCopies( std::span<const BufferImageCopy> copies, size_t buffer_offset, VkImageAspectFlags aspect_mask) { struct Maker { VkBufferImageCopy operator()(const BufferImageCopy& copy) const { @@ -373,14 +377,14 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) { VkImageAspectFlags aspect_mask; }; if (aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { - std::vector<VkBufferImageCopy> result(copies.size() * 2); + boost::container::small_vector<VkBufferImageCopy, 16> result(copies.size() * 2); std::ranges::transform(copies, result.begin(), Maker{buffer_offset, VK_IMAGE_ASPECT_DEPTH_BIT}); std::ranges::transform(copies, result.begin() + copies.size(), Maker{buffer_offset, VK_IMAGE_ASPECT_STENCIL_BIT}); return result; } else { - std::vector<VkBufferImageCopy> result(copies.size()); + boost::container::small_vector<VkBufferImageCopy, 16> result(copies.size()); std::ranges::transform(copies, result.begin(), Maker{buffer_offset, aspect_mask}); return result; } @@ -794,13 +798,13 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, Scheduler& sched BlitImageHelper& blit_image_helper_, RenderPassCache& render_pass_cache_, DescriptorPool& descriptor_pool, - UpdateDescriptorQueue& update_descriptor_queue) + ComputePassDescriptorQueue& compute_pass_descriptor_queue) : device{device_}, scheduler{scheduler_}, memory_allocator{memory_allocator_}, staging_buffer_pool{staging_buffer_pool_}, blit_image_helper{blit_image_helper_}, render_pass_cache{render_pass_cache_}, resolution{Settings::values.resolution_info} { if (Settings::values.accelerate_astc) { astc_decoder_pass.emplace(device, scheduler, descriptor_pool, staging_buffer_pool, - update_descriptor_queue, memory_allocator); + compute_pass_descriptor_queue, memory_allocator); } } @@ -835,14 +839,14 @@ bool TextureCacheRuntime::ShouldReinterpret(Image& dst, Image& src) { VkBuffer TextureCacheRuntime::GetTemporaryBuffer(size_t needed_size) { const auto level = (8 * sizeof(size_t)) - std::countl_zero(needed_size - 1ULL); - if (buffer_commits[level]) { + if (buffers[level]) { return *buffers[level]; } const auto new_size = Common::NextPow2(needed_size); static constexpr VkBufferUsageFlags flags = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT; - buffers[level] = device.GetLogical().CreateBuffer({ + const VkBufferCreateInfo temp_ci = { .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, .pNext = nullptr, .flags = 0, @@ -851,26 +855,35 @@ VkBuffer TextureCacheRuntime::GetTemporaryBuffer(size_t needed_size) { .sharingMode = VK_SHARING_MODE_EXCLUSIVE, .queueFamilyIndexCount = 0, .pQueueFamilyIndices = nullptr, - }); - buffer_commits[level] = std::make_unique<MemoryCommit>( - memory_allocator.Commit(buffers[level], MemoryUsage::DeviceLocal)); + }; + buffers[level] = memory_allocator.CreateBuffer(temp_ci, MemoryUsage::DeviceLocal); return *buffers[level]; } +void TextureCacheRuntime::BarrierFeedbackLoop() { + scheduler.RequestOutsideRenderPassOperationContext(); +} + void TextureCacheRuntime::ReinterpretImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies) { - std::vector<VkBufferImageCopy> vk_in_copies(copies.size()); - std::vector<VkBufferImageCopy> vk_out_copies(copies.size()); + boost::container::small_vector<VkBufferImageCopy, 16> vk_in_copies(copies.size()); + boost::container::small_vector<VkBufferImageCopy, 16> vk_out_copies(copies.size()); const VkImageAspectFlags src_aspect_mask = src.AspectMask(); const VkImageAspectFlags dst_aspect_mask = dst.AspectMask(); - std::ranges::transform(copies, vk_in_copies.begin(), [src_aspect_mask](const auto& copy) { - return MakeBufferImageCopy(copy, true, src_aspect_mask); - }); + const auto bpp_in = BytesPerBlock(src.info.format) / DefaultBlockWidth(src.info.format); + const auto bpp_out = BytesPerBlock(dst.info.format) / DefaultBlockWidth(dst.info.format); + std::ranges::transform(copies, vk_in_copies.begin(), + [src_aspect_mask, bpp_in, bpp_out](const auto& copy) { + auto copy2 = copy; + copy2.src_offset.x = (bpp_out * copy.src_offset.x) / bpp_in; + copy2.extent.width = (bpp_out * copy.extent.width) / bpp_in; + return MakeBufferImageCopy(copy2, true, src_aspect_mask); + }); std::ranges::transform(copies, vk_out_copies.begin(), [dst_aspect_mask](const auto& copy) { return MakeBufferImageCopy(copy, false, dst_aspect_mask); }); - const u32 img_bpp = BytesPerBlock(src.info.format); + const u32 img_bpp = BytesPerBlock(dst.info.format); size_t total_size = 0; for (const auto& copy : copies) { total_size += copy.extent.width * copy.extent.height * copy.extent.depth * img_bpp; @@ -1143,7 +1156,7 @@ void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, Im void TextureCacheRuntime::CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies) { - std::vector<VkImageCopy> vk_copies(copies.size()); + boost::container::small_vector<VkImageCopy, 16> vk_copies(copies.size()); const VkImageAspectFlags aspect_mask = dst.AspectMask(); ASSERT(aspect_mask == src.AspectMask()); @@ -1230,6 +1243,11 @@ void TextureCacheRuntime::CopyImage(Image& dst, Image& src, }); } +void TextureCacheRuntime::CopyImageMSAA(Image& dst, Image& src, + std::span<const VideoCommon::ImageCopy> copies) { + UNIMPLEMENTED_MSG("Copying images with different samples is not implemented in Vulkan."); +} + u64 TextureCacheRuntime::GetDeviceLocalMemory() const { return device.GetDeviceLocalMemory(); } @@ -1247,15 +1265,18 @@ void TextureCacheRuntime::TickFrame() {} Image::Image(TextureCacheRuntime& runtime_, const ImageInfo& info_, GPUVAddr gpu_addr_, VAddr cpu_addr_) : VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), scheduler{&runtime_.scheduler}, - runtime{&runtime_}, original_image(MakeImage(runtime_.device, info)), - commit(runtime_.memory_allocator.Commit(original_image, MemoryUsage::DeviceLocal)), + runtime{&runtime_}, + original_image(MakeImage(runtime_.device, runtime_.memory_allocator, info)), aspect_mask(ImageAspectMask(info.format)) { if (IsPixelFormatASTC(info.format) && !runtime->device.IsOptimalAstcSupported()) { - if (Settings::values.accelerate_astc.GetValue()) { + if (Settings::values.async_astc.GetValue()) { + flags |= VideoCommon::ImageFlagBits::AsynchronousDecode; + } else if (Settings::values.astc_recompression.GetValue() == + Settings::AstcRecompression::Uncompressed && + Settings::values.accelerate_astc.GetValue() && info.size.depth == 1) { flags |= VideoCommon::ImageFlagBits::AcceleratedUpload; - } else { - flags |= VideoCommon::ImageFlagBits::Converted; } + flags |= VideoCommon::ImageFlagBits::Converted; flags |= VideoCommon::ImageFlagBits::CostlyLoad; } if (runtime->device.HasDebuggingToolAttached()) { @@ -1267,7 +1288,9 @@ Image::Image(TextureCacheRuntime& runtime_, const ImageInfo& info_, GPUVAddr gpu .usage = VK_IMAGE_USAGE_STORAGE_BIT, }; current_image = *original_image; - if (IsPixelFormatASTC(info.format) && !runtime->device.IsOptimalAstcSupported()) { + if (IsPixelFormatASTC(info.format) && !runtime->device.IsOptimalAstcSupported() && + Settings::values.astc_recompression.GetValue() == + Settings::AstcRecompression::Uncompressed) { const auto& device = runtime->device.GetLogical(); storage_image_views.reserve(info.resources.levels); for (s32 level = 0; level < info.resources.levels; ++level) { @@ -1300,15 +1323,16 @@ Image::Image(const VideoCommon::NullImageParams& params) : VideoCommon::ImageBas Image::~Image() = default; -void Image::UploadMemory(const StagingBufferRef& map, std::span<const BufferImageCopy> copies) { +void Image::UploadMemory(VkBuffer buffer, VkDeviceSize offset, + std::span<const VideoCommon::BufferImageCopy> copies) { // TODO: Move this to another API const bool is_rescaled = True(flags & ImageFlagBits::Rescaled); if (is_rescaled) { ScaleDown(true); } scheduler->RequestOutsideRenderPassOperationContext(); - std::vector vk_copies = TransformBufferImageCopies(copies, map.offset, aspect_mask); - const VkBuffer src_buffer = map.buffer; + auto vk_copies = TransformBufferImageCopies(copies, offset, aspect_mask); + const VkBuffer src_buffer = buffer; const VkImage vk_image = *original_image; const VkImageAspectFlags vk_aspect_mask = aspect_mask; const bool is_initialized = std::exchange(initialized, true); @@ -1321,15 +1345,38 @@ void Image::UploadMemory(const StagingBufferRef& map, std::span<const BufferImag } } -void Image::DownloadMemory(const StagingBufferRef& map, std::span<const BufferImageCopy> copies) { +void Image::UploadMemory(const StagingBufferRef& map, std::span<const BufferImageCopy> copies) { + UploadMemory(map.buffer, map.offset, copies); +} + +void Image::DownloadMemory(VkBuffer buffer, VkDeviceSize offset, + std::span<const VideoCommon::BufferImageCopy> copies) { + std::array buffer_handles{ + buffer, + }; + std::array buffer_offsets{ + offset, + }; + DownloadMemory(buffer_handles, buffer_offsets, copies); +} + +void Image::DownloadMemory(std::span<VkBuffer> buffers_span, std::span<VkDeviceSize> offsets_span, + std::span<const VideoCommon::BufferImageCopy> copies) { const bool is_rescaled = True(flags & ImageFlagBits::Rescaled); if (is_rescaled) { ScaleDown(); } - std::vector vk_copies = TransformBufferImageCopies(copies, map.offset, aspect_mask); + boost::container::small_vector<VkBuffer, 8> buffers_vector{}; + boost::container::small_vector<boost::container::small_vector<VkBufferImageCopy, 16>, 8> + vk_copies; + for (size_t index = 0; index < buffers_span.size(); index++) { + buffers_vector.emplace_back(buffers_span[index]); + vk_copies.emplace_back( + TransformBufferImageCopies(copies, offsets_span[index], aspect_mask)); + } scheduler->RequestOutsideRenderPassOperationContext(); - scheduler->Record([buffer = map.buffer, image = *original_image, aspect_mask = aspect_mask, - vk_copies](vk::CommandBuffer cmdbuf) { + scheduler->Record([buffers = std::move(buffers_vector), image = *original_image, + aspect_mask = aspect_mask, vk_copies](vk::CommandBuffer cmdbuf) { const VkImageMemoryBarrier read_barrier{ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, .pNext = nullptr, @@ -1348,6 +1395,20 @@ void Image::DownloadMemory(const StagingBufferRef& map, std::span<const BufferIm .layerCount = VK_REMAINING_ARRAY_LAYERS, }, }; + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, + 0, read_barrier); + + for (size_t index = 0; index < buffers.size(); index++) { + cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, buffers[index], + vk_copies[index]); + } + + const VkMemoryBarrier memory_write_barrier{ + .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT, + .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, + }; const VkImageMemoryBarrier image_write_barrier{ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, .pNext = nullptr, @@ -1366,15 +1427,6 @@ void Image::DownloadMemory(const StagingBufferRef& map, std::span<const BufferIm .layerCount = VK_REMAINING_ARRAY_LAYERS, }, }; - const VkMemoryBarrier memory_write_barrier{ - .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, - .pNext = nullptr, - .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT, - .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, - }; - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, - 0, read_barrier); - cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, buffer, vk_copies); cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, memory_write_barrier, nullptr, image_write_barrier); }); @@ -1383,6 +1435,16 @@ void Image::DownloadMemory(const StagingBufferRef& map, std::span<const BufferIm } } +void Image::DownloadMemory(const StagingBufferRef& map, std::span<const BufferImageCopy> copies) { + std::array buffers{ + map.buffer, + }; + std::array offsets{ + map.offset, + }; + DownloadMemory(buffers, offsets, copies); +} + bool Image::IsRescaled() const noexcept { return True(flags & ImageFlagBits::Rescaled); } @@ -1405,9 +1467,7 @@ bool Image::ScaleUp(bool ignore) { auto scaled_info = info; scaled_info.size.width = scaled_width; scaled_info.size.height = scaled_height; - scaled_image = MakeImage(runtime->device, scaled_info); - auto& allocator = runtime->memory_allocator; - scaled_commit = MemoryCommit(allocator.Commit(scaled_image, MemoryUsage::DeviceLocal)); + scaled_image = MakeImage(runtime->device, runtime->memory_allocator, scaled_info); ignore = false; } current_image = *scaled_image; @@ -1530,8 +1590,9 @@ bool Image::NeedsScaleHelper() const { ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info, ImageId image_id_, Image& image) - : VideoCommon::ImageViewBase{info, image.info, image_id_}, device{&runtime.device}, - image_handle{image.Handle()}, samples(ConvertSampleCount(image.info.num_samples)) { + : VideoCommon::ImageViewBase{info, image.info, image_id_, image.gpu_addr}, + device{&runtime.device}, image_handle{image.Handle()}, + samples(ConvertSampleCount(image.info.num_samples)) { using Shader::TextureType; const VkImageAspectFlags aspect_mask = ImageViewAspectMask(info); @@ -1577,7 +1638,7 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI } vk::ImageView handle = device->GetLogical().CreateImageView(ci); if (device->HasDebuggingToolAttached()) { - handle.SetObjectNameEXT(VideoCommon::Name(*this).c_str()); + handle.SetObjectNameEXT(VideoCommon::Name(*this, gpu_addr).c_str()); } image_views[static_cast<size_t>(tex_type)] = std::move(handle); }; @@ -1618,7 +1679,7 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, const VideoCommon::ImageViewInfo& view_info, GPUVAddr gpu_addr_) - : VideoCommon::ImageViewBase{info, view_info}, gpu_addr{gpu_addr_}, + : VideoCommon::ImageViewBase{info, view_info, gpu_addr_}, buffer_size{VideoCommon::CalculateGuestSizeInBytes(info)} {} ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::NullImageViewParams& params) @@ -1739,27 +1800,36 @@ Sampler::Sampler(TextureCacheRuntime& runtime, const Tegra::Texture::TSCEntry& t // Some games have samplers with garbage. Sanitize them here. const f32 max_anisotropy = std::clamp(tsc.MaxAnisotropy(), 1.0f, 16.0f); - sampler = device.GetLogical().CreateSampler(VkSamplerCreateInfo{ - .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, - .pNext = pnext, - .flags = 0, - .magFilter = MaxwellToVK::Sampler::Filter(tsc.mag_filter), - .minFilter = MaxwellToVK::Sampler::Filter(tsc.min_filter), - .mipmapMode = MaxwellToVK::Sampler::MipmapMode(tsc.mipmap_filter), - .addressModeU = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_u, tsc.mag_filter), - .addressModeV = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_v, tsc.mag_filter), - .addressModeW = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_p, tsc.mag_filter), - .mipLodBias = tsc.LodBias(), - .anisotropyEnable = static_cast<VkBool32>(max_anisotropy > 1.0f ? VK_TRUE : VK_FALSE), - .maxAnisotropy = max_anisotropy, - .compareEnable = tsc.depth_compare_enabled, - .compareOp = MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func), - .minLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.0f : tsc.MinLod(), - .maxLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.25f : tsc.MaxLod(), - .borderColor = - arbitrary_borders ? VK_BORDER_COLOR_INT_CUSTOM_EXT : ConvertBorderColor(color), - .unnormalizedCoordinates = VK_FALSE, - }); + const auto create_sampler = [&](const f32 anisotropy) { + return device.GetLogical().CreateSampler(VkSamplerCreateInfo{ + .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, + .pNext = pnext, + .flags = 0, + .magFilter = MaxwellToVK::Sampler::Filter(tsc.mag_filter), + .minFilter = MaxwellToVK::Sampler::Filter(tsc.min_filter), + .mipmapMode = MaxwellToVK::Sampler::MipmapMode(tsc.mipmap_filter), + .addressModeU = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_u, tsc.mag_filter), + .addressModeV = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_v, tsc.mag_filter), + .addressModeW = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_p, tsc.mag_filter), + .mipLodBias = tsc.LodBias(), + .anisotropyEnable = static_cast<VkBool32>(anisotropy > 1.0f ? VK_TRUE : VK_FALSE), + .maxAnisotropy = anisotropy, + .compareEnable = tsc.depth_compare_enabled, + .compareOp = MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func), + .minLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.0f : tsc.MinLod(), + .maxLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.25f : tsc.MaxLod(), + .borderColor = + arbitrary_borders ? VK_BORDER_COLOR_FLOAT_CUSTOM_EXT : ConvertBorderColor(color), + .unnormalizedCoordinates = VK_FALSE, + }); + }; + + sampler = create_sampler(max_anisotropy); + + const f32 max_anisotropy_default = static_cast<f32>(1U << tsc.max_anisotropy); + if (max_anisotropy > max_anisotropy_default) { + sampler_default_anisotropy = create_sampler(max_anisotropy_default); + } } Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM_RT> color_buffers, @@ -1786,7 +1856,7 @@ Framebuffer::~Framebuffer() = default; void Framebuffer::CreateFramebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM_RT> color_buffers, ImageView* depth_buffer, bool is_rescaled) { - std::vector<VkImageView> attachments; + boost::container::small_vector<VkImageView, NUM_RT + 1> attachments; RenderPassKey renderpass_key{}; s32 num_layers = 1; @@ -1809,6 +1879,7 @@ void Framebuffer::CreateFramebuffer(TextureCacheRuntime& runtime, num_layers = std::max(num_layers, color_buffer->range.extent.layers); images[num_images] = color_buffer->ImageHandle(); image_ranges[num_images] = MakeSubresourceRange(color_buffer); + rt_map[index] = num_images; samples = color_buffer->Samples(); ++num_images; } diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index 1f27a3589..220943116 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -1,5 +1,5 @@ // SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later +// SPDX-License-Identifier: GPL-3.0-or-later #pragma once @@ -34,7 +34,6 @@ class ImageView; class Framebuffer; class RenderPassCache; class StagingBufferPool; -class UpdateDescriptorQueue; class Scheduler; class TextureCacheRuntime { @@ -45,7 +44,7 @@ public: BlitImageHelper& blit_image_helper_, RenderPassCache& render_pass_cache_, DescriptorPool& descriptor_pool, - UpdateDescriptorQueue& update_descriptor_queue); + ComputePassDescriptorQueue& compute_pass_descriptor_queue); void Finish(); @@ -70,6 +69,8 @@ public: void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies); + void CopyImageMSAA(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies); + bool ShouldReinterpret(Image& dst, Image& src); void ReinterpretImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies); @@ -80,6 +81,11 @@ public: return false; } + bool CanUploadMSAA() const noexcept { + // TODO: Implement buffer to MSAA uploads + return false; + } + void AccelerateImageUpload(Image&, const StagingBufferRef&, std::span<const VideoCommon::SwizzleParameters>); @@ -97,6 +103,8 @@ public: [[nodiscard]] VkBuffer GetTemporaryBuffer(size_t needed_size); + void BarrierFeedbackLoop(); + const Device& device; Scheduler& scheduler; MemoryAllocator& memory_allocator; @@ -106,9 +114,8 @@ public: std::optional<ASTCDecoderPass> astc_decoder_pass; const Settings::ResolutionScalingInfo& resolution; - constexpr static size_t indexing_slots = 8 * sizeof(size_t); + static constexpr size_t indexing_slots = 8 * sizeof(size_t); std::array<vk::Buffer, indexing_slots> buffers{}; - std::array<std::unique_ptr<MemoryCommit>, indexing_slots> buffer_commits{}; }; class Image : public VideoCommon::ImageBase { @@ -125,9 +132,18 @@ public: Image(Image&&) = default; Image& operator=(Image&&) = default; + void UploadMemory(VkBuffer buffer, VkDeviceSize offset, + std::span<const VideoCommon::BufferImageCopy> copies); + void UploadMemory(const StagingBufferRef& map, std::span<const VideoCommon::BufferImageCopy> copies); + void DownloadMemory(VkBuffer buffer, VkDeviceSize offset, + std::span<const VideoCommon::BufferImageCopy> copies); + + void DownloadMemory(std::span<VkBuffer> buffers, std::span<VkDeviceSize> offsets, + std::span<const VideoCommon::BufferImageCopy> copies); + void DownloadMemory(const StagingBufferRef& map, std::span<const VideoCommon::BufferImageCopy> copies); @@ -163,12 +179,10 @@ private: TextureCacheRuntime* runtime{}; vk::Image original_image; - MemoryCommit commit; std::vector<vk::ImageView> storage_image_views; VkImageAspectFlags aspect_mask = 0; bool initialized = false; vk::Image scaled_image{}; - MemoryCommit scaled_commit{}; VkImage current_image{}; std::unique_ptr<Framebuffer> scale_framebuffer; @@ -249,7 +263,6 @@ private: VkImage image_handle = VK_NULL_HANDLE; VkImageView render_target = VK_NULL_HANDLE; VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT; - GPUVAddr gpu_addr = 0; u32 buffer_size = 0; }; @@ -263,8 +276,17 @@ public: return *sampler; } + [[nodiscard]] VkSampler HandleWithDefaultAnisotropy() const noexcept { + return *sampler_default_anisotropy; + } + + [[nodiscard]] bool HasAddedAnisotropy() const noexcept { + return static_cast<bool>(sampler_default_anisotropy); + } + private: vk::Sampler sampler; + vk::Sampler sampler_default_anisotropy; }; class Framebuffer { @@ -320,7 +342,7 @@ public: } [[nodiscard]] bool HasAspectColorBit(size_t index) const noexcept { - return (image_ranges.at(index).aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) != 0; + return (image_ranges.at(rt_map[index]).aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) != 0; } [[nodiscard]] bool HasAspectDepthBit() const noexcept { @@ -340,6 +362,7 @@ private: u32 num_images = 0; std::array<VkImage, 9> images{}; std::array<VkImageSubresourceRange, 9> image_ranges{}; + std::array<size_t, NUM_RT> rt_map{}; bool has_depth{}; bool has_stencil{}; }; @@ -358,6 +381,7 @@ struct TextureCacheParams { using Sampler = Vulkan::Sampler; using Framebuffer = Vulkan::Framebuffer; using AsyncBuffer = Vulkan::StagingBufferRef; + using BufferType = VkBuffer; }; using TextureCache = VideoCommon::TextureCache<TextureCacheParams>; diff --git a/src/video_core/renderer_vulkan/vk_turbo_mode.cpp b/src/video_core/renderer_vulkan/vk_turbo_mode.cpp index c42594149..460d8d59d 100644 --- a/src/video_core/renderer_vulkan/vk_turbo_mode.cpp +++ b/src/video_core/renderer_vulkan/vk_turbo_mode.cpp @@ -1,6 +1,10 @@ // SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#if defined(ANDROID) && defined(ARCHITECTURE_arm64) +#include <adrenotools/driver.h> +#endif + #include "common/literals.h" #include "video_core/host_shaders/vulkan_turbo_mode_comp_spv.h" #include "video_core/renderer_vulkan/renderer_vulkan.h" @@ -13,7 +17,10 @@ namespace Vulkan { using namespace Common::Literals; TurboMode::TurboMode(const vk::Instance& instance, const vk::InstanceDispatch& dld) - : m_device{CreateDevice(instance, dld, VK_NULL_HANDLE)}, m_allocator{m_device, false} { +#ifndef ANDROID + : m_device{CreateDevice(instance, dld, VK_NULL_HANDLE)}, m_allocator{m_device} +#endif +{ { std::scoped_lock lk{m_submission_lock}; m_submission_time = std::chrono::steady_clock::now(); @@ -30,10 +37,11 @@ void TurboMode::QueueSubmitted() { } void TurboMode::Run(std::stop_token stop_token) { +#ifndef ANDROID auto& dld = m_device.GetLogical(); // Allocate buffer. 2MiB should be sufficient. - auto buffer = dld.CreateBuffer(VkBufferCreateInfo{ + const VkBufferCreateInfo buffer_ci = { .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, .pNext = nullptr, .flags = 0, @@ -42,13 +50,11 @@ void TurboMode::Run(std::stop_token stop_token) { .sharingMode = VK_SHARING_MODE_EXCLUSIVE, .queueFamilyIndexCount = 0, .pQueueFamilyIndices = nullptr, - }); - - // Commit some device local memory for the buffer. - auto commit = m_allocator.Commit(buffer, MemoryUsage::DeviceLocal); + }; + vk::Buffer buffer = m_allocator.CreateBuffer(buffer_ci, MemoryUsage::DeviceLocal); // Create the descriptor pool to contain our descriptor. - constexpr VkDescriptorPoolSize pool_size{ + static constexpr VkDescriptorPoolSize pool_size{ .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, .descriptorCount = 1, }; @@ -63,7 +69,7 @@ void TurboMode::Run(std::stop_token stop_token) { }); // Create the descriptor set layout from the pool. - constexpr VkDescriptorSetLayoutBinding layout_binding{ + static constexpr VkDescriptorSetLayoutBinding layout_binding{ .binding = 0, .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, .descriptorCount = 1, @@ -142,8 +148,14 @@ void TurboMode::Run(std::stop_token stop_token) { // Create a single command buffer. auto cmdbufs = command_pool.Allocate(1, VK_COMMAND_BUFFER_LEVEL_PRIMARY); auto cmdbuf = vk::CommandBuffer{cmdbufs[0], m_device.GetDispatchLoader()}; +#endif while (!stop_token.stop_requested()) { +#ifdef ANDROID +#ifdef ARCHITECTURE_arm64 + adrenotools_set_turbo(true); +#endif +#else // Reset the fence. fence.Reset(); @@ -209,7 +221,7 @@ void TurboMode::Run(std::stop_token stop_token) { // Wait for completion. fence.Wait(); - +#endif // Wait for the next graphics queue submission if necessary. std::unique_lock lk{m_submission_lock}; Common::CondvarWait(m_submission_cv, lk, stop_token, [this] { @@ -217,6 +229,9 @@ void TurboMode::Run(std::stop_token stop_token) { std::chrono::milliseconds{100}; }); } +#if defined(ANDROID) && defined(ARCHITECTURE_arm64) + adrenotools_set_turbo(false); +#endif } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_turbo_mode.h b/src/video_core/renderer_vulkan/vk_turbo_mode.h index 99b5ac50b..9341c9867 100644 --- a/src/video_core/renderer_vulkan/vk_turbo_mode.h +++ b/src/video_core/renderer_vulkan/vk_turbo_mode.h @@ -23,8 +23,10 @@ public: private: void Run(std::stop_token stop_token); +#ifndef ANDROID Device m_device; MemoryAllocator m_allocator; +#endif std::mutex m_submission_lock; std::condition_variable_any m_submission_cv; std::chrono::time_point<std::chrono::steady_clock> m_submission_time{}; diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp index 4d4a6753b..0630ebda5 100644 --- a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp +++ b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp @@ -14,24 +14,29 @@ namespace Vulkan { UpdateDescriptorQueue::UpdateDescriptorQueue(const Device& device_, Scheduler& scheduler_) : device{device_}, scheduler{scheduler_} { + payload_start = payload.data(); payload_cursor = payload.data(); } UpdateDescriptorQueue::~UpdateDescriptorQueue() = default; void UpdateDescriptorQueue::TickFrame() { - payload_cursor = payload.data(); + if (++frame_index >= FRAMES_IN_FLIGHT) { + frame_index = 0; + } + payload_start = payload.data() + frame_index * FRAME_PAYLOAD_SIZE; + payload_cursor = payload_start; } void UpdateDescriptorQueue::Acquire() { // Minimum number of entries required. - // This is the maximum number of entries a single draw call migth use. + // This is the maximum number of entries a single draw call might use. static constexpr size_t MIN_ENTRIES = 0x400; - if (std::distance(payload.data(), payload_cursor) + MIN_ENTRIES >= payload.max_size()) { + if (std::distance(payload_start, payload_cursor) + MIN_ENTRIES >= FRAME_PAYLOAD_SIZE) { LOG_WARNING(Render_Vulkan, "Payload overflow, waiting for worker thread"); scheduler.WaitWorker(); - payload_cursor = payload.data(); + payload_cursor = payload_start; } upload_start = payload_cursor; } diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.h b/src/video_core/renderer_vulkan/vk_update_descriptor.h index 625bcc809..e77b576ec 100644 --- a/src/video_core/renderer_vulkan/vk_update_descriptor.h +++ b/src/video_core/renderer_vulkan/vk_update_descriptor.h @@ -29,6 +29,12 @@ struct DescriptorUpdateEntry { }; class UpdateDescriptorQueue final { + // This should be plenty for the vast majority of cases. Most desktop platforms only + // provide up to 3 swapchain images. + static constexpr size_t FRAMES_IN_FLIGHT = 7; + static constexpr size_t FRAME_PAYLOAD_SIZE = 0x20000; + static constexpr size_t PAYLOAD_SIZE = FRAME_PAYLOAD_SIZE * FRAMES_IN_FLIGHT; + public: explicit UpdateDescriptorQueue(const Device& device_, Scheduler& scheduler_); ~UpdateDescriptorQueue(); @@ -73,9 +79,15 @@ private: const Device& device; Scheduler& scheduler; + size_t frame_index{0}; DescriptorUpdateEntry* payload_cursor = nullptr; + DescriptorUpdateEntry* payload_start = nullptr; const DescriptorUpdateEntry* upload_start = nullptr; - std::array<DescriptorUpdateEntry, 0x10000> payload; + std::array<DescriptorUpdateEntry, PAYLOAD_SIZE> payload; }; +// TODO: should these be separate classes instead? +using GuestDescriptorQueue = UpdateDescriptorQueue; +using ComputePassDescriptorQueue = UpdateDescriptorQueue; + } // namespace Vulkan |