diff options
author | liamwhite <liamwhite@users.noreply.github.com> | 2023-01-05 03:20:00 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-01-05 03:20:00 +0100 |
commit | b78328f19a54964ef6874281d1a4d6b6ad1c34d9 (patch) | |
tree | 239947ad53a4a06c3641d58c5d6b8daf5b3dc16a /src/video_core/renderer_vulkan | |
parent | Merge pull request #9518 from gidoly/revert-9504-pg2 (diff) | |
parent | yuzu-ui: Add setting for disabling macro HLE (diff) | |
download | yuzu-b78328f19a54964ef6874281d1a4d6b6ad1c34d9.tar yuzu-b78328f19a54964ef6874281d1a4d6b6ad1c34d9.tar.gz yuzu-b78328f19a54964ef6874281d1a4d6b6ad1c34d9.tar.bz2 yuzu-b78328f19a54964ef6874281d1a4d6b6ad1c34d9.tar.lz yuzu-b78328f19a54964ef6874281d1a4d6b6ad1c34d9.tar.xz yuzu-b78328f19a54964ef6874281d1a4d6b6ad1c34d9.tar.zst yuzu-b78328f19a54964ef6874281d1a4d6b6ad1c34d9.zip |
Diffstat (limited to 'src/video_core/renderer_vulkan')
-rw-r--r-- | src/video_core/renderer_vulkan/fixed_pipeline_state.cpp | 119 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/fixed_pipeline_state.h | 76 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_buffer_cache.cpp | 4 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp | 63 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 29 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_pipeline_cache.h | 1 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_rasterizer.cpp | 421 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_rasterizer.h | 24 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp | 56 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_staging_buffer_pool.h | 25 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_state_tracker.cpp | 102 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_state_tracker.h | 119 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_texture_cache.cpp | 8 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_texture_cache.h | 6 |
14 files changed, 843 insertions, 210 deletions
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index e62b36822..3d328a250 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -48,43 +48,30 @@ void RefreshXfbState(VideoCommon::TransformFeedbackState& state, const Maxwell& } } // Anonymous namespace -void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, - bool has_extended_dynamic_state, bool has_dynamic_vertex_input) { +void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, DynamicFeatures& features) { const Maxwell& regs = maxwell3d.regs; const auto topology_ = maxwell3d.draw_manager->GetDrawState().topology; - const std::array enabled_lut{ - regs.polygon_offset_point_enable, - regs.polygon_offset_line_enable, - regs.polygon_offset_fill_enable, - }; - const u32 topology_index = static_cast<u32>(topology_); raw1 = 0; - extended_dynamic_state.Assign(has_extended_dynamic_state ? 1 : 0); - dynamic_vertex_input.Assign(has_dynamic_vertex_input ? 1 : 0); + extended_dynamic_state.Assign(features.has_extended_dynamic_state ? 1 : 0); + extended_dynamic_state_2.Assign(features.has_extended_dynamic_state_2 ? 1 : 0); + extended_dynamic_state_2_extra.Assign(features.has_extended_dynamic_state_2_extra ? 1 : 0); + extended_dynamic_state_3_blend.Assign(features.has_extended_dynamic_state_3_blend ? 1 : 0); + extended_dynamic_state_3_enables.Assign(features.has_extended_dynamic_state_3_enables ? 1 : 0); + dynamic_vertex_input.Assign(features.has_dynamic_vertex_input ? 1 : 0); xfb_enabled.Assign(regs.transform_feedback_enabled != 0); - primitive_restart_enable.Assign(regs.primitive_restart.enabled != 0 ? 1 : 0); - depth_bias_enable.Assign(enabled_lut[POLYGON_OFFSET_ENABLE_LUT[topology_index]] != 0 ? 1 : 0); - depth_clamp_disabled.Assign(regs.viewport_clip_control.geometry_clip == - Maxwell::ViewportClipControl::GeometryClip::Passthrough || - regs.viewport_clip_control.geometry_clip == - Maxwell::ViewportClipControl::GeometryClip::FrustumXYZ || - regs.viewport_clip_control.geometry_clip == - Maxwell::ViewportClipControl::GeometryClip::FrustumZ); ndc_minus_one_to_one.Assign(regs.depth_mode == Maxwell::DepthMode::MinusOneToOne ? 1 : 0); polygon_mode.Assign(PackPolygonMode(regs.polygon_mode_front)); - patch_control_points_minus_one.Assign(regs.patch_vertices - 1); tessellation_primitive.Assign(static_cast<u32>(regs.tessellation.params.domain_type.Value())); tessellation_spacing.Assign(static_cast<u32>(regs.tessellation.params.spacing.Value())); tessellation_clockwise.Assign(regs.tessellation.params.output_primitives.Value() == Maxwell::Tessellation::OutputPrimitives::Triangles_CW); - logic_op_enable.Assign(regs.logic_op.enable != 0 ? 1 : 0); - logic_op.Assign(PackLogicOp(regs.logic_op.op)); + patch_control_points_minus_one.Assign(regs.patch_vertices - 1); topology.Assign(topology_); msaa_mode.Assign(regs.anti_alias_samples_mode); raw2 = 0; - rasterize_enable.Assign(regs.rasterize_enable != 0 ? 1 : 0); + const auto test_func = regs.alpha_test_enabled != 0 ? regs.alpha_test_func : Maxwell::ComparisonOp::Always_GL; alpha_test_func.Assign(PackComparisonOp(test_func)); @@ -97,6 +84,7 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, smooth_lines.Assign(regs.line_anti_alias_enable != 0 ? 1 : 0); alpha_to_coverage_enabled.Assign(regs.anti_alias_alpha_control.alpha_to_coverage != 0 ? 1 : 0); alpha_to_one_enabled.Assign(regs.anti_alias_alpha_control.alpha_to_one != 0 ? 1 : 0); + app_stage.Assign(maxwell3d.engine_state); for (size_t i = 0; i < regs.rt.size(); ++i) { color_formats[i] = static_cast<u8>(regs.rt[i].format); @@ -105,7 +93,7 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, point_size = Common::BitCast<u32>(regs.point_size); if (maxwell3d.dirty.flags[Dirty::VertexInput]) { - if (has_dynamic_vertex_input) { + if (features.has_dynamic_vertex_input) { // Dirty flag will be reset by the command buffer update static constexpr std::array LUT{ 0u, // Invalid @@ -144,12 +132,6 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, } } } - if (maxwell3d.dirty.flags[Dirty::Blending]) { - maxwell3d.dirty.flags[Dirty::Blending] = false; - for (size_t index = 0; index < attachments.size(); ++index) { - attachments[index].Refresh(regs, index); - } - } if (maxwell3d.dirty.flags[Dirty::ViewportSwizzles]) { maxwell3d.dirty.flags[Dirty::ViewportSwizzles] = false; const auto& transform = regs.viewport_transform; @@ -157,8 +139,27 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, return static_cast<u16>(viewport.swizzle.raw); }); } + dynamic_state.raw1 = 0; + dynamic_state.raw2 = 0; if (!extended_dynamic_state) { dynamic_state.Refresh(regs); + std::ranges::transform(regs.vertex_streams, vertex_strides.begin(), [](const auto& array) { + return static_cast<u16>(array.stride.Value()); + }); + } + if (!extended_dynamic_state_2_extra) { + dynamic_state.Refresh2(regs, topology, extended_dynamic_state_2); + } + if (!extended_dynamic_state_3_blend) { + if (maxwell3d.dirty.flags[Dirty::Blending]) { + maxwell3d.dirty.flags[Dirty::Blending] = false; + for (size_t index = 0; index < attachments.size(); ++index) { + attachments[index].Refresh(regs, index); + } + } + } + if (!extended_dynamic_state_3_enables) { + dynamic_state.Refresh3(regs); } if (xfb_enabled) { RefreshXfbState(xfb_state, regs); @@ -175,12 +176,11 @@ void FixedPipelineState::BlendingAttachment::Refresh(const Maxwell& regs, size_t mask_a.Assign(mask.A); // TODO: C++20 Use templated lambda to deduplicate code + if (!regs.blend.enable[index]) { + return; + } - if (!regs.blend_per_target_enabled) { - if (!regs.blend.enable[index]) { - return; - } - const auto& src = regs.blend; + const auto setup_blend = [&]<typename T>(const T& src) { equation_rgb.Assign(PackBlendEquation(src.color_op)); equation_a.Assign(PackBlendEquation(src.alpha_op)); factor_source_rgb.Assign(PackBlendFactor(src.color_source)); @@ -188,20 +188,13 @@ void FixedPipelineState::BlendingAttachment::Refresh(const Maxwell& regs, size_t factor_source_a.Assign(PackBlendFactor(src.alpha_source)); factor_dest_a.Assign(PackBlendFactor(src.alpha_dest)); enable.Assign(1); - return; - } + }; - if (!regs.blend.enable[index]) { + if (!regs.blend_per_target_enabled) { + setup_blend(regs.blend); return; } - const auto& src = regs.blend_per_target[index]; - equation_rgb.Assign(PackBlendEquation(src.color_op)); - equation_a.Assign(PackBlendEquation(src.alpha_op)); - factor_source_rgb.Assign(PackBlendFactor(src.color_source)); - factor_dest_rgb.Assign(PackBlendFactor(src.color_dest)); - factor_source_a.Assign(PackBlendFactor(src.alpha_source)); - factor_dest_a.Assign(PackBlendFactor(src.alpha_dest)); - enable.Assign(1); + setup_blend(regs.blend_per_target[index]); } void FixedPipelineState::DynamicState::Refresh(const Maxwell& regs) { @@ -211,8 +204,6 @@ void FixedPipelineState::DynamicState::Refresh(const Maxwell& regs) { packed_front_face = 1 - packed_front_face; } - raw1 = 0; - raw2 = 0; front.action_stencil_fail.Assign(PackStencilOp(regs.stencil_front_op.fail)); front.action_depth_fail.Assign(PackStencilOp(regs.stencil_front_op.zfail)); front.action_depth_pass.Assign(PackStencilOp(regs.stencil_front_op.zpass)); @@ -236,9 +227,37 @@ void FixedPipelineState::DynamicState::Refresh(const Maxwell& regs) { depth_test_func.Assign(PackComparisonOp(regs.depth_test_func)); cull_face.Assign(PackCullFace(regs.gl_cull_face)); cull_enable.Assign(regs.gl_cull_test_enabled != 0 ? 1 : 0); - std::ranges::transform(regs.vertex_streams, vertex_strides.begin(), [](const auto& array) { - return static_cast<u16>(array.stride.Value()); - }); +} + +void FixedPipelineState::DynamicState::Refresh2(const Maxwell& regs, + Maxwell::PrimitiveTopology topology_, + bool base_feautures_supported) { + logic_op.Assign(PackLogicOp(regs.logic_op.op)); + + if (base_feautures_supported) { + return; + } + + const std::array enabled_lut{ + regs.polygon_offset_point_enable, + regs.polygon_offset_line_enable, + regs.polygon_offset_fill_enable, + }; + const u32 topology_index = static_cast<u32>(topology_); + + rasterize_enable.Assign(regs.rasterize_enable != 0 ? 1 : 0); + primitive_restart_enable.Assign(regs.primitive_restart.enabled != 0 ? 1 : 0); + depth_bias_enable.Assign(enabled_lut[POLYGON_OFFSET_ENABLE_LUT[topology_index]] != 0 ? 1 : 0); +} + +void FixedPipelineState::DynamicState::Refresh3(const Maxwell& regs) { + logic_op_enable.Assign(regs.logic_op.enable != 0 ? 1 : 0); + depth_clamp_disabled.Assign(regs.viewport_clip_control.geometry_clip == + Maxwell::ViewportClipControl::GeometryClip::Passthrough || + regs.viewport_clip_control.geometry_clip == + Maxwell::ViewportClipControl::GeometryClip::FrustumXYZ || + regs.viewport_clip_control.geometry_clip == + Maxwell::ViewportClipControl::GeometryClip::FrustumZ); } size_t FixedPipelineState::Hash() const noexcept { diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h index ab79fb8f3..98ea20b42 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h @@ -17,6 +17,15 @@ namespace Vulkan { using Maxwell = Tegra::Engines::Maxwell3D::Regs; +struct DynamicFeatures { + bool has_extended_dynamic_state; + bool has_extended_dynamic_state_2; + bool has_extended_dynamic_state_2_extra; + bool has_extended_dynamic_state_3_blend; + bool has_extended_dynamic_state_3_enables; + bool has_dynamic_vertex_input; +}; + struct FixedPipelineState { static u32 PackComparisonOp(Maxwell::ComparisonOp op) noexcept; static Maxwell::ComparisonOp UnpackComparisonOp(u32 packed) noexcept; @@ -133,6 +142,17 @@ struct FixedPipelineState { struct DynamicState { union { u32 raw1; + BitField<0, 2, u32> cull_face; + BitField<2, 1, u32> cull_enable; + BitField<3, 1, u32> primitive_restart_enable; + BitField<4, 1, u32> depth_bias_enable; + BitField<5, 1, u32> rasterize_enable; + BitField<6, 4, u32> logic_op; + BitField<10, 1, u32> logic_op_enable; + BitField<11, 1, u32> depth_clamp_disabled; + }; + union { + u32 raw2; StencilFace<0> front; StencilFace<12> back; BitField<24, 1, u32> stencil_enable; @@ -142,15 +162,11 @@ struct FixedPipelineState { BitField<28, 1, u32> front_face; BitField<29, 3, u32> depth_test_func; }; - union { - u32 raw2; - BitField<0, 2, u32> cull_face; - BitField<2, 1, u32> cull_enable; - }; - // Vertex stride is a 12 bits value, we have 4 bits to spare per element - std::array<u16, Maxwell::NumVertexArrays> vertex_strides; void Refresh(const Maxwell& regs); + void Refresh2(const Maxwell& regs, Maxwell::PrimitiveTopology topology, + bool base_feautures_supported); + void Refresh3(const Maxwell& regs); Maxwell::ComparisonOp DepthTestFunc() const noexcept { return UnpackComparisonOp(depth_test_func); @@ -168,25 +184,24 @@ struct FixedPipelineState { union { u32 raw1; BitField<0, 1, u32> extended_dynamic_state; - BitField<1, 1, u32> dynamic_vertex_input; - BitField<2, 1, u32> xfb_enabled; - BitField<3, 1, u32> primitive_restart_enable; - BitField<4, 1, u32> depth_bias_enable; - BitField<5, 1, u32> depth_clamp_disabled; - BitField<6, 1, u32> ndc_minus_one_to_one; - BitField<7, 2, u32> polygon_mode; - BitField<9, 5, u32> patch_control_points_minus_one; - BitField<14, 2, u32> tessellation_primitive; - BitField<16, 2, u32> tessellation_spacing; - BitField<18, 1, u32> tessellation_clockwise; - BitField<19, 1, u32> logic_op_enable; - BitField<20, 4, u32> logic_op; + BitField<1, 1, u32> extended_dynamic_state_2; + BitField<2, 1, u32> extended_dynamic_state_2_extra; + BitField<3, 1, u32> extended_dynamic_state_3_blend; + BitField<4, 1, u32> extended_dynamic_state_3_enables; + BitField<5, 1, u32> dynamic_vertex_input; + BitField<6, 1, u32> xfb_enabled; + BitField<7, 1, u32> ndc_minus_one_to_one; + BitField<8, 2, u32> polygon_mode; + BitField<10, 2, u32> tessellation_primitive; + BitField<12, 2, u32> tessellation_spacing; + BitField<14, 1, u32> tessellation_clockwise; + BitField<15, 5, u32> patch_control_points_minus_one; + BitField<24, 4, Maxwell::PrimitiveTopology> topology; BitField<28, 4, Tegra::Texture::MsaaMode> msaa_mode; }; union { u32 raw2; - BitField<0, 1, u32> rasterize_enable; BitField<1, 3, u32> alpha_test_func; BitField<4, 1, u32> early_z; BitField<5, 1, u32> depth_enabled; @@ -197,25 +212,28 @@ struct FixedPipelineState { BitField<14, 1, u32> smooth_lines; BitField<15, 1, u32> alpha_to_coverage_enabled; BitField<16, 1, u32> alpha_to_one_enabled; + BitField<17, 3, Tegra::Engines::Maxwell3D::EngineHint> app_stage; }; std::array<u8, Maxwell::NumRenderTargets> color_formats; u32 alpha_test_ref; u32 point_size; - std::array<BlendingAttachment, Maxwell::NumRenderTargets> attachments; std::array<u16, Maxwell::NumViewports> viewport_swizzles; union { u64 attribute_types; // Used with VK_EXT_vertex_input_dynamic_state u64 enabled_divisors; }; + + DynamicState dynamic_state; + std::array<BlendingAttachment, Maxwell::NumRenderTargets> attachments; std::array<VertexAttribute, Maxwell::NumVertexAttributes> attributes; std::array<u32, Maxwell::NumVertexArrays> binding_divisors; + // Vertex stride is a 12 bits value, we have 4 bits to spare per element + std::array<u16, Maxwell::NumVertexArrays> vertex_strides; - DynamicState dynamic_state; VideoCommon::TransformFeedbackState xfb_state; - void Refresh(Tegra::Engines::Maxwell3D& maxwell3d, bool has_extended_dynamic_state, - bool has_dynamic_vertex_input); + void Refresh(Tegra::Engines::Maxwell3D& maxwell3d, DynamicFeatures& features); size_t Hash() const noexcept; @@ -230,13 +248,17 @@ struct FixedPipelineState { // When transform feedback is enabled, use the whole struct return sizeof(*this); } - if (dynamic_vertex_input) { + if (dynamic_vertex_input && extended_dynamic_state_3_blend) { // Exclude dynamic state and attributes + return offsetof(FixedPipelineState, dynamic_state); + } + if (dynamic_vertex_input) { + // Exclude dynamic state return offsetof(FixedPipelineState, attributes); } if (extended_dynamic_state) { // Exclude dynamic state - return offsetof(FixedPipelineState, dynamic_state); + return offsetof(FixedPipelineState, vertex_strides); } // Default return offsetof(FixedPipelineState, xfb_state); diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 6b54d7111..487d8b416 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -56,7 +56,8 @@ vk::Buffer CreateBuffer(const Device& device, u64 size) { VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | - VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT; + VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | + VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT; if (device.IsExtTransformFeedbackSupported()) { flags |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT; } @@ -516,6 +517,7 @@ void BufferCacheRuntime::ReserveNullBuffer() { if (device.IsExtTransformFeedbackSupported()) { create_info.usage |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT; } + create_info.usage |= VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT; null_buffer = device.GetLogical().CreateBuffer(create_info); if (device.HasDebuggingToolAttached()) { null_buffer.SetObjectNameEXT("Null buffer"); diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 515d8d869..d11383bf1 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -201,6 +201,22 @@ struct SimpleVertexSpec { static constexpr bool has_images = false; }; +struct SimpleStorageSpec { + static constexpr std::array<bool, 5> enabled_stages{true, false, false, false, true}; + static constexpr bool has_storage_buffers = true; + static constexpr bool has_texture_buffers = false; + static constexpr bool has_image_buffers = false; + static constexpr bool has_images = false; +}; + +struct SimpleImageSpec { + static constexpr std::array<bool, 5> enabled_stages{true, false, false, false, true}; + static constexpr bool has_storage_buffers = false; + static constexpr bool has_texture_buffers = false; + static constexpr bool has_image_buffers = false; + static constexpr bool has_images = true; +}; + struct DefaultSpec { static constexpr std::array<bool, 5> enabled_stages{true, true, true, true, true}; static constexpr bool has_storage_buffers = true; @@ -211,7 +227,8 @@ struct DefaultSpec { ConfigureFuncPtr ConfigureFunc(const std::array<vk::ShaderModule, NUM_STAGES>& modules, const std::array<Shader::Info, NUM_STAGES>& infos) { - return FindSpec<SimpleVertexSpec, SimpleVertexFragmentSpec, DefaultSpec>(modules, infos); + return FindSpec<SimpleVertexSpec, SimpleVertexFragmentSpec, SimpleStorageSpec, SimpleImageSpec, + DefaultSpec>(modules, infos); } } // Anonymous namespace @@ -524,6 +541,8 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { FixedPipelineState::DynamicState dynamic{}; if (!key.state.extended_dynamic_state) { dynamic = key.state.dynamic_state; + } else { + dynamic.raw1 = key.state.dynamic_state.raw1; } static_vector<VkVertexInputBindingDescription, 32> vertex_bindings; static_vector<VkVertexInputBindingDivisorDescriptionEXT, 32> vertex_binding_divisors; @@ -561,7 +580,7 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX; vertex_bindings.push_back({ .binding = static_cast<u32>(index), - .stride = dynamic.vertex_strides[index], + .stride = key.state.vertex_strides[index], .inputRate = rate, }); if (instanced) { @@ -625,7 +644,7 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { .pNext = nullptr, .flags = 0, .topology = input_assembly_topology, - .primitiveRestartEnable = key.state.primitive_restart_enable != 0 && + .primitiveRestartEnable = dynamic.primitive_restart_enable != 0 && ((input_assembly_topology != VK_PRIMITIVE_TOPOLOGY_PATCH_LIST && device.IsTopologyListPrimitiveRestartSupported()) || SupportsPrimitiveRestart(input_assembly_topology) || @@ -672,15 +691,15 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { .pNext = nullptr, .flags = 0, .depthClampEnable = - static_cast<VkBool32>(key.state.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE), + static_cast<VkBool32>(dynamic.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE), .rasterizerDiscardEnable = - static_cast<VkBool32>(key.state.rasterize_enable == 0 ? VK_TRUE : VK_FALSE), + static_cast<VkBool32>(dynamic.rasterize_enable == 0 ? VK_TRUE : VK_FALSE), .polygonMode = MaxwellToVK::PolygonMode(FixedPipelineState::UnpackPolygonMode(key.state.polygon_mode)), .cullMode = static_cast<VkCullModeFlags>( dynamic.cull_enable ? MaxwellToVK::CullFace(dynamic.CullFace()) : VK_CULL_MODE_NONE), .frontFace = MaxwellToVK::FrontFace(dynamic.FrontFace()), - .depthBiasEnable = key.state.depth_bias_enable, + .depthBiasEnable = (dynamic.depth_bias_enable == 0 ? VK_TRUE : VK_FALSE), .depthBiasConstantFactor = 0.0f, .depthBiasClamp = 0.0f, .depthBiasSlopeFactor = 0.0f, @@ -782,13 +801,13 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, .pNext = nullptr, .flags = 0, - .logicOpEnable = key.state.logic_op_enable != 0, - .logicOp = static_cast<VkLogicOp>(key.state.logic_op.Value()), + .logicOpEnable = dynamic.logic_op_enable != 0, + .logicOp = static_cast<VkLogicOp>(dynamic.logic_op.Value()), .attachmentCount = static_cast<u32>(cb_attachments.size()), .pAttachments = cb_attachments.data(), .blendConstants = {}, }; - static_vector<VkDynamicState, 19> dynamic_states{ + static_vector<VkDynamicState, 28> dynamic_states{ VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR, VK_DYNAMIC_STATE_DEPTH_BIAS, VK_DYNAMIC_STATE_BLEND_CONSTANTS, VK_DYNAMIC_STATE_DEPTH_BOUNDS, VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, @@ -811,6 +830,32 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { dynamic_states.push_back(VK_DYNAMIC_STATE_VERTEX_INPUT_EXT); } dynamic_states.insert(dynamic_states.end(), extended.begin(), extended.end()); + if (key.state.extended_dynamic_state_2) { + static constexpr std::array extended2{ + VK_DYNAMIC_STATE_DEPTH_BIAS_ENABLE_EXT, + VK_DYNAMIC_STATE_PRIMITIVE_RESTART_ENABLE_EXT, + VK_DYNAMIC_STATE_RASTERIZER_DISCARD_ENABLE_EXT, + }; + dynamic_states.insert(dynamic_states.end(), extended2.begin(), extended2.end()); + } + if (key.state.extended_dynamic_state_2_extra) { + dynamic_states.push_back(VK_DYNAMIC_STATE_LOGIC_OP_EXT); + } + if (key.state.extended_dynamic_state_3_blend) { + static constexpr std::array extended3{ + VK_DYNAMIC_STATE_COLOR_BLEND_ENABLE_EXT, + VK_DYNAMIC_STATE_COLOR_BLEND_EQUATION_EXT, + VK_DYNAMIC_STATE_COLOR_WRITE_MASK_EXT, + }; + dynamic_states.insert(dynamic_states.end(), extended3.begin(), extended3.end()); + } + if (key.state.extended_dynamic_state_3_enables) { + static constexpr std::array extended3{ + VK_DYNAMIC_STATE_DEPTH_CLAMP_ENABLE_EXT, + VK_DYNAMIC_STATE_LOGIC_OP_ENABLE_EXT, + }; + dynamic_states.insert(dynamic_states.end(), extended3.begin(), extended3.end()); + } } const VkPipelineDynamicStateCreateInfo dynamic_state_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index e7262420c..3046b72ab 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -54,7 +54,7 @@ using VideoCommon::FileEnvironment; using VideoCommon::GenericEnvironment; using VideoCommon::GraphicsEnvironment; -constexpr u32 CACHE_VERSION = 8; +constexpr u32 CACHE_VERSION = 10; template <typename Container> auto MakeSpan(Container& container) { @@ -351,6 +351,15 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device LOG_WARNING(Render_Vulkan, "maxVertexInputBindings is too low: {} < {}", device.GetMaxVertexInputBindings(), Maxwell::NumVertexArrays); } + + dynamic_features = DynamicFeatures{ + .has_extended_dynamic_state = device.IsExtExtendedDynamicStateSupported(), + .has_extended_dynamic_state_2 = device.IsExtExtendedDynamicState2Supported(), + .has_extended_dynamic_state_2_extra = device.IsExtExtendedDynamicState2ExtrasSupported(), + .has_extended_dynamic_state_3_blend = device.IsExtExtendedDynamicState3BlendingSupported(), + .has_extended_dynamic_state_3_enables = device.IsExtExtendedDynamicState3EnablesSupported(), + .has_dynamic_vertex_input = device.IsExtVertexInputDynamicStateSupported(), + }; } PipelineCache::~PipelineCache() = default; @@ -362,8 +371,7 @@ GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() { current_pipeline = nullptr; return nullptr; } - graphics_key.state.Refresh(*maxwell3d, device.IsExtExtendedDynamicStateSupported(), - device.IsExtVertexInputDynamicStateSupported()); + graphics_key.state.Refresh(*maxwell3d, dynamic_features); if (current_pipeline) { GraphicsPipeline* const next{current_pipeline->Next(graphics_key)}; @@ -439,14 +447,21 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading }); ++state.total; }}; - const bool extended_dynamic_state = device.IsExtExtendedDynamicStateSupported(); - const bool dynamic_vertex_input = device.IsExtVertexInputDynamicStateSupported(); const auto load_graphics{[&](std::ifstream& file, std::vector<FileEnvironment> envs) { GraphicsPipelineCacheKey key; file.read(reinterpret_cast<char*>(&key), sizeof(key)); - if ((key.state.extended_dynamic_state != 0) != extended_dynamic_state || - (key.state.dynamic_vertex_input != 0) != dynamic_vertex_input) { + if ((key.state.extended_dynamic_state != 0) != + dynamic_features.has_extended_dynamic_state || + (key.state.extended_dynamic_state_2 != 0) != + dynamic_features.has_extended_dynamic_state_2 || + (key.state.extended_dynamic_state_2_extra != 0) != + dynamic_features.has_extended_dynamic_state_2_extra || + (key.state.extended_dynamic_state_3_blend != 0) != + dynamic_features.has_extended_dynamic_state_3_blend || + (key.state.extended_dynamic_state_3_enables != 0) != + dynamic_features.has_extended_dynamic_state_3_enables || + (key.state.dynamic_vertex_input != 0) != dynamic_features.has_dynamic_vertex_input) { return; } workers.QueueWork([this, key, envs = std::move(envs), &state, &callback]() mutable { diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 61f9e9366..b4f593ef5 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -160,6 +160,7 @@ private: Common::ThreadWorker workers; Common::ThreadWorker serialization_thread; + DynamicFeatures dynamic_features; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index ac1eb9895..242bf9602 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -180,7 +180,8 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra RasterizerVulkan::~RasterizerVulkan() = default; -void RasterizerVulkan::Draw(bool is_indexed, u32 instance_count) { +template <typename Func> +void RasterizerVulkan::PrepareDraw(bool is_indexed, Func&& draw_func) { MICROPROFILE_SCOPE(Vulkan_Drawing); SCOPE_EXIT({ gpu.TickWork(); }); @@ -201,20 +202,67 @@ void RasterizerVulkan::Draw(bool is_indexed, u32 instance_count) { UpdateDynamicStates(); - const auto& draw_state = maxwell3d->draw_manager->GetDrawState(); - const u32 num_instances{instance_count}; - const DrawParams draw_params{MakeDrawParams(draw_state, num_instances, is_indexed)}; - scheduler.Record([draw_params](vk::CommandBuffer cmdbuf) { - if (draw_params.is_indexed) { - cmdbuf.DrawIndexed(draw_params.num_vertices, draw_params.num_instances, - draw_params.first_index, draw_params.base_vertex, - draw_params.base_instance); - } else { - cmdbuf.Draw(draw_params.num_vertices, draw_params.num_instances, - draw_params.base_vertex, draw_params.base_instance); + draw_func(); + + EndTransformFeedback(); +} + +void RasterizerVulkan::Draw(bool is_indexed, u32 instance_count) { + PrepareDraw(is_indexed, [this, is_indexed, instance_count] { + const auto& draw_state = maxwell3d->draw_manager->GetDrawState(); + const u32 num_instances{instance_count}; + const DrawParams draw_params{MakeDrawParams(draw_state, num_instances, is_indexed)}; + scheduler.Record([draw_params](vk::CommandBuffer cmdbuf) { + if (draw_params.is_indexed) { + cmdbuf.DrawIndexed(draw_params.num_vertices, draw_params.num_instances, + draw_params.first_index, draw_params.base_vertex, + draw_params.base_instance); + } else { + cmdbuf.Draw(draw_params.num_vertices, draw_params.num_instances, + draw_params.base_vertex, draw_params.base_instance); + } + }); + }); +} + +void RasterizerVulkan::DrawIndirect() { + const auto& params = maxwell3d->draw_manager->GetIndirectParams(); + buffer_cache.SetDrawIndirect(¶ms); + PrepareDraw(params.is_indexed, [this, ¶ms] { + const auto indirect_buffer = buffer_cache.GetDrawIndirectBuffer(); + const auto& buffer = indirect_buffer.first; + const auto& offset = indirect_buffer.second; + if (params.include_count) { + const auto count = buffer_cache.GetDrawIndirectCount(); + const auto& draw_buffer = count.first; + const auto& offset_base = count.second; + scheduler.Record([draw_buffer_obj = draw_buffer->Handle(), + buffer_obj = buffer->Handle(), offset_base, offset, + params](vk::CommandBuffer cmdbuf) { + if (params.is_indexed) { + cmdbuf.DrawIndexedIndirectCount( + buffer_obj, offset, draw_buffer_obj, offset_base, + static_cast<u32>(params.max_draw_counts), static_cast<u32>(params.stride)); + } else { + cmdbuf.DrawIndirectCount(buffer_obj, offset, draw_buffer_obj, offset_base, + static_cast<u32>(params.max_draw_counts), + static_cast<u32>(params.stride)); + } + }); + return; } + scheduler.Record([buffer_obj = buffer->Handle(), offset, params](vk::CommandBuffer cmdbuf) { + if (params.is_indexed) { + cmdbuf.DrawIndexedIndirect(buffer_obj, offset, + static_cast<u32>(params.max_draw_counts), + static_cast<u32>(params.stride)); + } else { + cmdbuf.DrawIndirect(buffer_obj, offset, static_cast<u32>(params.max_draw_counts), + static_cast<u32>(params.stride)); + } + }); }); - EndTransformFeedback(); + buffer_cache.SetDrawIndirect(nullptr); } void RasterizerVulkan::Clear(u32 layer_count) { @@ -379,44 +427,58 @@ void Vulkan::RasterizerVulkan::DisableGraphicsUniformBuffer(size_t stage, u32 in void RasterizerVulkan::FlushAll() {} -void RasterizerVulkan::FlushRegion(VAddr addr, u64 size) { +void RasterizerVulkan::FlushRegion(VAddr addr, u64 size, VideoCommon::CacheType which) { if (addr == 0 || size == 0) { return; } - { + if (True(which & VideoCommon::CacheType::TextureCache)) { std::scoped_lock lock{texture_cache.mutex}; texture_cache.DownloadMemory(addr, size); } - { + if ((True(which & VideoCommon::CacheType::BufferCache))) { std::scoped_lock lock{buffer_cache.mutex}; buffer_cache.DownloadMemory(addr, size); } - query_cache.FlushRegion(addr, size); + if ((True(which & VideoCommon::CacheType::QueryCache))) { + query_cache.FlushRegion(addr, size); + } } -bool RasterizerVulkan::MustFlushRegion(VAddr addr, u64 size) { - std::scoped_lock lock{texture_cache.mutex, buffer_cache.mutex}; +bool RasterizerVulkan::MustFlushRegion(VAddr addr, u64 size, VideoCommon::CacheType which) { + if ((True(which & VideoCommon::CacheType::BufferCache))) { + std::scoped_lock lock{buffer_cache.mutex}; + if (buffer_cache.IsRegionGpuModified(addr, size)) { + return true; + } + } if (!Settings::IsGPULevelHigh()) { - return buffer_cache.IsRegionGpuModified(addr, size); + return false; + } + if (True(which & VideoCommon::CacheType::TextureCache)) { + std::scoped_lock lock{texture_cache.mutex}; + return texture_cache.IsRegionGpuModified(addr, size); } - return texture_cache.IsRegionGpuModified(addr, size) || - buffer_cache.IsRegionGpuModified(addr, size); + return false; } -void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) { +void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size, VideoCommon::CacheType which) { if (addr == 0 || size == 0) { return; } - { + if (True(which & VideoCommon::CacheType::TextureCache)) { std::scoped_lock lock{texture_cache.mutex}; texture_cache.WriteMemory(addr, size); } - { + if ((True(which & VideoCommon::CacheType::BufferCache))) { std::scoped_lock lock{buffer_cache.mutex}; buffer_cache.WriteMemory(addr, size); } - pipeline_cache.InvalidateRegion(addr, size); - query_cache.InvalidateRegion(addr, size); + if ((True(which & VideoCommon::CacheType::QueryCache))) { + query_cache.InvalidateRegion(addr, size); + } + if ((True(which & VideoCommon::CacheType::ShaderCache))) { + pipeline_cache.InvalidateRegion(addr, size); + } } void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) { @@ -481,11 +543,12 @@ void RasterizerVulkan::ReleaseFences() { fence_manager.WaitPendingFences(); } -void RasterizerVulkan::FlushAndInvalidateRegion(VAddr addr, u64 size) { +void RasterizerVulkan::FlushAndInvalidateRegion(VAddr addr, u64 size, + VideoCommon::CacheType which) { if (Settings::IsGPULevelExtreme()) { - FlushRegion(addr, size); + FlushRegion(addr, size, which); } - InvalidateRegion(addr, size); + InvalidateRegion(addr, size, which); } void RasterizerVulkan::WaitForIdle() { @@ -541,6 +604,21 @@ void RasterizerVulkan::TickFrame() { } } +bool RasterizerVulkan::AccelerateConditionalRendering() { + if (Settings::IsGPULevelHigh()) { + // TODO(Blinkhawk): Reimplement Host conditional rendering. + return false; + } + // Medium / Low Hack: stub any checks on queries writen into the buffer cache. + const GPUVAddr condition_address{maxwell3d->regs.render_enable.Address()}; + Maxwell::ReportSemaphore::Compare cmp; + if (gpu_memory->IsMemoryDirty(condition_address, sizeof(cmp), + VideoCommon::CacheType::BufferCache)) { + return true; + } + return false; +} + bool RasterizerVulkan::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src, const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Config& copy_config) { @@ -561,7 +639,7 @@ void RasterizerVulkan::AccelerateInlineToMemory(GPUVAddr address, size_t copy_si } gpu_memory->WriteBlockUnsafe(address, memory.data(), copy_size); { - std::unique_lock<std::mutex> lock{buffer_cache.mutex}; + std::unique_lock<std::recursive_mutex> lock{buffer_cache.mutex}; if (!buffer_cache.InlineMemory(*cpu_addr, copy_size, memory)) { buffer_cache.WriteMemory(*cpu_addr, copy_size); } @@ -639,16 +717,35 @@ void RasterizerVulkan::UpdateDynamicStates() { UpdateLineWidth(regs); if (device.IsExtExtendedDynamicStateSupported()) { UpdateCullMode(regs); - UpdateDepthBoundsTestEnable(regs); - UpdateDepthTestEnable(regs); - UpdateDepthWriteEnable(regs); UpdateDepthCompareOp(regs); UpdateFrontFace(regs); UpdateStencilOp(regs); - UpdateStencilTestEnable(regs); + if (device.IsExtVertexInputDynamicStateSupported()) { UpdateVertexInput(regs); } + + if (state_tracker.TouchStateEnable()) { + UpdateDepthBoundsTestEnable(regs); + UpdateDepthTestEnable(regs); + UpdateDepthWriteEnable(regs); + UpdateStencilTestEnable(regs); + if (device.IsExtExtendedDynamicState2Supported()) { + UpdatePrimitiveRestartEnable(regs); + UpdateRasterizerDiscardEnable(regs); + UpdateDepthBiasEnable(regs); + } + if (device.IsExtExtendedDynamicState3EnablesSupported()) { + UpdateLogicOpEnable(regs); + UpdateDepthClampEnable(regs); + } + } + if (device.IsExtExtendedDynamicState2ExtrasSupported()) { + UpdateLogicOp(regs); + } + if (device.IsExtExtendedDynamicState3Supported()) { + UpdateBlending(regs); + } } } @@ -789,32 +886,92 @@ void RasterizerVulkan::UpdateStencilFaces(Tegra::Engines::Maxwell3D::Regs& regs) if (!state_tracker.TouchStencilProperties()) { return; } - if (regs.stencil_two_side_enable) { - // Separate values per face - scheduler.Record( - [front_ref = regs.stencil_front_ref, front_write_mask = regs.stencil_front_mask, - front_test_mask = regs.stencil_front_func_mask, back_ref = regs.stencil_back_ref, - back_write_mask = regs.stencil_back_mask, - back_test_mask = regs.stencil_back_func_mask](vk::CommandBuffer cmdbuf) { + bool update_references = state_tracker.TouchStencilReference(); + bool update_write_mask = state_tracker.TouchStencilWriteMask(); + bool update_compare_masks = state_tracker.TouchStencilCompare(); + if (state_tracker.TouchStencilSide(regs.stencil_two_side_enable != 0)) { + update_references = true; + update_write_mask = true; + update_compare_masks = true; + } + if (update_references) { + [&]() { + if (regs.stencil_two_side_enable) { + if (!state_tracker.CheckStencilReferenceFront(regs.stencil_front_ref) && + !state_tracker.CheckStencilReferenceBack(regs.stencil_back_ref)) { + return; + } + } else { + if (!state_tracker.CheckStencilReferenceFront(regs.stencil_front_ref)) { + return; + } + } + scheduler.Record([front_ref = regs.stencil_front_ref, back_ref = regs.stencil_back_ref, + two_sided = regs.stencil_two_side_enable](vk::CommandBuffer cmdbuf) { + const bool set_back = two_sided && front_ref != back_ref; // Front face - cmdbuf.SetStencilReference(VK_STENCIL_FACE_FRONT_BIT, front_ref); - cmdbuf.SetStencilWriteMask(VK_STENCIL_FACE_FRONT_BIT, front_write_mask); - cmdbuf.SetStencilCompareMask(VK_STENCIL_FACE_FRONT_BIT, front_test_mask); - - // Back face - cmdbuf.SetStencilReference(VK_STENCIL_FACE_BACK_BIT, back_ref); - cmdbuf.SetStencilWriteMask(VK_STENCIL_FACE_BACK_BIT, back_write_mask); - cmdbuf.SetStencilCompareMask(VK_STENCIL_FACE_BACK_BIT, back_test_mask); + cmdbuf.SetStencilReference(set_back ? VK_STENCIL_FACE_FRONT_BIT + : VK_STENCIL_FACE_FRONT_AND_BACK, + front_ref); + if (set_back) { + cmdbuf.SetStencilReference(VK_STENCIL_FACE_BACK_BIT, back_ref); + } }); - } else { - // Front face defines both faces - scheduler.Record([ref = regs.stencil_front_ref, write_mask = regs.stencil_front_mask, - test_mask = regs.stencil_front_func_mask](vk::CommandBuffer cmdbuf) { - cmdbuf.SetStencilReference(VK_STENCIL_FACE_FRONT_AND_BACK, ref); - cmdbuf.SetStencilWriteMask(VK_STENCIL_FACE_FRONT_AND_BACK, write_mask); - cmdbuf.SetStencilCompareMask(VK_STENCIL_FACE_FRONT_AND_BACK, test_mask); - }); + }(); + } + if (update_write_mask) { + [&]() { + if (regs.stencil_two_side_enable) { + if (!state_tracker.CheckStencilWriteMaskFront(regs.stencil_front_mask) && + !state_tracker.CheckStencilWriteMaskBack(regs.stencil_back_mask)) { + return; + } + } else { + if (!state_tracker.CheckStencilWriteMaskFront(regs.stencil_front_mask)) { + return; + } + } + scheduler.Record([front_write_mask = regs.stencil_front_mask, + back_write_mask = regs.stencil_back_mask, + two_sided = regs.stencil_two_side_enable](vk::CommandBuffer cmdbuf) { + const bool set_back = two_sided && front_write_mask != back_write_mask; + // Front face + cmdbuf.SetStencilWriteMask(set_back ? VK_STENCIL_FACE_FRONT_BIT + : VK_STENCIL_FACE_FRONT_AND_BACK, + front_write_mask); + if (set_back) { + cmdbuf.SetStencilWriteMask(VK_STENCIL_FACE_BACK_BIT, back_write_mask); + } + }); + }(); + } + if (update_compare_masks) { + [&]() { + if (regs.stencil_two_side_enable) { + if (!state_tracker.CheckStencilCompareMaskFront(regs.stencil_front_func_mask) && + !state_tracker.CheckStencilCompareMaskBack(regs.stencil_back_func_mask)) { + return; + } + } else { + if (!state_tracker.CheckStencilCompareMaskFront(regs.stencil_front_func_mask)) { + return; + } + } + scheduler.Record([front_test_mask = regs.stencil_front_func_mask, + back_test_mask = regs.stencil_back_func_mask, + two_sided = regs.stencil_two_side_enable](vk::CommandBuffer cmdbuf) { + const bool set_back = two_sided && front_test_mask != back_test_mask; + // Front face + cmdbuf.SetStencilCompareMask(set_back ? VK_STENCIL_FACE_FRONT_BIT + : VK_STENCIL_FACE_FRONT_AND_BACK, + front_test_mask); + if (set_back) { + cmdbuf.SetStencilCompareMask(VK_STENCIL_FACE_BACK_BIT, back_test_mask); + } + }); + }(); } + state_tracker.ClearStencilReset(); } void RasterizerVulkan::UpdateLineWidth(Tegra::Engines::Maxwell3D::Regs& regs) { @@ -868,6 +1025,82 @@ void RasterizerVulkan::UpdateDepthWriteEnable(Tegra::Engines::Maxwell3D::Regs& r }); } +void RasterizerVulkan::UpdatePrimitiveRestartEnable(Tegra::Engines::Maxwell3D::Regs& regs) { + if (!state_tracker.TouchPrimitiveRestartEnable()) { + return; + } + scheduler.Record([enable = regs.primitive_restart.enabled](vk::CommandBuffer cmdbuf) { + cmdbuf.SetPrimitiveRestartEnableEXT(enable); + }); +} + +void RasterizerVulkan::UpdateRasterizerDiscardEnable(Tegra::Engines::Maxwell3D::Regs& regs) { + if (!state_tracker.TouchRasterizerDiscardEnable()) { + return; + } + scheduler.Record([disable = regs.rasterize_enable](vk::CommandBuffer cmdbuf) { + cmdbuf.SetRasterizerDiscardEnableEXT(disable == 0); + }); +} + +void RasterizerVulkan::UpdateDepthBiasEnable(Tegra::Engines::Maxwell3D::Regs& regs) { + if (!state_tracker.TouchDepthBiasEnable()) { + return; + } + constexpr size_t POINT = 0; + constexpr size_t LINE = 1; + constexpr size_t POLYGON = 2; + static constexpr std::array POLYGON_OFFSET_ENABLE_LUT = { + POINT, // Points + LINE, // Lines + LINE, // LineLoop + LINE, // LineStrip + POLYGON, // Triangles + POLYGON, // TriangleStrip + POLYGON, // TriangleFan + POLYGON, // Quads + POLYGON, // QuadStrip + POLYGON, // Polygon + LINE, // LinesAdjacency + LINE, // LineStripAdjacency + POLYGON, // TrianglesAdjacency + POLYGON, // TriangleStripAdjacency + POLYGON, // Patches + }; + const std::array enabled_lut{ + regs.polygon_offset_point_enable, + regs.polygon_offset_line_enable, + regs.polygon_offset_fill_enable, + }; + const u32 topology_index = static_cast<u32>(maxwell3d->draw_manager->GetDrawState().topology); + const u32 enable = enabled_lut[POLYGON_OFFSET_ENABLE_LUT[topology_index]]; + scheduler.Record( + [enable](vk::CommandBuffer cmdbuf) { cmdbuf.SetDepthBiasEnableEXT(enable != 0); }); +} + +void RasterizerVulkan::UpdateLogicOpEnable(Tegra::Engines::Maxwell3D::Regs& regs) { + if (!state_tracker.TouchLogicOpEnable()) { + return; + } + scheduler.Record([enable = regs.logic_op.enable](vk::CommandBuffer cmdbuf) { + cmdbuf.SetLogicOpEnableEXT(enable != 0); + }); +} + +void RasterizerVulkan::UpdateDepthClampEnable(Tegra::Engines::Maxwell3D::Regs& regs) { + if (!state_tracker.TouchDepthClampEnable()) { + return; + } + bool is_enabled = !(regs.viewport_clip_control.geometry_clip == + Maxwell::ViewportClipControl::GeometryClip::Passthrough || + regs.viewport_clip_control.geometry_clip == + Maxwell::ViewportClipControl::GeometryClip::FrustumXYZ || + regs.viewport_clip_control.geometry_clip == + Maxwell::ViewportClipControl::GeometryClip::FrustumZ); + scheduler.Record( + [is_enabled](vk::CommandBuffer cmdbuf) { cmdbuf.SetDepthClampEnableEXT(is_enabled); }); +} + void RasterizerVulkan::UpdateDepthCompareOp(Tegra::Engines::Maxwell3D::Regs& regs) { if (!state_tracker.TouchDepthCompareOp()) { return; @@ -925,6 +1158,78 @@ void RasterizerVulkan::UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs) { } } +void RasterizerVulkan::UpdateLogicOp(Tegra::Engines::Maxwell3D::Regs& regs) { + if (!state_tracker.TouchLogicOp()) { + return; + } + const auto op_value = static_cast<u32>(regs.logic_op.op); + auto op = op_value >= 0x1500 && op_value < 0x1510 ? static_cast<VkLogicOp>(op_value - 0x1500) + : VK_LOGIC_OP_NO_OP; + scheduler.Record([op](vk::CommandBuffer cmdbuf) { cmdbuf.SetLogicOpEXT(op); }); +} + +void RasterizerVulkan::UpdateBlending(Tegra::Engines::Maxwell3D::Regs& regs) { + if (!state_tracker.TouchBlending()) { + return; + } + + if (state_tracker.TouchColorMask()) { + std::array<VkColorComponentFlags, Maxwell::NumRenderTargets> setup_masks{}; + for (size_t index = 0; index < Maxwell::NumRenderTargets; index++) { + const auto& mask = regs.color_mask[regs.color_mask_common ? 0 : index]; + auto& current = setup_masks[index]; + if (mask.R) { + current |= VK_COLOR_COMPONENT_R_BIT; + } + if (mask.G) { + current |= VK_COLOR_COMPONENT_G_BIT; + } + if (mask.B) { + current |= VK_COLOR_COMPONENT_B_BIT; + } + if (mask.A) { + current |= VK_COLOR_COMPONENT_A_BIT; + } + } + scheduler.Record([setup_masks](vk::CommandBuffer cmdbuf) { + cmdbuf.SetColorWriteMaskEXT(0, setup_masks); + }); + } + + if (state_tracker.TouchBlendEnable()) { + std::array<VkBool32, Maxwell::NumRenderTargets> setup_enables{}; + std::ranges::transform( + regs.blend.enable, setup_enables.begin(), + [&](const auto& is_enabled) { return is_enabled != 0 ? VK_TRUE : VK_FALSE; }); + scheduler.Record([setup_enables](vk::CommandBuffer cmdbuf) { + cmdbuf.SetColorBlendEnableEXT(0, setup_enables); + }); + } + + if (state_tracker.TouchBlendEquations()) { + std::array<VkColorBlendEquationEXT, Maxwell::NumRenderTargets> setup_blends{}; + for (size_t index = 0; index < Maxwell::NumRenderTargets; index++) { + const auto blend_setup = [&]<typename T>(const T& guest_blend) { + auto& host_blend = setup_blends[index]; + host_blend.srcColorBlendFactor = MaxwellToVK::BlendFactor(guest_blend.color_source); + host_blend.dstColorBlendFactor = MaxwellToVK::BlendFactor(guest_blend.color_dest); + host_blend.colorBlendOp = MaxwellToVK::BlendEquation(guest_blend.color_op); + host_blend.srcAlphaBlendFactor = MaxwellToVK::BlendFactor(guest_blend.alpha_source); + host_blend.dstAlphaBlendFactor = MaxwellToVK::BlendFactor(guest_blend.alpha_dest); + host_blend.alphaBlendOp = MaxwellToVK::BlendEquation(guest_blend.alpha_op); + }; + if (!regs.blend_per_target_enabled) { + blend_setup(regs.blend); + continue; + } + blend_setup(regs.blend_per_target[index]); + } + scheduler.Record([setup_blends](vk::CommandBuffer cmdbuf) { + cmdbuf.SetColorBlendEquationEXT(0, setup_blends); + }); + } +} + void RasterizerVulkan::UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& regs) { if (!state_tracker.TouchStencilTestEnable()) { return; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index ee483cfd9..c661e5b19 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -65,6 +65,7 @@ public: ~RasterizerVulkan() override; void Draw(bool is_indexed, u32 instance_count) override; + void DrawIndirect() override; void Clear(u32 layer_count) override; void DispatchCompute() override; void ResetCounter(VideoCore::QueryType type) override; @@ -72,9 +73,12 @@ public: void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; void DisableGraphicsUniformBuffer(size_t stage, u32 index) override; void FlushAll() override; - void FlushRegion(VAddr addr, u64 size) override; - bool MustFlushRegion(VAddr addr, u64 size) override; - void InvalidateRegion(VAddr addr, u64 size) override; + void FlushRegion(VAddr addr, u64 size, + VideoCommon::CacheType which = VideoCommon::CacheType::All) override; + bool MustFlushRegion(VAddr addr, u64 size, + VideoCommon::CacheType which = VideoCommon::CacheType::All) override; + void InvalidateRegion(VAddr addr, u64 size, + VideoCommon::CacheType which = VideoCommon::CacheType::All) override; void OnCPUWrite(VAddr addr, u64 size) override; void InvalidateGPUCache() override; void UnmapMemory(VAddr addr, u64 size) override; @@ -84,12 +88,14 @@ public: void SignalSyncPoint(u32 value) override; void SignalReference() override; void ReleaseFences() override; - void FlushAndInvalidateRegion(VAddr addr, u64 size) override; + void FlushAndInvalidateRegion( + VAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) override; void WaitForIdle() override; void FragmentBarrier() override; void TiledCacheBarrier() override; void FlushCommands() override; void TickFrame() override; + bool AccelerateConditionalRendering() override; bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src, const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Config& copy_config) override; @@ -114,6 +120,9 @@ private: static constexpr VkDeviceSize DEFAULT_BUFFER_SIZE = 4 * sizeof(float); + template <typename Func> + void PrepareDraw(bool is_indexed, Func&&); + void FlushWork(); void UpdateDynamicStates(); @@ -135,9 +144,16 @@ private: void UpdateDepthTestEnable(Tegra::Engines::Maxwell3D::Regs& regs); void UpdateDepthWriteEnable(Tegra::Engines::Maxwell3D::Regs& regs); void UpdateDepthCompareOp(Tegra::Engines::Maxwell3D::Regs& regs); + void UpdatePrimitiveRestartEnable(Tegra::Engines::Maxwell3D::Regs& regs); + void UpdateRasterizerDiscardEnable(Tegra::Engines::Maxwell3D::Regs& regs); + void UpdateDepthBiasEnable(Tegra::Engines::Maxwell3D::Regs& regs); + void UpdateLogicOpEnable(Tegra::Engines::Maxwell3D::Regs& regs); + void UpdateDepthClampEnable(Tegra::Engines::Maxwell3D::Regs& regs); void UpdateFrontFace(Tegra::Engines::Maxwell3D::Regs& regs); void UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs); void UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& regs); + void UpdateLogicOp(Tegra::Engines::Maxwell3D::Regs& regs); + void UpdateBlending(Tegra::Engines::Maxwell3D::Regs& regs); void UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs); diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp index 06f68d09a..74ca77216 100644 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp @@ -1,5 +1,5 @@ -// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later +// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later #include <algorithm> #include <utility> @@ -94,7 +94,8 @@ StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& mem .flags = 0, .size = STREAM_BUFFER_SIZE, .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | - VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, + VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | + VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT, .sharingMode = VK_SHARING_MODE_EXCLUSIVE, .queueFamilyIndexCount = 0, .pQueueFamilyIndices = nullptr, @@ -142,11 +143,23 @@ StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& mem StagingBufferPool::~StagingBufferPool() = default; -StagingBufferRef StagingBufferPool::Request(size_t size, MemoryUsage usage) { - if (usage == MemoryUsage::Upload && size <= MAX_STREAM_BUFFER_REQUEST_SIZE) { +StagingBufferRef StagingBufferPool::Request(size_t size, MemoryUsage usage, bool deferred) { + if (!deferred && usage == MemoryUsage::Upload && size <= MAX_STREAM_BUFFER_REQUEST_SIZE) { return GetStreamBuffer(size); } - return GetStagingBuffer(size, usage); + return GetStagingBuffer(size, usage, deferred); +} + +void StagingBufferPool::FreeDeferred(StagingBufferRef& ref) { + auto& entries = GetCache(ref.usage)[ref.log2_level].entries; + const auto is_this_one = [&ref](const StagingBuffer& entry) { + return entry.index == ref.index; + }; + auto it = std::find_if(entries.begin(), entries.end(), is_this_one); + ASSERT(it != entries.end()); + ASSERT(it->deferred); + it->tick = scheduler.CurrentTick(); + it->deferred = false; } void StagingBufferPool::TickFrame() { @@ -187,6 +200,9 @@ StagingBufferRef StagingBufferPool::GetStreamBuffer(size_t size) { .buffer = *stream_buffer, .offset = static_cast<VkDeviceSize>(offset), .mapped_span = std::span<u8>(stream_pointer + offset, size), + .usage{}, + .log2_level{}, + .index{}, }; } @@ -196,19 +212,21 @@ bool StagingBufferPool::AreRegionsActive(size_t region_begin, size_t region_end) [gpu_tick](u64 sync_tick) { return gpu_tick < sync_tick; }); }; -StagingBufferRef StagingBufferPool::GetStagingBuffer(size_t size, MemoryUsage usage) { - if (const std::optional<StagingBufferRef> ref = TryGetReservedBuffer(size, usage)) { +StagingBufferRef StagingBufferPool::GetStagingBuffer(size_t size, MemoryUsage usage, + bool deferred) { + if (const std::optional<StagingBufferRef> ref = TryGetReservedBuffer(size, usage, deferred)) { return *ref; } - return CreateStagingBuffer(size, usage); + return CreateStagingBuffer(size, usage, deferred); } std::optional<StagingBufferRef> StagingBufferPool::TryGetReservedBuffer(size_t size, - MemoryUsage usage) { + MemoryUsage usage, + bool deferred) { StagingBuffers& cache_level = GetCache(usage)[Common::Log2Ceil64(size)]; const auto is_free = [this](const StagingBuffer& entry) { - return scheduler.IsFree(entry.tick); + return !entry.deferred && scheduler.IsFree(entry.tick); }; auto& entries = cache_level.entries; const auto hint_it = entries.begin() + cache_level.iterate_index; @@ -220,11 +238,14 @@ std::optional<StagingBufferRef> StagingBufferPool::TryGetReservedBuffer(size_t s } } cache_level.iterate_index = std::distance(entries.begin(), it) + 1; - it->tick = scheduler.CurrentTick(); + it->tick = deferred ? std::numeric_limits<u64>::max() : scheduler.CurrentTick(); + ASSERT(!it->deferred); + it->deferred = deferred; return it->Ref(); } -StagingBufferRef StagingBufferPool::CreateStagingBuffer(size_t size, MemoryUsage usage) { +StagingBufferRef StagingBufferPool::CreateStagingBuffer(size_t size, MemoryUsage usage, + bool deferred) { const u32 log2 = Common::Log2Ceil64(size); vk::Buffer buffer = device.GetLogical().CreateBuffer({ .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, @@ -233,7 +254,8 @@ StagingBufferRef StagingBufferPool::CreateStagingBuffer(size_t size, MemoryUsage .size = 1ULL << log2, .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | - VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, + VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | + VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT, .sharingMode = VK_SHARING_MODE_EXCLUSIVE, .queueFamilyIndexCount = 0, .pQueueFamilyIndices = nullptr, @@ -249,7 +271,11 @@ StagingBufferRef StagingBufferPool::CreateStagingBuffer(size_t size, MemoryUsage .buffer = std::move(buffer), .commit = std::move(commit), .mapped_span = mapped_span, - .tick = scheduler.CurrentTick(), + .usage = usage, + .log2_level = log2, + .index = unique_ids++, + .tick = deferred ? std::numeric_limits<u64>::max() : scheduler.CurrentTick(), + .deferred = deferred, }); return entry.Ref(); } diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h index 91dc84da8..4fd15f11a 100644 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h @@ -1,5 +1,5 @@ -// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later +// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later #pragma once @@ -20,6 +20,9 @@ struct StagingBufferRef { VkBuffer buffer; VkDeviceSize offset; std::span<u8> mapped_span; + MemoryUsage usage; + u32 log2_level; + u64 index; }; class StagingBufferPool { @@ -30,7 +33,8 @@ public: Scheduler& scheduler); ~StagingBufferPool(); - StagingBufferRef Request(size_t size, MemoryUsage usage); + StagingBufferRef Request(size_t size, MemoryUsage usage, bool deferred = false); + void FreeDeferred(StagingBufferRef& ref); void TickFrame(); @@ -44,13 +48,20 @@ private: vk::Buffer buffer; MemoryCommit commit; std::span<u8> mapped_span; + MemoryUsage usage; + u32 log2_level; + u64 index; u64 tick = 0; + bool deferred{}; StagingBufferRef Ref() const noexcept { return { .buffer = *buffer, .offset = 0, .mapped_span = mapped_span, + .usage = usage, + .log2_level = log2_level, + .index = index, }; } }; @@ -68,11 +79,12 @@ private: bool AreRegionsActive(size_t region_begin, size_t region_end) const; - StagingBufferRef GetStagingBuffer(size_t size, MemoryUsage usage); + StagingBufferRef GetStagingBuffer(size_t size, MemoryUsage usage, bool deferred = false); - std::optional<StagingBufferRef> TryGetReservedBuffer(size_t size, MemoryUsage usage); + std::optional<StagingBufferRef> TryGetReservedBuffer(size_t size, MemoryUsage usage, + bool deferred); - StagingBufferRef CreateStagingBuffer(size_t size, MemoryUsage usage); + StagingBufferRef CreateStagingBuffer(size_t size, MemoryUsage usage, bool deferred); StagingBuffersCache& GetCache(MemoryUsage usage); @@ -99,6 +111,7 @@ private: size_t current_delete_level = 0; u64 buffer_index = 0; + u64 unique_ids{}; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.cpp b/src/video_core/renderer_vulkan/vk_state_tracker.cpp index edb41b171..d56558a83 100644 --- a/src/video_core/renderer_vulkan/vk_state_tracker.cpp +++ b/src/video_core/renderer_vulkan/vk_state_tracker.cpp @@ -27,10 +27,37 @@ using Flags = Maxwell3D::DirtyState::Flags; Flags MakeInvalidationFlags() { static constexpr int INVALIDATION_FLAGS[]{ - Viewports, Scissors, DepthBias, BlendConstants, DepthBounds, - StencilProperties, LineWidth, CullMode, DepthBoundsEnable, DepthTestEnable, - DepthWriteEnable, DepthCompareOp, FrontFace, StencilOp, StencilTestEnable, - VertexBuffers, VertexInput, + Viewports, + Scissors, + DepthBias, + BlendConstants, + DepthBounds, + StencilProperties, + StencilReference, + StencilWriteMask, + StencilCompare, + LineWidth, + CullMode, + DepthBoundsEnable, + DepthTestEnable, + DepthWriteEnable, + DepthCompareOp, + FrontFace, + StencilOp, + StencilTestEnable, + VertexBuffers, + VertexInput, + StateEnable, + PrimitiveRestartEnable, + RasterizerDiscardEnable, + DepthBiasEnable, + LogicOpEnable, + DepthClampEnable, + LogicOp, + Blending, + ColorMask, + BlendEquations, + BlendEnable, }; Flags flags{}; for (const int flag : INVALIDATION_FLAGS) { @@ -75,14 +102,17 @@ void SetupDirtyDepthBounds(Tables& tables) { } void SetupDirtyStencilProperties(Tables& tables) { - auto& table = tables[0]; - table[OFF(stencil_two_side_enable)] = StencilProperties; - table[OFF(stencil_front_ref)] = StencilProperties; - table[OFF(stencil_front_mask)] = StencilProperties; - table[OFF(stencil_front_func_mask)] = StencilProperties; - table[OFF(stencil_back_ref)] = StencilProperties; - table[OFF(stencil_back_mask)] = StencilProperties; - table[OFF(stencil_back_func_mask)] = StencilProperties; + const auto setup = [&](size_t position, u8 flag) { + tables[0][position] = flag; + tables[1][position] = StencilProperties; + }; + tables[0][OFF(stencil_two_side_enable)] = StencilProperties; + setup(OFF(stencil_front_ref), StencilReference); + setup(OFF(stencil_front_mask), StencilWriteMask); + setup(OFF(stencil_front_func_mask), StencilCompare); + setup(OFF(stencil_back_ref), StencilReference); + setup(OFF(stencil_back_mask), StencilWriteMask); + setup(OFF(stencil_back_func_mask), StencilCompare); } void SetupDirtyLineWidth(Tables& tables) { @@ -96,16 +126,22 @@ void SetupDirtyCullMode(Tables& tables) { table[OFF(gl_cull_test_enabled)] = CullMode; } -void SetupDirtyDepthBoundsEnable(Tables& tables) { - tables[0][OFF(depth_bounds_enable)] = DepthBoundsEnable; -} - -void SetupDirtyDepthTestEnable(Tables& tables) { - tables[0][OFF(depth_test_enable)] = DepthTestEnable; -} - -void SetupDirtyDepthWriteEnable(Tables& tables) { - tables[0][OFF(depth_write_enabled)] = DepthWriteEnable; +void SetupDirtyStateEnable(Tables& tables) { + const auto setup = [&](size_t position, u8 flag) { + tables[0][position] = flag; + tables[1][position] = StateEnable; + }; + setup(OFF(depth_bounds_enable), DepthBoundsEnable); + setup(OFF(depth_test_enable), DepthTestEnable); + setup(OFF(depth_write_enabled), DepthWriteEnable); + setup(OFF(stencil_enable), StencilTestEnable); + setup(OFF(primitive_restart.enabled), PrimitiveRestartEnable); + setup(OFF(rasterize_enable), RasterizerDiscardEnable); + setup(OFF(polygon_offset_point_enable), DepthBiasEnable); + setup(OFF(polygon_offset_line_enable), DepthBiasEnable); + setup(OFF(polygon_offset_fill_enable), DepthBiasEnable); + setup(OFF(logic_op.enable), LogicOpEnable); + setup(OFF(viewport_clip_control.geometry_clip), DepthClampEnable); } void SetupDirtyDepthCompareOp(Tables& tables) { @@ -133,16 +169,22 @@ void SetupDirtyStencilOp(Tables& tables) { tables[1][OFF(stencil_two_side_enable)] = StencilOp; } -void SetupDirtyStencilTestEnable(Tables& tables) { - tables[0][OFF(stencil_enable)] = StencilTestEnable; -} - void SetupDirtyBlending(Tables& tables) { tables[0][OFF(color_mask_common)] = Blending; + tables[1][OFF(color_mask_common)] = ColorMask; tables[0][OFF(blend_per_target_enabled)] = Blending; + tables[1][OFF(blend_per_target_enabled)] = BlendEquations; FillBlock(tables[0], OFF(color_mask), NUM(color_mask), Blending); + FillBlock(tables[1], OFF(color_mask), NUM(color_mask), ColorMask); FillBlock(tables[0], OFF(blend), NUM(blend), Blending); + FillBlock(tables[1], OFF(blend), NUM(blend), BlendEquations); + FillBlock(tables[1], OFF(blend.enable), NUM(blend.enable), BlendEnable); FillBlock(tables[0], OFF(blend_per_target), NUM(blend_per_target), Blending); + FillBlock(tables[1], OFF(blend_per_target), NUM(blend_per_target), BlendEquations); +} + +void SetupDirtySpecialOps(Tables& tables) { + tables[0][OFF(logic_op.op)] = LogicOp; } void SetupDirtyViewportSwizzles(Tables& tables) { @@ -185,17 +227,15 @@ void StateTracker::SetupTables(Tegra::Control::ChannelState& channel_state) { SetupDirtyStencilProperties(tables); SetupDirtyLineWidth(tables); SetupDirtyCullMode(tables); - SetupDirtyDepthBoundsEnable(tables); - SetupDirtyDepthTestEnable(tables); - SetupDirtyDepthWriteEnable(tables); + SetupDirtyStateEnable(tables); SetupDirtyDepthCompareOp(tables); SetupDirtyFrontFace(tables); SetupDirtyStencilOp(tables); - SetupDirtyStencilTestEnable(tables); SetupDirtyBlending(tables); SetupDirtyViewportSwizzles(tables); SetupDirtyVertexAttributes(tables); SetupDirtyVertexBindings(tables); + SetupDirtySpecialOps(tables); } void StateTracker::ChangeChannel(Tegra::Control::ChannelState& channel_state) { @@ -204,6 +244,8 @@ void StateTracker::ChangeChannel(Tegra::Control::ChannelState& channel_state) { void StateTracker::InvalidateState() { flags->set(); + current_topology = INVALID_TOPOLOGY; + stencil_reset = true; } StateTracker::StateTracker() diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.h b/src/video_core/renderer_vulkan/vk_state_tracker.h index 2296dea60..8010ad26c 100644 --- a/src/video_core/renderer_vulkan/vk_state_tracker.h +++ b/src/video_core/renderer_vulkan/vk_state_tracker.h @@ -35,6 +35,9 @@ enum : u8 { BlendConstants, DepthBounds, StencilProperties, + StencilReference, + StencilWriteMask, + StencilCompare, LineWidth, CullMode, @@ -45,8 +48,18 @@ enum : u8 { FrontFace, StencilOp, StencilTestEnable, + PrimitiveRestartEnable, + RasterizerDiscardEnable, + DepthBiasEnable, + StateEnable, + LogicOp, + LogicOpEnable, + DepthClampEnable, Blending, + BlendEnable, + BlendEquations, + ColorMask, ViewportSwizzles, Last, @@ -64,6 +77,7 @@ public: void InvalidateCommandBufferState() { (*flags) |= invalidation_flags; current_topology = INVALID_TOPOLOGY; + stencil_reset = true; } void InvalidateViewports() { @@ -103,6 +117,57 @@ public: return Exchange(Dirty::StencilProperties, false); } + bool TouchStencilReference() { + return Exchange(Dirty::StencilReference, false); + } + + bool TouchStencilWriteMask() { + return Exchange(Dirty::StencilWriteMask, false); + } + + bool TouchStencilCompare() { + return Exchange(Dirty::StencilCompare, false); + } + + template <typename T> + bool ExchangeCheck(T& old_value, T new_value) { + bool result = old_value != new_value; + old_value = new_value; + return result; + } + + bool TouchStencilSide(bool two_sided_stencil_new) { + return ExchangeCheck(two_sided_stencil, two_sided_stencil_new) || stencil_reset; + } + + bool CheckStencilReferenceFront(u32 new_value) { + return ExchangeCheck(front.ref, new_value) || stencil_reset; + } + + bool CheckStencilReferenceBack(u32 new_value) { + return ExchangeCheck(back.ref, new_value) || stencil_reset; + } + + bool CheckStencilWriteMaskFront(u32 new_value) { + return ExchangeCheck(front.write_mask, new_value) || stencil_reset; + } + + bool CheckStencilWriteMaskBack(u32 new_value) { + return ExchangeCheck(back.write_mask, new_value) || stencil_reset; + } + + bool CheckStencilCompareMaskFront(u32 new_value) { + return ExchangeCheck(front.compare_mask, new_value) || stencil_reset; + } + + bool CheckStencilCompareMaskBack(u32 new_value) { + return ExchangeCheck(back.compare_mask, new_value) || stencil_reset; + } + + void ClearStencilReset() { + stencil_reset = false; + } + bool TouchLineWidth() const { return Exchange(Dirty::LineWidth, false); } @@ -111,6 +176,10 @@ public: return Exchange(Dirty::CullMode, false); } + bool TouchStateEnable() { + return Exchange(Dirty::StateEnable, false); + } + bool TouchDepthBoundsTestEnable() { return Exchange(Dirty::DepthBoundsEnable, false); } @@ -123,6 +192,26 @@ public: return Exchange(Dirty::DepthWriteEnable, false); } + bool TouchPrimitiveRestartEnable() { + return Exchange(Dirty::PrimitiveRestartEnable, false); + } + + bool TouchRasterizerDiscardEnable() { + return Exchange(Dirty::RasterizerDiscardEnable, false); + } + + bool TouchDepthBiasEnable() { + return Exchange(Dirty::DepthBiasEnable, false); + } + + bool TouchLogicOpEnable() { + return Exchange(Dirty::LogicOpEnable, false); + } + + bool TouchDepthClampEnable() { + return Exchange(Dirty::DepthClampEnable, false); + } + bool TouchDepthCompareOp() { return Exchange(Dirty::DepthCompareOp, false); } @@ -135,10 +224,30 @@ public: return Exchange(Dirty::StencilOp, false); } + bool TouchBlending() { + return Exchange(Dirty::Blending, false); + } + + bool TouchBlendEnable() { + return Exchange(Dirty::BlendEnable, false); + } + + bool TouchBlendEquations() { + return Exchange(Dirty::BlendEquations, false); + } + + bool TouchColorMask() { + return Exchange(Dirty::ColorMask, false); + } + bool TouchStencilTestEnable() { return Exchange(Dirty::StencilTestEnable, false); } + bool TouchLogicOp() { + return Exchange(Dirty::LogicOp, false); + } + bool ChangePrimitiveTopology(Maxwell::PrimitiveTopology new_topology) { const bool has_changed = current_topology != new_topology; current_topology = new_topology; @@ -160,10 +269,20 @@ private: return is_dirty; } + struct StencilProperties { + u32 ref = 0; + u32 write_mask = 0; + u32 compare_mask = 0; + }; + Tegra::Engines::Maxwell3D::DirtyState::Flags* flags; Tegra::Engines::Maxwell3D::DirtyState::Flags default_flags; Tegra::Engines::Maxwell3D::DirtyState::Flags invalidation_flags; Maxwell::PrimitiveTopology current_topology = INVALID_TOPOLOGY; + bool two_sided_stencil = false; + StencilProperties front{}; + StencilProperties back{}; + bool stencil_reset = false; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index a65bbeb1c..d39372ec4 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -812,8 +812,12 @@ StagingBufferRef TextureCacheRuntime::UploadStagingBuffer(size_t size) { return staging_buffer_pool.Request(size, MemoryUsage::Upload); } -StagingBufferRef TextureCacheRuntime::DownloadStagingBuffer(size_t size) { - return staging_buffer_pool.Request(size, MemoryUsage::Download); +StagingBufferRef TextureCacheRuntime::DownloadStagingBuffer(size_t size, bool deferred) { + return staging_buffer_pool.Request(size, MemoryUsage::Download, deferred); +} + +void TextureCacheRuntime::FreeDeferredStagingBuffer(StagingBufferRef& ref) { + staging_buffer_pool.FreeDeferred(ref); } bool TextureCacheRuntime::ShouldReinterpret(Image& dst, Image& src) { diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index 7ec0df134..1f27a3589 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -51,7 +51,9 @@ public: StagingBufferRef UploadStagingBuffer(size_t size); - StagingBufferRef DownloadStagingBuffer(size_t size); + StagingBufferRef DownloadStagingBuffer(size_t size, bool deferred = false); + + void FreeDeferredStagingBuffer(StagingBufferRef& ref); void TickFrame(); @@ -347,6 +349,7 @@ struct TextureCacheParams { static constexpr bool FRAMEBUFFER_BLITS = false; static constexpr bool HAS_EMULATED_COPIES = false; static constexpr bool HAS_DEVICE_MEMORY_INFO = true; + static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = true; using Runtime = Vulkan::TextureCacheRuntime; using Image = Vulkan::Image; @@ -354,6 +357,7 @@ struct TextureCacheParams { using ImageView = Vulkan::ImageView; using Sampler = Vulkan::Sampler; using Framebuffer = Vulkan::Framebuffer; + using AsyncBuffer = Vulkan::StagingBufferRef; }; using TextureCache = VideoCommon::TextureCache<TextureCacheParams>; |