summaryrefslogtreecommitdiffstats
path: root/src/video_core/renderer_vulkan
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/video_core/renderer_vulkan/fixed_pipeline_state.cpp116
-rw-r--r--src/video_core/renderer_vulkan/fixed_pipeline_state.h207
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.cpp178
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.cpp34
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.cpp97
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.h42
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pipeline.cpp3
-rw-r--r--src/video_core/renderer_vulkan/vk_descriptor_pool.cpp1
-rw-r--r--src/video_core/renderer_vulkan/vk_device.cpp169
-rw-r--r--src/video_core/renderer_vulkan/vk_device.h6
-rw-r--r--src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp89
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.cpp103
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.h41
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp455
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.h36
-rw-r--r--src/video_core/renderer_vulkan/vk_sampler_cache.cpp6
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.cpp2
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp167
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.h12
-rw-r--r--src/video_core/renderer_vulkan/vk_state_tracker.cpp68
-rw-r--r--src/video_core/renderer_vulkan/vk_state_tracker.h50
-rw-r--r--src/video_core/renderer_vulkan/vk_stream_buffer.cpp2
-rw-r--r--src/video_core/renderer_vulkan/vk_stream_buffer.h6
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.cpp80
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.h33
-rw-r--r--src/video_core/renderer_vulkan/vk_update_descriptor.cpp36
-rw-r--r--src/video_core/renderer_vulkan/vk_update_descriptor.h32
-rw-r--r--src/video_core/renderer_vulkan/wrapper.cpp29
-rw-r--r--src/video_core/renderer_vulkan/wrapper.h60
29 files changed, 1371 insertions, 789 deletions
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
index 568744e3c..d1f0ea932 100644
--- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
+++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
@@ -39,53 +39,18 @@ constexpr std::array POLYGON_OFFSET_ENABLE_LUT = {
} // Anonymous namespace
-void FixedPipelineState::DepthStencil::Fill(const Maxwell& regs) noexcept {
- raw = 0;
- front.action_stencil_fail.Assign(PackStencilOp(regs.stencil_front_op_fail));
- front.action_depth_fail.Assign(PackStencilOp(regs.stencil_front_op_zfail));
- front.action_depth_pass.Assign(PackStencilOp(regs.stencil_front_op_zpass));
- front.test_func.Assign(PackComparisonOp(regs.stencil_front_func_func));
- if (regs.stencil_two_side_enable) {
- back.action_stencil_fail.Assign(PackStencilOp(regs.stencil_back_op_fail));
- back.action_depth_fail.Assign(PackStencilOp(regs.stencil_back_op_zfail));
- back.action_depth_pass.Assign(PackStencilOp(regs.stencil_back_op_zpass));
- back.test_func.Assign(PackComparisonOp(regs.stencil_back_func_func));
- } else {
- back.action_stencil_fail.Assign(front.action_stencil_fail);
- back.action_depth_fail.Assign(front.action_depth_fail);
- back.action_depth_pass.Assign(front.action_depth_pass);
- back.test_func.Assign(front.test_func);
- }
- depth_test_enable.Assign(regs.depth_test_enable);
- depth_write_enable.Assign(regs.depth_write_enabled);
- depth_bounds_enable.Assign(regs.depth_bounds_enable);
- stencil_enable.Assign(regs.stencil_enable);
- depth_test_func.Assign(PackComparisonOp(regs.depth_test_func));
-}
-
-void FixedPipelineState::Rasterizer::Fill(const Maxwell& regs) noexcept {
+void FixedPipelineState::Fill(const Maxwell& regs, bool has_extended_dynamic_state) {
const auto& clip = regs.view_volume_clip_control;
const std::array enabled_lut = {regs.polygon_offset_point_enable,
regs.polygon_offset_line_enable,
regs.polygon_offset_fill_enable};
const u32 topology_index = static_cast<u32>(regs.draw.topology.Value());
- u32 packed_front_face = PackFrontFace(regs.front_face);
- if (regs.screen_y_control.triangle_rast_flip != 0 &&
- regs.viewport_transform[0].scale_y > 0.0f) {
- // Flip front face
- packed_front_face = 1 - packed_front_face;
- }
-
raw = 0;
- topology.Assign(topology_index);
primitive_restart_enable.Assign(regs.primitive_restart.enabled != 0 ? 1 : 0);
- cull_enable.Assign(regs.cull_test_enabled != 0 ? 1 : 0);
depth_bias_enable.Assign(enabled_lut[POLYGON_OFFSET_ENABLE_LUT[topology_index]] != 0 ? 1 : 0);
depth_clamp_disabled.Assign(regs.view_volume_clip_control.depth_clamp_disabled.Value());
ndc_minus_one_to_one.Assign(regs.depth_mode == Maxwell::DepthMode::MinusOneToOne ? 1 : 0);
- cull_face.Assign(PackCullFace(regs.cull_face));
- front_face.Assign(packed_front_face);
polygon_mode.Assign(PackPolygonMode(regs.polygon_mode_front));
patch_control_points_minus_one.Assign(regs.patch_vertices - 1);
tessellation_primitive.Assign(static_cast<u32>(regs.tess_mode.prim.Value()));
@@ -94,19 +59,37 @@ void FixedPipelineState::Rasterizer::Fill(const Maxwell& regs) noexcept {
logic_op_enable.Assign(regs.logic_op.enable != 0 ? 1 : 0);
logic_op.Assign(PackLogicOp(regs.logic_op.operation));
rasterize_enable.Assign(regs.rasterize_enable != 0 ? 1 : 0);
+
std::memcpy(&point_size, &regs.point_size, sizeof(point_size)); // TODO: C++20 std::bit_cast
-}
-void FixedPipelineState::ColorBlending::Fill(const Maxwell& regs) noexcept {
+ for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) {
+ binding_divisors[index] =
+ regs.instanced_arrays.IsInstancingEnabled(index) ? regs.vertex_array[index].divisor : 0;
+ }
+
+ for (std::size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) {
+ const auto& input = regs.vertex_attrib_format[index];
+ auto& attribute = attributes[index];
+ attribute.raw = 0;
+ attribute.enabled.Assign(input.IsConstant() ? 0 : 1);
+ attribute.buffer.Assign(input.buffer);
+ attribute.offset.Assign(input.offset);
+ attribute.type.Assign(static_cast<u32>(input.type.Value()));
+ attribute.size.Assign(static_cast<u32>(input.size.Value()));
+ }
+
for (std::size_t index = 0; index < std::size(attachments); ++index) {
attachments[index].Fill(regs, index);
}
-}
-void FixedPipelineState::ViewportSwizzles::Fill(const Maxwell& regs) noexcept {
const auto& transform = regs.viewport_transform;
- std::transform(transform.begin(), transform.end(), swizzles.begin(),
+ std::transform(transform.begin(), transform.end(), viewport_swizzles.begin(),
[](const auto& viewport) { return static_cast<u16>(viewport.swizzle.raw); });
+
+ if (!has_extended_dynamic_state) {
+ no_extended_dynamic_state.Assign(1);
+ dynamic_state.Fill(regs);
+ }
}
void FixedPipelineState::BlendingAttachment::Fill(const Maxwell& regs, std::size_t index) {
@@ -148,20 +131,57 @@ void FixedPipelineState::BlendingAttachment::Fill(const Maxwell& regs, std::size
enable.Assign(1);
}
-void FixedPipelineState::Fill(const Maxwell& regs) {
- rasterizer.Fill(regs);
- depth_stencil.Fill(regs);
- color_blending.Fill(regs);
- viewport_swizzles.Fill(regs);
+void FixedPipelineState::DynamicState::Fill(const Maxwell& regs) {
+ const u32 topology_index = static_cast<u32>(regs.draw.topology.Value());
+ u32 packed_front_face = PackFrontFace(regs.front_face);
+ if (regs.screen_y_control.triangle_rast_flip != 0) {
+ // Flip front face
+ packed_front_face = 1 - packed_front_face;
+ }
+
+ raw1 = 0;
+ raw2 = 0;
+ front.action_stencil_fail.Assign(PackStencilOp(regs.stencil_front_op_fail));
+ front.action_depth_fail.Assign(PackStencilOp(regs.stencil_front_op_zfail));
+ front.action_depth_pass.Assign(PackStencilOp(regs.stencil_front_op_zpass));
+ front.test_func.Assign(PackComparisonOp(regs.stencil_front_func_func));
+ if (regs.stencil_two_side_enable) {
+ back.action_stencil_fail.Assign(PackStencilOp(regs.stencil_back_op_fail));
+ back.action_depth_fail.Assign(PackStencilOp(regs.stencil_back_op_zfail));
+ back.action_depth_pass.Assign(PackStencilOp(regs.stencil_back_op_zpass));
+ back.test_func.Assign(PackComparisonOp(regs.stencil_back_func_func));
+ } else {
+ back.action_stencil_fail.Assign(front.action_stencil_fail);
+ back.action_depth_fail.Assign(front.action_depth_fail);
+ back.action_depth_pass.Assign(front.action_depth_pass);
+ back.test_func.Assign(front.test_func);
+ }
+ stencil_enable.Assign(regs.stencil_enable);
+ depth_write_enable.Assign(regs.depth_write_enabled);
+ depth_bounds_enable.Assign(regs.depth_bounds_enable);
+ depth_test_enable.Assign(regs.depth_test_enable);
+ front_face.Assign(packed_front_face);
+ depth_test_func.Assign(PackComparisonOp(regs.depth_test_func));
+ topology.Assign(topology_index);
+ cull_face.Assign(PackCullFace(regs.cull_face));
+ cull_enable.Assign(regs.cull_test_enabled != 0 ? 1 : 0);
+
+ for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) {
+ const auto& input = regs.vertex_array[index];
+ VertexBinding& binding = vertex_bindings[index];
+ binding.raw = 0;
+ binding.enabled.Assign(input.IsEnabled() ? 1 : 0);
+ binding.stride.Assign(static_cast<u16>(input.stride.Value()));
+ }
}
std::size_t FixedPipelineState::Hash() const noexcept {
- const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this);
+ const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), Size());
return static_cast<std::size_t>(hash);
}
bool FixedPipelineState::operator==(const FixedPipelineState& rhs) const noexcept {
- return std::memcmp(this, &rhs, sizeof *this) == 0;
+ return std::memcmp(this, &rhs, Size()) == 0;
}
u32 FixedPipelineState::PackComparisonOp(Maxwell::ComparisonOp op) noexcept {
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h
index 31a6398f2..cdcbb65f5 100644
--- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h
+++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h
@@ -60,14 +60,6 @@ struct FixedPipelineState {
void Fill(const Maxwell& regs, std::size_t index);
- std::size_t Hash() const noexcept;
-
- bool operator==(const BlendingAttachment& rhs) const noexcept;
-
- bool operator!=(const BlendingAttachment& rhs) const noexcept {
- return !operator==(rhs);
- }
-
constexpr std::array<bool, 4> Mask() const noexcept {
return {mask_r != 0, mask_g != 0, mask_b != 0, mask_a != 0};
}
@@ -97,156 +89,116 @@ struct FixedPipelineState {
}
};
- struct VertexInput {
- union Binding {
- u16 raw;
- BitField<0, 1, u16> enabled;
- BitField<1, 12, u16> stride;
- };
+ union VertexAttribute {
+ u32 raw;
+ BitField<0, 1, u32> enabled;
+ BitField<1, 5, u32> buffer;
+ BitField<6, 14, u32> offset;
+ BitField<20, 3, u32> type;
+ BitField<23, 6, u32> size;
- union Attribute {
- u32 raw;
- BitField<0, 1, u32> enabled;
- BitField<1, 5, u32> buffer;
- BitField<6, 14, u32> offset;
- BitField<20, 3, u32> type;
- BitField<23, 6, u32> size;
-
- constexpr Maxwell::VertexAttribute::Type Type() const noexcept {
- return static_cast<Maxwell::VertexAttribute::Type>(type.Value());
- }
-
- constexpr Maxwell::VertexAttribute::Size Size() const noexcept {
- return static_cast<Maxwell::VertexAttribute::Size>(size.Value());
- }
- };
-
- std::array<Binding, Maxwell::NumVertexArrays> bindings;
- std::array<u32, Maxwell::NumVertexArrays> binding_divisors;
- std::array<Attribute, Maxwell::NumVertexAttributes> attributes;
-
- void SetBinding(std::size_t index, bool enabled, u32 stride, u32 divisor) noexcept {
- auto& binding = bindings[index];
- binding.raw = 0;
- binding.enabled.Assign(enabled ? 1 : 0);
- binding.stride.Assign(static_cast<u16>(stride));
- binding_divisors[index] = divisor;
+ constexpr Maxwell::VertexAttribute::Type Type() const noexcept {
+ return static_cast<Maxwell::VertexAttribute::Type>(type.Value());
}
- void SetAttribute(std::size_t index, bool enabled, u32 buffer, u32 offset,
- Maxwell::VertexAttribute::Type type,
- Maxwell::VertexAttribute::Size size) noexcept {
- auto& attribute = attributes[index];
- attribute.raw = 0;
- attribute.enabled.Assign(enabled ? 1 : 0);
- attribute.buffer.Assign(buffer);
- attribute.offset.Assign(offset);
- attribute.type.Assign(static_cast<u32>(type));
- attribute.size.Assign(static_cast<u32>(size));
+ constexpr Maxwell::VertexAttribute::Size Size() const noexcept {
+ return static_cast<Maxwell::VertexAttribute::Size>(size.Value());
}
};
- struct Rasterizer {
- union {
- u32 raw;
- BitField<0, 4, u32> topology;
- BitField<4, 1, u32> primitive_restart_enable;
- BitField<5, 1, u32> cull_enable;
- BitField<6, 1, u32> depth_bias_enable;
- BitField<7, 1, u32> depth_clamp_disabled;
- BitField<8, 1, u32> ndc_minus_one_to_one;
- BitField<9, 2, u32> cull_face;
- BitField<11, 1, u32> front_face;
- BitField<12, 2, u32> polygon_mode;
- BitField<14, 5, u32> patch_control_points_minus_one;
- BitField<19, 2, u32> tessellation_primitive;
- BitField<21, 2, u32> tessellation_spacing;
- BitField<23, 1, u32> tessellation_clockwise;
- BitField<24, 1, u32> logic_op_enable;
- BitField<25, 4, u32> logic_op;
- BitField<29, 1, u32> rasterize_enable;
- };
-
- // TODO(Rodrigo): Move this to push constants
- u32 point_size;
+ template <std::size_t Position>
+ union StencilFace {
+ BitField<Position + 0, 3, u32> action_stencil_fail;
+ BitField<Position + 3, 3, u32> action_depth_fail;
+ BitField<Position + 6, 3, u32> action_depth_pass;
+ BitField<Position + 9, 3, u32> test_func;
- void Fill(const Maxwell& regs) noexcept;
+ Maxwell::StencilOp ActionStencilFail() const noexcept {
+ return UnpackStencilOp(action_stencil_fail);
+ }
- constexpr Maxwell::PrimitiveTopology Topology() const noexcept {
- return static_cast<Maxwell::PrimitiveTopology>(topology.Value());
+ Maxwell::StencilOp ActionDepthFail() const noexcept {
+ return UnpackStencilOp(action_depth_fail);
}
- Maxwell::CullFace CullFace() const noexcept {
- return UnpackCullFace(cull_face.Value());
+ Maxwell::StencilOp ActionDepthPass() const noexcept {
+ return UnpackStencilOp(action_depth_pass);
}
- Maxwell::FrontFace FrontFace() const noexcept {
- return UnpackFrontFace(front_face.Value());
+ Maxwell::ComparisonOp TestFunc() const noexcept {
+ return UnpackComparisonOp(test_func);
}
};
- struct DepthStencil {
- template <std::size_t Position>
- union StencilFace {
- BitField<Position + 0, 3, u32> action_stencil_fail;
- BitField<Position + 3, 3, u32> action_depth_fail;
- BitField<Position + 6, 3, u32> action_depth_pass;
- BitField<Position + 9, 3, u32> test_func;
-
- Maxwell::StencilOp ActionStencilFail() const noexcept {
- return UnpackStencilOp(action_stencil_fail);
- }
-
- Maxwell::StencilOp ActionDepthFail() const noexcept {
- return UnpackStencilOp(action_depth_fail);
- }
-
- Maxwell::StencilOp ActionDepthPass() const noexcept {
- return UnpackStencilOp(action_depth_pass);
- }
-
- Maxwell::ComparisonOp TestFunc() const noexcept {
- return UnpackComparisonOp(test_func);
- }
- };
+ union VertexBinding {
+ u16 raw;
+ BitField<0, 12, u16> stride;
+ BitField<12, 1, u16> enabled;
+ };
+ struct DynamicState {
union {
- u32 raw;
+ u32 raw1;
StencilFace<0> front;
StencilFace<12> back;
- BitField<24, 1, u32> depth_test_enable;
+ BitField<24, 1, u32> stencil_enable;
BitField<25, 1, u32> depth_write_enable;
BitField<26, 1, u32> depth_bounds_enable;
- BitField<27, 1, u32> stencil_enable;
- BitField<28, 3, u32> depth_test_func;
+ BitField<27, 1, u32> depth_test_enable;
+ BitField<28, 1, u32> front_face;
+ BitField<29, 3, u32> depth_test_func;
+ };
+ union {
+ u32 raw2;
+ BitField<0, 4, u32> topology;
+ BitField<4, 2, u32> cull_face;
+ BitField<6, 1, u32> cull_enable;
};
+ std::array<VertexBinding, Maxwell::NumVertexArrays> vertex_bindings;
- void Fill(const Maxwell& regs) noexcept;
+ void Fill(const Maxwell& regs);
Maxwell::ComparisonOp DepthTestFunc() const noexcept {
return UnpackComparisonOp(depth_test_func);
}
- };
-
- struct ColorBlending {
- std::array<BlendingAttachment, Maxwell::NumRenderTargets> attachments;
- void Fill(const Maxwell& regs) noexcept;
- };
+ Maxwell::CullFace CullFace() const noexcept {
+ return UnpackCullFace(cull_face.Value());
+ }
- struct ViewportSwizzles {
- std::array<u16, Maxwell::NumViewports> swizzles;
+ Maxwell::FrontFace FrontFace() const noexcept {
+ return UnpackFrontFace(front_face.Value());
+ }
- void Fill(const Maxwell& regs) noexcept;
+ constexpr Maxwell::PrimitiveTopology Topology() const noexcept {
+ return static_cast<Maxwell::PrimitiveTopology>(topology.Value());
+ }
};
- VertexInput vertex_input;
- Rasterizer rasterizer;
- DepthStencil depth_stencil;
- ColorBlending color_blending;
- ViewportSwizzles viewport_swizzles;
+ union {
+ u32 raw;
+ BitField<0, 1, u32> no_extended_dynamic_state;
+ BitField<2, 1, u32> primitive_restart_enable;
+ BitField<3, 1, u32> depth_bias_enable;
+ BitField<4, 1, u32> depth_clamp_disabled;
+ BitField<5, 1, u32> ndc_minus_one_to_one;
+ BitField<6, 2, u32> polygon_mode;
+ BitField<8, 5, u32> patch_control_points_minus_one;
+ BitField<13, 2, u32> tessellation_primitive;
+ BitField<15, 2, u32> tessellation_spacing;
+ BitField<17, 1, u32> tessellation_clockwise;
+ BitField<18, 1, u32> logic_op_enable;
+ BitField<19, 4, u32> logic_op;
+ BitField<23, 1, u32> rasterize_enable;
+ };
+ u32 point_size;
+ std::array<u32, Maxwell::NumVertexArrays> binding_divisors;
+ std::array<VertexAttribute, Maxwell::NumVertexAttributes> attributes;
+ std::array<BlendingAttachment, Maxwell::NumRenderTargets> attachments;
+ std::array<u16, Maxwell::NumViewports> viewport_swizzles;
+ DynamicState dynamic_state;
- void Fill(const Maxwell& regs);
+ void Fill(const Maxwell& regs, bool has_extended_dynamic_state);
std::size_t Hash() const noexcept;
@@ -255,6 +207,11 @@ struct FixedPipelineState {
bool operator!=(const FixedPipelineState& rhs) const noexcept {
return !operator==(rhs);
}
+
+ std::size_t Size() const noexcept {
+ const std::size_t total_size = sizeof *this;
+ return total_size - (no_extended_dynamic_state != 0 ? 0 : sizeof(DynamicState));
+ }
};
static_assert(std::has_unique_object_representations_v<FixedPipelineState>);
static_assert(std::is_trivially_copyable_v<FixedPipelineState>);
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
index 2871035f5..d7f1ae89f 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -21,29 +21,29 @@ namespace Sampler {
VkFilter Filter(Tegra::Texture::TextureFilter filter) {
switch (filter) {
- case Tegra::Texture::TextureFilter::Linear:
- return VK_FILTER_LINEAR;
case Tegra::Texture::TextureFilter::Nearest:
return VK_FILTER_NEAREST;
+ case Tegra::Texture::TextureFilter::Linear:
+ return VK_FILTER_LINEAR;
}
- UNIMPLEMENTED_MSG("Unimplemented sampler filter={}", static_cast<u32>(filter));
+ UNREACHABLE_MSG("Invalid sampler filter={}", static_cast<u32>(filter));
return {};
}
VkSamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter) {
switch (mipmap_filter) {
case Tegra::Texture::TextureMipmapFilter::None:
- // TODO(Rodrigo): None seems to be mapped to OpenGL's mag and min filters without mipmapping
- // (e.g. GL_NEAREST and GL_LINEAR). Vulkan doesn't have such a thing, find out if we have to
- // use an image view with a single mipmap level to emulate this.
- return VK_SAMPLER_MIPMAP_MODE_LINEAR;
- ;
- case Tegra::Texture::TextureMipmapFilter::Linear:
- return VK_SAMPLER_MIPMAP_MODE_LINEAR;
+ // There are no Vulkan filter modes that directly correspond to OpenGL minification filters
+ // of GL_LINEAR or GL_NEAREST, but they can be emulated using
+ // VK_SAMPLER_MIPMAP_MODE_NEAREST, minLod = 0, and maxLod = 0.25, and using minFilter =
+ // VK_FILTER_LINEAR or minFilter = VK_FILTER_NEAREST, respectively.
+ return VK_SAMPLER_MIPMAP_MODE_NEAREST;
case Tegra::Texture::TextureMipmapFilter::Nearest:
return VK_SAMPLER_MIPMAP_MODE_NEAREST;
+ case Tegra::Texture::TextureMipmapFilter::Linear:
+ return VK_SAMPLER_MIPMAP_MODE_LINEAR;
}
- UNIMPLEMENTED_MSG("Unimplemented sampler mipmap mode={}", static_cast<u32>(mipmap_filter));
+ UNREACHABLE_MSG("Invalid sampler mipmap mode={}", static_cast<u32>(mipmap_filter));
return {};
}
@@ -78,10 +78,9 @@ VkSamplerAddressMode WrapMode(const VKDevice& device, Tegra::Texture::WrapMode w
case Tegra::Texture::WrapMode::MirrorOnceBorder:
UNIMPLEMENTED();
return VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE;
- default:
- UNIMPLEMENTED_MSG("Unimplemented wrap mode={}", static_cast<u32>(wrap_mode));
- return {};
}
+ UNIMPLEMENTED_MSG("Unimplemented wrap mode={}", static_cast<u32>(wrap_mode));
+ return {};
}
VkCompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func) {
@@ -149,7 +148,7 @@ struct FormatTuple {
{VK_FORMAT_R16_SFLOAT, Attachable | Storage}, // R16F
{VK_FORMAT_R16_UNORM, Attachable | Storage}, // R16U
{VK_FORMAT_UNDEFINED}, // R16S
- {VK_FORMAT_UNDEFINED}, // R16UI
+ {VK_FORMAT_R16_UINT, Attachable | Storage}, // R16UI
{VK_FORMAT_UNDEFINED}, // R16I
{VK_FORMAT_R16G16_UNORM, Attachable | Storage}, // RG16
{VK_FORMAT_R16G16_SFLOAT, Attachable | Storage}, // RG16F
@@ -288,14 +287,35 @@ VkPrimitiveTopology PrimitiveTopology([[maybe_unused]] const VKDevice& device,
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
case Maxwell::PrimitiveTopology::Patches:
return VK_PRIMITIVE_TOPOLOGY_PATCH_LIST;
- default:
- UNIMPLEMENTED_MSG("Unimplemented topology={}", static_cast<u32>(topology));
- return {};
}
+ UNIMPLEMENTED_MSG("Unimplemented topology={}", static_cast<u32>(topology));
+ return {};
}
VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size) {
switch (type) {
+ case Maxwell::VertexAttribute::Type::UnsignedNorm:
+ switch (size) {
+ case Maxwell::VertexAttribute::Size::Size_8:
+ return VK_FORMAT_R8_UNORM;
+ case Maxwell::VertexAttribute::Size::Size_8_8:
+ return VK_FORMAT_R8G8_UNORM;
+ case Maxwell::VertexAttribute::Size::Size_8_8_8:
+ return VK_FORMAT_R8G8B8_UNORM;
+ case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
+ return VK_FORMAT_R8G8B8A8_UNORM;
+ case Maxwell::VertexAttribute::Size::Size_16:
+ return VK_FORMAT_R16_UNORM;
+ case Maxwell::VertexAttribute::Size::Size_16_16:
+ return VK_FORMAT_R16G16_UNORM;
+ case Maxwell::VertexAttribute::Size::Size_16_16_16:
+ return VK_FORMAT_R16G16B16_UNORM;
+ case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
+ return VK_FORMAT_R16G16B16A16_UNORM;
+ case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
+ return VK_FORMAT_A2B10G10R10_UNORM_PACK32;
+ }
+ break;
case Maxwell::VertexAttribute::Type::SignedNorm:
switch (size) {
case Maxwell::VertexAttribute::Size::Size_8:
@@ -316,62 +336,50 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib
return VK_FORMAT_R16G16B16A16_SNORM;
case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
return VK_FORMAT_A2B10G10R10_SNORM_PACK32;
- default:
- break;
}
break;
- case Maxwell::VertexAttribute::Type::UnsignedNorm:
+ case Maxwell::VertexAttribute::Type::UnsignedScaled:
switch (size) {
case Maxwell::VertexAttribute::Size::Size_8:
- return VK_FORMAT_R8_UNORM;
+ return VK_FORMAT_R8_USCALED;
case Maxwell::VertexAttribute::Size::Size_8_8:
- return VK_FORMAT_R8G8_UNORM;
+ return VK_FORMAT_R8G8_USCALED;
case Maxwell::VertexAttribute::Size::Size_8_8_8:
- return VK_FORMAT_R8G8B8_UNORM;
+ return VK_FORMAT_R8G8B8_USCALED;
case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
- return VK_FORMAT_R8G8B8A8_UNORM;
+ return VK_FORMAT_R8G8B8A8_USCALED;
case Maxwell::VertexAttribute::Size::Size_16:
- return VK_FORMAT_R16_UNORM;
+ return VK_FORMAT_R16_USCALED;
case Maxwell::VertexAttribute::Size::Size_16_16:
- return VK_FORMAT_R16G16_UNORM;
+ return VK_FORMAT_R16G16_USCALED;
case Maxwell::VertexAttribute::Size::Size_16_16_16:
- return VK_FORMAT_R16G16B16_UNORM;
+ return VK_FORMAT_R16G16B16_USCALED;
case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
- return VK_FORMAT_R16G16B16A16_UNORM;
+ return VK_FORMAT_R16G16B16A16_USCALED;
case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
- return VK_FORMAT_A2B10G10R10_UNORM_PACK32;
- default:
- break;
+ return VK_FORMAT_A2B10G10R10_USCALED_PACK32;
}
break;
- case Maxwell::VertexAttribute::Type::SignedInt:
+ case Maxwell::VertexAttribute::Type::SignedScaled:
switch (size) {
case Maxwell::VertexAttribute::Size::Size_8:
- return VK_FORMAT_R8_SINT;
+ return VK_FORMAT_R8_SSCALED;
case Maxwell::VertexAttribute::Size::Size_8_8:
- return VK_FORMAT_R8G8_SINT;
+ return VK_FORMAT_R8G8_SSCALED;
case Maxwell::VertexAttribute::Size::Size_8_8_8:
- return VK_FORMAT_R8G8B8_SINT;
+ return VK_FORMAT_R8G8B8_SSCALED;
case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
- return VK_FORMAT_R8G8B8A8_SINT;
+ return VK_FORMAT_R8G8B8A8_SSCALED;
case Maxwell::VertexAttribute::Size::Size_16:
- return VK_FORMAT_R16_SINT;
+ return VK_FORMAT_R16_SSCALED;
case Maxwell::VertexAttribute::Size::Size_16_16:
- return VK_FORMAT_R16G16_SINT;
+ return VK_FORMAT_R16G16_SSCALED;
case Maxwell::VertexAttribute::Size::Size_16_16_16:
- return VK_FORMAT_R16G16B16_SINT;
+ return VK_FORMAT_R16G16B16_SSCALED;
case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
- return VK_FORMAT_R16G16B16A16_SINT;
- case Maxwell::VertexAttribute::Size::Size_32:
- return VK_FORMAT_R32_SINT;
- case Maxwell::VertexAttribute::Size::Size_32_32:
- return VK_FORMAT_R32G32_SINT;
- case Maxwell::VertexAttribute::Size::Size_32_32_32:
- return VK_FORMAT_R32G32B32_SINT;
- case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
- return VK_FORMAT_R32G32B32A32_SINT;
- default:
- break;
+ return VK_FORMAT_R16G16B16A16_SSCALED;
+ case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
+ return VK_FORMAT_A2B10G10R10_SSCALED_PACK32;
}
break;
case Maxwell::VertexAttribute::Type::UnsignedInt:
@@ -400,56 +408,50 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib
return VK_FORMAT_R32G32B32_UINT;
case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
return VK_FORMAT_R32G32B32A32_UINT;
- default:
- break;
+ case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
+ return VK_FORMAT_A2B10G10R10_UINT_PACK32;
}
break;
- case Maxwell::VertexAttribute::Type::UnsignedScaled:
+ case Maxwell::VertexAttribute::Type::SignedInt:
switch (size) {
case Maxwell::VertexAttribute::Size::Size_8:
- return VK_FORMAT_R8_USCALED;
+ return VK_FORMAT_R8_SINT;
case Maxwell::VertexAttribute::Size::Size_8_8:
- return VK_FORMAT_R8G8_USCALED;
+ return VK_FORMAT_R8G8_SINT;
case Maxwell::VertexAttribute::Size::Size_8_8_8:
- return VK_FORMAT_R8G8B8_USCALED;
+ return VK_FORMAT_R8G8B8_SINT;
case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
- return VK_FORMAT_R8G8B8A8_USCALED;
+ return VK_FORMAT_R8G8B8A8_SINT;
case Maxwell::VertexAttribute::Size::Size_16:
- return VK_FORMAT_R16_USCALED;
+ return VK_FORMAT_R16_SINT;
case Maxwell::VertexAttribute::Size::Size_16_16:
- return VK_FORMAT_R16G16_USCALED;
+ return VK_FORMAT_R16G16_SINT;
case Maxwell::VertexAttribute::Size::Size_16_16_16:
- return VK_FORMAT_R16G16B16_USCALED;
+ return VK_FORMAT_R16G16B16_SINT;
case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
- return VK_FORMAT_R16G16B16A16_USCALED;
- default:
- break;
+ return VK_FORMAT_R16G16B16A16_SINT;
+ case Maxwell::VertexAttribute::Size::Size_32:
+ return VK_FORMAT_R32_SINT;
+ case Maxwell::VertexAttribute::Size::Size_32_32:
+ return VK_FORMAT_R32G32_SINT;
+ case Maxwell::VertexAttribute::Size::Size_32_32_32:
+ return VK_FORMAT_R32G32B32_SINT;
+ case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
+ return VK_FORMAT_R32G32B32A32_SINT;
+ case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
+ return VK_FORMAT_A2B10G10R10_SINT_PACK32;
}
break;
- case Maxwell::VertexAttribute::Type::SignedScaled:
+ case Maxwell::VertexAttribute::Type::Float:
switch (size) {
- case Maxwell::VertexAttribute::Size::Size_8:
- return VK_FORMAT_R8_SSCALED;
- case Maxwell::VertexAttribute::Size::Size_8_8:
- return VK_FORMAT_R8G8_SSCALED;
- case Maxwell::VertexAttribute::Size::Size_8_8_8:
- return VK_FORMAT_R8G8B8_SSCALED;
- case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
- return VK_FORMAT_R8G8B8A8_SSCALED;
case Maxwell::VertexAttribute::Size::Size_16:
- return VK_FORMAT_R16_SSCALED;
+ return VK_FORMAT_R16_SFLOAT;
case Maxwell::VertexAttribute::Size::Size_16_16:
- return VK_FORMAT_R16G16_SSCALED;
+ return VK_FORMAT_R16G16_SFLOAT;
case Maxwell::VertexAttribute::Size::Size_16_16_16:
- return VK_FORMAT_R16G16B16_SSCALED;
+ return VK_FORMAT_R16G16B16_SFLOAT;
case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
- return VK_FORMAT_R16G16B16A16_SSCALED;
- default:
- break;
- }
- break;
- case Maxwell::VertexAttribute::Type::Float:
- switch (size) {
+ return VK_FORMAT_R16G16B16A16_SFLOAT;
case Maxwell::VertexAttribute::Size::Size_32:
return VK_FORMAT_R32_SFLOAT;
case Maxwell::VertexAttribute::Size::Size_32_32:
@@ -458,16 +460,6 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib
return VK_FORMAT_R32G32B32_SFLOAT;
case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
return VK_FORMAT_R32G32B32A32_SFLOAT;
- case Maxwell::VertexAttribute::Size::Size_16:
- return VK_FORMAT_R16_SFLOAT;
- case Maxwell::VertexAttribute::Size::Size_16_16:
- return VK_FORMAT_R16G16_SFLOAT;
- case Maxwell::VertexAttribute::Size::Size_16_16_16:
- return VK_FORMAT_R16G16B16_SFLOAT;
- case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
- return VK_FORMAT_R16G16B16A16_SFLOAT;
- default:
- break;
}
break;
}
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
index 59b441943..2258479f5 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
@@ -13,6 +13,7 @@
#include <fmt/format.h>
#include "common/dynamic_library.h"
+#include "common/file_util.h"
#include "common/logging/log.h"
#include "common/telemetry.h"
#include "core/core.h"
@@ -76,7 +77,8 @@ Common::DynamicLibrary OpenVulkanLibrary() {
char* libvulkan_env = getenv("LIBVULKAN_PATH");
if (!libvulkan_env || !library.Open(libvulkan_env)) {
// Use the libvulkan.dylib from the application bundle.
- std::string filename = File::GetBundleDirectory() + "/Contents/Frameworks/libvulkan.dylib";
+ const std::string filename =
+ FileUtil::GetBundleDirectory() + "/Contents/Frameworks/libvulkan.dylib";
library.Open(filename.c_str());
}
#else
@@ -153,11 +155,31 @@ vk::Instance CreateInstance(Common::DynamicLibrary& library, vk::InstanceDispatc
}
}
- static constexpr std::array layers_data{"VK_LAYER_LUNARG_standard_validation"};
- vk::Span<const char*> layers = layers_data;
- if (!enable_layers) {
- layers = {};
+ std::vector<const char*> layers;
+ layers.reserve(1);
+ if (enable_layers) {
+ layers.push_back("VK_LAYER_KHRONOS_validation");
+ }
+
+ const std::optional layer_properties = vk::EnumerateInstanceLayerProperties(dld);
+ if (!layer_properties) {
+ LOG_ERROR(Render_Vulkan, "Failed to query layer properties, disabling layers");
+ layers.clear();
+ }
+
+ for (auto layer_it = layers.begin(); layer_it != layers.end();) {
+ const char* const layer = *layer_it;
+ const auto it = std::find_if(
+ layer_properties->begin(), layer_properties->end(),
+ [layer](const VkLayerProperties& prop) { return !std::strcmp(layer, prop.layerName); });
+ if (it == layer_properties->end()) {
+ LOG_ERROR(Render_Vulkan, "Layer {} not available, removing it", layer);
+ layer_it = layers.erase(layer_it);
+ } else {
+ ++layer_it;
+ }
}
+
vk::Instance instance = vk::Instance::Create(layers, extensions, dld);
if (!instance) {
LOG_ERROR(Render_Vulkan, "Failed to create Vulkan instance");
@@ -387,7 +409,7 @@ bool RendererVulkan::PickDevices() {
return false;
}
- const s32 device_index = Settings::values.vulkan_device;
+ const s32 device_index = Settings::values.vulkan_device.GetValue();
if (device_index < 0 || device_index >= static_cast<s32>(devices->size())) {
LOG_ERROR(Render_Vulkan, "Invalid device index {}!", device_index);
return false;
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index 5f33d9e40..2be38d419 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -37,9 +37,9 @@ std::unique_ptr<VKStreamBuffer> CreateStreamBuffer(const VKDevice& device, VKSch
} // Anonymous namespace
-CachedBufferBlock::CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager,
- VAddr cpu_addr, std::size_t size)
- : VideoCommon::BufferBlock{cpu_addr, size} {
+Buffer::Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VKScheduler& scheduler_,
+ VKStagingBufferPool& staging_pool_, VAddr cpu_addr, std::size_t size)
+ : VideoCommon::BufferBlock{cpu_addr, size}, scheduler{scheduler_}, staging_pool{staging_pool_} {
VkBufferCreateInfo ci;
ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
ci.pNext = nullptr;
@@ -54,46 +54,17 @@ CachedBufferBlock::CachedBufferBlock(const VKDevice& device, VKMemoryManager& me
buffer.commit = memory_manager.Commit(buffer.handle, false);
}
-CachedBufferBlock::~CachedBufferBlock() = default;
+Buffer::~Buffer() = default;
-VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
- const VKDevice& device, VKMemoryManager& memory_manager,
- VKScheduler& scheduler, VKStagingBufferPool& staging_pool)
- : VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer>{rasterizer, system,
- CreateStreamBuffer(device,
- scheduler)},
- device{device}, memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{
- staging_pool} {}
-
-VKBufferCache::~VKBufferCache() = default;
-
-Buffer VKBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
- return std::make_shared<CachedBufferBlock>(device, memory_manager, cpu_addr, size);
-}
-
-VkBuffer VKBufferCache::ToHandle(const Buffer& buffer) {
- return buffer->GetHandle();
-}
-
-VkBuffer VKBufferCache::GetEmptyBuffer(std::size_t size) {
- size = std::max(size, std::size_t(4));
- const auto& empty = staging_pool.GetUnusedBuffer(size, false);
- scheduler.RequestOutsideRenderPassOperationContext();
- scheduler.Record([size, buffer = *empty.handle](vk::CommandBuffer cmdbuf) {
- cmdbuf.FillBuffer(buffer, 0, size, 0);
- });
- return *empty.handle;
-}
-
-void VKBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
- const u8* data) {
+void Buffer::Upload(std::size_t offset, std::size_t size, const u8* data) {
const auto& staging = staging_pool.GetUnusedBuffer(size, true);
std::memcpy(staging.commit->Map(size), data, size);
scheduler.RequestOutsideRenderPassOperationContext();
- scheduler.Record([staging = *staging.handle, buffer = buffer->GetHandle(), offset,
- size](vk::CommandBuffer cmdbuf) {
- cmdbuf.CopyBuffer(staging, buffer, VkBufferCopy{0, offset, size});
+
+ const VkBuffer handle = Handle();
+ scheduler.Record([staging = *staging.handle, handle, offset, size](vk::CommandBuffer cmdbuf) {
+ cmdbuf.CopyBuffer(staging, handle, VkBufferCopy{0, offset, size});
VkBufferMemoryBarrier barrier;
barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
@@ -102,7 +73,7 @@ void VKBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, st
barrier.dstAccessMask = UPLOAD_ACCESS_BARRIERS;
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
- barrier.buffer = buffer;
+ barrier.buffer = handle;
barrier.offset = offset;
barrier.size = size;
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0, {},
@@ -110,12 +81,12 @@ void VKBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, st
});
}
-void VKBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
- u8* data) {
+void Buffer::Download(std::size_t offset, std::size_t size, u8* data) {
const auto& staging = staging_pool.GetUnusedBuffer(size, true);
scheduler.RequestOutsideRenderPassOperationContext();
- scheduler.Record([staging = *staging.handle, buffer = buffer->GetHandle(), offset,
- size](vk::CommandBuffer cmdbuf) {
+
+ const VkBuffer handle = Handle();
+ scheduler.Record([staging = *staging.handle, handle, offset, size](vk::CommandBuffer cmdbuf) {
VkBufferMemoryBarrier barrier;
barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
barrier.pNext = nullptr;
@@ -123,7 +94,7 @@ void VKBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset,
barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
- barrier.buffer = buffer;
+ barrier.buffer = handle;
barrier.offset = offset;
barrier.size = size;
@@ -131,18 +102,20 @@ void VKBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset,
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT |
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_TRANSFER_BIT, 0, {}, barrier, {});
- cmdbuf.CopyBuffer(buffer, staging, VkBufferCopy{offset, 0, size});
+ cmdbuf.CopyBuffer(handle, staging, VkBufferCopy{offset, 0, size});
});
scheduler.Finish();
std::memcpy(data, staging.commit->Map(size), size);
}
-void VKBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset,
- std::size_t dst_offset, std::size_t size) {
+void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset,
+ std::size_t size) {
scheduler.RequestOutsideRenderPassOperationContext();
- scheduler.Record([src_buffer = src->GetHandle(), dst_buffer = dst->GetHandle(), src_offset,
- dst_offset, size](vk::CommandBuffer cmdbuf) {
+
+ const VkBuffer dst_buffer = Handle();
+ scheduler.Record([src_buffer = src.Handle(), dst_buffer, src_offset, dst_offset,
+ size](vk::CommandBuffer cmdbuf) {
cmdbuf.CopyBuffer(src_buffer, dst_buffer, VkBufferCopy{src_offset, dst_offset, size});
std::array<VkBufferMemoryBarrier, 2> barriers;
@@ -169,4 +142,30 @@ void VKBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t
});
}
+VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
+ const VKDevice& device, VKMemoryManager& memory_manager,
+ VKScheduler& scheduler, VKStagingBufferPool& staging_pool)
+ : VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer>{rasterizer, system,
+ CreateStreamBuffer(device,
+ scheduler)},
+ device{device}, memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{
+ staging_pool} {}
+
+VKBufferCache::~VKBufferCache() = default;
+
+std::shared_ptr<Buffer> VKBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
+ return std::make_shared<Buffer>(device, memory_manager, scheduler, staging_pool, cpu_addr,
+ size);
+}
+
+VKBufferCache::BufferInfo VKBufferCache::GetEmptyBuffer(std::size_t size) {
+ size = std::max(size, std::size_t(4));
+ const auto& empty = staging_pool.GetUnusedBuffer(size, false);
+ scheduler.RequestOutsideRenderPassOperationContext();
+ scheduler.Record([size, buffer = *empty.handle](vk::CommandBuffer cmdbuf) {
+ cmdbuf.FillBuffer(buffer, 0, size, 0);
+ });
+ return {*empty.handle, 0, 0};
+}
+
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
index a54583e7d..991ee451c 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -8,7 +8,6 @@
#include "common/common_types.h"
#include "video_core/buffer_cache/buffer_cache.h"
-#include "video_core/rasterizer_cache.h"
#include "video_core/renderer_vulkan/vk_memory_manager.h"
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
@@ -24,22 +23,34 @@ class VKDevice;
class VKMemoryManager;
class VKScheduler;
-class CachedBufferBlock final : public VideoCommon::BufferBlock {
+class Buffer final : public VideoCommon::BufferBlock {
public:
- explicit CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager,
- VAddr cpu_addr, std::size_t size);
- ~CachedBufferBlock();
+ explicit Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VKScheduler& scheduler,
+ VKStagingBufferPool& staging_pool, VAddr cpu_addr, std::size_t size);
+ ~Buffer();
- VkBuffer GetHandle() const {
+ void Upload(std::size_t offset, std::size_t size, const u8* data);
+
+ void Download(std::size_t offset, std::size_t size, u8* data);
+
+ void CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset,
+ std::size_t size);
+
+ VkBuffer Handle() const {
return *buffer.handle;
}
+ u64 Address() const {
+ return 0;
+ }
+
private:
+ VKScheduler& scheduler;
+ VKStagingBufferPool& staging_pool;
+
VKBuffer buffer;
};
-using Buffer = std::shared_ptr<CachedBufferBlock>;
-
class VKBufferCache final : public VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer> {
public:
explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
@@ -47,21 +58,10 @@ public:
VKScheduler& scheduler, VKStagingBufferPool& staging_pool);
~VKBufferCache();
- VkBuffer GetEmptyBuffer(std::size_t size) override;
+ BufferInfo GetEmptyBuffer(std::size_t size) override;
protected:
- VkBuffer ToHandle(const Buffer& buffer) override;
-
- Buffer CreateBlock(VAddr cpu_addr, std::size_t size) override;
-
- void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
- const u8* data) override;
-
- void DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
- u8* data) override;
-
- void CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset,
- std::size_t dst_offset, std::size_t size) override;
+ std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) override;
private:
const VKDevice& device;
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
index 8e1b46277..281bf9ac3 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
@@ -53,8 +53,9 @@ vk::DescriptorSetLayout VKComputePipeline::CreateDescriptorSetLayout() const {
};
add_bindings(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, entries.const_buffers.size());
add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, entries.global_buffers.size());
- add_bindings(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, entries.texel_buffers.size());
+ add_bindings(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, entries.uniform_texels.size());
add_bindings(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, entries.samplers.size());
+ add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, entries.storage_texels.size());
add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, entries.images.size());
VkDescriptorSetLayoutCreateInfo ci;
diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp
index 890fd52cf..9259b618d 100644
--- a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp
+++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp
@@ -42,6 +42,7 @@ vk::DescriptorPool* VKDescriptorPool::AllocateNewPool() {
{VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, num_sets * 60},
{VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, num_sets * 64},
{VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, num_sets * 64},
+ {VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, num_sets * 64},
{VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, num_sets * 40}};
VkDescriptorPoolCreateInfo ci;
diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp
index 750e5a0ca..fdaea4210 100644
--- a/src/video_core/renderer_vulkan/vk_device.cpp
+++ b/src/video_core/renderer_vulkan/vk_device.cpp
@@ -73,76 +73,79 @@ VkFormatFeatureFlags GetFormatFeatures(VkFormatProperties properties, FormatType
std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(
vk::PhysicalDevice physical, const vk::InstanceDispatch& dld) {
- static constexpr std::array formats{VK_FORMAT_A8B8G8R8_UNORM_PACK32,
- VK_FORMAT_A8B8G8R8_UINT_PACK32,
- VK_FORMAT_A8B8G8R8_SNORM_PACK32,
- VK_FORMAT_A8B8G8R8_SRGB_PACK32,
- VK_FORMAT_B5G6R5_UNORM_PACK16,
- VK_FORMAT_A2B10G10R10_UNORM_PACK32,
- VK_FORMAT_A1R5G5B5_UNORM_PACK16,
- VK_FORMAT_R32G32B32A32_SFLOAT,
- VK_FORMAT_R32G32B32A32_UINT,
- VK_FORMAT_R32G32_SFLOAT,
- VK_FORMAT_R32G32_UINT,
- VK_FORMAT_R16G16B16A16_UINT,
- VK_FORMAT_R16G16B16A16_SNORM,
- VK_FORMAT_R16G16B16A16_UNORM,
- VK_FORMAT_R16G16_UNORM,
- VK_FORMAT_R16G16_SNORM,
- VK_FORMAT_R16G16_SFLOAT,
- VK_FORMAT_R16_UNORM,
- VK_FORMAT_R8G8B8A8_SRGB,
- VK_FORMAT_R8G8_UNORM,
- VK_FORMAT_R8G8_SNORM,
- VK_FORMAT_R8G8_UINT,
- VK_FORMAT_R8_UNORM,
- VK_FORMAT_R8_UINT,
- VK_FORMAT_B10G11R11_UFLOAT_PACK32,
- VK_FORMAT_R32_SFLOAT,
- VK_FORMAT_R32_UINT,
- VK_FORMAT_R32_SINT,
- VK_FORMAT_R16_SFLOAT,
- VK_FORMAT_R16G16B16A16_SFLOAT,
- VK_FORMAT_B8G8R8A8_UNORM,
- VK_FORMAT_B8G8R8A8_SRGB,
- VK_FORMAT_R4G4B4A4_UNORM_PACK16,
- VK_FORMAT_D32_SFLOAT,
- VK_FORMAT_D16_UNORM,
- VK_FORMAT_D16_UNORM_S8_UINT,
- VK_FORMAT_D24_UNORM_S8_UINT,
- VK_FORMAT_D32_SFLOAT_S8_UINT,
- VK_FORMAT_BC1_RGBA_UNORM_BLOCK,
- VK_FORMAT_BC2_UNORM_BLOCK,
- VK_FORMAT_BC3_UNORM_BLOCK,
- VK_FORMAT_BC4_UNORM_BLOCK,
- VK_FORMAT_BC5_UNORM_BLOCK,
- VK_FORMAT_BC5_SNORM_BLOCK,
- VK_FORMAT_BC7_UNORM_BLOCK,
- VK_FORMAT_BC6H_UFLOAT_BLOCK,
- VK_FORMAT_BC6H_SFLOAT_BLOCK,
- VK_FORMAT_BC1_RGBA_SRGB_BLOCK,
- VK_FORMAT_BC2_SRGB_BLOCK,
- VK_FORMAT_BC3_SRGB_BLOCK,
- VK_FORMAT_BC7_SRGB_BLOCK,
- VK_FORMAT_ASTC_4x4_SRGB_BLOCK,
- VK_FORMAT_ASTC_8x8_SRGB_BLOCK,
- VK_FORMAT_ASTC_8x5_SRGB_BLOCK,
- VK_FORMAT_ASTC_5x4_SRGB_BLOCK,
- VK_FORMAT_ASTC_5x5_UNORM_BLOCK,
- VK_FORMAT_ASTC_5x5_SRGB_BLOCK,
- VK_FORMAT_ASTC_10x8_UNORM_BLOCK,
- VK_FORMAT_ASTC_10x8_SRGB_BLOCK,
- VK_FORMAT_ASTC_6x6_UNORM_BLOCK,
- VK_FORMAT_ASTC_6x6_SRGB_BLOCK,
- VK_FORMAT_ASTC_10x10_UNORM_BLOCK,
- VK_FORMAT_ASTC_10x10_SRGB_BLOCK,
- VK_FORMAT_ASTC_12x12_UNORM_BLOCK,
- VK_FORMAT_ASTC_12x12_SRGB_BLOCK,
- VK_FORMAT_ASTC_8x6_UNORM_BLOCK,
- VK_FORMAT_ASTC_8x6_SRGB_BLOCK,
- VK_FORMAT_ASTC_6x5_UNORM_BLOCK,
- VK_FORMAT_ASTC_6x5_SRGB_BLOCK,
- VK_FORMAT_E5B9G9R9_UFLOAT_PACK32};
+ static constexpr std::array formats{
+ VK_FORMAT_A8B8G8R8_UNORM_PACK32,
+ VK_FORMAT_A8B8G8R8_UINT_PACK32,
+ VK_FORMAT_A8B8G8R8_SNORM_PACK32,
+ VK_FORMAT_A8B8G8R8_SRGB_PACK32,
+ VK_FORMAT_B5G6R5_UNORM_PACK16,
+ VK_FORMAT_A2B10G10R10_UNORM_PACK32,
+ VK_FORMAT_A1R5G5B5_UNORM_PACK16,
+ VK_FORMAT_R32G32B32A32_SFLOAT,
+ VK_FORMAT_R32G32B32A32_UINT,
+ VK_FORMAT_R32G32_SFLOAT,
+ VK_FORMAT_R32G32_UINT,
+ VK_FORMAT_R16G16B16A16_UINT,
+ VK_FORMAT_R16G16B16A16_SNORM,
+ VK_FORMAT_R16G16B16A16_UNORM,
+ VK_FORMAT_R16G16_UNORM,
+ VK_FORMAT_R16G16_SNORM,
+ VK_FORMAT_R16G16_SFLOAT,
+ VK_FORMAT_R16_UNORM,
+ VK_FORMAT_R16_UINT,
+ VK_FORMAT_R8G8B8A8_SRGB,
+ VK_FORMAT_R8G8_UNORM,
+ VK_FORMAT_R8G8_SNORM,
+ VK_FORMAT_R8G8_UINT,
+ VK_FORMAT_R8_UNORM,
+ VK_FORMAT_R8_UINT,
+ VK_FORMAT_B10G11R11_UFLOAT_PACK32,
+ VK_FORMAT_R32_SFLOAT,
+ VK_FORMAT_R32_UINT,
+ VK_FORMAT_R32_SINT,
+ VK_FORMAT_R16_SFLOAT,
+ VK_FORMAT_R16G16B16A16_SFLOAT,
+ VK_FORMAT_B8G8R8A8_UNORM,
+ VK_FORMAT_B8G8R8A8_SRGB,
+ VK_FORMAT_R4G4B4A4_UNORM_PACK16,
+ VK_FORMAT_D32_SFLOAT,
+ VK_FORMAT_D16_UNORM,
+ VK_FORMAT_D16_UNORM_S8_UINT,
+ VK_FORMAT_D24_UNORM_S8_UINT,
+ VK_FORMAT_D32_SFLOAT_S8_UINT,
+ VK_FORMAT_BC1_RGBA_UNORM_BLOCK,
+ VK_FORMAT_BC2_UNORM_BLOCK,
+ VK_FORMAT_BC3_UNORM_BLOCK,
+ VK_FORMAT_BC4_UNORM_BLOCK,
+ VK_FORMAT_BC5_UNORM_BLOCK,
+ VK_FORMAT_BC5_SNORM_BLOCK,
+ VK_FORMAT_BC7_UNORM_BLOCK,
+ VK_FORMAT_BC6H_UFLOAT_BLOCK,
+ VK_FORMAT_BC6H_SFLOAT_BLOCK,
+ VK_FORMAT_BC1_RGBA_SRGB_BLOCK,
+ VK_FORMAT_BC2_SRGB_BLOCK,
+ VK_FORMAT_BC3_SRGB_BLOCK,
+ VK_FORMAT_BC7_SRGB_BLOCK,
+ VK_FORMAT_ASTC_4x4_SRGB_BLOCK,
+ VK_FORMAT_ASTC_8x8_SRGB_BLOCK,
+ VK_FORMAT_ASTC_8x5_SRGB_BLOCK,
+ VK_FORMAT_ASTC_5x4_SRGB_BLOCK,
+ VK_FORMAT_ASTC_5x5_UNORM_BLOCK,
+ VK_FORMAT_ASTC_5x5_SRGB_BLOCK,
+ VK_FORMAT_ASTC_10x8_UNORM_BLOCK,
+ VK_FORMAT_ASTC_10x8_SRGB_BLOCK,
+ VK_FORMAT_ASTC_6x6_UNORM_BLOCK,
+ VK_FORMAT_ASTC_6x6_SRGB_BLOCK,
+ VK_FORMAT_ASTC_10x10_UNORM_BLOCK,
+ VK_FORMAT_ASTC_10x10_SRGB_BLOCK,
+ VK_FORMAT_ASTC_12x12_UNORM_BLOCK,
+ VK_FORMAT_ASTC_12x12_SRGB_BLOCK,
+ VK_FORMAT_ASTC_8x6_UNORM_BLOCK,
+ VK_FORMAT_ASTC_8x6_SRGB_BLOCK,
+ VK_FORMAT_ASTC_6x5_UNORM_BLOCK,
+ VK_FORMAT_ASTC_6x5_SRGB_BLOCK,
+ VK_FORMAT_E5B9G9R9_UFLOAT_PACK32,
+ };
std::unordered_map<VkFormat, VkFormatProperties> format_properties;
for (const auto format : formats) {
format_properties.emplace(format, physical.GetFormatProperties(format));
@@ -310,6 +313,16 @@ bool VKDevice::Create() {
LOG_INFO(Render_Vulkan, "Device doesn't support custom border colors");
}
+ VkPhysicalDeviceExtendedDynamicStateFeaturesEXT dynamic_state;
+ if (ext_extended_dynamic_state) {
+ dynamic_state.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT;
+ dynamic_state.pNext = nullptr;
+ dynamic_state.extendedDynamicState = VK_TRUE;
+ SetNext(next, dynamic_state);
+ } else {
+ LOG_INFO(Render_Vulkan, "Device doesn't support extended dynamic state");
+ }
+
if (!ext_depth_range_unrestricted) {
LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted");
}
@@ -538,6 +551,7 @@ std::vector<const char*> VKDevice::LoadExtensions() {
bool has_ext_subgroup_size_control{};
bool has_ext_transform_feedback{};
bool has_ext_custom_border_color{};
+ bool has_ext_extended_dynamic_state{};
for (const auto& extension : physical.EnumerateDeviceExtensionProperties()) {
Test(extension, nv_viewport_swizzle, VK_NV_VIEWPORT_SWIZZLE_EXTENSION_NAME, true);
Test(extension, khr_uniform_buffer_standard_layout,
@@ -555,6 +569,8 @@ std::vector<const char*> VKDevice::LoadExtensions() {
false);
Test(extension, has_ext_custom_border_color, VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME,
false);
+ Test(extension, has_ext_extended_dynamic_state,
+ VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false);
if (Settings::values.renderer_debug) {
Test(extension, nv_device_diagnostics_config,
VK_NV_DEVICE_DIAGNOSTICS_CONFIG_EXTENSION_NAME, true);
@@ -640,6 +656,19 @@ std::vector<const char*> VKDevice::LoadExtensions() {
}
}
+ if (has_ext_extended_dynamic_state) {
+ VkPhysicalDeviceExtendedDynamicStateFeaturesEXT dynamic_state;
+ dynamic_state.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT;
+ dynamic_state.pNext = nullptr;
+ features.pNext = &dynamic_state;
+ physical.GetFeatures2KHR(features);
+
+ if (dynamic_state.extendedDynamicState) {
+ extensions.push_back(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME);
+ ext_extended_dynamic_state = true;
+ }
+ }
+
return extensions;
}
diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/renderer_vulkan/vk_device.h
index 6b9227b09..ae5c21baa 100644
--- a/src/video_core/renderer_vulkan/vk_device.h
+++ b/src/video_core/renderer_vulkan/vk_device.h
@@ -182,6 +182,11 @@ public:
return ext_custom_border_color;
}
+ /// Returns true if the device supports VK_EXT_extended_dynamic_state.
+ bool IsExtExtendedDynamicStateSupported() const {
+ return ext_extended_dynamic_state;
+ }
+
/// Returns the vendor name reported from Vulkan.
std::string_view GetVendorName() const {
return vendor_name;
@@ -239,6 +244,7 @@ private:
bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer.
bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback.
bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color.
+ bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state.
bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config.
// Telemetry parameters
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
index 69b6bba00..844445105 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
@@ -176,20 +176,32 @@ std::vector<vk::ShaderModule> VKGraphicsPipeline::CreateShaderModules(
vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpass_params,
const SPIRVProgram& program) const {
- const auto& vi = fixed_state.vertex_input;
- const auto& ds = fixed_state.depth_stencil;
- const auto& cd = fixed_state.color_blending;
- const auto& rs = fixed_state.rasterizer;
- const auto& viewport_swizzles = fixed_state.viewport_swizzles.swizzles;
+ const auto& state = fixed_state;
+ const auto& viewport_swizzles = state.viewport_swizzles;
+
+ FixedPipelineState::DynamicState dynamic;
+ if (device.IsExtExtendedDynamicStateSupported()) {
+ // Insert dummy values, as long as they are valid they don't matter as extended dynamic
+ // state is ignored
+ dynamic.raw1 = 0;
+ dynamic.raw2 = 0;
+ for (FixedPipelineState::VertexBinding& binding : dynamic.vertex_bindings) {
+ // Enable all vertex bindings
+ binding.raw = 0;
+ binding.enabled.Assign(1);
+ }
+ } else {
+ dynamic = state.dynamic_state;
+ }
std::vector<VkVertexInputBindingDescription> vertex_bindings;
std::vector<VkVertexInputBindingDivisorDescriptionEXT> vertex_binding_divisors;
- for (std::size_t index = 0; index < std::size(vi.bindings); ++index) {
- const auto& binding = vi.bindings[index];
+ for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) {
+ const auto& binding = dynamic.vertex_bindings[index];
if (!binding.enabled) {
continue;
}
- const bool instanced = vi.binding_divisors[index] != 0;
+ const bool instanced = state.binding_divisors[index] != 0;
const auto rate = instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX;
auto& vertex_binding = vertex_bindings.emplace_back();
@@ -200,14 +212,14 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa
if (instanced) {
auto& binding_divisor = vertex_binding_divisors.emplace_back();
binding_divisor.binding = static_cast<u32>(index);
- binding_divisor.divisor = vi.binding_divisors[index];
+ binding_divisor.divisor = state.binding_divisors[index];
}
}
std::vector<VkVertexInputAttributeDescription> vertex_attributes;
const auto& input_attributes = program[0]->entries.attributes;
- for (std::size_t index = 0; index < std::size(vi.attributes); ++index) {
- const auto& attribute = vi.attributes[index];
+ for (std::size_t index = 0; index < state.attributes.size(); ++index) {
+ const auto& attribute = state.attributes[index];
if (!attribute.enabled) {
continue;
}
@@ -244,15 +256,15 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa
input_assembly_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO;
input_assembly_ci.pNext = nullptr;
input_assembly_ci.flags = 0;
- input_assembly_ci.topology = MaxwellToVK::PrimitiveTopology(device, rs.Topology());
+ input_assembly_ci.topology = MaxwellToVK::PrimitiveTopology(device, dynamic.Topology());
input_assembly_ci.primitiveRestartEnable =
- rs.primitive_restart_enable != 0 && SupportsPrimitiveRestart(input_assembly_ci.topology);
+ state.primitive_restart_enable != 0 && SupportsPrimitiveRestart(input_assembly_ci.topology);
VkPipelineTessellationStateCreateInfo tessellation_ci;
tessellation_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO;
tessellation_ci.pNext = nullptr;
tessellation_ci.flags = 0;
- tessellation_ci.patchControlPoints = rs.patch_control_points_minus_one.Value() + 1;
+ tessellation_ci.patchControlPoints = state.patch_control_points_minus_one.Value() + 1;
VkPipelineViewportStateCreateInfo viewport_ci;
viewport_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO;
@@ -280,13 +292,13 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa
rasterization_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO;
rasterization_ci.pNext = nullptr;
rasterization_ci.flags = 0;
- rasterization_ci.depthClampEnable = rs.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE;
- rasterization_ci.rasterizerDiscardEnable = rs.rasterize_enable == 0 ? VK_TRUE : VK_FALSE;
+ rasterization_ci.depthClampEnable = state.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE;
+ rasterization_ci.rasterizerDiscardEnable = state.rasterize_enable == 0 ? VK_TRUE : VK_FALSE;
rasterization_ci.polygonMode = VK_POLYGON_MODE_FILL;
rasterization_ci.cullMode =
- rs.cull_enable ? MaxwellToVK::CullFace(rs.CullFace()) : VK_CULL_MODE_NONE;
- rasterization_ci.frontFace = MaxwellToVK::FrontFace(rs.FrontFace());
- rasterization_ci.depthBiasEnable = rs.depth_bias_enable;
+ dynamic.cull_enable ? MaxwellToVK::CullFace(dynamic.CullFace()) : VK_CULL_MODE_NONE;
+ rasterization_ci.frontFace = MaxwellToVK::FrontFace(dynamic.FrontFace());
+ rasterization_ci.depthBiasEnable = state.depth_bias_enable;
rasterization_ci.depthBiasConstantFactor = 0.0f;
rasterization_ci.depthBiasClamp = 0.0f;
rasterization_ci.depthBiasSlopeFactor = 0.0f;
@@ -307,14 +319,15 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa
depth_stencil_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO;
depth_stencil_ci.pNext = nullptr;
depth_stencil_ci.flags = 0;
- depth_stencil_ci.depthTestEnable = ds.depth_test_enable;
- depth_stencil_ci.depthWriteEnable = ds.depth_write_enable;
- depth_stencil_ci.depthCompareOp =
- ds.depth_test_enable ? MaxwellToVK::ComparisonOp(ds.DepthTestFunc()) : VK_COMPARE_OP_ALWAYS;
- depth_stencil_ci.depthBoundsTestEnable = ds.depth_bounds_enable;
- depth_stencil_ci.stencilTestEnable = ds.stencil_enable;
- depth_stencil_ci.front = GetStencilFaceState(ds.front);
- depth_stencil_ci.back = GetStencilFaceState(ds.back);
+ depth_stencil_ci.depthTestEnable = dynamic.depth_test_enable;
+ depth_stencil_ci.depthWriteEnable = dynamic.depth_write_enable;
+ depth_stencil_ci.depthCompareOp = dynamic.depth_test_enable
+ ? MaxwellToVK::ComparisonOp(dynamic.DepthTestFunc())
+ : VK_COMPARE_OP_ALWAYS;
+ depth_stencil_ci.depthBoundsTestEnable = dynamic.depth_bounds_enable;
+ depth_stencil_ci.stencilTestEnable = dynamic.stencil_enable;
+ depth_stencil_ci.front = GetStencilFaceState(dynamic.front);
+ depth_stencil_ci.back = GetStencilFaceState(dynamic.back);
depth_stencil_ci.minDepthBounds = 0.0f;
depth_stencil_ci.maxDepthBounds = 0.0f;
@@ -324,7 +337,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa
static constexpr std::array COMPONENT_TABLE = {
VK_COLOR_COMPONENT_R_BIT, VK_COLOR_COMPONENT_G_BIT, VK_COLOR_COMPONENT_B_BIT,
VK_COLOR_COMPONENT_A_BIT};
- const auto& blend = cd.attachments[index];
+ const auto& blend = state.attachments[index];
VkColorComponentFlags color_components = 0;
for (std::size_t i = 0; i < COMPONENT_TABLE.size(); ++i) {
@@ -354,11 +367,27 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa
color_blend_ci.pAttachments = cb_attachments.data();
std::memset(color_blend_ci.blendConstants, 0, sizeof(color_blend_ci.blendConstants));
- static constexpr std::array dynamic_states = {
+ std::vector dynamic_states = {
VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR,
VK_DYNAMIC_STATE_DEPTH_BIAS, VK_DYNAMIC_STATE_BLEND_CONSTANTS,
VK_DYNAMIC_STATE_DEPTH_BOUNDS, VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
- VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, VK_DYNAMIC_STATE_STENCIL_REFERENCE};
+ VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, VK_DYNAMIC_STATE_STENCIL_REFERENCE,
+ };
+ if (device.IsExtExtendedDynamicStateSupported()) {
+ static constexpr std::array extended = {
+ VK_DYNAMIC_STATE_CULL_MODE_EXT,
+ VK_DYNAMIC_STATE_FRONT_FACE_EXT,
+ VK_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY_EXT,
+ VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE_EXT,
+ VK_DYNAMIC_STATE_DEPTH_TEST_ENABLE_EXT,
+ VK_DYNAMIC_STATE_DEPTH_WRITE_ENABLE_EXT,
+ VK_DYNAMIC_STATE_DEPTH_COMPARE_OP_EXT,
+ VK_DYNAMIC_STATE_DEPTH_BOUNDS_TEST_ENABLE_EXT,
+ VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE_EXT,
+ VK_DYNAMIC_STATE_STENCIL_OP_EXT,
+ };
+ dynamic_states.insert(dynamic_states.end(), extended.begin(), extended.end());
+ }
VkPipelineDynamicStateCreateInfo dynamic_state_ci;
dynamic_state_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO;
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index a5c7b7945..3da835324 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -27,6 +27,7 @@
#include "video_core/renderer_vulkan/wrapper.h"
#include "video_core/shader/compiler_settings.h"
#include "video_core/shader/memory_util.h"
+#include "video_core/shader_cache.h"
namespace Vulkan {
@@ -45,6 +46,7 @@ constexpr VkDescriptorType UNIFORM_BUFFER = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
constexpr VkDescriptorType STORAGE_BUFFER = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
constexpr VkDescriptorType UNIFORM_TEXEL_BUFFER = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER;
constexpr VkDescriptorType COMBINED_IMAGE_SAMPLER = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
+constexpr VkDescriptorType STORAGE_TEXEL_BUFFER = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER;
constexpr VkDescriptorType STORAGE_IMAGE = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
constexpr VideoCommon::Shader::CompilerSettings compiler_settings{
@@ -104,8 +106,9 @@ u32 FillDescriptorLayout(const ShaderEntries& entries,
u32 binding = base_binding;
AddBindings<UNIFORM_BUFFER>(bindings, binding, flags, entries.const_buffers);
AddBindings<STORAGE_BUFFER>(bindings, binding, flags, entries.global_buffers);
- AddBindings<UNIFORM_TEXEL_BUFFER>(bindings, binding, flags, entries.texel_buffers);
+ AddBindings<UNIFORM_TEXEL_BUFFER>(bindings, binding, flags, entries.uniform_texels);
AddBindings<COMBINED_IMAGE_SAMPLER>(bindings, binding, flags, entries.samplers);
+ AddBindings<STORAGE_TEXEL_BUFFER>(bindings, binding, flags, entries.storage_texels);
AddBindings<STORAGE_IMAGE>(bindings, binding, flags, entries.images);
return binding;
}
@@ -113,12 +116,12 @@ u32 FillDescriptorLayout(const ShaderEntries& entries,
} // Anonymous namespace
std::size_t GraphicsPipelineCacheKey::Hash() const noexcept {
- const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this);
+ const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), Size());
return static_cast<std::size_t>(hash);
}
bool GraphicsPipelineCacheKey::operator==(const GraphicsPipelineCacheKey& rhs) const noexcept {
- return std::memcmp(&rhs, this, sizeof *this) == 0;
+ return std::memcmp(&rhs, this, Size()) == 0;
}
std::size_t ComputePipelineCacheKey::Hash() const noexcept {
@@ -130,19 +133,18 @@ bool ComputePipelineCacheKey::operator==(const ComputePipelineCacheKey& rhs) con
return std::memcmp(&rhs, this, sizeof *this) == 0;
}
-CachedShader::CachedShader(Core::System& system, Tegra::Engines::ShaderType stage,
- GPUVAddr gpu_addr, VAddr cpu_addr, ProgramCode program_code,
- u32 main_offset)
- : RasterizerCacheObject{cpu_addr}, gpu_addr{gpu_addr}, program_code{std::move(program_code)},
+Shader::Shader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr,
+ VideoCommon::Shader::ProgramCode program_code, u32 main_offset)
+ : gpu_addr{gpu_addr}, program_code{std::move(program_code)},
registry{stage, GetEngine(system, stage)}, shader_ir{this->program_code, main_offset,
compiler_settings, registry},
entries{GenerateShaderEntries(shader_ir)} {}
-CachedShader::~CachedShader() = default;
+Shader::~Shader() = default;
-Tegra::Engines::ConstBufferEngineInterface& CachedShader::GetEngine(
- Core::System& system, Tegra::Engines::ShaderType stage) {
- if (stage == Tegra::Engines::ShaderType::Compute) {
+Tegra::Engines::ConstBufferEngineInterface& Shader::GetEngine(Core::System& system,
+ Tegra::Engines::ShaderType stage) {
+ if (stage == ShaderType::Compute) {
return system.GPU().KeplerCompute();
} else {
return system.GPU().Maxwell3D();
@@ -154,16 +156,16 @@ VKPipelineCache::VKPipelineCache(Core::System& system, RasterizerVulkan& rasteri
VKDescriptorPool& descriptor_pool,
VKUpdateDescriptorQueue& update_descriptor_queue,
VKRenderPassCache& renderpass_cache)
- : RasterizerCache{rasterizer}, system{system}, device{device}, scheduler{scheduler},
- descriptor_pool{descriptor_pool}, update_descriptor_queue{update_descriptor_queue},
- renderpass_cache{renderpass_cache} {}
+ : VideoCommon::ShaderCache<Shader>{rasterizer}, system{system}, device{device},
+ scheduler{scheduler}, descriptor_pool{descriptor_pool},
+ update_descriptor_queue{update_descriptor_queue}, renderpass_cache{renderpass_cache} {}
VKPipelineCache::~VKPipelineCache() = default;
-std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
+std::array<Shader*, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
const auto& gpu = system.GPU().Maxwell3D();
- std::array<Shader, Maxwell::MaxShaderProgram> shaders;
+ std::array<Shader*, Maxwell::MaxShaderProgram> shaders{};
for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
const auto program{static_cast<Maxwell::ShaderProgram>(index)};
@@ -176,24 +178,28 @@ std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
const GPUVAddr program_addr{GetShaderAddress(system, program)};
const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
ASSERT(cpu_addr);
- auto shader = cpu_addr ? TryGet(*cpu_addr) : null_shader;
- if (!shader) {
+
+ Shader* result = cpu_addr ? TryGet(*cpu_addr) : null_shader.get();
+ if (!result) {
const auto host_ptr{memory_manager.GetPointer(program_addr)};
// No shader found - create a new one
constexpr u32 stage_offset = STAGE_MAIN_OFFSET;
- const auto stage = static_cast<Tegra::Engines::ShaderType>(index == 0 ? 0 : index - 1);
+ const auto stage = static_cast<ShaderType>(index == 0 ? 0 : index - 1);
ProgramCode code = GetShaderCode(memory_manager, program_addr, host_ptr, false);
+ const std::size_t size_in_bytes = code.size() * sizeof(u64);
+
+ auto shader = std::make_unique<Shader>(system, stage, program_addr, std::move(code),
+ stage_offset);
+ result = shader.get();
- shader = std::make_shared<CachedShader>(system, stage, program_addr, *cpu_addr,
- std::move(code), stage_offset);
if (cpu_addr) {
- Register(shader);
+ Register(std::move(shader), *cpu_addr, size_in_bytes);
} else {
- null_shader = shader;
+ null_shader = std::move(shader);
}
}
- shaders[index] = std::move(shader);
+ shaders[index] = result;
}
return last_shaders = shaders;
}
@@ -234,19 +240,22 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach
const auto cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
ASSERT(cpu_addr);
- auto shader = cpu_addr ? TryGet(*cpu_addr) : null_kernel;
+ Shader* shader = cpu_addr ? TryGet(*cpu_addr) : null_kernel.get();
if (!shader) {
// No shader found - create a new one
const auto host_ptr = memory_manager.GetPointer(program_addr);
ProgramCode code = GetShaderCode(memory_manager, program_addr, host_ptr, true);
- shader = std::make_shared<CachedShader>(system, Tegra::Engines::ShaderType::Compute,
- program_addr, *cpu_addr, std::move(code),
- KERNEL_MAIN_OFFSET);
+ const std::size_t size_in_bytes = code.size() * sizeof(u64);
+
+ auto shader_info = std::make_unique<Shader>(system, ShaderType::Compute, program_addr,
+ std::move(code), KERNEL_MAIN_OFFSET);
+ shader = shader_info.get();
+
if (cpu_addr) {
- Register(shader);
+ Register(std::move(shader_info), *cpu_addr, size_in_bytes);
} else {
- null_kernel = shader;
+ null_kernel = std::move(shader_info);
}
}
@@ -262,7 +271,7 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach
return *entry;
}
-void VKPipelineCache::Unregister(const Shader& shader) {
+void VKPipelineCache::OnShaderRemoval(Shader* shader) {
bool finished = false;
const auto Finish = [&] {
// TODO(Rodrigo): Instead of finishing here, wait for the fences that use this pipeline and
@@ -294,8 +303,6 @@ void VKPipelineCache::Unregister(const Shader& shader) {
Finish();
it = compute_cache.erase(it);
}
-
- RasterizerCache::Unregister(shader);
}
std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>>
@@ -305,16 +312,19 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
const auto& gpu = system.GPU().Maxwell3D();
Specialization specialization;
- if (fixed_state.rasterizer.Topology() == Maxwell::PrimitiveTopology::Points) {
+ if (fixed_state.dynamic_state.Topology() == Maxwell::PrimitiveTopology::Points ||
+ device.IsExtExtendedDynamicStateSupported()) {
float point_size;
- std::memcpy(&point_size, &fixed_state.rasterizer.point_size, sizeof(float));
+ std::memcpy(&point_size, &fixed_state.point_size, sizeof(float));
specialization.point_size = point_size;
ASSERT(point_size != 0.0f);
}
for (std::size_t i = 0; i < Maxwell::NumVertexAttributes; ++i) {
- specialization.attribute_types[i] = fixed_state.vertex_input.attributes[i].Type();
+ const auto& attribute = fixed_state.attributes[i];
+ specialization.enabled_attributes[i] = attribute.enabled.Value() != 0;
+ specialization.attribute_types[i] = attribute.Type();
}
- specialization.ndc_minus_one_to_one = fixed_state.rasterizer.ndc_minus_one_to_one;
+ specialization.ndc_minus_one_to_one = fixed_state.ndc_minus_one_to_one;
SPIRVProgram program;
std::vector<VkDescriptorSetLayoutBinding> bindings;
@@ -328,12 +338,11 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
}
const GPUVAddr gpu_addr = GetShaderAddress(system, program_enum);
- const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr);
- const auto shader = cpu_addr ? TryGet(*cpu_addr) : null_shader;
- ASSERT(shader);
+ const std::optional<VAddr> cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr);
+ Shader* const shader = cpu_addr ? TryGet(*cpu_addr) : null_shader.get();
const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5
- const auto program_type = GetShaderType(program_enum);
+ const ShaderType program_type = GetShaderType(program_enum);
const auto& entries = shader->GetEntries();
program[stage] = {
Decompile(device, shader->GetIR(), program_type, shader->GetRegistry(), specialization),
@@ -375,16 +384,17 @@ void AddEntry(std::vector<VkDescriptorUpdateTemplateEntry>& template_entries, u3
return;
}
- if constexpr (descriptor_type == UNIFORM_TEXEL_BUFFER) {
- // Nvidia has a bug where updating multiple uniform texels at once causes the driver to
- // crash.
+ if constexpr (descriptor_type == UNIFORM_TEXEL_BUFFER ||
+ descriptor_type == STORAGE_TEXEL_BUFFER) {
+ // Nvidia has a bug where updating multiple texels at once causes the driver to crash.
+ // Note: Fixed in driver Windows 443.24, Linux 440.66.15
for (u32 i = 0; i < count; ++i) {
VkDescriptorUpdateTemplateEntry& entry = template_entries.emplace_back();
entry.dstBinding = binding + i;
entry.dstArrayElement = 0;
entry.descriptorCount = 1;
entry.descriptorType = descriptor_type;
- entry.offset = offset + i * entry_size;
+ entry.offset = static_cast<std::size_t>(offset + i * entry_size);
entry.stride = entry_size;
}
} else if (count > 0) {
@@ -405,8 +415,9 @@ void FillDescriptorUpdateTemplateEntries(
std::vector<VkDescriptorUpdateTemplateEntryKHR>& template_entries) {
AddEntry<UNIFORM_BUFFER>(template_entries, offset, binding, entries.const_buffers);
AddEntry<STORAGE_BUFFER>(template_entries, offset, binding, entries.global_buffers);
- AddEntry<UNIFORM_TEXEL_BUFFER>(template_entries, offset, binding, entries.texel_buffers);
+ AddEntry<UNIFORM_TEXEL_BUFFER>(template_entries, offset, binding, entries.uniform_texels);
AddEntry<COMBINED_IMAGE_SAMPLER>(template_entries, offset, binding, entries.samplers);
+ AddEntry<STORAGE_TEXEL_BUFFER>(template_entries, offset, binding, entries.storage_texels);
AddEntry<STORAGE_IMAGE>(template_entries, offset, binding, entries.images);
}
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
index 0b5796fef..0a3fe65fb 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
@@ -17,7 +17,6 @@
#include "common/common_types.h"
#include "video_core/engines/const_buffer_engine_interface.h"
#include "video_core/engines/maxwell_3d.h"
-#include "video_core/rasterizer_cache.h"
#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
@@ -26,6 +25,7 @@
#include "video_core/shader/memory_util.h"
#include "video_core/shader/registry.h"
#include "video_core/shader/shader_ir.h"
+#include "video_core/shader_cache.h"
namespace Core {
class System;
@@ -41,15 +41,13 @@ class VKFence;
class VKScheduler;
class VKUpdateDescriptorQueue;
-class CachedShader;
-using Shader = std::shared_ptr<CachedShader>;
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
struct GraphicsPipelineCacheKey {
- FixedPipelineState fixed_state;
RenderPassParams renderpass_params;
+ u32 padding;
std::array<GPUVAddr, Maxwell::MaxShaderProgram> shaders;
- u64 padding; // This is necessary for unique object representations
+ FixedPipelineState fixed_state;
std::size_t Hash() const noexcept;
@@ -58,6 +56,10 @@ struct GraphicsPipelineCacheKey {
bool operator!=(const GraphicsPipelineCacheKey& rhs) const noexcept {
return !operator==(rhs);
}
+
+ std::size_t Size() const noexcept {
+ return sizeof(renderpass_params) + sizeof(padding) + sizeof(shaders) + fixed_state.Size();
+ }
};
static_assert(std::has_unique_object_representations_v<GraphicsPipelineCacheKey>);
static_assert(std::is_trivially_copyable_v<GraphicsPipelineCacheKey>);
@@ -102,21 +104,16 @@ struct hash<Vulkan::ComputePipelineCacheKey> {
namespace Vulkan {
-class CachedShader final : public RasterizerCacheObject {
+class Shader {
public:
- explicit CachedShader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr,
- VAddr cpu_addr, VideoCommon::Shader::ProgramCode program_code,
- u32 main_offset);
- ~CachedShader();
+ explicit Shader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr,
+ VideoCommon::Shader::ProgramCode program_code, u32 main_offset);
+ ~Shader();
GPUVAddr GetGpuAddr() const {
return gpu_addr;
}
- std::size_t GetSizeInBytes() const override {
- return program_code.size() * sizeof(u64);
- }
-
VideoCommon::Shader::ShaderIR& GetIR() {
return shader_ir;
}
@@ -144,25 +141,23 @@ private:
ShaderEntries entries;
};
-class VKPipelineCache final : public RasterizerCache<Shader> {
+class VKPipelineCache final : public VideoCommon::ShaderCache<Shader> {
public:
explicit VKPipelineCache(Core::System& system, RasterizerVulkan& rasterizer,
const VKDevice& device, VKScheduler& scheduler,
VKDescriptorPool& descriptor_pool,
VKUpdateDescriptorQueue& update_descriptor_queue,
VKRenderPassCache& renderpass_cache);
- ~VKPipelineCache();
+ ~VKPipelineCache() override;
- std::array<Shader, Maxwell::MaxShaderProgram> GetShaders();
+ std::array<Shader*, Maxwell::MaxShaderProgram> GetShaders();
VKGraphicsPipeline& GetGraphicsPipeline(const GraphicsPipelineCacheKey& key);
VKComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key);
protected:
- void Unregister(const Shader& shader) override;
-
- void FlushObjectInner(const Shader& object) override {}
+ void OnShaderRemoval(Shader* shader) final;
private:
std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>> DecompileShaders(
@@ -175,10 +170,10 @@ private:
VKUpdateDescriptorQueue& update_descriptor_queue;
VKRenderPassCache& renderpass_cache;
- Shader null_shader{};
- Shader null_kernel{};
+ std::unique_ptr<Shader> null_shader;
+ std::unique_ptr<Shader> null_kernel;
- std::array<Shader, Maxwell::MaxShaderProgram> last_shaders;
+ std::array<Shader*, Maxwell::MaxShaderProgram> last_shaders{};
GraphicsPipelineCacheKey last_graphics_key;
VKGraphicsPipeline* last_graphics_pipeline = nullptr;
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index be5b77fae..380ed532b 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -38,6 +38,7 @@
#include "video_core/renderer_vulkan/vk_texture_cache.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
#include "video_core/renderer_vulkan/wrapper.h"
+#include "video_core/shader_cache.h"
namespace Vulkan {
@@ -98,7 +99,7 @@ VkRect2D GetScissorState(const Maxwell& regs, std::size_t index) {
}
std::array<GPUVAddr, Maxwell::MaxShaderProgram> GetShaderAddresses(
- const std::array<Shader, Maxwell::MaxShaderProgram>& shaders) {
+ const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders) {
std::array<GPUVAddr, Maxwell::MaxShaderProgram> addresses;
for (std::size_t i = 0; i < std::size(addresses); ++i) {
addresses[i] = shaders[i] ? shaders[i]->GetGpuAddr() : 0;
@@ -117,6 +118,17 @@ template <typename Engine, typename Entry>
Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry,
std::size_t stage, std::size_t index = 0) {
const auto stage_type = static_cast<Tegra::Engines::ShaderType>(stage);
+ if constexpr (std::is_same_v<Entry, SamplerEntry>) {
+ if (entry.is_separated) {
+ const u32 buffer_1 = entry.buffer;
+ const u32 buffer_2 = entry.secondary_buffer;
+ const u32 offset_1 = entry.offset;
+ const u32 offset_2 = entry.secondary_offset;
+ const u32 handle_1 = engine.AccessConstBuffer32(stage_type, buffer_1, offset_1);
+ const u32 handle_2 = engine.AccessConstBuffer32(stage_type, buffer_2, offset_2);
+ return engine.GetTextureInfo(handle_1 | handle_2);
+ }
+ }
if (entry.is_bindless) {
const auto tex_handle = engine.AccessConstBuffer32(stage_type, entry.buffer, entry.offset);
return engine.GetTextureInfo(tex_handle);
@@ -131,13 +143,65 @@ Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry
}
}
+/// @brief Determine if an attachment to be updated has to preserve contents
+/// @param is_clear True when a clear is being executed
+/// @param regs 3D registers
+/// @return True when the contents have to be preserved
+bool HasToPreserveColorContents(bool is_clear, const Maxwell& regs) {
+ if (!is_clear) {
+ return true;
+ }
+ // First we have to make sure all clear masks are enabled.
+ if (!regs.clear_buffers.R || !regs.clear_buffers.G || !regs.clear_buffers.B ||
+ !regs.clear_buffers.A) {
+ return true;
+ }
+ // If scissors are disabled, the whole screen is cleared
+ if (!regs.clear_flags.scissor) {
+ return false;
+ }
+ // Then we have to confirm scissor testing clears the whole image
+ const std::size_t index = regs.clear_buffers.RT;
+ const auto& scissor = regs.scissor_test[0];
+ return scissor.min_x > 0 || scissor.min_y > 0 || scissor.max_x < regs.rt[index].width ||
+ scissor.max_y < regs.rt[index].height;
+}
+
+/// @brief Determine if an attachment to be updated has to preserve contents
+/// @param is_clear True when a clear is being executed
+/// @param regs 3D registers
+/// @return True when the contents have to be preserved
+bool HasToPreserveDepthContents(bool is_clear, const Maxwell& regs) {
+ // If we are not clearing, the contents have to be preserved
+ if (!is_clear) {
+ return true;
+ }
+ // For depth stencil clears we only have to confirm scissor test covers the whole image
+ if (!regs.clear_flags.scissor) {
+ return false;
+ }
+ // Make sure the clear cover the whole image
+ const auto& scissor = regs.scissor_test[0];
+ return scissor.min_x > 0 || scissor.min_y > 0 || scissor.max_x < regs.zeta_width ||
+ scissor.max_y < regs.zeta_height;
+}
+
+template <std::size_t N>
+std::array<VkDeviceSize, N> ExpandStrides(const std::array<u16, N>& strides) {
+ std::array<VkDeviceSize, N> expanded;
+ std::copy(strides.begin(), strides.end(), expanded.begin());
+ return expanded;
+}
+
} // Anonymous namespace
class BufferBindings final {
public:
- void AddVertexBinding(VkBuffer buffer, VkDeviceSize offset) {
+ void AddVertexBinding(VkBuffer buffer, VkDeviceSize offset, VkDeviceSize size, u32 stride) {
vertex.buffers[vertex.num_buffers] = buffer;
vertex.offsets[vertex.num_buffers] = offset;
+ vertex.sizes[vertex.num_buffers] = size;
+ vertex.strides[vertex.num_buffers] = static_cast<u16>(stride);
++vertex.num_buffers;
}
@@ -147,76 +211,76 @@ public:
index.type = type;
}
- void Bind(VKScheduler& scheduler) const {
+ void Bind(const VKDevice& device, VKScheduler& scheduler) const {
// Use this large switch case to avoid dispatching more memory in the record lambda than
// what we need. It looks horrible, but it's the best we can do on standard C++.
switch (vertex.num_buffers) {
case 0:
- return BindStatic<0>(scheduler);
+ return BindStatic<0>(device, scheduler);
case 1:
- return BindStatic<1>(scheduler);
+ return BindStatic<1>(device, scheduler);
case 2:
- return BindStatic<2>(scheduler);
+ return BindStatic<2>(device, scheduler);
case 3:
- return BindStatic<3>(scheduler);
+ return BindStatic<3>(device, scheduler);
case 4:
- return BindStatic<4>(scheduler);
+ return BindStatic<4>(device, scheduler);
case 5:
- return BindStatic<5>(scheduler);
+ return BindStatic<5>(device, scheduler);
case 6:
- return BindStatic<6>(scheduler);
+ return BindStatic<6>(device, scheduler);
case 7:
- return BindStatic<7>(scheduler);
+ return BindStatic<7>(device, scheduler);
case 8:
- return BindStatic<8>(scheduler);
+ return BindStatic<8>(device, scheduler);
case 9:
- return BindStatic<9>(scheduler);
+ return BindStatic<9>(device, scheduler);
case 10:
- return BindStatic<10>(scheduler);
+ return BindStatic<10>(device, scheduler);
case 11:
- return BindStatic<11>(scheduler);
+ return BindStatic<11>(device, scheduler);
case 12:
- return BindStatic<12>(scheduler);
+ return BindStatic<12>(device, scheduler);
case 13:
- return BindStatic<13>(scheduler);
+ return BindStatic<13>(device, scheduler);
case 14:
- return BindStatic<14>(scheduler);
+ return BindStatic<14>(device, scheduler);
case 15:
- return BindStatic<15>(scheduler);
+ return BindStatic<15>(device, scheduler);
case 16:
- return BindStatic<16>(scheduler);
+ return BindStatic<16>(device, scheduler);
case 17:
- return BindStatic<17>(scheduler);
+ return BindStatic<17>(device, scheduler);
case 18:
- return BindStatic<18>(scheduler);
+ return BindStatic<18>(device, scheduler);
case 19:
- return BindStatic<19>(scheduler);
+ return BindStatic<19>(device, scheduler);
case 20:
- return BindStatic<20>(scheduler);
+ return BindStatic<20>(device, scheduler);
case 21:
- return BindStatic<21>(scheduler);
+ return BindStatic<21>(device, scheduler);
case 22:
- return BindStatic<22>(scheduler);
+ return BindStatic<22>(device, scheduler);
case 23:
- return BindStatic<23>(scheduler);
+ return BindStatic<23>(device, scheduler);
case 24:
- return BindStatic<24>(scheduler);
+ return BindStatic<24>(device, scheduler);
case 25:
- return BindStatic<25>(scheduler);
+ return BindStatic<25>(device, scheduler);
case 26:
- return BindStatic<26>(scheduler);
+ return BindStatic<26>(device, scheduler);
case 27:
- return BindStatic<27>(scheduler);
+ return BindStatic<27>(device, scheduler);
case 28:
- return BindStatic<28>(scheduler);
+ return BindStatic<28>(device, scheduler);
case 29:
- return BindStatic<29>(scheduler);
+ return BindStatic<29>(device, scheduler);
case 30:
- return BindStatic<30>(scheduler);
+ return BindStatic<30>(device, scheduler);
case 31:
- return BindStatic<31>(scheduler);
+ return BindStatic<31>(device, scheduler);
case 32:
- return BindStatic<32>(scheduler);
+ return BindStatic<32>(device, scheduler);
}
UNREACHABLE();
}
@@ -227,6 +291,8 @@ private:
std::size_t num_buffers = 0;
std::array<VkBuffer, Maxwell::NumVertexArrays> buffers;
std::array<VkDeviceSize, Maxwell::NumVertexArrays> offsets;
+ std::array<VkDeviceSize, Maxwell::NumVertexArrays> sizes;
+ std::array<u16, Maxwell::NumVertexArrays> strides;
} vertex;
struct {
@@ -236,15 +302,23 @@ private:
} index;
template <std::size_t N>
- void BindStatic(VKScheduler& scheduler) const {
- if (index.buffer) {
- BindStatic<N, true>(scheduler);
+ void BindStatic(const VKDevice& device, VKScheduler& scheduler) const {
+ if (device.IsExtExtendedDynamicStateSupported()) {
+ if (index.buffer) {
+ BindStatic<N, true, true>(scheduler);
+ } else {
+ BindStatic<N, false, true>(scheduler);
+ }
} else {
- BindStatic<N, false>(scheduler);
+ if (index.buffer) {
+ BindStatic<N, true, false>(scheduler);
+ } else {
+ BindStatic<N, false, false>(scheduler);
+ }
}
}
- template <std::size_t N, bool is_indexed>
+ template <std::size_t N, bool is_indexed, bool has_extended_dynamic_state>
void BindStatic(VKScheduler& scheduler) const {
static_assert(N <= Maxwell::NumVertexArrays);
if constexpr (N == 0) {
@@ -256,6 +330,31 @@ private:
std::copy(vertex.buffers.begin(), vertex.buffers.begin() + N, buffers.begin());
std::copy(vertex.offsets.begin(), vertex.offsets.begin() + N, offsets.begin());
+ if constexpr (has_extended_dynamic_state) {
+ // With extended dynamic states we can specify the length and stride of a vertex buffer
+ // std::array<VkDeviceSize, N> sizes;
+ std::array<u16, N> strides;
+ // std::copy(vertex.sizes.begin(), vertex.sizes.begin() + N, sizes.begin());
+ std::copy(vertex.strides.begin(), vertex.strides.begin() + N, strides.begin());
+
+ if constexpr (is_indexed) {
+ scheduler.Record(
+ [buffers, offsets, strides, index = index](vk::CommandBuffer cmdbuf) {
+ cmdbuf.BindIndexBuffer(index.buffer, index.offset, index.type);
+ cmdbuf.BindVertexBuffers2EXT(0, static_cast<u32>(N), buffers.data(),
+ offsets.data(), nullptr,
+ ExpandStrides(strides).data());
+ });
+ } else {
+ scheduler.Record([buffers, offsets, strides](vk::CommandBuffer cmdbuf) {
+ cmdbuf.BindVertexBuffers2EXT(0, static_cast<u32>(N), buffers.data(),
+ offsets.data(), nullptr,
+ ExpandStrides(strides).data());
+ });
+ }
+ return;
+ }
+
if constexpr (is_indexed) {
// Indexed draw
scheduler.Record([buffers, offsets, index = index](vk::CommandBuffer cmdbuf) {
@@ -314,7 +413,7 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
const auto& gpu = system.GPU().Maxwell3D();
GraphicsPipelineCacheKey key;
- key.fixed_state.Fill(gpu.regs);
+ key.fixed_state.Fill(gpu.regs, device.IsExtExtendedDynamicStateSupported());
buffer_cache.Map(CalculateGraphicsStreamBufferSize(is_indexed));
@@ -332,7 +431,7 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
buffer_cache.Unmap();
- const Texceptions texceptions = UpdateAttachments();
+ const Texceptions texceptions = UpdateAttachments(false);
SetupImageTransitions(texceptions, color_attachments, zeta_attachment);
key.renderpass_params = GetRenderPassParams(texceptions);
@@ -347,7 +446,7 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
UpdateDynamicStates();
- buffer_bindings.Bind(scheduler);
+ buffer_bindings.Bind(device, scheduler);
BeginTransformFeedback();
@@ -388,7 +487,7 @@ void RasterizerVulkan::Clear() {
return;
}
- [[maybe_unused]] const auto texceptions = UpdateAttachments();
+ [[maybe_unused]] const auto texceptions = UpdateAttachments(true);
DEBUG_ASSERT(texceptions.none());
SetupImageTransitions(0, color_attachments, zeta_attachment);
@@ -468,8 +567,9 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {
const auto& entries = pipeline.GetEntries();
SetupComputeConstBuffers(entries);
SetupComputeGlobalBuffers(entries);
- SetupComputeTexelBuffers(entries);
+ SetupComputeUniformTexels(entries);
SetupComputeTextures(entries);
+ SetupComputeStorageTexels(entries);
SetupComputeImages(entries);
buffer_cache.Unmap();
@@ -664,9 +764,12 @@ void RasterizerVulkan::FlushWork() {
draw_counter = 0;
}
-RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments() {
+RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments(bool is_clear) {
MICROPROFILE_SCOPE(Vulkan_RenderTargets);
- auto& dirty = system.GPU().Maxwell3D().dirty.flags;
+ auto& maxwell3d = system.GPU().Maxwell3D();
+ auto& dirty = maxwell3d.dirty.flags;
+ auto& regs = maxwell3d.regs;
+
const bool update_rendertargets = dirty[VideoCommon::Dirty::RenderTargets];
dirty[VideoCommon::Dirty::RenderTargets] = false;
@@ -675,7 +778,8 @@ RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments() {
Texceptions texceptions;
for (std::size_t rt = 0; rt < Maxwell::NumRenderTargets; ++rt) {
if (update_rendertargets) {
- color_attachments[rt] = texture_cache.GetColorBufferSurface(rt, true);
+ const bool preserve_contents = HasToPreserveColorContents(is_clear, regs);
+ color_attachments[rt] = texture_cache.GetColorBufferSurface(rt, preserve_contents);
}
if (color_attachments[rt] && WalkAttachmentOverlaps(*color_attachments[rt])) {
texceptions[rt] = true;
@@ -683,7 +787,8 @@ RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments() {
}
if (update_rendertargets) {
- zeta_attachment = texture_cache.GetDepthBufferSurface(true);
+ const bool preserve_contents = HasToPreserveDepthContents(is_clear, regs);
+ zeta_attachment = texture_cache.GetDepthBufferSurface(preserve_contents);
}
if (zeta_attachment && WalkAttachmentOverlaps(*zeta_attachment)) {
texceptions[ZETA_TEXCEPTION_INDEX] = true;
@@ -715,7 +820,7 @@ std::tuple<VkFramebuffer, VkExtent2D> RasterizerVulkan::ConfigureFramebuffers(
if (!view) {
return false;
}
- key.views.push_back(view->GetHandle());
+ key.views.push_back(view->GetAttachment());
key.width = std::min(key.width, view->GetWidth());
key.height = std::min(key.height, view->GetHeight());
key.layers = std::min(key.layers, view->GetNumLayers());
@@ -761,7 +866,7 @@ RasterizerVulkan::DrawParameters RasterizerVulkan::SetupGeometry(FixedPipelineSt
const auto& gpu = system.GPU().Maxwell3D();
const auto& regs = gpu.regs;
- SetupVertexArrays(fixed_state.vertex_input, buffer_bindings);
+ SetupVertexArrays(buffer_bindings);
const u32 base_instance = regs.vb_base_instance;
const u32 num_instances = is_instanced ? gpu.mme_draw.instance_count : 1;
@@ -775,20 +880,21 @@ RasterizerVulkan::DrawParameters RasterizerVulkan::SetupGeometry(FixedPipelineSt
}
void RasterizerVulkan::SetupShaderDescriptors(
- const std::array<Shader, Maxwell::MaxShaderProgram>& shaders) {
+ const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders) {
texture_cache.GuardSamplers(true);
for (std::size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
// Skip VertexA stage
- const auto& shader = shaders[stage + 1];
+ Shader* const shader = shaders[stage + 1];
if (!shader) {
continue;
}
const auto& entries = shader->GetEntries();
SetupGraphicsConstBuffers(entries, stage);
SetupGraphicsGlobalBuffers(entries, stage);
- SetupGraphicsTexelBuffers(entries, stage);
+ SetupGraphicsUniformTexels(entries, stage);
SetupGraphicsTextures(entries, stage);
+ SetupGraphicsStorageTexels(entries, stage);
SetupGraphicsImages(entries, stage);
}
texture_cache.GuardSamplers(false);
@@ -831,6 +937,17 @@ void RasterizerVulkan::UpdateDynamicStates() {
UpdateBlendConstants(regs);
UpdateDepthBounds(regs);
UpdateStencilFaces(regs);
+ if (device.IsExtExtendedDynamicStateSupported()) {
+ UpdateCullMode(regs);
+ UpdateDepthBoundsTestEnable(regs);
+ UpdateDepthTestEnable(regs);
+ UpdateDepthWriteEnable(regs);
+ UpdateDepthCompareOp(regs);
+ UpdateFrontFace(regs);
+ UpdatePrimitiveTopology(regs);
+ UpdateStencilOp(regs);
+ UpdateStencilTestEnable(regs);
+ }
}
void RasterizerVulkan::BeginTransformFeedback() {
@@ -838,6 +955,10 @@ void RasterizerVulkan::BeginTransformFeedback() {
if (regs.tfb_enabled == 0) {
return;
}
+ if (!device.IsExtTransformFeedbackSupported()) {
+ LOG_ERROR(Render_Vulkan, "Transform feedbacks used but not supported");
+ return;
+ }
UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) ||
regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) ||
@@ -852,10 +973,10 @@ void RasterizerVulkan::BeginTransformFeedback() {
UNIMPLEMENTED_IF(binding.buffer_offset != 0);
const GPUVAddr gpu_addr = binding.Address();
- const std::size_t size = binding.buffer_size;
- const auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, size, 4, true);
+ const VkDeviceSize size = static_cast<VkDeviceSize>(binding.buffer_size);
+ const auto info = buffer_cache.UploadMemory(gpu_addr, size, 4, true);
- scheduler.Record([buffer = buffer, offset = offset, size](vk::CommandBuffer cmdbuf) {
+ scheduler.Record([buffer = info.handle, offset = info.offset, size](vk::CommandBuffer cmdbuf) {
cmdbuf.BindTransformFeedbackBuffersEXT(0, 1, &buffer, &offset, &size);
cmdbuf.BeginTransformFeedbackEXT(0, 0, nullptr, nullptr);
});
@@ -866,50 +987,33 @@ void RasterizerVulkan::EndTransformFeedback() {
if (regs.tfb_enabled == 0) {
return;
}
+ if (!device.IsExtTransformFeedbackSupported()) {
+ return;
+ }
scheduler.Record(
[](vk::CommandBuffer cmdbuf) { cmdbuf.EndTransformFeedbackEXT(0, 0, nullptr, nullptr); });
}
-void RasterizerVulkan::SetupVertexArrays(FixedPipelineState::VertexInput& vertex_input,
- BufferBindings& buffer_bindings) {
+void RasterizerVulkan::SetupVertexArrays(BufferBindings& buffer_bindings) {
const auto& regs = system.GPU().Maxwell3D().regs;
- for (std::size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) {
- const auto& attrib = regs.vertex_attrib_format[index];
- if (!attrib.IsValid()) {
- vertex_input.SetAttribute(index, false, 0, 0, {}, {});
- continue;
- }
-
- [[maybe_unused]] const auto& buffer = regs.vertex_array[attrib.buffer];
- ASSERT(buffer.IsEnabled());
-
- vertex_input.SetAttribute(index, true, attrib.buffer, attrib.offset, attrib.type.Value(),
- attrib.size.Value());
- }
-
for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) {
const auto& vertex_array = regs.vertex_array[index];
if (!vertex_array.IsEnabled()) {
- vertex_input.SetBinding(index, false, 0, 0);
continue;
}
- vertex_input.SetBinding(
- index, true, vertex_array.stride,
- regs.instanced_arrays.IsInstancingEnabled(index) ? vertex_array.divisor : 0);
-
const GPUVAddr start{vertex_array.StartAddress()};
const GPUVAddr end{regs.vertex_array_limit[index].LimitAddress()};
ASSERT(end >= start);
- const std::size_t size{end - start};
+ const std::size_t size = end - start;
if (size == 0) {
- buffer_bindings.AddVertexBinding(DefaultBuffer(), 0);
+ buffer_bindings.AddVertexBinding(DefaultBuffer(), 0, DEFAULT_BUFFER_SIZE, 0);
continue;
}
- const auto [buffer, offset] = buffer_cache.UploadMemory(start, size);
- buffer_bindings.AddVertexBinding(buffer, offset);
+ const auto info = buffer_cache.UploadMemory(start, size);
+ buffer_bindings.AddVertexBinding(info.handle, info.offset, size, vertex_array.stride);
}
}
@@ -931,7 +1035,9 @@ void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawPar
break;
}
const GPUVAddr gpu_addr = regs.index_array.IndexStart();
- auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize());
+ const auto info = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize());
+ VkBuffer buffer = info.handle;
+ u64 offset = info.offset;
std::tie(buffer, offset) = quad_indexed_pass.Assemble(
regs.index_array.format, params.num_vertices, params.base_vertex, buffer, offset);
@@ -945,7 +1051,9 @@ void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawPar
break;
}
const GPUVAddr gpu_addr = regs.index_array.IndexStart();
- auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize());
+ const auto info = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize());
+ VkBuffer buffer = info.handle;
+ u64 offset = info.offset;
auto format = regs.index_array.format;
const bool is_uint8 = format == Maxwell::IndexFormat::UnsignedByte;
@@ -980,12 +1088,12 @@ void RasterizerVulkan::SetupGraphicsGlobalBuffers(const ShaderEntries& entries,
}
}
-void RasterizerVulkan::SetupGraphicsTexelBuffers(const ShaderEntries& entries, std::size_t stage) {
+void RasterizerVulkan::SetupGraphicsUniformTexels(const ShaderEntries& entries, std::size_t stage) {
MICROPROFILE_SCOPE(Vulkan_Textures);
const auto& gpu = system.GPU().Maxwell3D();
- for (const auto& entry : entries.texel_buffers) {
+ for (const auto& entry : entries.uniform_texels) {
const auto image = GetTextureInfo(gpu, entry, stage).tic;
- SetupTexelBuffer(image, entry);
+ SetupUniformTexels(image, entry);
}
}
@@ -1000,6 +1108,15 @@ void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, std::
}
}
+void RasterizerVulkan::SetupGraphicsStorageTexels(const ShaderEntries& entries, std::size_t stage) {
+ MICROPROFILE_SCOPE(Vulkan_Textures);
+ const auto& gpu = system.GPU().Maxwell3D();
+ for (const auto& entry : entries.storage_texels) {
+ const auto image = GetTextureInfo(gpu, entry, stage).tic;
+ SetupStorageTexel(image, entry);
+ }
+}
+
void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage) {
MICROPROFILE_SCOPE(Vulkan_Images);
const auto& gpu = system.GPU().Maxwell3D();
@@ -1032,12 +1149,12 @@ void RasterizerVulkan::SetupComputeGlobalBuffers(const ShaderEntries& entries) {
}
}
-void RasterizerVulkan::SetupComputeTexelBuffers(const ShaderEntries& entries) {
+void RasterizerVulkan::SetupComputeUniformTexels(const ShaderEntries& entries) {
MICROPROFILE_SCOPE(Vulkan_Textures);
const auto& gpu = system.GPU().KeplerCompute();
- for (const auto& entry : entries.texel_buffers) {
+ for (const auto& entry : entries.uniform_texels) {
const auto image = GetTextureInfo(gpu, entry, ComputeShaderIndex).tic;
- SetupTexelBuffer(image, entry);
+ SetupUniformTexels(image, entry);
}
}
@@ -1052,6 +1169,15 @@ void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) {
}
}
+void RasterizerVulkan::SetupComputeStorageTexels(const ShaderEntries& entries) {
+ MICROPROFILE_SCOPE(Vulkan_Textures);
+ const auto& gpu = system.GPU().KeplerCompute();
+ for (const auto& entry : entries.storage_texels) {
+ const auto image = GetTextureInfo(gpu, entry, ComputeShaderIndex).tic;
+ SetupStorageTexel(image, entry);
+ }
+}
+
void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) {
MICROPROFILE_SCOPE(Vulkan_Images);
const auto& gpu = system.GPU().KeplerCompute();
@@ -1074,10 +1200,9 @@ void RasterizerVulkan::SetupConstBuffer(const ConstBufferEntry& entry,
Common::AlignUp(CalculateConstBufferSize(entry, buffer), 4 * sizeof(float));
ASSERT(size <= MaxConstbufferSize);
- const auto [buffer_handle, offset] =
+ const auto info =
buffer_cache.UploadMemory(buffer.address, size, device.GetUniformBufferAlignment());
-
- update_descriptor_queue.AddBuffer(buffer_handle, offset, size);
+ update_descriptor_queue.AddBuffer(info.handle, info.offset, size);
}
void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address) {
@@ -1091,18 +1216,18 @@ void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAdd
// Note: Do *not* use DefaultBuffer() here, storage buffers can be written breaking the
// default buffer.
static constexpr std::size_t dummy_size = 4;
- const auto buffer = buffer_cache.GetEmptyBuffer(dummy_size);
- update_descriptor_queue.AddBuffer(buffer, 0, dummy_size);
+ const auto info = buffer_cache.GetEmptyBuffer(dummy_size);
+ update_descriptor_queue.AddBuffer(info.handle, info.offset, dummy_size);
return;
}
- const auto [buffer, offset] = buffer_cache.UploadMemory(
+ const auto info = buffer_cache.UploadMemory(
actual_addr, size, device.GetStorageBufferAlignment(), entry.IsWritten());
- update_descriptor_queue.AddBuffer(buffer, offset, size);
+ update_descriptor_queue.AddBuffer(info.handle, info.offset, size);
}
-void RasterizerVulkan::SetupTexelBuffer(const Tegra::Texture::TICEntry& tic,
- const TexelBufferEntry& entry) {
+void RasterizerVulkan::SetupUniformTexels(const Tegra::Texture::TICEntry& tic,
+ const UniformTexelEntry& entry) {
const auto view = texture_cache.GetTextureSurface(tic, entry);
ASSERT(view->IsBufferView());
@@ -1114,16 +1239,24 @@ void RasterizerVulkan::SetupTexture(const Tegra::Texture::FullTextureInfo& textu
auto view = texture_cache.GetTextureSurface(texture.tic, entry);
ASSERT(!view->IsBufferView());
- const auto image_view = view->GetHandle(texture.tic.x_source, texture.tic.y_source,
- texture.tic.z_source, texture.tic.w_source);
+ const VkImageView image_view = view->GetImageView(texture.tic.x_source, texture.tic.y_source,
+ texture.tic.z_source, texture.tic.w_source);
const auto sampler = sampler_cache.GetSampler(texture.tsc);
update_descriptor_queue.AddSampledImage(sampler, image_view);
- const auto image_layout = update_descriptor_queue.GetLastImageLayout();
+ VkImageLayout* const image_layout = update_descriptor_queue.LastImageLayout();
*image_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
sampled_views.push_back(ImageView{std::move(view), image_layout});
}
+void RasterizerVulkan::SetupStorageTexel(const Tegra::Texture::TICEntry& tic,
+ const StorageTexelEntry& entry) {
+ const auto view = texture_cache.GetImageSurface(tic, entry);
+ ASSERT(view->IsBufferView());
+
+ update_descriptor_queue.AddTexelBuffer(view->GetBufferView());
+}
+
void RasterizerVulkan::SetupImage(const Tegra::Texture::TICEntry& tic, const ImageEntry& entry) {
auto view = texture_cache.GetImageSurface(tic, entry);
@@ -1133,10 +1266,11 @@ void RasterizerVulkan::SetupImage(const Tegra::Texture::TICEntry& tic, const Ima
UNIMPLEMENTED_IF(tic.IsBuffer());
- const auto image_view = view->GetHandle(tic.x_source, tic.y_source, tic.z_source, tic.w_source);
+ const VkImageView image_view =
+ view->GetImageView(tic.x_source, tic.y_source, tic.z_source, tic.w_source);
update_descriptor_queue.AddImage(image_view);
- const auto image_layout = update_descriptor_queue.GetLastImageLayout();
+ VkImageLayout* const image_layout = update_descriptor_queue.LastImageLayout();
*image_layout = VK_IMAGE_LAYOUT_GENERAL;
image_views.push_back(ImageView{std::move(view), image_layout});
}
@@ -1231,6 +1365,117 @@ void RasterizerVulkan::UpdateStencilFaces(Tegra::Engines::Maxwell3D::Regs& regs)
}
}
+void RasterizerVulkan::UpdateCullMode(Tegra::Engines::Maxwell3D::Regs& regs) {
+ if (!state_tracker.TouchCullMode()) {
+ return;
+ }
+ scheduler.Record(
+ [enabled = regs.cull_test_enabled, cull_face = regs.cull_face](vk::CommandBuffer cmdbuf) {
+ cmdbuf.SetCullModeEXT(enabled ? MaxwellToVK::CullFace(cull_face) : VK_CULL_MODE_NONE);
+ });
+}
+
+void RasterizerVulkan::UpdateDepthBoundsTestEnable(Tegra::Engines::Maxwell3D::Regs& regs) {
+ if (!state_tracker.TouchDepthBoundsTestEnable()) {
+ return;
+ }
+ scheduler.Record([enable = regs.depth_bounds_enable](vk::CommandBuffer cmdbuf) {
+ cmdbuf.SetDepthBoundsTestEnableEXT(enable);
+ });
+}
+
+void RasterizerVulkan::UpdateDepthTestEnable(Tegra::Engines::Maxwell3D::Regs& regs) {
+ if (!state_tracker.TouchDepthTestEnable()) {
+ return;
+ }
+ scheduler.Record([enable = regs.depth_test_enable](vk::CommandBuffer cmdbuf) {
+ cmdbuf.SetDepthTestEnableEXT(enable);
+ });
+}
+
+void RasterizerVulkan::UpdateDepthWriteEnable(Tegra::Engines::Maxwell3D::Regs& regs) {
+ if (!state_tracker.TouchDepthWriteEnable()) {
+ return;
+ }
+ scheduler.Record([enable = regs.depth_write_enabled](vk::CommandBuffer cmdbuf) {
+ cmdbuf.SetDepthWriteEnableEXT(enable);
+ });
+}
+
+void RasterizerVulkan::UpdateDepthCompareOp(Tegra::Engines::Maxwell3D::Regs& regs) {
+ if (!state_tracker.TouchDepthCompareOp()) {
+ return;
+ }
+ scheduler.Record([func = regs.depth_test_func](vk::CommandBuffer cmdbuf) {
+ cmdbuf.SetDepthCompareOpEXT(MaxwellToVK::ComparisonOp(func));
+ });
+}
+
+void RasterizerVulkan::UpdateFrontFace(Tegra::Engines::Maxwell3D::Regs& regs) {
+ if (!state_tracker.TouchFrontFace()) {
+ return;
+ }
+
+ VkFrontFace front_face = MaxwellToVK::FrontFace(regs.front_face);
+ if (regs.screen_y_control.triangle_rast_flip != 0) {
+ front_face = front_face == VK_FRONT_FACE_CLOCKWISE ? VK_FRONT_FACE_COUNTER_CLOCKWISE
+ : VK_FRONT_FACE_CLOCKWISE;
+ }
+ scheduler.Record(
+ [front_face](vk::CommandBuffer cmdbuf) { cmdbuf.SetFrontFaceEXT(front_face); });
+}
+
+void RasterizerVulkan::UpdatePrimitiveTopology(Tegra::Engines::Maxwell3D::Regs& regs) {
+ if (!state_tracker.TouchPrimitiveTopology()) {
+ return;
+ }
+ const Maxwell::PrimitiveTopology primitive_topology = regs.draw.topology.Value();
+ scheduler.Record([this, primitive_topology](vk::CommandBuffer cmdbuf) {
+ cmdbuf.SetPrimitiveTopologyEXT(MaxwellToVK::PrimitiveTopology(device, primitive_topology));
+ });
+}
+
+void RasterizerVulkan::UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs) {
+ if (!state_tracker.TouchStencilOp()) {
+ return;
+ }
+ const Maxwell::StencilOp fail = regs.stencil_front_op_fail;
+ const Maxwell::StencilOp zfail = regs.stencil_front_op_zfail;
+ const Maxwell::StencilOp zpass = regs.stencil_front_op_zpass;
+ const Maxwell::ComparisonOp compare = regs.stencil_front_func_func;
+ if (regs.stencil_two_side_enable) {
+ scheduler.Record([fail, zfail, zpass, compare](vk::CommandBuffer cmdbuf) {
+ cmdbuf.SetStencilOpEXT(VK_STENCIL_FACE_FRONT_AND_BACK, MaxwellToVK::StencilOp(fail),
+ MaxwellToVK::StencilOp(zpass), MaxwellToVK::StencilOp(zfail),
+ MaxwellToVK::ComparisonOp(compare));
+ });
+ } else {
+ const Maxwell::StencilOp back_fail = regs.stencil_back_op_fail;
+ const Maxwell::StencilOp back_zfail = regs.stencil_back_op_zfail;
+ const Maxwell::StencilOp back_zpass = regs.stencil_back_op_zpass;
+ const Maxwell::ComparisonOp back_compare = regs.stencil_back_func_func;
+ scheduler.Record([fail, zfail, zpass, compare, back_fail, back_zfail, back_zpass,
+ back_compare](vk::CommandBuffer cmdbuf) {
+ cmdbuf.SetStencilOpEXT(VK_STENCIL_FACE_FRONT_BIT, MaxwellToVK::StencilOp(fail),
+ MaxwellToVK::StencilOp(zpass), MaxwellToVK::StencilOp(zfail),
+ MaxwellToVK::ComparisonOp(compare));
+ cmdbuf.SetStencilOpEXT(VK_STENCIL_FACE_BACK_BIT, MaxwellToVK::StencilOp(back_fail),
+ MaxwellToVK::StencilOp(back_zpass),
+ MaxwellToVK::StencilOp(back_zfail),
+ MaxwellToVK::ComparisonOp(back_compare));
+ });
+ }
+}
+
+void RasterizerVulkan::UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& regs) {
+ if (!state_tracker.TouchStencilTestEnable()) {
+ return;
+ }
+ scheduler.Record([enable = regs.stencil_enable](vk::CommandBuffer cmdbuf) {
+ cmdbuf.SetStencilTestEnableEXT(enable);
+ });
+}
+
std::size_t RasterizerVulkan::CalculateGraphicsStreamBufferSize(bool is_indexed) const {
std::size_t size = CalculateVertexArraysSize();
if (is_indexed) {
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index 0ed0e48c6..923178b0b 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -159,7 +159,10 @@ private:
void FlushWork();
- Texceptions UpdateAttachments();
+ /// @brief Updates the currently bound attachments
+ /// @param is_clear True when the framebuffer is updated as a clear
+ /// @return Bitfield of attachments being used as sampled textures
+ Texceptions UpdateAttachments(bool is_clear);
std::tuple<VkFramebuffer, VkExtent2D> ConfigureFramebuffers(VkRenderPass renderpass);
@@ -168,7 +171,7 @@ private:
bool is_indexed, bool is_instanced);
/// Setup descriptors in the graphics pipeline.
- void SetupShaderDescriptors(const std::array<Shader, Maxwell::MaxShaderProgram>& shaders);
+ void SetupShaderDescriptors(const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders);
void SetupImageTransitions(Texceptions texceptions,
const std::array<View, Maxwell::NumRenderTargets>& color_attachments,
@@ -182,8 +185,7 @@ private:
bool WalkAttachmentOverlaps(const CachedSurfaceView& attachment);
- void SetupVertexArrays(FixedPipelineState::VertexInput& vertex_input,
- BufferBindings& buffer_bindings);
+ void SetupVertexArrays(BufferBindings& buffer_bindings);
void SetupIndexBuffer(BufferBindings& buffer_bindings, DrawParameters& params, bool is_indexed);
@@ -193,12 +195,15 @@ private:
/// Setup global buffers in the graphics pipeline.
void SetupGraphicsGlobalBuffers(const ShaderEntries& entries, std::size_t stage);
- /// Setup texel buffers in the graphics pipeline.
- void SetupGraphicsTexelBuffers(const ShaderEntries& entries, std::size_t stage);
+ /// Setup uniform texels in the graphics pipeline.
+ void SetupGraphicsUniformTexels(const ShaderEntries& entries, std::size_t stage);
/// Setup textures in the graphics pipeline.
void SetupGraphicsTextures(const ShaderEntries& entries, std::size_t stage);
+ /// Setup storage texels in the graphics pipeline.
+ void SetupGraphicsStorageTexels(const ShaderEntries& entries, std::size_t stage);
+
/// Setup images in the graphics pipeline.
void SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage);
@@ -209,11 +214,14 @@ private:
void SetupComputeGlobalBuffers(const ShaderEntries& entries);
/// Setup texel buffers in the compute pipeline.
- void SetupComputeTexelBuffers(const ShaderEntries& entries);
+ void SetupComputeUniformTexels(const ShaderEntries& entries);
/// Setup textures in the compute pipeline.
void SetupComputeTextures(const ShaderEntries& entries);
+ /// Setup storage texels in the compute pipeline.
+ void SetupComputeStorageTexels(const ShaderEntries& entries);
+
/// Setup images in the compute pipeline.
void SetupComputeImages(const ShaderEntries& entries);
@@ -222,10 +230,12 @@ private:
void SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address);
- void SetupTexelBuffer(const Tegra::Texture::TICEntry& image, const TexelBufferEntry& entry);
+ void SetupUniformTexels(const Tegra::Texture::TICEntry& image, const UniformTexelEntry& entry);
void SetupTexture(const Tegra::Texture::FullTextureInfo& texture, const SamplerEntry& entry);
+ void SetupStorageTexel(const Tegra::Texture::TICEntry& tic, const StorageTexelEntry& entry);
+
void SetupImage(const Tegra::Texture::TICEntry& tic, const ImageEntry& entry);
void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs);
@@ -235,6 +245,16 @@ private:
void UpdateDepthBounds(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateStencilFaces(Tegra::Engines::Maxwell3D::Regs& regs);
+ void UpdateCullMode(Tegra::Engines::Maxwell3D::Regs& regs);
+ void UpdateDepthBoundsTestEnable(Tegra::Engines::Maxwell3D::Regs& regs);
+ void UpdateDepthTestEnable(Tegra::Engines::Maxwell3D::Regs& regs);
+ void UpdateDepthWriteEnable(Tegra::Engines::Maxwell3D::Regs& regs);
+ void UpdateDepthCompareOp(Tegra::Engines::Maxwell3D::Regs& regs);
+ void UpdateFrontFace(Tegra::Engines::Maxwell3D::Regs& regs);
+ void UpdatePrimitiveTopology(Tegra::Engines::Maxwell3D::Regs& regs);
+ void UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs);
+ void UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& regs);
+
std::size_t CalculateGraphicsStreamBufferSize(bool is_indexed) const;
std::size_t CalculateComputeStreamBufferSize() const;
diff --git a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp
index e6f2fa553..616eacc36 100644
--- a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp
@@ -9,6 +9,8 @@
#include "video_core/renderer_vulkan/wrapper.h"
#include "video_core/textures/texture.h"
+using Tegra::Texture::TextureMipmapFilter;
+
namespace Vulkan {
namespace {
@@ -63,8 +65,8 @@ vk::Sampler VKSamplerCache::CreateSampler(const Tegra::Texture::TSCEntry& tsc) c
ci.maxAnisotropy = tsc.GetMaxAnisotropy();
ci.compareEnable = tsc.depth_compare_enabled;
ci.compareOp = MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func);
- ci.minLod = tsc.GetMinLod();
- ci.maxLod = tsc.GetMaxLod();
+ ci.minLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.0f : tsc.GetMinLod();
+ ci.maxLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.25f : tsc.GetMaxLod();
ci.borderColor = arbitrary_borders ? VK_BORDER_COLOR_INT_CUSTOM_EXT : ConvertBorderColor(color);
ci.unnormalizedCoordinates = VK_FALSE;
return device.GetLogical().CreateSampler(ci);
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp
index 82ec9180e..56524e6f3 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.cpp
+++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp
@@ -9,6 +9,7 @@
#include <utility>
#include "common/microprofile.h"
+#include "common/thread.h"
#include "video_core/renderer_vulkan/vk_device.h"
#include "video_core/renderer_vulkan/vk_query_cache.h"
#include "video_core/renderer_vulkan/vk_resource_manager.h"
@@ -133,6 +134,7 @@ void VKScheduler::BindGraphicsPipeline(VkPipeline pipeline) {
}
void VKScheduler::WorkerThread() {
+ Common::SetCurrentThreadPriority(Common::ThreadPriority::High);
std::unique_lock lock{mutex};
do {
cv.wait(lock, [this] { return !chunk_queue.Empty() || quit; });
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index 6f6dedd82..97429cc59 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -400,8 +400,9 @@ private:
u32 binding = specialization.base_binding;
binding = DeclareConstantBuffers(binding);
binding = DeclareGlobalBuffers(binding);
- binding = DeclareTexelBuffers(binding);
+ binding = DeclareUniformTexels(binding);
binding = DeclareSamplers(binding);
+ binding = DeclareStorageTexels(binding);
binding = DeclareImages(binding);
const Id main = OpFunction(t_void, {}, TypeFunction(t_void));
@@ -741,8 +742,10 @@ private:
if (!IsGenericAttribute(index)) {
continue;
}
-
const u32 location = GetGenericAttributeLocation(index);
+ if (!IsAttributeEnabled(location)) {
+ continue;
+ }
const auto type_descriptor = GetAttributeType(location);
Id type;
if (IsInputAttributeArray()) {
@@ -887,7 +890,7 @@ private:
return binding;
}
- u32 DeclareTexelBuffers(u32 binding) {
+ u32 DeclareUniformTexels(u32 binding) {
for (const auto& sampler : ir.GetSamplers()) {
if (!sampler.is_buffer) {
continue;
@@ -908,7 +911,7 @@ private:
Decorate(id, spv::Decoration::Binding, binding++);
Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET);
- texel_buffers.emplace(sampler.index, TexelBuffer{image_type, id});
+ uniform_texels.emplace(sampler.index, TexelBuffer{image_type, id});
}
return binding;
}
@@ -943,31 +946,48 @@ private:
return binding;
}
- u32 DeclareImages(u32 binding) {
+ u32 DeclareStorageTexels(u32 binding) {
for (const auto& image : ir.GetImages()) {
- const auto [dim, arrayed] = GetImageDim(image);
- constexpr int depth = 0;
- constexpr bool ms = false;
- constexpr int sampled = 2; // This won't be accessed with a sampler
- constexpr auto format = spv::ImageFormat::Unknown;
- const Id image_type = TypeImage(t_uint, dim, depth, arrayed, ms, sampled, format, {});
- const Id pointer_type = TypePointer(spv::StorageClass::UniformConstant, image_type);
- const Id id = OpVariable(pointer_type, spv::StorageClass::UniformConstant);
- AddGlobalVariable(Name(id, fmt::format("image_{}", image.index)));
-
- Decorate(id, spv::Decoration::Binding, binding++);
- Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET);
- if (image.is_read && !image.is_written) {
- Decorate(id, spv::Decoration::NonWritable);
- } else if (image.is_written && !image.is_read) {
- Decorate(id, spv::Decoration::NonReadable);
+ if (image.type != Tegra::Shader::ImageType::TextureBuffer) {
+ continue;
}
+ DeclareImage(image, binding);
+ }
+ return binding;
+ }
- images.emplace(image.index, StorageImage{image_type, id});
+ u32 DeclareImages(u32 binding) {
+ for (const auto& image : ir.GetImages()) {
+ if (image.type == Tegra::Shader::ImageType::TextureBuffer) {
+ continue;
+ }
+ DeclareImage(image, binding);
}
return binding;
}
+ void DeclareImage(const Image& image, u32& binding) {
+ const auto [dim, arrayed] = GetImageDim(image);
+ constexpr int depth = 0;
+ constexpr bool ms = false;
+ constexpr int sampled = 2; // This won't be accessed with a sampler
+ const auto format = image.is_atomic ? spv::ImageFormat::R32ui : spv::ImageFormat::Unknown;
+ const Id image_type = TypeImage(t_uint, dim, depth, arrayed, ms, sampled, format, {});
+ const Id pointer_type = TypePointer(spv::StorageClass::UniformConstant, image_type);
+ const Id id = OpVariable(pointer_type, spv::StorageClass::UniformConstant);
+ AddGlobalVariable(Name(id, fmt::format("image_{}", image.index)));
+
+ Decorate(id, spv::Decoration::Binding, binding++);
+ Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET);
+ if (image.is_read && !image.is_written) {
+ Decorate(id, spv::Decoration::NonWritable);
+ } else if (image.is_written && !image.is_read) {
+ Decorate(id, spv::Decoration::NonReadable);
+ }
+
+ images.emplace(image.index, StorageImage{image_type, id});
+ }
+
bool IsRenderTargetEnabled(u32 rt) const {
for (u32 component = 0; component < 4; ++component) {
if (header.ps.IsColorComponentOutputEnabled(rt, component)) {
@@ -986,6 +1006,10 @@ private:
return stage == ShaderType::TesselationControl;
}
+ bool IsAttributeEnabled(u32 location) const {
+ return stage != ShaderType::Vertex || specialization.enabled_attributes[location];
+ }
+
u32 GetNumInputVertices() const {
switch (stage) {
case ShaderType::Geometry:
@@ -1201,16 +1225,20 @@ private:
UNIMPLEMENTED_MSG("Unmanaged FrontFacing element={}", element);
return {v_float_zero, Type::Float};
default:
- if (IsGenericAttribute(attribute)) {
- const u32 location = GetGenericAttributeLocation(attribute);
- const auto type_descriptor = GetAttributeType(location);
- const Type type = type_descriptor.type;
- const Id attribute_id = input_attributes.at(attribute);
- const std::vector elements = {element};
- const Id pointer = ArrayPass(type_descriptor.scalar, attribute_id, elements);
- return {OpLoad(GetTypeDefinition(type), pointer), type};
+ if (!IsGenericAttribute(attribute)) {
+ break;
}
- break;
+ const u32 location = GetGenericAttributeLocation(attribute);
+ if (!IsAttributeEnabled(location)) {
+ // Disabled attributes (also known as constant attributes) always return zero.
+ return {v_float_zero, Type::Float};
+ }
+ const auto type_descriptor = GetAttributeType(location);
+ const Type type = type_descriptor.type;
+ const Id attribute_id = input_attributes.at(attribute);
+ const std::vector elements = {element};
+ const Id pointer = ArrayPass(type_descriptor.scalar, attribute_id, elements);
+ return {OpLoad(GetTypeDefinition(type), pointer), type};
}
UNIMPLEMENTED_MSG("Unhandled input attribute: {}", static_cast<u32>(attribute));
return {v_float_zero, Type::Float};
@@ -1246,7 +1274,7 @@ private:
} else {
UNREACHABLE_MSG("Unmanaged offset node type");
}
- pointer = OpAccessChain(t_cbuf_float, buffer_id, Constant(t_uint, 0), buffer_index,
+ pointer = OpAccessChain(t_cbuf_float, buffer_id, v_uint_zero, buffer_index,
buffer_element);
}
return {OpLoad(t_float, pointer), Type::Float};
@@ -1601,7 +1629,7 @@ private:
const Id result = OpIAddCarry(TypeStruct({t_uint, t_uint}), op_a, op_b);
const Id carry = OpCompositeExtract(t_uint, result, 1);
- return {OpINotEqual(t_bool, carry, Constant(t_uint, 0)), Type::Bool};
+ return {OpINotEqual(t_bool, carry, v_uint_zero), Type::Bool};
}
Expression LogicalAssign(Operation operation) {
@@ -1664,7 +1692,7 @@ private:
const auto& meta = std::get<MetaTexture>(operation.GetMeta());
const u32 index = meta.sampler.index;
if (meta.sampler.is_buffer) {
- const auto& entry = texel_buffers.at(index);
+ const auto& entry = uniform_texels.at(index);
return OpLoad(entry.image_type, entry.image);
} else {
const auto& entry = sampled_images.at(index);
@@ -1941,39 +1969,20 @@ private:
return {};
}
- Expression AtomicImageAdd(Operation operation) {
- UNIMPLEMENTED();
- return {};
- }
-
- Expression AtomicImageMin(Operation operation) {
- UNIMPLEMENTED();
- return {};
- }
-
- Expression AtomicImageMax(Operation operation) {
- UNIMPLEMENTED();
- return {};
- }
-
- Expression AtomicImageAnd(Operation operation) {
- UNIMPLEMENTED();
- return {};
- }
-
- Expression AtomicImageOr(Operation operation) {
- UNIMPLEMENTED();
- return {};
- }
+ template <Id (Module::*func)(Id, Id, Id, Id, Id)>
+ Expression AtomicImage(Operation operation) {
+ const auto& meta{std::get<MetaImage>(operation.GetMeta())};
+ ASSERT(meta.values.size() == 1);
- Expression AtomicImageXor(Operation operation) {
- UNIMPLEMENTED();
- return {};
- }
+ const Id coordinate = GetCoordinates(operation, Type::Int);
+ const Id image = images.at(meta.image.index).image;
+ const Id sample = v_uint_zero;
+ const Id pointer = OpImageTexelPointer(t_image_uint, image, coordinate, sample);
- Expression AtomicImageExchange(Operation operation) {
- UNIMPLEMENTED();
- return {};
+ const Id scope = Constant(t_uint, static_cast<u32>(spv::Scope::Device));
+ const Id semantics = v_uint_zero;
+ const Id value = AsUint(Visit(meta.values[0]));
+ return {(this->*func)(t_uint, pointer, scope, semantics, value), Type::Uint};
}
template <Id (Module::*func)(Id, Id, Id, Id, Id)>
@@ -1988,7 +1997,7 @@ private:
return {v_float_zero, Type::Float};
}
const Id scope = Constant(t_uint, static_cast<u32>(spv::Scope::Device));
- const Id semantics = Constant(t_uint, 0);
+ const Id semantics = v_uint_zero;
const Id value = AsUint(Visit(operation[1]));
return {(this->*func)(t_uint, pointer, scope, semantics, value), Type::Uint};
@@ -2612,11 +2621,11 @@ private:
&SPIRVDecompiler::ImageLoad,
&SPIRVDecompiler::ImageStore,
- &SPIRVDecompiler::AtomicImageAdd,
- &SPIRVDecompiler::AtomicImageAnd,
- &SPIRVDecompiler::AtomicImageOr,
- &SPIRVDecompiler::AtomicImageXor,
- &SPIRVDecompiler::AtomicImageExchange,
+ &SPIRVDecompiler::AtomicImage<&Module::OpAtomicIAdd>,
+ &SPIRVDecompiler::AtomicImage<&Module::OpAtomicAnd>,
+ &SPIRVDecompiler::AtomicImage<&Module::OpAtomicOr>,
+ &SPIRVDecompiler::AtomicImage<&Module::OpAtomicXor>,
+ &SPIRVDecompiler::AtomicImage<&Module::OpAtomicExchange>,
&SPIRVDecompiler::Atomic<&Module::OpAtomicExchange>,
&SPIRVDecompiler::Atomic<&Module::OpAtomicIAdd>,
@@ -2758,8 +2767,11 @@ private:
Decorate(TypeStruct(t_gmem_array), spv::Decoration::Block), 0, spv::Decoration::Offset, 0);
const Id t_gmem_ssbo = TypePointer(spv::StorageClass::StorageBuffer, t_gmem_struct);
+ const Id t_image_uint = TypePointer(spv::StorageClass::Image, t_uint);
+
const Id v_float_zero = Constant(t_float, 0.0f);
const Id v_float_one = Constant(t_float, 1.0f);
+ const Id v_uint_zero = Constant(t_uint, 0);
// Nvidia uses these defaults for varyings (e.g. position and generic attributes)
const Id v_varying_default =
@@ -2784,15 +2796,16 @@ private:
std::unordered_map<u8, GenericVaryingDescription> output_attributes;
std::map<u32, Id> constant_buffers;
std::map<GlobalMemoryBase, Id> global_buffers;
- std::map<u32, TexelBuffer> texel_buffers;
+ std::map<u32, TexelBuffer> uniform_texels;
std::map<u32, SampledImage> sampled_images;
+ std::map<u32, TexelBuffer> storage_texels;
std::map<u32, StorageImage> images;
+ std::array<Id, Maxwell::NumRenderTargets> frag_colors{};
Id instance_index{};
Id vertex_index{};
Id base_instance{};
Id base_vertex{};
- std::array<Id, Maxwell::NumRenderTargets> frag_colors{};
Id frag_depth{};
Id frag_coord{};
Id front_facing{};
@@ -3048,13 +3061,17 @@ ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir) {
}
for (const auto& sampler : ir.GetSamplers()) {
if (sampler.is_buffer) {
- entries.texel_buffers.emplace_back(sampler);
+ entries.uniform_texels.emplace_back(sampler);
} else {
entries.samplers.emplace_back(sampler);
}
}
for (const auto& image : ir.GetImages()) {
- entries.images.emplace_back(image);
+ if (image.type == Tegra::Shader::ImageType::TextureBuffer) {
+ entries.storage_texels.emplace_back(image);
+ } else {
+ entries.images.emplace_back(image);
+ }
}
for (const auto& attribute : ir.GetInputAttributes()) {
if (IsGenericAttribute(attribute)) {
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h
index f4c05ac3c..2b0e90396 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.h
@@ -21,8 +21,9 @@ class VKDevice;
namespace Vulkan {
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
-using TexelBufferEntry = VideoCommon::Shader::Sampler;
+using UniformTexelEntry = VideoCommon::Shader::Sampler;
using SamplerEntry = VideoCommon::Shader::Sampler;
+using StorageTexelEntry = VideoCommon::Shader::Image;
using ImageEntry = VideoCommon::Shader::Image;
constexpr u32 DESCRIPTOR_SET = 0;
@@ -66,13 +67,15 @@ private:
struct ShaderEntries {
u32 NumBindings() const {
return static_cast<u32>(const_buffers.size() + global_buffers.size() +
- texel_buffers.size() + samplers.size() + images.size());
+ uniform_texels.size() + samplers.size() + storage_texels.size() +
+ images.size());
}
std::vector<ConstBufferEntry> const_buffers;
std::vector<GlobalBufferEntry> global_buffers;
- std::vector<TexelBufferEntry> texel_buffers;
+ std::vector<UniformTexelEntry> uniform_texels;
std::vector<SamplerEntry> samplers;
+ std::vector<StorageTexelEntry> storage_texels;
std::vector<ImageEntry> images;
std::set<u32> attributes;
std::array<bool, Maxwell::NumClipDistances> clip_distances{};
@@ -88,7 +91,8 @@ struct Specialization final {
u32 shared_memory_size{};
// Graphics specific
- std::optional<float> point_size{};
+ std::optional<float> point_size;
+ std::bitset<Maxwell::NumVertexAttributes> enabled_attributes;
std::array<Maxwell::VertexAttribute::Type, Maxwell::NumVertexAttributes> attribute_types{};
bool ndc_minus_one_to_one{};
};
diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.cpp b/src/video_core/renderer_vulkan/vk_state_tracker.cpp
index 94a89e388..e5a583dd5 100644
--- a/src/video_core/renderer_vulkan/vk_state_tracker.cpp
+++ b/src/video_core/renderer_vulkan/vk_state_tracker.cpp
@@ -36,6 +36,15 @@ Flags MakeInvalidationFlags() {
flags[BlendConstants] = true;
flags[DepthBounds] = true;
flags[StencilProperties] = true;
+ flags[CullMode] = true;
+ flags[DepthBoundsEnable] = true;
+ flags[DepthTestEnable] = true;
+ flags[DepthWriteEnable] = true;
+ flags[DepthCompareOp] = true;
+ flags[FrontFace] = true;
+ flags[PrimitiveTopology] = true;
+ flags[StencilOp] = true;
+ flags[StencilTestEnable] = true;
return flags;
}
@@ -75,6 +84,57 @@ void SetupDirtyStencilProperties(Tables& tables) {
table[OFF(stencil_back_func_mask)] = StencilProperties;
}
+void SetupDirtyCullMode(Tables& tables) {
+ auto& table = tables[0];
+ table[OFF(cull_face)] = CullMode;
+ table[OFF(cull_test_enabled)] = CullMode;
+}
+
+void SetupDirtyDepthBoundsEnable(Tables& tables) {
+ tables[0][OFF(depth_bounds_enable)] = DepthBoundsEnable;
+}
+
+void SetupDirtyDepthTestEnable(Tables& tables) {
+ tables[0][OFF(depth_test_enable)] = DepthTestEnable;
+}
+
+void SetupDirtyDepthWriteEnable(Tables& tables) {
+ tables[0][OFF(depth_write_enabled)] = DepthWriteEnable;
+}
+
+void SetupDirtyDepthCompareOp(Tables& tables) {
+ tables[0][OFF(depth_test_func)] = DepthCompareOp;
+}
+
+void SetupDirtyFrontFace(Tables& tables) {
+ auto& table = tables[0];
+ table[OFF(front_face)] = FrontFace;
+ table[OFF(screen_y_control)] = FrontFace;
+}
+
+void SetupDirtyPrimitiveTopology(Tables& tables) {
+ tables[0][OFF(draw.topology)] = PrimitiveTopology;
+}
+
+void SetupDirtyStencilOp(Tables& tables) {
+ auto& table = tables[0];
+ table[OFF(stencil_front_op_fail)] = StencilOp;
+ table[OFF(stencil_front_op_zfail)] = StencilOp;
+ table[OFF(stencil_front_op_zpass)] = StencilOp;
+ table[OFF(stencil_front_func_func)] = StencilOp;
+ table[OFF(stencil_back_op_fail)] = StencilOp;
+ table[OFF(stencil_back_op_zfail)] = StencilOp;
+ table[OFF(stencil_back_op_zpass)] = StencilOp;
+ table[OFF(stencil_back_func_func)] = StencilOp;
+
+ // Table 0 is used by StencilProperties
+ tables[1][OFF(stencil_two_side_enable)] = StencilOp;
+}
+
+void SetupDirtyStencilTestEnable(Tables& tables) {
+ tables[0][OFF(stencil_enable)] = StencilTestEnable;
+}
+
} // Anonymous namespace
StateTracker::StateTracker(Core::System& system)
@@ -90,6 +150,14 @@ void StateTracker::Initialize() {
SetupDirtyBlendConstants(tables);
SetupDirtyDepthBounds(tables);
SetupDirtyStencilProperties(tables);
+ SetupDirtyCullMode(tables);
+ SetupDirtyDepthBoundsEnable(tables);
+ SetupDirtyDepthTestEnable(tables);
+ SetupDirtyDepthWriteEnable(tables);
+ SetupDirtyDepthCompareOp(tables);
+ SetupDirtyFrontFace(tables);
+ SetupDirtyPrimitiveTopology(tables);
+ SetupDirtyStencilOp(tables);
}
void StateTracker::InvalidateCommandBufferState() {
diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.h b/src/video_core/renderer_vulkan/vk_state_tracker.h
index 03bc415b2..54ca0d6c6 100644
--- a/src/video_core/renderer_vulkan/vk_state_tracker.h
+++ b/src/video_core/renderer_vulkan/vk_state_tracker.h
@@ -26,6 +26,16 @@ enum : u8 {
DepthBounds,
StencilProperties,
+ CullMode,
+ DepthBoundsEnable,
+ DepthTestEnable,
+ DepthWriteEnable,
+ DepthCompareOp,
+ FrontFace,
+ PrimitiveTopology,
+ StencilOp,
+ StencilTestEnable,
+
Last
};
static_assert(Last <= std::numeric_limits<u8>::max());
@@ -64,6 +74,46 @@ public:
return Exchange(Dirty::StencilProperties, false);
}
+ bool TouchCullMode() {
+ return Exchange(Dirty::CullMode, false);
+ }
+
+ bool TouchDepthBoundsTestEnable() {
+ return Exchange(Dirty::DepthBoundsEnable, false);
+ }
+
+ bool TouchDepthTestEnable() {
+ return Exchange(Dirty::DepthTestEnable, false);
+ }
+
+ bool TouchDepthBoundsEnable() {
+ return Exchange(Dirty::DepthBoundsEnable, false);
+ }
+
+ bool TouchDepthWriteEnable() {
+ return Exchange(Dirty::DepthWriteEnable, false);
+ }
+
+ bool TouchDepthCompareOp() {
+ return Exchange(Dirty::DepthCompareOp, false);
+ }
+
+ bool TouchFrontFace() {
+ return Exchange(Dirty::FrontFace, false);
+ }
+
+ bool TouchPrimitiveTopology() {
+ return Exchange(Dirty::PrimitiveTopology, false);
+ }
+
+ bool TouchStencilOp() {
+ return Exchange(Dirty::StencilOp, false);
+ }
+
+ bool TouchStencilTestEnable() {
+ return Exchange(Dirty::StencilTestEnable, false);
+ }
+
private:
bool Exchange(std::size_t id, bool new_value) const noexcept {
auto& flags = system.GPU().Maxwell3D().dirty.flags;
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
index 868447af2..2d28a6c47 100644
--- a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
+++ b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
@@ -121,7 +121,7 @@ void VKStreamBuffer::CreateBuffers(VkBufferUsageFlags usage) {
// Substract from the preferred heap size some bytes to avoid getting out of memory.
const VkDeviceSize heap_size = memory_properties.memoryHeaps[preferred_heap].size;
- const VkDeviceSize allocable_size = heap_size - 4 * 1024 * 1024;
+ const VkDeviceSize allocable_size = heap_size - 9 * 1024 * 1024;
VkBufferCreateInfo buffer_ci;
buffer_ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.h b/src/video_core/renderer_vulkan/vk_stream_buffer.h
index dfddf7ad6..689f0d276 100644
--- a/src/video_core/renderer_vulkan/vk_stream_buffer.h
+++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h
@@ -35,10 +35,14 @@ public:
/// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy.
void Unmap(u64 size);
- VkBuffer GetHandle() const {
+ VkBuffer Handle() const noexcept {
return *buffer;
}
+ u64 Address() const noexcept {
+ return 0;
+ }
+
private:
struct Watch final {
VKFenceWatch fence;
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index 2f1d5021d..430031665 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -100,8 +100,8 @@ vk::Buffer CreateBuffer(const VKDevice& device, const SurfaceParams& params,
ci.pNext = nullptr;
ci.flags = 0;
ci.size = static_cast<VkDeviceSize>(host_memory_size);
- ci.usage = VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
- VK_BUFFER_USAGE_TRANSFER_DST_BIT;
+ ci.usage = VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT |
+ VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
ci.queueFamilyIndexCount = 0;
ci.pQueueFamilyIndices = nullptr;
@@ -167,6 +167,7 @@ VkImageCreateInfo GenerateImageCreateInfo(const VKDevice& device, const SurfaceP
ci.extent = {params.width, params.height, 1};
break;
case SurfaceTarget::Texture3D:
+ ci.flags |= VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT;
ci.extent = {params.width, params.height, params.depth};
break;
case SurfaceTarget::TextureBuffer:
@@ -176,6 +177,12 @@ VkImageCreateInfo GenerateImageCreateInfo(const VKDevice& device, const SurfaceP
return ci;
}
+u32 EncodeSwizzle(Tegra::Texture::SwizzleSource x_source, Tegra::Texture::SwizzleSource y_source,
+ Tegra::Texture::SwizzleSource z_source, Tegra::Texture::SwizzleSource w_source) {
+ return (static_cast<u32>(x_source) << 24) | (static_cast<u32>(y_source) << 16) |
+ (static_cast<u32>(z_source) << 8) | static_cast<u32>(w_source);
+}
+
} // Anonymous namespace
CachedSurface::CachedSurface(Core::System& system, const VKDevice& device,
@@ -203,9 +210,11 @@ CachedSurface::CachedSurface(Core::System& system, const VKDevice& device,
}
// TODO(Rodrigo): Move this to a virtual function.
- main_view = CreateViewInner(
- ViewParams(params.target, 0, static_cast<u32>(params.GetNumLayers()), 0, params.num_levels),
- true);
+ u32 num_layers = 1;
+ if (params.is_layered || params.target == SurfaceTarget::Texture3D) {
+ num_layers = params.depth;
+ }
+ main_view = CreateView(ViewParams(params.target, 0, num_layers, 0, params.num_levels));
}
CachedSurface::~CachedSurface() = default;
@@ -253,12 +262,8 @@ void CachedSurface::DecorateSurfaceName() {
}
View CachedSurface::CreateView(const ViewParams& params) {
- return CreateViewInner(params, false);
-}
-
-View CachedSurface::CreateViewInner(const ViewParams& params, bool is_proxy) {
// TODO(Rodrigo): Add name decorations
- return views[params] = std::make_shared<CachedSurfaceView>(device, *this, params, is_proxy);
+ return views[params] = std::make_shared<CachedSurfaceView>(device, *this, params);
}
void CachedSurface::UploadBuffer(const std::vector<u8>& staging_buffer) {
@@ -342,18 +347,27 @@ VkImageSubresourceRange CachedSurface::GetImageSubresourceRange() const {
}
CachedSurfaceView::CachedSurfaceView(const VKDevice& device, CachedSurface& surface,
- const ViewParams& params, bool is_proxy)
+ const ViewParams& params)
: VideoCommon::ViewBase{params}, params{surface.GetSurfaceParams()},
image{surface.GetImageHandle()}, buffer_view{surface.GetBufferViewHandle()},
aspect_mask{surface.GetAspectMask()}, device{device}, surface{surface},
- base_layer{params.base_layer}, num_layers{params.num_layers}, base_level{params.base_level},
- num_levels{params.num_levels}, image_view_type{image ? GetImageViewType(params.target)
- : VK_IMAGE_VIEW_TYPE_1D} {}
+ base_level{params.base_level}, num_levels{params.num_levels},
+ image_view_type{image ? GetImageViewType(params.target) : VK_IMAGE_VIEW_TYPE_1D} {
+ if (image_view_type == VK_IMAGE_VIEW_TYPE_3D) {
+ base_layer = 0;
+ num_layers = 1;
+ base_slice = params.base_layer;
+ num_slices = params.num_layers;
+ } else {
+ base_layer = params.base_layer;
+ num_layers = params.num_layers;
+ }
+}
CachedSurfaceView::~CachedSurfaceView() = default;
-VkImageView CachedSurfaceView::GetHandle(SwizzleSource x_source, SwizzleSource y_source,
- SwizzleSource z_source, SwizzleSource w_source) {
+VkImageView CachedSurfaceView::GetImageView(SwizzleSource x_source, SwizzleSource y_source,
+ SwizzleSource z_source, SwizzleSource w_source) {
const u32 new_swizzle = EncodeSwizzle(x_source, y_source, z_source, w_source);
if (last_image_view && last_swizzle == new_swizzle) {
return last_image_view;
@@ -399,6 +413,11 @@ VkImageView CachedSurfaceView::GetHandle(SwizzleSource x_source, SwizzleSource y
});
}
+ if (image_view_type == VK_IMAGE_VIEW_TYPE_3D) {
+ ASSERT(base_slice == 0);
+ ASSERT(num_slices == params.depth);
+ }
+
VkImageViewCreateInfo ci;
ci.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
ci.pNext = nullptr;
@@ -417,6 +436,35 @@ VkImageView CachedSurfaceView::GetHandle(SwizzleSource x_source, SwizzleSource y
return last_image_view = *image_view;
}
+VkImageView CachedSurfaceView::GetAttachment() {
+ if (render_target) {
+ return *render_target;
+ }
+
+ VkImageViewCreateInfo ci;
+ ci.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
+ ci.pNext = nullptr;
+ ci.flags = 0;
+ ci.image = surface.GetImageHandle();
+ ci.format = surface.GetImage().GetFormat();
+ ci.components = {VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY,
+ VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY};
+ ci.subresourceRange.aspectMask = aspect_mask;
+ ci.subresourceRange.baseMipLevel = base_level;
+ ci.subresourceRange.levelCount = num_levels;
+ if (image_view_type == VK_IMAGE_VIEW_TYPE_3D) {
+ ci.viewType = num_slices > 1 ? VK_IMAGE_VIEW_TYPE_2D_ARRAY : VK_IMAGE_VIEW_TYPE_2D;
+ ci.subresourceRange.baseArrayLayer = base_slice;
+ ci.subresourceRange.layerCount = num_slices;
+ } else {
+ ci.viewType = image_view_type;
+ ci.subresourceRange.baseArrayLayer = base_layer;
+ ci.subresourceRange.layerCount = num_layers;
+ }
+ render_target = device.GetLogical().CreateImageView(ci);
+ return *render_target;
+}
+
VKTextureCache::VKTextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
const VKDevice& device, VKResourceManager& resource_manager,
VKMemoryManager& memory_manager, VKScheduler& scheduler,
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h
index f211ccb1e..807e26c8a 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.h
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.h
@@ -91,7 +91,6 @@ protected:
void DecorateSurfaceName();
View CreateView(const ViewParams& params) override;
- View CreateViewInner(const ViewParams& params, bool is_proxy);
private:
void UploadBuffer(const std::vector<u8>& staging_buffer);
@@ -120,23 +119,20 @@ private:
class CachedSurfaceView final : public VideoCommon::ViewBase {
public:
explicit CachedSurfaceView(const VKDevice& device, CachedSurface& surface,
- const ViewParams& params, bool is_proxy);
+ const ViewParams& params);
~CachedSurfaceView();
- VkImageView GetHandle(Tegra::Texture::SwizzleSource x_source,
- Tegra::Texture::SwizzleSource y_source,
- Tegra::Texture::SwizzleSource z_source,
- Tegra::Texture::SwizzleSource w_source);
+ VkImageView GetImageView(Tegra::Texture::SwizzleSource x_source,
+ Tegra::Texture::SwizzleSource y_source,
+ Tegra::Texture::SwizzleSource z_source,
+ Tegra::Texture::SwizzleSource w_source);
+
+ VkImageView GetAttachment();
bool IsSameSurface(const CachedSurfaceView& rhs) const {
return &surface == &rhs.surface;
}
- VkImageView GetHandle() {
- return GetHandle(Tegra::Texture::SwizzleSource::R, Tegra::Texture::SwizzleSource::G,
- Tegra::Texture::SwizzleSource::B, Tegra::Texture::SwizzleSource::A);
- }
-
u32 GetWidth() const {
return params.GetMipWidth(base_level);
}
@@ -180,14 +176,6 @@ public:
}
private:
- static u32 EncodeSwizzle(Tegra::Texture::SwizzleSource x_source,
- Tegra::Texture::SwizzleSource y_source,
- Tegra::Texture::SwizzleSource z_source,
- Tegra::Texture::SwizzleSource w_source) {
- return (static_cast<u32>(x_source) << 24) | (static_cast<u32>(y_source) << 16) |
- (static_cast<u32>(z_source) << 8) | static_cast<u32>(w_source);
- }
-
// Store a copy of these values to avoid double dereference when reading them
const SurfaceParams params;
const VkImage image;
@@ -196,15 +184,18 @@ private:
const VKDevice& device;
CachedSurface& surface;
- const u32 base_layer;
- const u32 num_layers;
const u32 base_level;
const u32 num_levels;
const VkImageViewType image_view_type;
+ u32 base_layer = 0;
+ u32 num_layers = 0;
+ u32 base_slice = 0;
+ u32 num_slices = 0;
VkImageView last_image_view = nullptr;
u32 last_swizzle = 0;
+ vk::ImageView render_target;
std::unordered_map<u32, vk::ImageView> view_cache;
};
diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
index 681ecde98..351c048d2 100644
--- a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
+++ b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
@@ -24,35 +24,25 @@ void VKUpdateDescriptorQueue::TickFrame() {
}
void VKUpdateDescriptorQueue::Acquire() {
- entries.clear();
-}
+ // Minimum number of entries required.
+ // This is the maximum number of entries a single draw call migth use.
+ static constexpr std::size_t MIN_ENTRIES = 0x400;
-void VKUpdateDescriptorQueue::Send(VkDescriptorUpdateTemplateKHR update_template,
- VkDescriptorSet set) {
- if (payload.size() + entries.size() >= payload.max_size()) {
+ if (payload.size() + MIN_ENTRIES >= payload.max_size()) {
LOG_WARNING(Render_Vulkan, "Payload overflow, waiting for worker thread");
scheduler.WaitWorker();
payload.clear();
}
+ upload_start = &*payload.end();
+}
- // TODO(Rodrigo): Rework to write the payload directly
- const auto payload_start = payload.data() + payload.size();
- for (const auto& entry : entries) {
- if (const auto image = std::get_if<VkDescriptorImageInfo>(&entry)) {
- payload.push_back(*image);
- } else if (const auto buffer = std::get_if<VkDescriptorBufferInfo>(&entry)) {
- payload.push_back(*buffer);
- } else if (const auto texel = std::get_if<VkBufferView>(&entry)) {
- payload.push_back(*texel);
- } else {
- UNREACHABLE();
- }
- }
-
- scheduler.Record(
- [payload_start, set, update_template, logical = &device.GetLogical()](vk::CommandBuffer) {
- logical->UpdateDescriptorSet(set, update_template, payload_start);
- });
+void VKUpdateDescriptorQueue::Send(VkDescriptorUpdateTemplateKHR update_template,
+ VkDescriptorSet set) {
+ const void* const data = upload_start;
+ const vk::Device* const logical = &device.GetLogical();
+ scheduler.Record([data, logical, set, update_template](vk::CommandBuffer) {
+ logical->UpdateDescriptorSet(set, update_template, data);
+ });
}
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.h b/src/video_core/renderer_vulkan/vk_update_descriptor.h
index cc7e3dff4..945320c72 100644
--- a/src/video_core/renderer_vulkan/vk_update_descriptor.h
+++ b/src/video_core/renderer_vulkan/vk_update_descriptor.h
@@ -15,17 +15,13 @@ namespace Vulkan {
class VKDevice;
class VKScheduler;
-class DescriptorUpdateEntry {
-public:
- explicit DescriptorUpdateEntry() {}
-
- DescriptorUpdateEntry(VkDescriptorImageInfo image) : image{image} {}
+struct DescriptorUpdateEntry {
+ DescriptorUpdateEntry(VkDescriptorImageInfo image_) : image{image_} {}
- DescriptorUpdateEntry(VkDescriptorBufferInfo buffer) : buffer{buffer} {}
+ DescriptorUpdateEntry(VkDescriptorBufferInfo buffer_) : buffer{buffer_} {}
- DescriptorUpdateEntry(VkBufferView texel_buffer) : texel_buffer{texel_buffer} {}
+ DescriptorUpdateEntry(VkBufferView texel_buffer_) : texel_buffer{texel_buffer_} {}
-private:
union {
VkDescriptorImageInfo image;
VkDescriptorBufferInfo buffer;
@@ -45,32 +41,34 @@ public:
void Send(VkDescriptorUpdateTemplateKHR update_template, VkDescriptorSet set);
void AddSampledImage(VkSampler sampler, VkImageView image_view) {
- entries.emplace_back(VkDescriptorImageInfo{sampler, image_view, {}});
+ payload.emplace_back(VkDescriptorImageInfo{sampler, image_view, {}});
}
void AddImage(VkImageView image_view) {
- entries.emplace_back(VkDescriptorImageInfo{{}, image_view, {}});
+ payload.emplace_back(VkDescriptorImageInfo{{}, image_view, {}});
}
void AddBuffer(VkBuffer buffer, u64 offset, std::size_t size) {
- entries.emplace_back(VkDescriptorBufferInfo{buffer, offset, size});
+ payload.emplace_back(VkDescriptorBufferInfo{buffer, offset, size});
}
void AddTexelBuffer(VkBufferView texel_buffer) {
- entries.emplace_back(texel_buffer);
+ payload.emplace_back(texel_buffer);
}
- VkImageLayout* GetLastImageLayout() {
- return &std::get<VkDescriptorImageInfo>(entries.back()).imageLayout;
+ VkImageLayout* LastImageLayout() {
+ return &payload.back().image.imageLayout;
}
-private:
- using Variant = std::variant<VkDescriptorImageInfo, VkDescriptorBufferInfo, VkBufferView>;
+ const VkImageLayout* LastImageLayout() const {
+ return &payload.back().image.imageLayout;
+ }
+private:
const VKDevice& device;
VKScheduler& scheduler;
- boost::container::static_vector<Variant, 0x400> entries;
+ const DescriptorUpdateEntry* upload_start = nullptr;
boost::container::static_vector<DescriptorUpdateEntry, 0x10000> payload;
};
diff --git a/src/video_core/renderer_vulkan/wrapper.cpp b/src/video_core/renderer_vulkan/wrapper.cpp
index 2ce9b0626..051298cc8 100644
--- a/src/video_core/renderer_vulkan/wrapper.cpp
+++ b/src/video_core/renderer_vulkan/wrapper.cpp
@@ -88,6 +88,16 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
X(vkCmdSetStencilWriteMask);
X(vkCmdSetViewport);
X(vkCmdWaitEvents);
+ X(vkCmdBindVertexBuffers2EXT);
+ X(vkCmdSetCullModeEXT);
+ X(vkCmdSetDepthBoundsTestEnableEXT);
+ X(vkCmdSetDepthCompareOpEXT);
+ X(vkCmdSetDepthTestEnableEXT);
+ X(vkCmdSetDepthWriteEnableEXT);
+ X(vkCmdSetFrontFaceEXT);
+ X(vkCmdSetPrimitiveTopologyEXT);
+ X(vkCmdSetStencilOpEXT);
+ X(vkCmdSetStencilTestEnableEXT);
X(vkCreateBuffer);
X(vkCreateBufferView);
X(vkCreateCommandPool);
@@ -153,7 +163,8 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
bool Load(InstanceDispatch& dld) noexcept {
#define X(name) Proc(dld.name, dld, #name)
- return X(vkCreateInstance) && X(vkEnumerateInstanceExtensionProperties);
+ return X(vkCreateInstance) && X(vkEnumerateInstanceExtensionProperties) &&
+ X(vkEnumerateInstanceLayerProperties);
#undef X
}
@@ -725,8 +736,7 @@ bool PhysicalDevice::GetSurfaceSupportKHR(u32 queue_family_index, VkSurfaceKHR s
return supported == VK_TRUE;
}
-VkSurfaceCapabilitiesKHR PhysicalDevice::GetSurfaceCapabilitiesKHR(VkSurfaceKHR surface) const
- noexcept {
+VkSurfaceCapabilitiesKHR PhysicalDevice::GetSurfaceCapabilitiesKHR(VkSurfaceKHR surface) const {
VkSurfaceCapabilitiesKHR capabilities;
Check(dld->vkGetPhysicalDeviceSurfaceCapabilitiesKHR(physical_device, surface, &capabilities));
return capabilities;
@@ -771,4 +781,17 @@ std::optional<std::vector<VkExtensionProperties>> EnumerateInstanceExtensionProp
return properties;
}
+std::optional<std::vector<VkLayerProperties>> EnumerateInstanceLayerProperties(
+ const InstanceDispatch& dld) {
+ u32 num;
+ if (dld.vkEnumerateInstanceLayerProperties(&num, nullptr) != VK_SUCCESS) {
+ return std::nullopt;
+ }
+ std::vector<VkLayerProperties> properties(num);
+ if (dld.vkEnumerateInstanceLayerProperties(&num, properties.data()) != VK_SUCCESS) {
+ return std::nullopt;
+ }
+ return properties;
+}
+
} // namespace Vulkan::vk
diff --git a/src/video_core/renderer_vulkan/wrapper.h b/src/video_core/renderer_vulkan/wrapper.h
index 98937a77a..71daac9d7 100644
--- a/src/video_core/renderer_vulkan/wrapper.h
+++ b/src/video_core/renderer_vulkan/wrapper.h
@@ -141,6 +141,7 @@ struct InstanceDispatch {
PFN_vkCreateInstance vkCreateInstance;
PFN_vkDestroyInstance vkDestroyInstance;
PFN_vkEnumerateInstanceExtensionProperties vkEnumerateInstanceExtensionProperties;
+ PFN_vkEnumerateInstanceLayerProperties vkEnumerateInstanceLayerProperties;
PFN_vkCreateDebugUtilsMessengerEXT vkCreateDebugUtilsMessengerEXT;
PFN_vkCreateDevice vkCreateDevice;
@@ -206,6 +207,16 @@ struct DeviceDispatch : public InstanceDispatch {
PFN_vkCmdSetStencilWriteMask vkCmdSetStencilWriteMask;
PFN_vkCmdSetViewport vkCmdSetViewport;
PFN_vkCmdWaitEvents vkCmdWaitEvents;
+ PFN_vkCmdBindVertexBuffers2EXT vkCmdBindVertexBuffers2EXT;
+ PFN_vkCmdSetCullModeEXT vkCmdSetCullModeEXT;
+ PFN_vkCmdSetDepthBoundsTestEnableEXT vkCmdSetDepthBoundsTestEnableEXT;
+ PFN_vkCmdSetDepthCompareOpEXT vkCmdSetDepthCompareOpEXT;
+ PFN_vkCmdSetDepthTestEnableEXT vkCmdSetDepthTestEnableEXT;
+ PFN_vkCmdSetDepthWriteEnableEXT vkCmdSetDepthWriteEnableEXT;
+ PFN_vkCmdSetFrontFaceEXT vkCmdSetFrontFaceEXT;
+ PFN_vkCmdSetPrimitiveTopologyEXT vkCmdSetPrimitiveTopologyEXT;
+ PFN_vkCmdSetStencilOpEXT vkCmdSetStencilOpEXT;
+ PFN_vkCmdSetStencilTestEnableEXT vkCmdSetStencilTestEnableEXT;
PFN_vkCreateBuffer vkCreateBuffer;
PFN_vkCreateBufferView vkCreateBufferView;
PFN_vkCreateCommandPool vkCreateCommandPool;
@@ -779,7 +790,7 @@ public:
bool GetSurfaceSupportKHR(u32 queue_family_index, VkSurfaceKHR) const;
- VkSurfaceCapabilitiesKHR GetSurfaceCapabilitiesKHR(VkSurfaceKHR) const noexcept;
+ VkSurfaceCapabilitiesKHR GetSurfaceCapabilitiesKHR(VkSurfaceKHR) const;
std::vector<VkSurfaceFormatKHR> GetSurfaceFormatsKHR(VkSurfaceKHR) const;
@@ -968,6 +979,50 @@ public:
buffer_barriers.data(), image_barriers.size(), image_barriers.data());
}
+ void BindVertexBuffers2EXT(u32 first_binding, u32 binding_count, const VkBuffer* buffers,
+ const VkDeviceSize* offsets, const VkDeviceSize* sizes,
+ const VkDeviceSize* strides) const noexcept {
+ dld->vkCmdBindVertexBuffers2EXT(handle, first_binding, binding_count, buffers, offsets,
+ sizes, strides);
+ }
+
+ void SetCullModeEXT(VkCullModeFlags cull_mode) const noexcept {
+ dld->vkCmdSetCullModeEXT(handle, cull_mode);
+ }
+
+ void SetDepthBoundsTestEnableEXT(bool enable) const noexcept {
+ dld->vkCmdSetDepthBoundsTestEnableEXT(handle, enable ? VK_TRUE : VK_FALSE);
+ }
+
+ void SetDepthCompareOpEXT(VkCompareOp compare_op) const noexcept {
+ dld->vkCmdSetDepthCompareOpEXT(handle, compare_op);
+ }
+
+ void SetDepthTestEnableEXT(bool enable) const noexcept {
+ dld->vkCmdSetDepthTestEnableEXT(handle, enable ? VK_TRUE : VK_FALSE);
+ }
+
+ void SetDepthWriteEnableEXT(bool enable) const noexcept {
+ dld->vkCmdSetDepthWriteEnableEXT(handle, enable ? VK_TRUE : VK_FALSE);
+ }
+
+ void SetFrontFaceEXT(VkFrontFace front_face) const noexcept {
+ dld->vkCmdSetFrontFaceEXT(handle, front_face);
+ }
+
+ void SetPrimitiveTopologyEXT(VkPrimitiveTopology primitive_topology) const noexcept {
+ dld->vkCmdSetPrimitiveTopologyEXT(handle, primitive_topology);
+ }
+
+ void SetStencilOpEXT(VkStencilFaceFlags face_mask, VkStencilOp fail_op, VkStencilOp pass_op,
+ VkStencilOp depth_fail_op, VkCompareOp compare_op) const noexcept {
+ dld->vkCmdSetStencilOpEXT(handle, face_mask, fail_op, pass_op, depth_fail_op, compare_op);
+ }
+
+ void SetStencilTestEnableEXT(bool enable) const noexcept {
+ dld->vkCmdSetStencilTestEnableEXT(handle, enable ? VK_TRUE : VK_FALSE);
+ }
+
void BindTransformFeedbackBuffersEXT(u32 first, u32 count, const VkBuffer* buffers,
const VkDeviceSize* offsets,
const VkDeviceSize* sizes) const noexcept {
@@ -996,4 +1051,7 @@ private:
std::optional<std::vector<VkExtensionProperties>> EnumerateInstanceExtensionProperties(
const InstanceDispatch& dld);
+std::optional<std::vector<VkLayerProperties>> EnumerateInstanceLayerProperties(
+ const InstanceDispatch& dld);
+
} // namespace Vulkan::vk