summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h7
-rw-r--r--src/video_core/host_shaders/vulkan_quad_indexed.comp6
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.cpp23
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.cpp299
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.h13
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pass.cpp12
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pass.h2
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp8
8 files changed, 245 insertions, 125 deletions
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 6c8d98946..f1c60d1f3 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -666,9 +666,10 @@ void BufferCache<P>::BindHostGeometryBuffers(bool is_indexed) {
BindHostIndexBuffer();
} else if constexpr (!HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) {
const auto& draw_state = maxwell3d->draw_manager->GetDrawState();
- if (draw_state.topology == Maxwell::PrimitiveTopology::Quads) {
- runtime.BindQuadArrayIndexBuffer(draw_state.vertex_buffer.first,
- draw_state.vertex_buffer.count);
+ if (draw_state.topology == Maxwell::PrimitiveTopology::Quads ||
+ draw_state.topology == Maxwell::PrimitiveTopology::QuadStrip) {
+ runtime.BindQuadIndexBuffer(draw_state.topology, draw_state.vertex_buffer.first,
+ draw_state.vertex_buffer.count);
}
}
BindHostVertexBuffers();
diff --git a/src/video_core/host_shaders/vulkan_quad_indexed.comp b/src/video_core/host_shaders/vulkan_quad_indexed.comp
index a412f30ff..066fe4a9c 100644
--- a/src/video_core/host_shaders/vulkan_quad_indexed.comp
+++ b/src/video_core/host_shaders/vulkan_quad_indexed.comp
@@ -16,6 +16,7 @@ layout (std430, set = 0, binding = 1) writeonly buffer OutputBuffer {
layout (push_constant) uniform PushConstants {
uint base_vertex;
int index_shift; // 0: uint8, 1: uint16, 2: uint32
+ int is_strip; // 0: quads 1: quadstrip
};
void main() {
@@ -28,9 +29,10 @@ void main() {
int flipped_shift = 2 - index_shift;
int mask = (1 << flipped_shift) - 1;
- const int quad_swizzle[6] = int[](0, 1, 2, 0, 2, 3);
+ const int quads_swizzle[6] = int[](0, 1, 2, 0, 2, 3);
+ const int quad_strip_swizzle[6] = int[](0, 3, 1, 0, 2, 3);
for (uint vertex = 0; vertex < 6; ++vertex) {
- int offset = primitive * 4 + quad_swizzle[vertex];
+ int offset = (is_strip == 0 ? primitive * 4 + quads_swizzle[vertex] : primitive * 2 + quad_strip_swizzle[vertex]);
int int_offset = offset >> flipped_shift;
int bit_offset = (offset & mask) * index_size;
uint packed_input = input_indexes[int_offset];
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
index 3e03c5cd6..ca52e2389 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -301,6 +301,8 @@ VkPrimitiveTopology PrimitiveTopology([[maybe_unused]] const Device& device,
return VK_PRIMITIVE_TOPOLOGY_POINT_LIST;
case Maxwell::PrimitiveTopology::Lines:
return VK_PRIMITIVE_TOPOLOGY_LINE_LIST;
+ case Maxwell::PrimitiveTopology::LineLoop:
+ return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
case Maxwell::PrimitiveTopology::LineStrip:
return VK_PRIMITIVE_TOPOLOGY_LINE_STRIP;
case Maxwell::PrimitiveTopology::Triangles:
@@ -309,15 +311,28 @@ VkPrimitiveTopology PrimitiveTopology([[maybe_unused]] const Device& device,
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP;
case Maxwell::PrimitiveTopology::TriangleFan:
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN;
+ case Maxwell::PrimitiveTopology::LinesAdjacency:
+ return VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY;
+ case Maxwell::PrimitiveTopology::LineStripAdjacency:
+ return VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY;
+ case Maxwell::PrimitiveTopology::TrianglesAdjacency:
+ return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY;
+ case Maxwell::PrimitiveTopology::TriangleStripAdjacency:
+ return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY;
case Maxwell::PrimitiveTopology::Quads:
- // TODO(Rodrigo): Use VK_PRIMITIVE_TOPOLOGY_QUAD_LIST_EXT whenever it releases
+ case Maxwell::PrimitiveTopology::QuadStrip:
+ // TODO: Use VK_PRIMITIVE_TOPOLOGY_QUAD_LIST_EXT/VK_PRIMITIVE_TOPOLOGY_QUAD_STRIP_EXT
+ // whenever it releases
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
case Maxwell::PrimitiveTopology::Patches:
return VK_PRIMITIVE_TOPOLOGY_PATCH_LIST;
- default:
- UNIMPLEMENTED_MSG("Unimplemented topology={}", topology);
- return {};
+ case Maxwell::PrimitiveTopology::Polygon:
+ LOG_WARNING(Render_Vulkan, "Draw mode is Polygon with a polygon mode of lines should be a "
+ "single body and not a bunch of triangles.");
+ return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN;
}
+ UNIMPLEMENTED_MSG("Unimplemented topology={}", topology);
+ return {};
}
VkFormat VertexFormat(const Device& device, Maxwell::VertexAttribute::Type type,
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index 84d36fea6..6b54d7111 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -51,15 +51,6 @@ size_t BytesPerIndex(VkIndexType index_type) {
}
}
-template <typename T>
-std::array<T, 6> MakeQuadIndices(u32 quad, u32 first) {
- std::array<T, 6> indices{0, 1, 2, 0, 2, 3};
- for (T& index : indices) {
- index = static_cast<T>(first + index + quad * 4);
- }
- return indices;
-}
-
vk::Buffer CreateBuffer(const Device& device, u64 size) {
VkBufferUsageFlags flags =
VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
@@ -123,6 +114,187 @@ VkBufferView Buffer::View(u32 offset, u32 size, VideoCore::Surface::PixelFormat
return *views.back().handle;
}
+class QuadIndexBuffer {
+public:
+ QuadIndexBuffer(const Device& device_, MemoryAllocator& memory_allocator_,
+ Scheduler& scheduler_, StagingBufferPool& staging_pool_)
+ : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_},
+ staging_pool{staging_pool_} {}
+
+ virtual ~QuadIndexBuffer() = default;
+
+ void UpdateBuffer(u32 num_indices_) {
+ if (num_indices_ <= num_indices) {
+ return;
+ }
+
+ scheduler.Finish();
+
+ num_indices = num_indices_;
+ index_type = IndexTypeFromNumElements(device, num_indices);
+
+ const u32 num_quads = GetQuadsNum(num_indices);
+ const u32 num_triangle_indices = num_quads * 6;
+ const u32 num_first_offset_copies = 4;
+ const size_t bytes_per_index = BytesPerIndex(index_type);
+ const size_t size_bytes = num_triangle_indices * bytes_per_index * num_first_offset_copies;
+ buffer = device.GetLogical().CreateBuffer(VkBufferCreateInfo{
+ .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .size = size_bytes,
+ .usage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
+ .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
+ .queueFamilyIndexCount = 0,
+ .pQueueFamilyIndices = nullptr,
+ });
+ if (device.HasDebuggingToolAttached()) {
+ buffer.SetObjectNameEXT("Quad LUT");
+ }
+ memory_commit = memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal);
+
+ const StagingBufferRef staging = staging_pool.Request(size_bytes, MemoryUsage::Upload);
+ u8* staging_data = staging.mapped_span.data();
+ const size_t quad_size = bytes_per_index * 6;
+
+ for (u32 first = 0; first < num_first_offset_copies; ++first) {
+ for (u32 quad = 0; quad < num_quads; ++quad) {
+ MakeAndUpdateIndices(staging_data, quad_size, quad, first);
+ staging_data += quad_size;
+ }
+ }
+
+ scheduler.RequestOutsideRenderPassOperationContext();
+ scheduler.Record([src_buffer = staging.buffer, src_offset = staging.offset,
+ dst_buffer = *buffer, size_bytes](vk::CommandBuffer cmdbuf) {
+ const VkBufferCopy copy{
+ .srcOffset = src_offset,
+ .dstOffset = 0,
+ .size = size_bytes,
+ };
+ const VkBufferMemoryBarrier write_barrier{
+ .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
+ .pNext = nullptr,
+ .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
+ .dstAccessMask = VK_ACCESS_INDEX_READ_BIT,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .buffer = dst_buffer,
+ .offset = 0,
+ .size = size_bytes,
+ };
+ cmdbuf.CopyBuffer(src_buffer, dst_buffer, copy);
+ cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT,
+ VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, write_barrier);
+ });
+ }
+
+ void BindBuffer(u32 first) {
+ const VkIndexType index_type_ = index_type;
+ const size_t sub_first_offset = static_cast<size_t>(first % 4) * GetQuadsNum(num_indices);
+ const size_t offset =
+ (sub_first_offset + GetQuadsNum(first)) * 6ULL * BytesPerIndex(index_type);
+ scheduler.Record([buffer = *buffer, index_type_, offset](vk::CommandBuffer cmdbuf) {
+ cmdbuf.BindIndexBuffer(buffer, offset, index_type_);
+ });
+ }
+
+protected:
+ virtual u32 GetQuadsNum(u32 num_indices) const = 0;
+
+ virtual void MakeAndUpdateIndices(u8* staging_data, size_t quad_size, u32 quad, u32 first) = 0;
+
+ const Device& device;
+ MemoryAllocator& memory_allocator;
+ Scheduler& scheduler;
+ StagingBufferPool& staging_pool;
+
+ vk::Buffer buffer{};
+ MemoryCommit memory_commit{};
+ VkIndexType index_type{};
+ u32 num_indices = 0;
+};
+
+class QuadArrayIndexBuffer : public QuadIndexBuffer {
+public:
+ QuadArrayIndexBuffer(const Device& device_, MemoryAllocator& memory_allocator_,
+ Scheduler& scheduler_, StagingBufferPool& staging_pool_)
+ : QuadIndexBuffer(device_, memory_allocator_, scheduler_, staging_pool_) {}
+
+ ~QuadArrayIndexBuffer() = default;
+
+private:
+ u32 GetQuadsNum(u32 num_indices_) const override {
+ return num_indices_ / 4;
+ }
+
+ template <typename T>
+ static std::array<T, 6> MakeIndices(u32 quad, u32 first) {
+ std::array<T, 6> indices{0, 1, 2, 0, 2, 3};
+ for (T& index : indices) {
+ index = static_cast<T>(first + index + quad * 4);
+ }
+ return indices;
+ }
+
+ void MakeAndUpdateIndices(u8* staging_data, size_t quad_size, u32 quad, u32 first) {
+ switch (index_type) {
+ case VK_INDEX_TYPE_UINT8_EXT:
+ std::memcpy(staging_data, MakeIndices<u8>(quad, first).data(), quad_size);
+ break;
+ case VK_INDEX_TYPE_UINT16:
+ std::memcpy(staging_data, MakeIndices<u16>(quad, first).data(), quad_size);
+ break;
+ case VK_INDEX_TYPE_UINT32:
+ std::memcpy(staging_data, MakeIndices<u32>(quad, first).data(), quad_size);
+ break;
+ default:
+ ASSERT(false);
+ break;
+ }
+ }
+};
+
+class QuadStripIndexBuffer : public QuadIndexBuffer {
+public:
+ QuadStripIndexBuffer(const Device& device_, MemoryAllocator& memory_allocator_,
+ Scheduler& scheduler_, StagingBufferPool& staging_pool_)
+ : QuadIndexBuffer(device_, memory_allocator_, scheduler_, staging_pool_) {}
+
+ ~QuadStripIndexBuffer() = default;
+
+private:
+ u32 GetQuadsNum(u32 num_indices_) const override {
+ return num_indices_ >= 4 ? (num_indices_ - 2) / 2 : 0;
+ }
+
+ template <typename T>
+ static std::array<T, 6> MakeIndices(u32 quad, u32 first) {
+ std::array<T, 6> indices{0, 3, 1, 0, 2, 3};
+ for (T& index : indices) {
+ index = static_cast<T>(first + index + quad * 2);
+ }
+ return indices;
+ }
+
+ void MakeAndUpdateIndices(u8* staging_data, size_t quad_size, u32 quad, u32 first) {
+ switch (index_type) {
+ case VK_INDEX_TYPE_UINT8_EXT:
+ std::memcpy(staging_data, MakeIndices<u8>(quad, first).data(), quad_size);
+ break;
+ case VK_INDEX_TYPE_UINT16:
+ std::memcpy(staging_data, MakeIndices<u16>(quad, first).data(), quad_size);
+ break;
+ case VK_INDEX_TYPE_UINT32:
+ std::memcpy(staging_data, MakeIndices<u32>(quad, first).data(), quad_size);
+ break;
+ default:
+ ASSERT(false);
+ break;
+ }
+ }
+};
+
BufferCacheRuntime::BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_allocator_,
Scheduler& scheduler_, StagingBufferPool& staging_pool_,
UpdateDescriptorQueue& update_descriptor_queue_,
@@ -130,7 +302,12 @@ BufferCacheRuntime::BufferCacheRuntime(const Device& device_, MemoryAllocator& m
: device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_},
staging_pool{staging_pool_}, update_descriptor_queue{update_descriptor_queue_},
uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
- quad_index_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue) {}
+ quad_index_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue) {
+ quad_array_index_buffer = std::make_shared<QuadArrayIndexBuffer>(device_, memory_allocator_,
+ scheduler_, staging_pool_);
+ quad_strip_index_buffer = std::make_shared<QuadStripIndexBuffer>(device_, memory_allocator_,
+ scheduler_, staging_pool_);
+}
StagingBufferRef BufferCacheRuntime::UploadStagingBuffer(size_t size) {
return staging_pool.Request(size, MemoryUsage::Upload);
@@ -245,10 +422,11 @@ void BufferCacheRuntime::BindIndexBuffer(PrimitiveTopology topology, IndexFormat
VkIndexType vk_index_type = MaxwellToVK::IndexFormat(index_format);
VkDeviceSize vk_offset = offset;
VkBuffer vk_buffer = buffer;
- if (topology == PrimitiveTopology::Quads) {
+ if (topology == PrimitiveTopology::Quads || topology == PrimitiveTopology::QuadStrip) {
vk_index_type = VK_INDEX_TYPE_UINT32;
std::tie(vk_buffer, vk_offset) =
- quad_index_pass.Assemble(index_format, num_indices, base_vertex, buffer, offset);
+ quad_index_pass.Assemble(index_format, num_indices, base_vertex, buffer, offset,
+ topology == PrimitiveTopology::QuadStrip);
} else if (vk_index_type == VK_INDEX_TYPE_UINT8_EXT && !device.IsExtIndexTypeUint8Supported()) {
vk_index_type = VK_INDEX_TYPE_UINT16;
std::tie(vk_buffer, vk_offset) = uint8_pass.Assemble(num_indices, buffer, offset);
@@ -263,7 +441,7 @@ void BufferCacheRuntime::BindIndexBuffer(PrimitiveTopology topology, IndexFormat
});
}
-void BufferCacheRuntime::BindQuadArrayIndexBuffer(u32 first, u32 count) {
+void BufferCacheRuntime::BindQuadIndexBuffer(PrimitiveTopology topology, u32 first, u32 count) {
if (count == 0) {
ReserveNullBuffer();
scheduler.Record([this](vk::CommandBuffer cmdbuf) {
@@ -271,16 +449,14 @@ void BufferCacheRuntime::BindQuadArrayIndexBuffer(u32 first, u32 count) {
});
return;
}
- ReserveQuadArrayLUT(first + count, true);
-
- // The LUT has the indices 0, 1, 2, and 3 copied as an array
- // To apply these 'first' offsets we can apply an offset based on the modulus.
- const VkIndexType index_type = quad_array_lut_index_type;
- const size_t sub_first_offset = static_cast<size_t>(first % 4) * (current_num_indices / 4);
- const size_t offset = (sub_first_offset + first / 4) * 6ULL * BytesPerIndex(index_type);
- scheduler.Record([buffer = *quad_array_lut, index_type, offset](vk::CommandBuffer cmdbuf) {
- cmdbuf.BindIndexBuffer(buffer, offset, index_type);
- });
+
+ if (topology == PrimitiveTopology::Quads) {
+ quad_array_index_buffer->UpdateBuffer(first + count);
+ quad_array_index_buffer->BindBuffer(first);
+ } else if (topology == PrimitiveTopology::QuadStrip) {
+ quad_strip_index_buffer->UpdateBuffer(first + count);
+ quad_strip_index_buffer->BindBuffer(first);
+ }
}
void BufferCacheRuntime::BindVertexBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size,
@@ -323,83 +499,6 @@ void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, VkBuffer buffer,
});
}
-void BufferCacheRuntime::ReserveQuadArrayLUT(u32 num_indices, bool wait_for_idle) {
- if (num_indices <= current_num_indices) {
- return;
- }
- if (wait_for_idle) {
- scheduler.Finish();
- }
- current_num_indices = num_indices;
- quad_array_lut_index_type = IndexTypeFromNumElements(device, num_indices);
-
- const u32 num_quads = num_indices / 4;
- const u32 num_triangle_indices = num_quads * 6;
- const u32 num_first_offset_copies = 4;
- const size_t bytes_per_index = BytesPerIndex(quad_array_lut_index_type);
- const size_t size_bytes = num_triangle_indices * bytes_per_index * num_first_offset_copies;
- quad_array_lut = device.GetLogical().CreateBuffer(VkBufferCreateInfo{
- .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
- .pNext = nullptr,
- .flags = 0,
- .size = size_bytes,
- .usage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
- .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
- .queueFamilyIndexCount = 0,
- .pQueueFamilyIndices = nullptr,
- });
- if (device.HasDebuggingToolAttached()) {
- quad_array_lut.SetObjectNameEXT("Quad LUT");
- }
- quad_array_lut_commit = memory_allocator.Commit(quad_array_lut, MemoryUsage::DeviceLocal);
-
- const StagingBufferRef staging = staging_pool.Request(size_bytes, MemoryUsage::Upload);
- u8* staging_data = staging.mapped_span.data();
- const size_t quad_size = bytes_per_index * 6;
- for (u32 first = 0; first < num_first_offset_copies; ++first) {
- for (u32 quad = 0; quad < num_quads; ++quad) {
- switch (quad_array_lut_index_type) {
- case VK_INDEX_TYPE_UINT8_EXT:
- std::memcpy(staging_data, MakeQuadIndices<u8>(quad, first).data(), quad_size);
- break;
- case VK_INDEX_TYPE_UINT16:
- std::memcpy(staging_data, MakeQuadIndices<u16>(quad, first).data(), quad_size);
- break;
- case VK_INDEX_TYPE_UINT32:
- std::memcpy(staging_data, MakeQuadIndices<u32>(quad, first).data(), quad_size);
- break;
- default:
- ASSERT(false);
- break;
- }
- staging_data += quad_size;
- }
- }
- scheduler.RequestOutsideRenderPassOperationContext();
- scheduler.Record([src_buffer = staging.buffer, src_offset = staging.offset,
- dst_buffer = *quad_array_lut, size_bytes](vk::CommandBuffer cmdbuf) {
- const VkBufferCopy copy{
- .srcOffset = src_offset,
- .dstOffset = 0,
- .size = size_bytes,
- };
- const VkBufferMemoryBarrier write_barrier{
- .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
- .pNext = nullptr,
- .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
- .dstAccessMask = VK_ACCESS_INDEX_READ_BIT,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .buffer = dst_buffer,
- .offset = 0,
- .size = size_bytes,
- };
- cmdbuf.CopyBuffer(src_buffer, dst_buffer, copy);
- cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT,
- 0, write_barrier);
- });
-}
-
void BufferCacheRuntime::ReserveNullBuffer() {
if (null_buffer) {
return;
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
index a15c8b39b..183b33632 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -50,6 +50,9 @@ private:
std::vector<BufferView> views;
};
+class QuadArrayIndexBuffer;
+class QuadStripIndexBuffer;
+
class BufferCacheRuntime {
friend Buffer;
@@ -86,7 +89,7 @@ public:
void BindIndexBuffer(PrimitiveTopology topology, IndexFormat index_format, u32 num_indices,
u32 base_vertex, VkBuffer buffer, u32 offset, u32 size);
- void BindQuadArrayIndexBuffer(u32 first, u32 count);
+ void BindQuadIndexBuffer(PrimitiveTopology topology, u32 first, u32 count);
void BindVertexBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size, u32 stride);
@@ -118,8 +121,6 @@ private:
update_descriptor_queue.AddBuffer(buffer, offset, size);
}
- void ReserveQuadArrayLUT(u32 num_indices, bool wait_for_idle);
-
void ReserveNullBuffer();
const Device& device;
@@ -128,10 +129,8 @@ private:
StagingBufferPool& staging_pool;
UpdateDescriptorQueue& update_descriptor_queue;
- vk::Buffer quad_array_lut;
- MemoryCommit quad_array_lut_commit;
- VkIndexType quad_array_lut_index_type{};
- u32 current_num_indices = 0;
+ std::shared_ptr<QuadArrayIndexBuffer> quad_array_index_buffer;
+ std::shared_ptr<QuadStripIndexBuffer> quad_strip_index_buffer;
vk::Buffer null_buffer;
MemoryCommit null_buffer_commit;
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
index 2c00979d7..1a316b6eb 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
@@ -245,7 +245,7 @@ QuadIndexedPass::QuadIndexedPass(const Device& device_, Scheduler& scheduler_,
UpdateDescriptorQueue& update_descriptor_queue_)
: ComputePass(device_, descriptor_pool_, INPUT_OUTPUT_DESCRIPTOR_SET_BINDINGS,
INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE, INPUT_OUTPUT_BANK_INFO,
- COMPUTE_PUSH_CONSTANT_RANGE<sizeof(u32) * 2>, VULKAN_QUAD_INDEXED_COMP_SPV),
+ COMPUTE_PUSH_CONSTANT_RANGE<sizeof(u32) * 3>, VULKAN_QUAD_INDEXED_COMP_SPV),
scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_},
update_descriptor_queue{update_descriptor_queue_} {}
@@ -253,7 +253,7 @@ QuadIndexedPass::~QuadIndexedPass() = default;
std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble(
Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, u32 num_vertices, u32 base_vertex,
- VkBuffer src_buffer, u32 src_offset) {
+ VkBuffer src_buffer, u32 src_offset, bool is_strip) {
const u32 index_shift = [index_format] {
switch (index_format) {
case Tegra::Engines::Maxwell3D::Regs::IndexFormat::UnsignedByte:
@@ -267,7 +267,7 @@ std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble(
return 2;
}();
const u32 input_size = num_vertices << index_shift;
- const u32 num_tri_vertices = (num_vertices / 4) * 6;
+ const u32 num_tri_vertices = (is_strip ? (num_vertices - 2) / 2 : num_vertices / 4) * 6;
const std::size_t staging_size = num_tri_vertices * sizeof(u32);
const auto staging = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal);
@@ -278,8 +278,8 @@ std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble(
const void* const descriptor_data{update_descriptor_queue.UpdateData()};
scheduler.RequestOutsideRenderPassOperationContext();
- scheduler.Record([this, descriptor_data, num_tri_vertices, base_vertex,
- index_shift](vk::CommandBuffer cmdbuf) {
+ scheduler.Record([this, descriptor_data, num_tri_vertices, base_vertex, index_shift,
+ is_strip](vk::CommandBuffer cmdbuf) {
static constexpr u32 DISPATCH_SIZE = 1024;
static constexpr VkMemoryBarrier WRITE_BARRIER{
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
@@ -287,7 +287,7 @@ std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble(
.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_INDEX_READ_BIT,
};
- const std::array<u32, 2> push_constants{base_vertex, index_shift};
+ const std::array<u32, 3> push_constants{base_vertex, index_shift, is_strip ? 1u : 0u};
const VkDescriptorSet set = descriptor_allocator.Commit();
device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data);
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h
index 5d32e3caf..c4c8fa081 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pass.h
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.h
@@ -74,7 +74,7 @@ public:
std::pair<VkBuffer, VkDeviceSize> Assemble(
Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, u32 num_vertices,
- u32 base_vertex, VkBuffer src_buffer, u32 src_offset);
+ u32 base_vertex, VkBuffer src_buffer, u32 src_offset, bool is_strip);
private:
Scheduler& scheduler;
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 4b7126c30..ac1eb9895 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -138,12 +138,16 @@ DrawParams MakeDrawParams(const MaxwellDrawState& draw_state, u32 num_instances,
.first_index = is_indexed ? draw_state.index_buffer.first : 0,
.is_indexed = is_indexed,
};
+ // 6 triangle vertices per quad, base vertex is part of the index
+ // See BindQuadIndexBuffer for more details
if (draw_state.topology == Maxwell::PrimitiveTopology::Quads) {
- // 6 triangle vertices per quad, base vertex is part of the index
- // See BindQuadArrayIndexBuffer for more details
params.num_vertices = (params.num_vertices / 4) * 6;
params.base_vertex = 0;
params.is_indexed = true;
+ } else if (draw_state.topology == Maxwell::PrimitiveTopology::QuadStrip) {
+ params.num_vertices = (params.num_vertices - 2) / 2 * 6;
+ params.base_vertex = 0;
+ params.is_indexed = true;
}
return params;
}