From c541559767c3912940ee3d73a122530b3edde9f1 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sat, 5 Mar 2022 08:01:13 +0100 Subject: MacroHLE: Refactor MacroHLE system. --- src/video_core/buffer_cache/buffer_cache.h | 6 +- src/video_core/dma_pusher.cpp | 9 +- src/video_core/dma_pusher.h | 2 +- src/video_core/engines/draw_manager.cpp | 6 + src/video_core/engines/draw_manager.h | 2 + src/video_core/engines/maxwell_3d.cpp | 37 +++ src/video_core/engines/maxwell_3d.h | 21 +- src/video_core/macro/macro.cpp | 7 +- src/video_core/macro/macro.h | 1 + src/video_core/macro/macro_hle.cpp | 445 ++++++++++++++++++++++------- src/video_core/macro/macro_hle.h | 5 + 11 files changed, 420 insertions(+), 121 deletions(-) diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 557227b37..98343628c 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -1574,7 +1574,11 @@ void BufferCache

::MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 s if (!is_async) { return; } - uncommitted_ranges.add(base_interval); + const bool is_high_accuracy = + Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::High; + if (is_high_accuracy) { + uncommitted_ranges.add(base_interval); + } } template diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp index 13ff64fa3..5ad40abaa 100644 --- a/src/video_core/dma_pusher.cpp +++ b/src/video_core/dma_pusher.cpp @@ -94,10 +94,10 @@ void DmaPusher::ProcessCommands(std::span commands) { if (dma_state.method_count) { // Data word of methods command + dma_state.dma_word_offset = static_cast(index * sizeof(u32)); if (dma_state.non_incrementing) { const u32 max_write = static_cast( std::min(index + dma_state.method_count, commands.size()) - index); - dma_state.dma_word_offset = static_cast(index * sizeof(u32)); CallMultiMethod(&command_header.argument, max_write); dma_state.method_count -= max_write; dma_state.is_last_call = true; @@ -133,6 +133,8 @@ void DmaPusher::ProcessCommands(std::span commands) { case SubmissionMode::Inline: dma_state.method = command_header.method; dma_state.subchannel = command_header.subchannel; + dma_state.dma_word_offset = static_cast( + -static_cast(dma_state.dma_get)); // negate to set address as 0 CallMethod(command_header.arg_count); dma_state.non_incrementing = true; dma_increment_once = false; @@ -165,8 +167,9 @@ void DmaPusher::CallMethod(u32 argument) const { dma_state.method_count, }); } else { - subchannels[dma_state.subchannel]->CallMethod(dma_state.method, argument, - dma_state.is_last_call); + auto subchannel = subchannels[dma_state.subchannel]; + subchannel->current_dma_segment = dma_state.dma_get + dma_state.dma_word_offset; + subchannel->CallMethod(dma_state.method, argument, dma_state.is_last_call); } } diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h index da7728ded..1cdb690ed 100644 --- a/src/video_core/dma_pusher.h +++ b/src/video_core/dma_pusher.h @@ -157,7 +157,7 @@ private: u32 method_count; ///< Current method count u32 length_pending; ///< Large NI command length pending GPUVAddr dma_get; ///< Currently read segment - u32 dma_word_offset; ///< Current word ofset from address + u64 dma_word_offset; ///< Current word ofset from address bool non_incrementing; ///< Current command's NI flag bool is_last_call; }; diff --git a/src/video_core/engines/draw_manager.cpp b/src/video_core/engines/draw_manager.cpp index c60f32aad..183d5403c 100644 --- a/src/video_core/engines/draw_manager.cpp +++ b/src/video_core/engines/draw_manager.cpp @@ -91,6 +91,12 @@ void DrawManager::DrawIndex(PrimitiveTopology topology, u32 index_first, u32 ind ProcessDraw(true, num_instances); } +void DrawManager::DrawArrayIndirect(PrimitiveTopology topology) { + draw_state.topology = topology; + + ProcessDrawIndirect(true); +} + void DrawManager::DrawIndexedIndirect(PrimitiveTopology topology, u32 index_first, u32 index_count) { const auto& regs{maxwell3d->regs}; draw_state.topology = topology; diff --git a/src/video_core/engines/draw_manager.h b/src/video_core/engines/draw_manager.h index 437990162..49a4fca48 100644 --- a/src/video_core/engines/draw_manager.h +++ b/src/video_core/engines/draw_manager.h @@ -56,6 +56,8 @@ public: void DrawIndex(PrimitiveTopology topology, u32 index_first, u32 index_count, u32 base_index, u32 base_instance, u32 num_instances); + void DrawArrayIndirect(PrimitiveTopology topology); + void DrawIndexedIndirect(PrimitiveTopology topology, u32 index_first, u32 index_count); const State& GetDrawState() const { diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index cd6274a9b..b998a8e69 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -133,15 +133,52 @@ void Maxwell3D::ProcessMacro(u32 method, const u32* base_start, u32 amount, bool for (size_t i = 0; i < amount; i++) { macro_addresses.push_back(current_dma_segment + i * sizeof(u32)); } + macro_segments.emplace_back(current_dma_segment, amount); // Call the macro when there are no more parameters in the command buffer if (is_last_call) { CallMacroMethod(executing_macro, macro_params); macro_params.clear(); macro_addresses.clear(); + macro_segments.clear(); } } +void Maxwell3D::RefreshParameters() { + size_t current_index = 0; + for (auto& segment : macro_segments) { + if (segment.first == 0) { + current_index += segment.second; + continue; + } + memory_manager.ReadBlock(segment.first, ¯o_params[current_index], + sizeof(u32) * segment.second); + current_index += segment.second; + } +} + +u32 Maxwell3D::GetMaxCurrentVertices() { + u32 num_vertices = 0; + for (size_t index = 0; index < Regs::NumVertexArrays; ++index) { + const auto& array = regs.vertex_streams[index]; + if (array.enable == 0) { + continue; + } + const auto& attribute = regs.vertex_attrib_format[index]; + if (attribute.constant) { + num_vertices = std::max(num_vertices, 1U); + continue; + } + const auto& limit = regs.vertex_stream_limits[index]; + const GPUVAddr gpu_addr_begin = array.Address(); + const GPUVAddr gpu_addr_end = limit.Address() + 1; + const u32 address_size = static_cast(gpu_addr_end - gpu_addr_begin); + num_vertices = std::max( + num_vertices, address_size / std::max(attribute.SizeInBytes(), array.stride.Value())); + } + return num_vertices; +} + u32 Maxwell3D::ProcessShadowRam(u32 method, u32 argument) { // Keep track of the register value in shadow_state when requested. const auto control = shadow_state.shadow_ram_control; diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index ac5e87563..e2256594d 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -3068,10 +3068,14 @@ public: friend class DrawManager; std::vector inline_index_draw_indexes; - std::vector macro_addresses; - Core::System& system; - MemoryManager& memory_manager; + GPUVAddr getMacroAddress(size_t index) const { + return macro_addresses[index]; + } + + void RefreshParameters(); + + u32 GetMaxCurrentVertices(); /// Handles a write to the CLEAR_BUFFERS register. void ProcessClearBuffers(u32 layer_count); @@ -3135,6 +3139,9 @@ private: /// Returns a query's value or an empty object if the value will be deferred through a cache. std::optional GetQueryResult(); + Core::System& system; + MemoryManager& memory_manager; + VideoCore::RasterizerInterface* rasterizer = nullptr; /// Start offsets of each macro in macro_memory @@ -3151,6 +3158,14 @@ private: Upload::State upload_state; bool execute_on{true}; + + std::array draw_command{}; + std::vector deferred_draw_method; + enum class DrawMode : u32 { General = 0, Instance, InlineIndex }; + DrawMode draw_mode{DrawMode::General}; + bool draw_indexed{}; + std::vector> macro_segments; + std::vector macro_addresses; }; #define ASSERT_REG_POSITION(field_name, position) \ diff --git a/src/video_core/macro/macro.cpp b/src/video_core/macro/macro.cpp index 505d81c1e..01dd25f95 100644 --- a/src/video_core/macro/macro.cpp +++ b/src/video_core/macro/macro.cpp @@ -13,6 +13,7 @@ #include "common/fs/fs.h" #include "common/fs/path_util.h" #include "common/settings.h" +#include "video_core/engines/maxwell_3d.h" #include "video_core/macro/macro.h" #include "video_core/macro/macro_hle.h" #include "video_core/macro/macro_interpreter.h" @@ -40,8 +41,8 @@ static void Dump(u64 hash, std::span code) { macro_file.write(reinterpret_cast(code.data()), code.size_bytes()); } -MacroEngine::MacroEngine(Engines::Maxwell3D& maxwell3d) - : hle_macros{std::make_unique(maxwell3d)} {} +MacroEngine::MacroEngine(Engines::Maxwell3D& maxwell3d_) + : hle_macros{std::make_unique(maxwell3d_)}, maxwell3d{maxwell3d_} {} MacroEngine::~MacroEngine() = default; @@ -61,6 +62,7 @@ void MacroEngine::Execute(u32 method, const std::vector& parameters) { if (cache_info.has_hle_program) { cache_info.hle_program->Execute(parameters, method); } else { + maxwell3d.RefreshParameters(); cache_info.lle_program->Execute(parameters, method); } } else { @@ -106,6 +108,7 @@ void MacroEngine::Execute(u32 method, const std::vector& parameters) { cache_info.hle_program = std::move(hle_program); cache_info.hle_program->Execute(parameters, method); } else { + maxwell3d.RefreshParameters(); cache_info.lle_program->Execute(parameters, method); } } diff --git a/src/video_core/macro/macro.h b/src/video_core/macro/macro.h index 07d97ba39..737ced9a4 100644 --- a/src/video_core/macro/macro.h +++ b/src/video_core/macro/macro.h @@ -137,6 +137,7 @@ private: std::unordered_map macro_cache; std::unordered_map> uploaded_macro_code; std::unique_ptr hle_macros; + Engines::Maxwell3D& maxwell3d; }; std::unique_ptr GetMacroEngine(Engines::Maxwell3D& maxwell3d); diff --git a/src/video_core/macro/macro_hle.cpp b/src/video_core/macro/macro_hle.cpp index da988cc0d..79fab96e1 100644 --- a/src/video_core/macro/macro_hle.cpp +++ b/src/video_core/macro/macro_hle.cpp @@ -3,6 +3,7 @@ #include #include +#include "common/assert.h" #include "common/scope_exit.h" #include "video_core/dirty_flags.h" #include "video_core/engines/draw_manager.h" @@ -15,143 +16,365 @@ namespace Tegra { namespace { -using HLEFunction = void (*)(Engines::Maxwell3D& maxwell3d, const std::vector& parameters); - -// HLE'd functions -void HLE_771BB18C62444DA0(Engines::Maxwell3D& maxwell3d, const std::vector& parameters) { - const u32 instance_count = parameters[2] & maxwell3d.GetRegisterValue(0xD1B); - maxwell3d.draw_manager->DrawIndex( - static_cast(parameters[0] & 0x3ffffff), - parameters[4], parameters[1], parameters[3], parameters[5], instance_count); +bool IsTopologySafe(Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology topology) { + switch (topology) { + case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Points: + case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Lines: + case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineLoop: + case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineStrip: + case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Triangles: + case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStrip: + case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleFan: + case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LinesAdjacency: + case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineStripAdjacency: + case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TrianglesAdjacency: + case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStripAdjacency: + case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Patches: + return true; + case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Quads: + case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::QuadStrip: + case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Polygon: + default: + return false; + } } -void HLE_DrawArraysIndirect(Engines::Maxwell3D& maxwell3d, const std::vector& parameters) { - const u32 instance_count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]); - maxwell3d.draw_manager->DrawArray( - static_cast(parameters[0]), - parameters[3], parameters[1], parameters[4], instance_count); -} +class HLEMacroImpl : public CachedMacro { +public: + explicit HLEMacroImpl(Engines::Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} {} -void HLE_DrawIndexedIndirect(Engines::Maxwell3D& maxwell3d, const std::vector& parameters) { - const u32 element_base = parameters[4]; - const u32 base_instance = parameters[5]; - maxwell3d.regs.vertex_id_base = element_base; - maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; - maxwell3d.CallMethod(0x8e3, 0x640, true); - maxwell3d.CallMethod(0x8e4, element_base, true); - maxwell3d.CallMethod(0x8e5, base_instance, true); - - auto& params = maxwell3d.draw_manager->GetIndirectParams(); - params.is_indexed = true; - params.include_count = false; - params.count_start_address = 0; - params.indirect_start_address = maxwell3d.macro_addresses[1]; - params.buffer_size = 5 * sizeof(u32); - params.max_draw_counts = 1; - params.stride = 0; - - maxwell3d.draw_manager->DrawIndexedIndirect( - static_cast(parameters[0]), 0, - 1U << 18); - - maxwell3d.regs.vertex_id_base = 0x0; - maxwell3d.CallMethod(0x8e3, 0x640, true); - maxwell3d.CallMethod(0x8e4, 0x0, true); - maxwell3d.CallMethod(0x8e5, 0x0, true); -} +protected: + void advanceCheck() { + current_value = (current_value + 1) % fibonacci_post; + check_limit = current_value == 0; + if (check_limit) { + const u32 new_fibonacci = fibonacci_pre + fibonacci_post; + fibonacci_pre = fibonacci_post; + fibonacci_post = new_fibonacci; + } + } + + Engines::Maxwell3D& maxwell3d; + u32 fibonacci_pre{89}; + u32 fibonacci_post{144}; + u32 current_value{fibonacci_post - 1}; + bool check_limit{}; +}; + +class HLE_771BB18C62444DA0 final : public HLEMacroImpl { +public: + explicit HLE_771BB18C62444DA0(Engines::Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} -// Multidraw Indixed Indirect -void HLE_MultiDrawIndexedIndirect(Engines::Maxwell3D& maxwell3d, - const std::vector& parameters) { - const u32 start_indirect = parameters[0]; - const u32 end_indirect = parameters[1]; - if (start_indirect >= end_indirect) { - // Nothing to do. - return; + void Execute(const std::vector& parameters, [[maybe_unused]] u32 method) override { + maxwell3d.RefreshParameters(); + const u32 instance_count = parameters[2] & maxwell3d.GetRegisterValue(0xD1B); + maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; + maxwell3d.draw_manager->DrawIndex( + static_cast(parameters[0] & + 0x3ffffff), + parameters[4], parameters[1], parameters[3], parameters[5], instance_count); } - const auto topology = - static_cast(parameters[2]); - const u32 padding = parameters[3]; // padding is in words - - // size of each indirect segment - const u32 indirect_words = 5 + padding; - const u32 stride = indirect_words * sizeof(u32); - const std::size_t draw_count = end_indirect - start_indirect; - u32 lowest_first = std::numeric_limits::max(); - u32 highest_limit = std::numeric_limits::min(); - for (std::size_t index = 0; index < draw_count; index++) { - const std::size_t base = index * indirect_words + 5; - const u32 count = parameters[base]; - const u32 first_index = parameters[base + 2]; - lowest_first = std::min(lowest_first, first_index); - highest_limit = std::max(highest_limit, first_index + count); +}; + +class HLE_DrawArraysIndirect final : public HLEMacroImpl { +public: + explicit HLE_DrawArraysIndirect(Engines::Maxwell3D& maxwell3d_, bool extended_ = false) + : HLEMacroImpl(maxwell3d_), extended(extended_) {} + + void Execute(const std::vector& parameters, [[maybe_unused]] u32 method) override { + auto topology = + static_cast(parameters[0]); + if (!IsTopologySafe(topology)) { + Fallback(parameters); + return; + } + + auto& params = maxwell3d.draw_manager->GetIndirectParams(); + params.is_indexed = false; + params.include_count = false; + params.count_start_address = 0; + params.indirect_start_address = maxwell3d.getMacroAddress(1); + params.buffer_size = 4 * sizeof(u32); + params.max_draw_counts = 1; + params.stride = 0; + + if (extended) { + maxwell3d.CallMethod(0x8e3, 0x640, true); + maxwell3d.CallMethod(0x8e4, parameters[4], true); + } + + maxwell3d.draw_manager->DrawArrayIndirect(topology); + + if (extended) { + maxwell3d.CallMethod(0x8e3, 0x640, true); + maxwell3d.CallMethod(0x8e4, 0, true); + } + maxwell3d.regs.vertex_buffer.first = 0; + maxwell3d.regs.vertex_buffer.count = 0; } - const u32 base_vertex = parameters[8]; - const u32 base_instance = parameters[9]; - maxwell3d.regs.vertex_id_base = base_vertex; - maxwell3d.CallMethod(0x8e3, 0x640, true); - maxwell3d.CallMethod(0x8e4, base_vertex, true); - maxwell3d.CallMethod(0x8e5, base_instance, true); - auto& params = maxwell3d.draw_manager->GetIndirectParams(); - params.is_indexed = true; - params.include_count = true; - params.count_start_address = maxwell3d.macro_addresses[4]; - params.indirect_start_address = maxwell3d.macro_addresses[5]; - params.buffer_size = stride * draw_count; - params.max_draw_counts = draw_count; - params.stride = stride; - maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; - maxwell3d.draw_manager->DrawIndexedIndirect(topology, 0, highest_limit); -} +private: + void Fallback(const std::vector& parameters) { + SCOPE_EXIT({ + if (extended) { + maxwell3d.CallMethod(0x8e3, 0x640, true); + maxwell3d.CallMethod(0x8e4, 0, true); + } + }); + maxwell3d.RefreshParameters(); + const u32 instance_count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]); -// Multi-layer Clear -void HLE_MultiLayerClear(Engines::Maxwell3D& maxwell3d, const std::vector& parameters) { - ASSERT(parameters.size() == 1); + const u32 vertex_first = parameters[3]; + const u32 vertex_count = parameters[1]; - const Engines::Maxwell3D::Regs::ClearSurface clear_params{parameters[0]}; - const u32 rt_index = clear_params.RT; - const u32 num_layers = maxwell3d.regs.rt[rt_index].depth; - ASSERT(clear_params.layer == 0); + if (maxwell3d.GetMaxCurrentVertices() < vertex_first + vertex_count) { + ASSERT_MSG(false, "Faulty draw!"); + return; + } - maxwell3d.regs.clear_surface.raw = clear_params.raw; - maxwell3d.draw_manager->Clear(num_layers); -} + const u32 base_instance = parameters[4]; + if (extended) { + maxwell3d.CallMethod(0x8e3, 0x640, true); + maxwell3d.CallMethod(0x8e4, base_instance, true); + } + + maxwell3d.draw_manager->DrawArray( + static_cast(parameters[0]), + vertex_first, vertex_count, base_instance, instance_count); + } -constexpr std::array, 5> hle_funcs{{ - {0x771BB18C62444DA0, &HLE_771BB18C62444DA0}, - {0x0D61FC9FAAC9FCAD, &HLE_DrawArraysIndirect}, - {0x0217920100488FF7, &HLE_DrawIndexedIndirect}, - {0x3F5E74B9C9A50164, &HLE_MultiDrawIndexedIndirect}, - {0xEAD26C3E2109B06B, &HLE_MultiLayerClear}, -}}; + bool extended; +}; -class HLEMacroImpl final : public CachedMacro { +class HLE_DrawIndexedIndirect final : public HLEMacroImpl { public: - explicit HLEMacroImpl(Engines::Maxwell3D& maxwell3d_, HLEFunction func_) - : maxwell3d{maxwell3d_}, func{func_} {} + explicit HLE_DrawIndexedIndirect(Engines::Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} - void Execute(const std::vector& parameters, u32 method) override { - func(maxwell3d, parameters); + void Execute(const std::vector& parameters, [[maybe_unused]] u32 method) override { + auto topology = + static_cast(parameters[0]); + if (!IsTopologySafe(topology)) { + Fallback(parameters); + return; + } + + advanceCheck(); + if (check_limit) { + maxwell3d.RefreshParameters(); + minimum_limit = std::max(parameters[3], minimum_limit); + } + + const u32 base_vertex = parameters[8]; + const u32 base_instance = parameters[9]; + maxwell3d.regs.vertex_id_base = base_vertex; + maxwell3d.CallMethod(0x8e3, 0x640, true); + maxwell3d.CallMethod(0x8e4, base_vertex, true); + maxwell3d.CallMethod(0x8e5, base_instance, true); + auto& params = maxwell3d.draw_manager->GetIndirectParams(); + params.is_indexed = true; + params.include_count = false; + params.count_start_address = 0; + params.indirect_start_address = maxwell3d.getMacroAddress(1); + params.buffer_size = 5 * sizeof(u32); + params.max_draw_counts = 1; + params.stride = 0; + maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; + maxwell3d.draw_manager->DrawIndexedIndirect(topology, 0, minimum_limit); + maxwell3d.CallMethod(0x8e3, 0x640, true); + maxwell3d.CallMethod(0x8e4, 0x0, true); + maxwell3d.CallMethod(0x8e5, 0x0, true); } private: - Engines::Maxwell3D& maxwell3d; - HLEFunction func; + void Fallback(const std::vector& parameters) { + maxwell3d.RefreshParameters(); + const u32 instance_count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]); + const u32 element_base = parameters[4]; + const u32 base_instance = parameters[5]; + maxwell3d.regs.vertex_id_base = element_base; + maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; + maxwell3d.CallMethod(0x8e3, 0x640, true); + maxwell3d.CallMethod(0x8e4, element_base, true); + maxwell3d.CallMethod(0x8e5, base_instance, true); + + maxwell3d.draw_manager->DrawIndex( + static_cast(parameters[0]), + parameters[3], parameters[1], element_base, base_instance, instance_count); + + maxwell3d.regs.vertex_id_base = 0x0; + maxwell3d.CallMethod(0x8e3, 0x640, true); + maxwell3d.CallMethod(0x8e4, 0x0, true); + maxwell3d.CallMethod(0x8e5, 0x0, true); + } + + u32 minimum_limit{1 << 18}; +}; + +class HLE_MultiLayerClear final : public HLEMacroImpl { +public: + explicit HLE_MultiLayerClear(Engines::Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} + + void Execute(const std::vector& parameters, [[maybe_unused]] u32 method) override { + maxwell3d.RefreshParameters(); + ASSERT(parameters.size() == 1); + + const Engines::Maxwell3D::Regs::ClearSurface clear_params{parameters[0]}; + const u32 rt_index = clear_params.RT; + const u32 num_layers = maxwell3d.regs.rt[rt_index].depth; + ASSERT(clear_params.layer == 0); + + maxwell3d.regs.clear_surface.raw = clear_params.raw; + maxwell3d.draw_manager->Clear(num_layers); + } +}; + +class HLE_MultiDrawIndexedIndirectCount final : public HLEMacroImpl { +public: + explicit HLE_MultiDrawIndexedIndirectCount(Engines::Maxwell3D& maxwell3d_) + : HLEMacroImpl(maxwell3d_) {} + + void Execute(const std::vector& parameters, [[maybe_unused]] u32 method) override { + const auto topology = + static_cast(parameters[2]); + if (!IsTopologySafe(topology)) { + Fallback(parameters); + return; + } + + advanceCheck(); + if (check_limit) { + maxwell3d.RefreshParameters(); + } + const u32 start_indirect = parameters[0]; + const u32 end_indirect = parameters[1]; + if (start_indirect >= end_indirect) { + // Nothing to do. + return; + } + + maxwell3d.regs.draw.topology.Assign(topology); + const u32 padding = parameters[3]; // padding is in words + + // size of each indirect segment + const u32 indirect_words = 5 + padding; + const u32 stride = indirect_words * sizeof(u32); + const std::size_t draw_count = end_indirect - start_indirect; + u32 lowest_first = std::numeric_limits::max(); + u32 highest_limit = std::numeric_limits::min(); + for (std::size_t index = 0; index < draw_count; index++) { + const std::size_t base = index * indirect_words + 5; + const u32 count = parameters[base]; + const u32 first_index = parameters[base + 2]; + lowest_first = std::min(lowest_first, first_index); + highest_limit = std::max(highest_limit, first_index + count); + } + if (check_limit) { + minimum_limit = std::max(highest_limit, minimum_limit); + } + + maxwell3d.regs.index_buffer.first = 0; + maxwell3d.regs.index_buffer.count = std::max(highest_limit, minimum_limit); + maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; + auto& params = maxwell3d.draw_manager->GetIndirectParams(); + params.is_indexed = true; + params.include_count = true; + params.count_start_address = maxwell3d.getMacroAddress(4); + params.indirect_start_address = maxwell3d.getMacroAddress(5); + params.buffer_size = stride * draw_count; + params.max_draw_counts = draw_count; + params.stride = stride; + maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; + maxwell3d.draw_manager->DrawIndexedIndirect(topology, 0, highest_limit); + } + +private: + void Fallback(const std::vector& parameters) { + SCOPE_EXIT({ + // Clean everything. + // Clean everything. + maxwell3d.regs.vertex_id_base = 0x0; + maxwell3d.CallMethod(0x8e3, 0x640, true); + maxwell3d.CallMethod(0x8e4, 0x0, true); + maxwell3d.CallMethod(0x8e5, 0x0, true); + }); + maxwell3d.RefreshParameters(); + const u32 start_indirect = parameters[0]; + const u32 end_indirect = parameters[1]; + if (start_indirect >= end_indirect) { + // Nothing to do. + return; + } + const auto topology = + static_cast(parameters[2]); + maxwell3d.regs.draw.topology.Assign(topology); + const u32 padding = parameters[3]; + const std::size_t max_draws = parameters[4]; + + const u32 indirect_words = 5 + padding; + const std::size_t first_draw = start_indirect; + const std::size_t effective_draws = end_indirect - start_indirect; + const std::size_t last_draw = start_indirect + std::min(effective_draws, max_draws); + + for (std::size_t index = first_draw; index < last_draw; index++) { + const std::size_t base = index * indirect_words + 5; + const u32 base_vertex = parameters[base + 3]; + const u32 base_instance = parameters[base + 4]; + maxwell3d.regs.vertex_id_base = base_vertex; + maxwell3d.CallMethod(0x8e3, 0x640, true); + maxwell3d.CallMethod(0x8e4, base_vertex, true); + maxwell3d.CallMethod(0x8e5, base_instance, true); + maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; + maxwell3d.draw_manager->DrawIndex(topology, parameters[base + 2], parameters[base], + base_vertex, base_instance, parameters[base + 1]); + } + } + + u32 minimum_limit{1 << 12}; }; } // Anonymous namespace -HLEMacro::HLEMacro(Engines::Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} {} +HLEMacro::HLEMacro(Engines::Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} { + builders.emplace(0x771BB18C62444DA0ULL, + std::function(Engines::Maxwell3D&)>( + [](Engines::Maxwell3D& maxwell3d) -> std::unique_ptr { + return std::make_unique(maxwell3d); + })); + builders.emplace(0x0D61FC9FAAC9FCADULL, + std::function(Engines::Maxwell3D&)>( + [](Engines::Maxwell3D& maxwell3d) -> std::unique_ptr { + return std::make_unique(maxwell3d); + })); + builders.emplace(0x8A4D173EB99A8603ULL, + std::function(Engines::Maxwell3D&)>( + [](Engines::Maxwell3D& maxwell3d) -> std::unique_ptr { + return std::make_unique(maxwell3d, true); + })); + builders.emplace(0x0217920100488FF7ULL, + std::function(Engines::Maxwell3D&)>( + [](Engines::Maxwell3D& maxwell3d) -> std::unique_ptr { + return std::make_unique(maxwell3d); + })); + builders.emplace(0x3F5E74B9C9A50164ULL, + std::function(Engines::Maxwell3D&)>( + [](Engines::Maxwell3D& maxwell3d) -> std::unique_ptr { + return std::make_unique(maxwell3d); + })); + builders.emplace(0xEAD26C3E2109B06BULL, + std::function(Engines::Maxwell3D&)>( + [](Engines::Maxwell3D& maxwell3d) -> std::unique_ptr { + return std::make_unique(maxwell3d); + })); +} + HLEMacro::~HLEMacro() = default; std::unique_ptr HLEMacro::GetHLEProgram(u64 hash) const { - const auto it = std::find_if(hle_funcs.cbegin(), hle_funcs.cend(), - [hash](const auto& pair) { return pair.first == hash; }); - if (it == hle_funcs.end()) { + const auto it = builders.find(hash); + if (it == builders.end()) { return nullptr; } - return std::make_unique(maxwell3d, it->second); + return it->second(maxwell3d); } } // namespace Tegra diff --git a/src/video_core/macro/macro_hle.h b/src/video_core/macro/macro_hle.h index 625332c9d..33f92fab1 100644 --- a/src/video_core/macro/macro_hle.h +++ b/src/video_core/macro/macro_hle.h @@ -3,7 +3,10 @@ #pragma once +#include #include +#include + #include "common/common_types.h" namespace Tegra { @@ -23,6 +26,8 @@ public: private: Engines::Maxwell3D& maxwell3d; + std::unordered_map(Engines::Maxwell3D&)>> + builders; }; } // namespace Tegra -- cgit v1.2.3