diff options
Diffstat (limited to 'src/video_core/macro')
-rw-r--r-- | src/video_core/macro/macro.cpp | 35 | ||||
-rw-r--r-- | src/video_core/macro/macro.h | 19 | ||||
-rw-r--r-- | src/video_core/macro/macro_hle.cpp | 113 | ||||
-rw-r--r-- | src/video_core/macro/macro_hle.h | 44 | ||||
-rw-r--r-- | src/video_core/macro/macro_interpreter.cpp | 3 | ||||
-rw-r--r-- | src/video_core/macro/macro_jit_x64.cpp | 65 | ||||
-rw-r--r-- | src/video_core/macro/macro_jit_x64.h | 1 |
7 files changed, 245 insertions, 35 deletions
diff --git a/src/video_core/macro/macro.cpp b/src/video_core/macro/macro.cpp index 89077a2d8..ef7dad349 100644 --- a/src/video_core/macro/macro.cpp +++ b/src/video_core/macro/macro.cpp @@ -2,23 +2,37 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <boost/container_hash/hash.hpp> #include "common/assert.h" #include "common/logging/log.h" #include "core/settings.h" +#include "video_core/engines/maxwell_3d.h" #include "video_core/macro/macro.h" +#include "video_core/macro/macro_hle.h" #include "video_core/macro/macro_interpreter.h" #include "video_core/macro/macro_jit_x64.h" namespace Tegra { +MacroEngine::MacroEngine(Engines::Maxwell3D& maxwell3d) + : hle_macros{std::make_unique<Tegra::HLEMacro>(maxwell3d)} {} + +MacroEngine::~MacroEngine() = default; + void MacroEngine::AddCode(u32 method, u32 data) { uploaded_macro_code[method].push_back(data); } -void MacroEngine::Execute(u32 method, const std::vector<u32>& parameters) { +void MacroEngine::Execute(Engines::Maxwell3D& maxwell3d, u32 method, + const std::vector<u32>& parameters) { auto compiled_macro = macro_cache.find(method); if (compiled_macro != macro_cache.end()) { - compiled_macro->second->Execute(parameters, method); + const auto& cache_info = compiled_macro->second; + if (cache_info.has_hle_program) { + cache_info.hle_program->Execute(parameters, method); + } else { + cache_info.lle_program->Execute(parameters, method); + } } else { // Macro not compiled, check if it's uploaded and if so, compile it auto macro_code = uploaded_macro_code.find(method); @@ -26,8 +40,21 @@ void MacroEngine::Execute(u32 method, const std::vector<u32>& parameters) { UNREACHABLE_MSG("Macro 0x{0:x} was not uploaded", method); return; } - macro_cache[method] = Compile(macro_code->second); - macro_cache[method]->Execute(parameters, method); + auto& cache_info = macro_cache[method]; + cache_info.hash = boost::hash_value(macro_code->second); + cache_info.lle_program = Compile(macro_code->second); + + auto hle_program = hle_macros->GetHLEProgram(cache_info.hash); + if (hle_program.has_value()) { + cache_info.has_hle_program = true; + cache_info.hle_program = std::move(hle_program.value()); + } + + if (cache_info.has_hle_program) { + cache_info.hle_program->Execute(parameters, method); + } else { + cache_info.lle_program->Execute(parameters, method); + } } } diff --git a/src/video_core/macro/macro.h b/src/video_core/macro/macro.h index b76ed891f..4d00b84b0 100644 --- a/src/video_core/macro/macro.h +++ b/src/video_core/macro/macro.h @@ -11,9 +11,11 @@ #include "common/common_types.h" namespace Tegra { + namespace Engines { class Maxwell3D; } + namespace Macro { constexpr std::size_t NUM_MACRO_REGISTERS = 8; enum class Operation : u32 { @@ -94,6 +96,8 @@ union MethodAddress { } // namespace Macro +class HLEMacro; + class CachedMacro { public: virtual ~CachedMacro() = default; @@ -107,20 +111,29 @@ public: class MacroEngine { public: - virtual ~MacroEngine() = default; + explicit MacroEngine(Engines::Maxwell3D& maxwell3d); + virtual ~MacroEngine(); // Store the uploaded macro code to compile them when they're called. void AddCode(u32 method, u32 data); // Compiles the macro if its not in the cache, and executes the compiled macro - void Execute(u32 method, const std::vector<u32>& parameters); + void Execute(Engines::Maxwell3D& maxwell3d, u32 method, const std::vector<u32>& parameters); protected: virtual std::unique_ptr<CachedMacro> Compile(const std::vector<u32>& code) = 0; private: - std::unordered_map<u32, std::unique_ptr<CachedMacro>> macro_cache; + struct CacheInfo { + std::unique_ptr<CachedMacro> lle_program{}; + std::unique_ptr<CachedMacro> hle_program{}; + u64 hash{}; + bool has_hle_program{}; + }; + + std::unordered_map<u32, CacheInfo> macro_cache; std::unordered_map<u32, std::vector<u32>> uploaded_macro_code; + std::unique_ptr<HLEMacro> hle_macros; }; std::unique_ptr<MacroEngine> GetMacroEngine(Engines::Maxwell3D& maxwell3d); diff --git a/src/video_core/macro/macro_hle.cpp b/src/video_core/macro/macro_hle.cpp new file mode 100644 index 000000000..410f99018 --- /dev/null +++ b/src/video_core/macro/macro_hle.cpp @@ -0,0 +1,113 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <array> +#include <vector> +#include "video_core/engines/maxwell_3d.h" +#include "video_core/macro/macro_hle.h" +#include "video_core/rasterizer_interface.h" + +namespace Tegra { + +namespace { +// HLE'd functions +static void HLE_771BB18C62444DA0(Engines::Maxwell3D& maxwell3d, + const std::vector<u32>& parameters) { + const u32 instance_count = parameters[2] & maxwell3d.GetRegisterValue(0xD1B); + + maxwell3d.regs.draw.topology.Assign( + static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0] & + ~(0x3ffffff << 26))); + maxwell3d.regs.vb_base_instance = parameters[5]; + maxwell3d.mme_draw.instance_count = instance_count; + maxwell3d.regs.vb_element_base = parameters[3]; + maxwell3d.regs.index_array.count = parameters[1]; + maxwell3d.regs.index_array.first = parameters[4]; + + if (maxwell3d.ShouldExecute()) { + maxwell3d.GetRasterizer().Draw(true, true); + } + maxwell3d.regs.index_array.count = 0; + maxwell3d.mme_draw.instance_count = 0; + maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined; +} + +static void HLE_0D61FC9FAAC9FCAD(Engines::Maxwell3D& maxwell3d, + const std::vector<u32>& parameters) { + const u32 count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]); + + maxwell3d.regs.vertex_buffer.first = parameters[3]; + maxwell3d.regs.vertex_buffer.count = parameters[1]; + maxwell3d.regs.vb_base_instance = parameters[4]; + maxwell3d.regs.draw.topology.Assign( + static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0])); + maxwell3d.mme_draw.instance_count = count; + + if (maxwell3d.ShouldExecute()) { + maxwell3d.GetRasterizer().Draw(false, true); + } + maxwell3d.regs.vertex_buffer.count = 0; + maxwell3d.mme_draw.instance_count = 0; + maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined; +} + +static void HLE_0217920100488FF7(Engines::Maxwell3D& maxwell3d, + const std::vector<u32>& parameters) { + const u32 instance_count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]); + const u32 element_base = parameters[4]; + const u32 base_instance = parameters[5]; + maxwell3d.regs.index_array.first = parameters[3]; + maxwell3d.regs.reg_array[0x446] = element_base; // vertex id base? + maxwell3d.regs.index_array.count = parameters[1]; + maxwell3d.regs.vb_element_base = element_base; + maxwell3d.regs.vb_base_instance = base_instance; + maxwell3d.mme_draw.instance_count = instance_count; + maxwell3d.CallMethodFromMME(0x8e3, 0x640); + maxwell3d.CallMethodFromMME(0x8e4, element_base); + maxwell3d.CallMethodFromMME(0x8e5, base_instance); + maxwell3d.regs.draw.topology.Assign( + static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0])); + if (maxwell3d.ShouldExecute()) { + maxwell3d.GetRasterizer().Draw(true, true); + } + maxwell3d.regs.reg_array[0x446] = 0x0; // vertex id base? + maxwell3d.regs.index_array.count = 0; + maxwell3d.regs.vb_element_base = 0x0; + maxwell3d.regs.vb_base_instance = 0x0; + maxwell3d.mme_draw.instance_count = 0; + maxwell3d.CallMethodFromMME(0x8e3, 0x640); + maxwell3d.CallMethodFromMME(0x8e4, 0x0); + maxwell3d.CallMethodFromMME(0x8e5, 0x0); + maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined; +} +} // namespace + +constexpr std::array<std::pair<u64, HLEFunction>, 3> hle_funcs{{ + std::make_pair<u64, HLEFunction>(0x771BB18C62444DA0, &HLE_771BB18C62444DA0), + std::make_pair<u64, HLEFunction>(0x0D61FC9FAAC9FCAD, &HLE_0D61FC9FAAC9FCAD), + std::make_pair<u64, HLEFunction>(0x0217920100488FF7, &HLE_0217920100488FF7), +}}; + +HLEMacro::HLEMacro(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {} +HLEMacro::~HLEMacro() = default; + +std::optional<std::unique_ptr<CachedMacro>> HLEMacro::GetHLEProgram(u64 hash) const { + const auto it = std::find_if(hle_funcs.cbegin(), hle_funcs.cend(), + [hash](const auto& pair) { return pair.first == hash; }); + if (it == hle_funcs.end()) { + return std::nullopt; + } + return std::make_unique<HLEMacroImpl>(maxwell3d, it->second); +} + +HLEMacroImpl::~HLEMacroImpl() = default; + +HLEMacroImpl::HLEMacroImpl(Engines::Maxwell3D& maxwell3d, HLEFunction func) + : maxwell3d(maxwell3d), func(func) {} + +void HLEMacroImpl::Execute(const std::vector<u32>& parameters, u32 method) { + func(maxwell3d, parameters); +} + +} // namespace Tegra diff --git a/src/video_core/macro/macro_hle.h b/src/video_core/macro/macro_hle.h new file mode 100644 index 000000000..37af875a0 --- /dev/null +++ b/src/video_core/macro/macro_hle.h @@ -0,0 +1,44 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <memory> +#include <optional> +#include <vector> +#include "common/common_types.h" +#include "video_core/macro/macro.h" + +namespace Tegra { + +namespace Engines { +class Maxwell3D; +} + +using HLEFunction = void (*)(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters); + +class HLEMacro { +public: + explicit HLEMacro(Engines::Maxwell3D& maxwell3d); + ~HLEMacro(); + + std::optional<std::unique_ptr<CachedMacro>> GetHLEProgram(u64 hash) const; + +private: + Engines::Maxwell3D& maxwell3d; +}; + +class HLEMacroImpl : public CachedMacro { +public: + explicit HLEMacroImpl(Engines::Maxwell3D& maxwell3d, HLEFunction func); + ~HLEMacroImpl(); + + void Execute(const std::vector<u32>& parameters, u32 method) override; + +private: + Engines::Maxwell3D& maxwell3d; + HLEFunction func; +}; + +} // namespace Tegra diff --git a/src/video_core/macro/macro_interpreter.cpp b/src/video_core/macro/macro_interpreter.cpp index 5edff27aa..aa5256419 100644 --- a/src/video_core/macro/macro_interpreter.cpp +++ b/src/video_core/macro/macro_interpreter.cpp @@ -11,7 +11,8 @@ MICROPROFILE_DEFINE(MacroInterp, "GPU", "Execute macro interpreter", MP_RGB(128, 128, 192)); namespace Tegra { -MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {} +MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d) + : MacroEngine::MacroEngine(maxwell3d), maxwell3d(maxwell3d) {} std::unique_ptr<CachedMacro> MacroInterpreter::Compile(const std::vector<u32>& code) { return std::make_unique<MacroInterpreterImpl>(maxwell3d, code); diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp index d4a97ec7b..07292702f 100644 --- a/src/video_core/macro/macro_jit_x64.cpp +++ b/src/video_core/macro/macro_jit_x64.cpp @@ -28,7 +28,8 @@ static const std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({ BRANCH_HOLDER, }); -MacroJITx64::MacroJITx64(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {} +MacroJITx64::MacroJITx64(Engines::Maxwell3D& maxwell3d) + : MacroEngine::MacroEngine(maxwell3d), maxwell3d(maxwell3d) {} std::unique_ptr<CachedMacro> MacroJITx64::Compile(const std::vector<u32>& code) { return std::make_unique<MacroJITx64Impl>(maxwell3d, code); @@ -54,13 +55,15 @@ void MacroJITx64Impl::Compile_ALU(Macro::Opcode opcode) { const bool is_a_zero = opcode.src_a == 0; const bool is_b_zero = opcode.src_b == 0; const bool valid_operation = !is_a_zero && !is_b_zero; - const bool is_move_operation = !is_a_zero && is_b_zero; + [[maybe_unused]] const bool is_move_operation = !is_a_zero && is_b_zero; const bool has_zero_register = is_a_zero || is_b_zero; + const bool no_zero_reg_skip = opcode.alu_operation == Macro::ALUOperation::AddWithCarry || + opcode.alu_operation == Macro::ALUOperation::SubtractWithBorrow; Xbyak::Reg32 src_a; Xbyak::Reg32 src_b; - if (!optimizer.zero_reg_skip) { + if (!optimizer.zero_reg_skip || no_zero_reg_skip) { src_a = Compile_GetRegister(opcode.src_a, RESULT); src_b = Compile_GetRegister(opcode.src_b, eax); } else { @@ -71,7 +74,6 @@ void MacroJITx64Impl::Compile_ALU(Macro::Opcode opcode) { src_b = Compile_GetRegister(opcode.src_b, eax); } } - Xbyak::Label skip_carry{}; bool has_emitted = false; @@ -183,7 +185,8 @@ void MacroJITx64Impl::Compile_AddImmediate(Macro::Opcode opcode) { opcode.result_operation == Macro::ResultOperation::MoveAndSetMethod) { if (next_opcode.has_value()) { const auto next = *next_opcode; - if (next.result_operation == Macro::ResultOperation::MoveAndSetMethod) { + if (next.result_operation == Macro::ResultOperation::MoveAndSetMethod && + opcode.dst == next.dst) { return; } } @@ -237,10 +240,10 @@ void MacroJITx64Impl::Compile_ExtractInsert(Macro::Opcode opcode) { } void MacroJITx64Impl::Compile_ExtractShiftLeftImmediate(Macro::Opcode opcode) { - auto dst = Compile_GetRegister(opcode.src_a, eax); - auto src = Compile_GetRegister(opcode.src_b, RESULT); + const auto dst = Compile_GetRegister(opcode.src_a, ecx); + const auto src = Compile_GetRegister(opcode.src_b, RESULT); - shr(src, al); + shr(src, dst.cvt8()); if (opcode.bf_size != 0 && opcode.bf_size != 31) { and_(src, opcode.GetBitfieldMask()); } else if (opcode.bf_size == 0) { @@ -256,8 +259,8 @@ void MacroJITx64Impl::Compile_ExtractShiftLeftImmediate(Macro::Opcode opcode) { } void MacroJITx64Impl::Compile_ExtractShiftLeftRegister(Macro::Opcode opcode) { - auto dst = Compile_GetRegister(opcode.src_a, eax); - auto src = Compile_GetRegister(opcode.src_b, RESULT); + const auto dst = Compile_GetRegister(opcode.src_a, ecx); + const auto src = Compile_GetRegister(opcode.src_b, RESULT); if (opcode.bf_src_bit != 0) { shr(src, opcode.bf_src_bit); @@ -266,16 +269,9 @@ void MacroJITx64Impl::Compile_ExtractShiftLeftRegister(Macro::Opcode opcode) { if (opcode.bf_size != 31) { and_(src, opcode.GetBitfieldMask()); } - shl(src, al); - Compile_ProcessResult(opcode.result_operation, opcode.dst); -} + shl(src, dst.cvt8()); -static u32 Read(Engines::Maxwell3D* maxwell3d, u32 method) { - return maxwell3d->GetRegisterValue(method); -} - -static void Send(Engines::Maxwell3D* maxwell3d, Macro::MethodAddress method_address, u32 value) { - maxwell3d->CallMethodFromMME(method_address.address, value); + Compile_ProcessResult(opcode.result_operation, opcode.dst); } void MacroJITx64Impl::Compile_Read(Macro::Opcode opcode) { @@ -295,15 +291,27 @@ void MacroJITx64Impl::Compile_Read(Macro::Opcode opcode) { sub(result, opcode.immediate * -1); } } - Common::X64::ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); - mov(Common::X64::ABI_PARAM1, qword[STATE]); - mov(Common::X64::ABI_PARAM2, RESULT); - Common::X64::CallFarFunction(*this, &Read); - Common::X64::ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); - mov(RESULT, Common::X64::ABI_RETURN.cvt32()); + + // Equivalent to Engines::Maxwell3D::GetRegisterValue: + if (optimizer.enable_asserts) { + Xbyak::Label pass_range_check; + cmp(RESULT, static_cast<u32>(Engines::Maxwell3D::Regs::NUM_REGS)); + jb(pass_range_check); + int3(); + L(pass_range_check); + } + mov(rax, qword[STATE]); + mov(RESULT, + dword[rax + offsetof(Engines::Maxwell3D, regs) + + offsetof(Engines::Maxwell3D::Regs, reg_array) + RESULT.cvt64() * sizeof(u32)]); + Compile_ProcessResult(opcode.result_operation, opcode.dst); } +static void Send(Engines::Maxwell3D* maxwell3d, Macro::MethodAddress method_address, u32 value) { + maxwell3d->CallMethodFromMME(method_address.address, value); +} + void Tegra::MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) { Common::X64::ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); mov(Common::X64::ABI_PARAM1, qword[STATE]); @@ -435,6 +443,9 @@ void MacroJITx64Impl::Compile() { // one if our register isn't "dirty" optimizer.optimize_for_method_move = true; + // Enable run-time assertions in JITted code + optimizer.enable_asserts = false; + // Check to see if we can skip emitting certain instructions Optimizer_ScanFlags(); @@ -543,7 +554,7 @@ Xbyak::Reg32 MacroJITx64Impl::Compile_GetRegister(u32 index, Xbyak::Reg32 dst) { } void MacroJITx64Impl::Compile_ProcessResult(Macro::ResultOperation operation, u32 reg) { - auto SetRegister = [=](u32 reg, Xbyak::Reg32 result) { + const auto SetRegister = [this](u32 reg, const Xbyak::Reg32& result) { // Register 0 is supposed to always return 0. NOP is implemented as a store to the zero // register. if (reg == 0) { @@ -551,7 +562,7 @@ void MacroJITx64Impl::Compile_ProcessResult(Macro::ResultOperation operation, u3 } mov(dword[STATE + offsetof(JITState, registers) + reg * sizeof(u32)], result); }; - auto SetMethodAddress = [=](Xbyak::Reg32 reg) { mov(METHOD_ADDRESS, reg); }; + const auto SetMethodAddress = [this](const Xbyak::Reg32& reg) { mov(METHOD_ADDRESS, reg); }; switch (operation) { case Macro::ResultOperation::IgnoreAndFetch: diff --git a/src/video_core/macro/macro_jit_x64.h b/src/video_core/macro/macro_jit_x64.h index 51ec090b8..a180e7428 100644 --- a/src/video_core/macro/macro_jit_x64.h +++ b/src/video_core/macro/macro_jit_x64.h @@ -76,6 +76,7 @@ private: bool zero_reg_skip{}; bool skip_dummy_addimmediate{}; bool optimize_for_method_move{}; + bool enable_asserts{}; }; OptimizerState optimizer{}; |