diff options
Diffstat (limited to 'src/video_core')
-rw-r--r-- | src/video_core/buffer_cache/buffer_cache.h | 2 | ||||
-rw-r--r-- | src/video_core/engines/maxwell_3d.cpp | 2 | ||||
-rw-r--r-- | src/video_core/gpu.h | 16 | ||||
-rw-r--r-- | src/video_core/macro/macro.cpp | 13 | ||||
-rw-r--r-- | src/video_core/macro/macro.h | 2 | ||||
-rw-r--r-- | src/video_core/macro/macro_hle.cpp | 34 | ||||
-rw-r--r-- | src/video_core/macro/macro_hle.h | 21 | ||||
-rw-r--r-- | src/video_core/macro/macro_interpreter.cpp | 92 | ||||
-rw-r--r-- | src/video_core/macro/macro_interpreter.h | 78 | ||||
-rw-r--r-- | src/video_core/macro/macro_jit_x64.cpp | 104 | ||||
-rw-r--r-- | src/video_core/macro/macro_jit_x64.h | 71 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_fsr.cpp | 2 |
12 files changed, 207 insertions, 230 deletions
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 43bed63ac..048dba4f3 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -1474,6 +1474,8 @@ typename BufferCache<P>::OverlapResult BufferCache<P>::ResolveOverlaps(VAddr cpu // When this memory region has been joined a bunch of times, we assume it's being used // as a stream buffer. Increase the size to skip constantly recreating buffers. has_stream_leap = true; + begin -= PAGE_SIZE * 256; + cpu_addr = begin; end += PAGE_SIZE * 256; } } diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index b18b8a02a..c38ebd670 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -240,7 +240,7 @@ void Maxwell3D::CallMacroMethod(u32 method, const std::vector<u32>& parameters) ((method - MacroRegistersStart) >> 1) % static_cast<u32>(macro_positions.size()); // Execute the current macro. - macro_engine->Execute(*this, macro_positions[entry], parameters); + macro_engine->Execute(macro_positions[entry], parameters); if (mme_draw.current_mode != MMEDrawMode::Undefined) { FlushMMEInlineDraw(); } diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 3188b83ed..26b8ea233 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -12,9 +12,6 @@ #include "video_core/framebuffer_config.h" namespace Core { -namespace Frontend { -class EmuWindow; -} class System; } // namespace Core @@ -25,7 +22,6 @@ class ShaderNotify; namespace Tegra { class DmaPusher; -class CDmaPusher; struct CommandList; enum class RenderTargetFormat : u32 { @@ -88,15 +84,9 @@ enum class DepthFormat : u32 { D32_FLOAT_S8X24_UINT = 0x19, }; -struct CommandListHeader; -class DebugContext; - namespace Engines { -class Fermi2D; class Maxwell3D; -class MaxwellDMA; class KeplerCompute; -class KeplerMemory; } // namespace Engines enum class EngineID { @@ -190,12 +180,6 @@ public: /// Returns a const reference to the GPU DMA pusher. [[nodiscard]] const Tegra::DmaPusher& DmaPusher() const; - /// Returns a reference to the GPU CDMA pusher. - [[nodiscard]] Tegra::CDmaPusher& CDmaPusher(); - - /// Returns a const reference to the GPU CDMA pusher. - [[nodiscard]] const Tegra::CDmaPusher& CDmaPusher() const; - /// Returns a reference to the underlying renderer. [[nodiscard]] VideoCore::RendererBase& Renderer(); diff --git a/src/video_core/macro/macro.cpp b/src/video_core/macro/macro.cpp index d7fabe605..0aeda4ce8 100644 --- a/src/video_core/macro/macro.cpp +++ b/src/video_core/macro/macro.cpp @@ -2,12 +2,13 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <cstring> #include <optional> + #include <boost/container_hash/hash.hpp> + #include "common/assert.h" -#include "common/logging/log.h" #include "common/settings.h" -#include "video_core/engines/maxwell_3d.h" #include "video_core/macro/macro.h" #include "video_core/macro/macro_hle.h" #include "video_core/macro/macro_interpreter.h" @@ -24,8 +25,7 @@ void MacroEngine::AddCode(u32 method, u32 data) { uploaded_macro_code[method].push_back(data); } -void MacroEngine::Execute(Engines::Maxwell3D& maxwell3d, u32 method, - const std::vector<u32>& parameters) { +void MacroEngine::Execute(u32 method, const std::vector<u32>& parameters) { auto compiled_macro = macro_cache.find(method); if (compiled_macro != macro_cache.end()) { const auto& cache_info = compiled_macro->second; @@ -66,10 +66,9 @@ void MacroEngine::Execute(Engines::Maxwell3D& maxwell3d, u32 method, cache_info.lle_program = Compile(code); } - auto hle_program = hle_macros->GetHLEProgram(cache_info.hash); - if (hle_program.has_value()) { + if (auto hle_program = hle_macros->GetHLEProgram(cache_info.hash)) { cache_info.has_hle_program = true; - cache_info.hle_program = std::move(hle_program.value()); + cache_info.hle_program = std::move(hle_program); cache_info.hle_program->Execute(parameters, method); } else { cache_info.lle_program->Execute(parameters, method); diff --git a/src/video_core/macro/macro.h b/src/video_core/macro/macro.h index 31ee3440a..7aaa49286 100644 --- a/src/video_core/macro/macro.h +++ b/src/video_core/macro/macro.h @@ -119,7 +119,7 @@ public: void AddCode(u32 method, u32 data); // Compiles the macro if its not in the cache, and executes the compiled macro - void Execute(Engines::Maxwell3D& maxwell3d, u32 method, const std::vector<u32>& parameters); + void Execute(u32 method, const std::vector<u32>& parameters); protected: virtual std::unique_ptr<CachedMacro> Compile(const std::vector<u32>& code) = 0; diff --git a/src/video_core/macro/macro_hle.cpp b/src/video_core/macro/macro_hle.cpp index 70ac7c620..900ad23c9 100644 --- a/src/video_core/macro/macro_hle.cpp +++ b/src/video_core/macro/macro_hle.cpp @@ -5,12 +5,15 @@ #include <array> #include <vector> #include "video_core/engines/maxwell_3d.h" +#include "video_core/macro/macro.h" #include "video_core/macro/macro_hle.h" #include "video_core/rasterizer_interface.h" namespace Tegra { - namespace { + +using HLEFunction = void (*)(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters); + // HLE'd functions void HLE_771BB18C62444DA0(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters) { const u32 instance_count = parameters[2] & maxwell3d.GetRegisterValue(0xD1B); @@ -77,7 +80,6 @@ void HLE_0217920100488FF7(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& maxwell3d.CallMethodFromMME(0x8e5, 0x0); maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined; } -} // Anonymous namespace constexpr std::array<std::pair<u64, HLEFunction>, 3> hle_funcs{{ {0x771BB18C62444DA0, &HLE_771BB18C62444DA0}, @@ -85,25 +87,31 @@ constexpr std::array<std::pair<u64, HLEFunction>, 3> hle_funcs{{ {0x0217920100488FF7, &HLE_0217920100488FF7}, }}; +class HLEMacroImpl final : public CachedMacro { +public: + explicit HLEMacroImpl(Engines::Maxwell3D& maxwell3d_, HLEFunction func_) + : maxwell3d{maxwell3d_}, func{func_} {} + + void Execute(const std::vector<u32>& parameters, u32 method) override { + func(maxwell3d, parameters); + } + +private: + Engines::Maxwell3D& maxwell3d; + HLEFunction func; +}; +} // Anonymous namespace + HLEMacro::HLEMacro(Engines::Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} {} HLEMacro::~HLEMacro() = default; -std::optional<std::unique_ptr<CachedMacro>> HLEMacro::GetHLEProgram(u64 hash) const { +std::unique_ptr<CachedMacro> HLEMacro::GetHLEProgram(u64 hash) const { const auto it = std::find_if(hle_funcs.cbegin(), hle_funcs.cend(), [hash](const auto& pair) { return pair.first == hash; }); if (it == hle_funcs.end()) { - return std::nullopt; + return nullptr; } return std::make_unique<HLEMacroImpl>(maxwell3d, it->second); } -HLEMacroImpl::~HLEMacroImpl() = default; - -HLEMacroImpl::HLEMacroImpl(Engines::Maxwell3D& maxwell3d_, HLEFunction func_) - : maxwell3d{maxwell3d_}, func{func_} {} - -void HLEMacroImpl::Execute(const std::vector<u32>& parameters, u32 method) { - func(maxwell3d, parameters); -} - } // namespace Tegra diff --git a/src/video_core/macro/macro_hle.h b/src/video_core/macro/macro_hle.h index cb3bd1600..b86ba84a1 100644 --- a/src/video_core/macro/macro_hle.h +++ b/src/video_core/macro/macro_hle.h @@ -5,10 +5,7 @@ #pragma once #include <memory> -#include <optional> -#include <vector> #include "common/common_types.h" -#include "video_core/macro/macro.h" namespace Tegra { @@ -16,29 +13,17 @@ namespace Engines { class Maxwell3D; } -using HLEFunction = void (*)(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters); - class HLEMacro { public: explicit HLEMacro(Engines::Maxwell3D& maxwell3d_); ~HLEMacro(); - std::optional<std::unique_ptr<CachedMacro>> GetHLEProgram(u64 hash) const; - -private: - Engines::Maxwell3D& maxwell3d; -}; - -class HLEMacroImpl : public CachedMacro { -public: - explicit HLEMacroImpl(Engines::Maxwell3D& maxwell3d, HLEFunction func); - ~HLEMacroImpl(); - - void Execute(const std::vector<u32>& parameters, u32 method) override; + // Allocates and returns a cached macro if the hash matches a known function. + // Returns nullptr otherwise. + [[nodiscard]] std::unique_ptr<CachedMacro> GetHLEProgram(u64 hash) const; private: Engines::Maxwell3D& maxwell3d; - HLEFunction func; }; } // namespace Tegra diff --git a/src/video_core/macro/macro_interpreter.cpp b/src/video_core/macro/macro_interpreter.cpp index 8da26fd59..fba755448 100644 --- a/src/video_core/macro/macro_interpreter.cpp +++ b/src/video_core/macro/macro_interpreter.cpp @@ -2,6 +2,9 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <array> +#include <optional> + #include "common/assert.h" #include "common/logging/log.h" #include "common/microprofile.h" @@ -11,16 +14,81 @@ MICROPROFILE_DEFINE(MacroInterp, "GPU", "Execute macro interpreter", MP_RGB(128, 128, 192)); namespace Tegra { -MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d_) - : MacroEngine{maxwell3d_}, maxwell3d{maxwell3d_} {} +namespace { +class MacroInterpreterImpl final : public CachedMacro { +public: + explicit MacroInterpreterImpl(Engines::Maxwell3D& maxwell3d_, const std::vector<u32>& code_) + : maxwell3d{maxwell3d_}, code{code_} {} -std::unique_ptr<CachedMacro> MacroInterpreter::Compile(const std::vector<u32>& code) { - return std::make_unique<MacroInterpreterImpl>(maxwell3d, code); -} + void Execute(const std::vector<u32>& params, u32 method) override; + +private: + /// Resets the execution engine state, zeroing registers, etc. + void Reset(); + + /** + * Executes a single macro instruction located at the current program counter. Returns whether + * the interpreter should keep running. + * + * @param is_delay_slot Whether the current step is being executed due to a delay slot in a + * previous instruction. + */ + bool Step(bool is_delay_slot); + + /// Calculates the result of an ALU operation. src_a OP src_b; + u32 GetALUResult(Macro::ALUOperation operation, u32 src_a, u32 src_b); + + /// Performs the result operation on the input result and stores it in the specified register + /// (if necessary). + void ProcessResult(Macro::ResultOperation operation, u32 reg, u32 result); + + /// Evaluates the branch condition and returns whether the branch should be taken or not. + bool EvaluateBranchCondition(Macro::BranchCondition cond, u32 value) const; + + /// Reads an opcode at the current program counter location. + Macro::Opcode GetOpcode() const; + + /// Returns the specified register's value. Register 0 is hardcoded to always return 0. + u32 GetRegister(u32 register_id) const; + + /// Sets the register to the input value. + void SetRegister(u32 register_id, u32 value); + + /// Sets the method address to use for the next Send instruction. + void SetMethodAddress(u32 address); -MacroInterpreterImpl::MacroInterpreterImpl(Engines::Maxwell3D& maxwell3d_, - const std::vector<u32>& code_) - : maxwell3d{maxwell3d_}, code{code_} {} + /// Calls a GPU Engine method with the input parameter. + void Send(u32 value); + + /// Reads a GPU register located at the method address. + u32 Read(u32 method) const; + + /// Returns the next parameter in the parameter queue. + u32 FetchParameter(); + + Engines::Maxwell3D& maxwell3d; + + /// Current program counter + u32 pc{}; + /// Program counter to execute at after the delay slot is executed. + std::optional<u32> delayed_pc; + + /// General purpose macro registers. + std::array<u32, Macro::NUM_MACRO_REGISTERS> registers = {}; + + /// Method address to use for the next Send instruction. + Macro::MethodAddress method_address = {}; + + /// Input parameters of the current macro. + std::unique_ptr<u32[]> parameters; + std::size_t num_parameters = 0; + std::size_t parameters_capacity = 0; + /// Index of the next parameter that will be fetched by the 'parm' instruction. + u32 next_parameter_index = 0; + + bool carry_flag = false; + const std::vector<u32>& code; +}; void MacroInterpreterImpl::Execute(const std::vector<u32>& params, u32 method) { MICROPROFILE_SCOPE(MacroInterp); @@ -283,5 +351,13 @@ u32 MacroInterpreterImpl::FetchParameter() { ASSERT(next_parameter_index < num_parameters); return parameters[next_parameter_index++]; } +} // Anonymous namespace + +MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d_) + : MacroEngine{maxwell3d_}, maxwell3d{maxwell3d_} {} + +std::unique_ptr<CachedMacro> MacroInterpreter::Compile(const std::vector<u32>& code) { + return std::make_unique<MacroInterpreterImpl>(maxwell3d, code); +} } // namespace Tegra diff --git a/src/video_core/macro/macro_interpreter.h b/src/video_core/macro/macro_interpreter.h index d50c619ce..8a9648e46 100644 --- a/src/video_core/macro/macro_interpreter.h +++ b/src/video_core/macro/macro_interpreter.h @@ -3,10 +3,9 @@ // Refer to the license.txt file included. #pragma once -#include <array> -#include <optional> + #include <vector> -#include "common/bit_field.h" + #include "common/common_types.h" #include "video_core/macro/macro.h" @@ -26,77 +25,4 @@ private: Engines::Maxwell3D& maxwell3d; }; -class MacroInterpreterImpl : public CachedMacro { -public: - explicit MacroInterpreterImpl(Engines::Maxwell3D& maxwell3d_, const std::vector<u32>& code_); - void Execute(const std::vector<u32>& params, u32 method) override; - -private: - /// Resets the execution engine state, zeroing registers, etc. - void Reset(); - - /** - * Executes a single macro instruction located at the current program counter. Returns whether - * the interpreter should keep running. - * - * @param is_delay_slot Whether the current step is being executed due to a delay slot in a - * previous instruction. - */ - bool Step(bool is_delay_slot); - - /// Calculates the result of an ALU operation. src_a OP src_b; - u32 GetALUResult(Macro::ALUOperation operation, u32 src_a, u32 src_b); - - /// Performs the result operation on the input result and stores it in the specified register - /// (if necessary). - void ProcessResult(Macro::ResultOperation operation, u32 reg, u32 result); - - /// Evaluates the branch condition and returns whether the branch should be taken or not. - bool EvaluateBranchCondition(Macro::BranchCondition cond, u32 value) const; - - /// Reads an opcode at the current program counter location. - Macro::Opcode GetOpcode() const; - - /// Returns the specified register's value. Register 0 is hardcoded to always return 0. - u32 GetRegister(u32 register_id) const; - - /// Sets the register to the input value. - void SetRegister(u32 register_id, u32 value); - - /// Sets the method address to use for the next Send instruction. - void SetMethodAddress(u32 address); - - /// Calls a GPU Engine method with the input parameter. - void Send(u32 value); - - /// Reads a GPU register located at the method address. - u32 Read(u32 method) const; - - /// Returns the next parameter in the parameter queue. - u32 FetchParameter(); - - Engines::Maxwell3D& maxwell3d; - - /// Current program counter - u32 pc; - /// Program counter to execute at after the delay slot is executed. - std::optional<u32> delayed_pc; - - /// General purpose macro registers. - std::array<u32, Macro::NUM_MACRO_REGISTERS> registers = {}; - - /// Method address to use for the next Send instruction. - Macro::MethodAddress method_address = {}; - - /// Input parameters of the current macro. - std::unique_ptr<u32[]> parameters; - std::size_t num_parameters = 0; - std::size_t parameters_capacity = 0; - /// Index of the next parameter that will be fetched by the 'parm' instruction. - u32 next_parameter_index = 0; - - bool carry_flag = false; - const std::vector<u32>& code; -}; - } // namespace Tegra diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp index c6b2b2109..47b28ad16 100644 --- a/src/video_core/macro/macro_jit_x64.cpp +++ b/src/video_core/macro/macro_jit_x64.cpp @@ -2,9 +2,17 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <array> +#include <bitset> +#include <optional> + +#include <xbyak/xbyak.h> + #include "common/assert.h" +#include "common/bit_field.h" #include "common/logging/log.h" #include "common/microprofile.h" +#include "common/x64/xbyak_abi.h" #include "common/x64/xbyak_util.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/macro/macro_interpreter.h" @@ -14,13 +22,14 @@ MICROPROFILE_DEFINE(MacroJitCompile, "GPU", "Compile macro JIT", MP_RGB(173, 255 MICROPROFILE_DEFINE(MacroJitExecute, "GPU", "Execute macro JIT", MP_RGB(255, 255, 0)); namespace Tegra { +namespace { constexpr Xbyak::Reg64 STATE = Xbyak::util::rbx; constexpr Xbyak::Reg32 RESULT = Xbyak::util::ebp; constexpr Xbyak::Reg64 PARAMETERS = Xbyak::util::r12; constexpr Xbyak::Reg32 METHOD_ADDRESS = Xbyak::util::r14d; constexpr Xbyak::Reg64 BRANCH_HOLDER = Xbyak::util::r15; -static const std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({ +constexpr std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({ STATE, RESULT, PARAMETERS, @@ -28,19 +37,75 @@ static const std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({ BRANCH_HOLDER, }); -MacroJITx64::MacroJITx64(Engines::Maxwell3D& maxwell3d_) - : MacroEngine{maxwell3d_}, maxwell3d{maxwell3d_} {} +// Arbitrarily chosen based on current booting games. +constexpr size_t MAX_CODE_SIZE = 0x10000; -std::unique_ptr<CachedMacro> MacroJITx64::Compile(const std::vector<u32>& code) { - return std::make_unique<MacroJITx64Impl>(maxwell3d, code); +std::bitset<32> PersistentCallerSavedRegs() { + return PERSISTENT_REGISTERS & Common::X64::ABI_ALL_CALLER_SAVED; } -MacroJITx64Impl::MacroJITx64Impl(Engines::Maxwell3D& maxwell3d_, const std::vector<u32>& code_) - : CodeGenerator{MAX_CODE_SIZE}, code{code_}, maxwell3d{maxwell3d_} { - Compile(); -} +class MacroJITx64Impl final : public Xbyak::CodeGenerator, public CachedMacro { +public: + explicit MacroJITx64Impl(Engines::Maxwell3D& maxwell3d_, const std::vector<u32>& code_) + : CodeGenerator{MAX_CODE_SIZE}, code{code_}, maxwell3d{maxwell3d_} { + Compile(); + } + + void Execute(const std::vector<u32>& parameters, u32 method) override; + + void Compile_ALU(Macro::Opcode opcode); + void Compile_AddImmediate(Macro::Opcode opcode); + void Compile_ExtractInsert(Macro::Opcode opcode); + void Compile_ExtractShiftLeftImmediate(Macro::Opcode opcode); + void Compile_ExtractShiftLeftRegister(Macro::Opcode opcode); + void Compile_Read(Macro::Opcode opcode); + void Compile_Branch(Macro::Opcode opcode); + +private: + void Optimizer_ScanFlags(); + + void Compile(); + bool Compile_NextInstruction(); + + Xbyak::Reg32 Compile_FetchParameter(); + Xbyak::Reg32 Compile_GetRegister(u32 index, Xbyak::Reg32 dst); + + void Compile_ProcessResult(Macro::ResultOperation operation, u32 reg); + void Compile_Send(Xbyak::Reg32 value); -MacroJITx64Impl::~MacroJITx64Impl() = default; + Macro::Opcode GetOpCode() const; + + struct JITState { + Engines::Maxwell3D* maxwell3d{}; + std::array<u32, Macro::NUM_MACRO_REGISTERS> registers{}; + u32 carry_flag{}; + }; + static_assert(offsetof(JITState, maxwell3d) == 0, "Maxwell3D is not at 0x0"); + using ProgramType = void (*)(JITState*, const u32*); + + struct OptimizerState { + bool can_skip_carry{}; + bool has_delayed_pc{}; + bool zero_reg_skip{}; + bool skip_dummy_addimmediate{}; + bool optimize_for_method_move{}; + bool enable_asserts{}; + }; + OptimizerState optimizer{}; + + std::optional<Macro::Opcode> next_opcode{}; + ProgramType program{nullptr}; + + std::array<Xbyak::Label, MAX_CODE_SIZE> labels; + std::array<Xbyak::Label, MAX_CODE_SIZE> delay_skip; + Xbyak::Label end_of_code{}; + + bool is_delay_slot{}; + u32 pc{}; + + const std::vector<u32>& code; + Engines::Maxwell3D& maxwell3d; +}; void MacroJITx64Impl::Execute(const std::vector<u32>& parameters, u32 method) { MICROPROFILE_SCOPE(MacroJitExecute); @@ -307,11 +372,11 @@ void MacroJITx64Impl::Compile_Read(Macro::Opcode opcode) { Compile_ProcessResult(opcode.result_operation, opcode.dst); } -static void Send(Engines::Maxwell3D* maxwell3d, Macro::MethodAddress method_address, u32 value) { +void Send(Engines::Maxwell3D* maxwell3d, Macro::MethodAddress method_address, u32 value) { maxwell3d->CallMethodFromMME(method_address.address, value); } -void Tegra::MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) { +void MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) { Common::X64::ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); mov(Common::X64::ABI_PARAM1, qword[STATE]); mov(Common::X64::ABI_PARAM2, METHOD_ADDRESS); @@ -338,7 +403,7 @@ void Tegra::MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) { L(dont_process); } -void Tegra::MacroJITx64Impl::Compile_Branch(Macro::Opcode opcode) { +void MacroJITx64Impl::Compile_Branch(Macro::Opcode opcode) { ASSERT_MSG(!is_delay_slot, "Executing a branch in a delay slot is not valid"); const s32 jump_address = static_cast<s32>(pc) + static_cast<s32>(opcode.GetBranchTarget() / sizeof(s32)); @@ -392,7 +457,7 @@ void Tegra::MacroJITx64Impl::Compile_Branch(Macro::Opcode opcode) { L(end); } -void Tegra::MacroJITx64Impl::Optimizer_ScanFlags() { +void MacroJITx64Impl::Optimizer_ScanFlags() { optimizer.can_skip_carry = true; optimizer.has_delayed_pc = false; for (auto raw_op : code) { @@ -534,7 +599,7 @@ bool MacroJITx64Impl::Compile_NextInstruction() { return true; } -Xbyak::Reg32 Tegra::MacroJITx64Impl::Compile_FetchParameter() { +Xbyak::Reg32 MacroJITx64Impl::Compile_FetchParameter() { mov(eax, dword[PARAMETERS]); add(PARAMETERS, sizeof(u32)); return eax; @@ -611,9 +676,12 @@ Macro::Opcode MacroJITx64Impl::GetOpCode() const { ASSERT(pc < code.size()); return {code[pc]}; } +} // Anonymous namespace -std::bitset<32> MacroJITx64Impl::PersistentCallerSavedRegs() const { - return PERSISTENT_REGISTERS & Common::X64::ABI_ALL_CALLER_SAVED; -} +MacroJITx64::MacroJITx64(Engines::Maxwell3D& maxwell3d_) + : MacroEngine{maxwell3d_}, maxwell3d{maxwell3d_} {} +std::unique_ptr<CachedMacro> MacroJITx64::Compile(const std::vector<u32>& code) { + return std::make_unique<MacroJITx64Impl>(maxwell3d, code); +} } // namespace Tegra diff --git a/src/video_core/macro/macro_jit_x64.h b/src/video_core/macro/macro_jit_x64.h index d03d480b4..773b037ae 100644 --- a/src/video_core/macro/macro_jit_x64.h +++ b/src/video_core/macro/macro_jit_x64.h @@ -4,12 +4,7 @@ #pragma once -#include <array> -#include <bitset> -#include <xbyak/xbyak.h> -#include "common/bit_field.h" #include "common/common_types.h" -#include "common/x64/xbyak_abi.h" #include "video_core/macro/macro.h" namespace Tegra { @@ -18,9 +13,6 @@ namespace Engines { class Maxwell3D; } -/// MAX_CODE_SIZE is arbitrarily chosen based on current booting games -constexpr size_t MAX_CODE_SIZE = 0x10000; - class MacroJITx64 final : public MacroEngine { public: explicit MacroJITx64(Engines::Maxwell3D& maxwell3d_); @@ -32,67 +24,4 @@ private: Engines::Maxwell3D& maxwell3d; }; -class MacroJITx64Impl : public Xbyak::CodeGenerator, public CachedMacro { -public: - explicit MacroJITx64Impl(Engines::Maxwell3D& maxwell3d_, const std::vector<u32>& code_); - ~MacroJITx64Impl(); - - void Execute(const std::vector<u32>& parameters, u32 method) override; - - void Compile_ALU(Macro::Opcode opcode); - void Compile_AddImmediate(Macro::Opcode opcode); - void Compile_ExtractInsert(Macro::Opcode opcode); - void Compile_ExtractShiftLeftImmediate(Macro::Opcode opcode); - void Compile_ExtractShiftLeftRegister(Macro::Opcode opcode); - void Compile_Read(Macro::Opcode opcode); - void Compile_Branch(Macro::Opcode opcode); - -private: - void Optimizer_ScanFlags(); - - void Compile(); - bool Compile_NextInstruction(); - - Xbyak::Reg32 Compile_FetchParameter(); - Xbyak::Reg32 Compile_GetRegister(u32 index, Xbyak::Reg32 dst); - - void Compile_ProcessResult(Macro::ResultOperation operation, u32 reg); - void Compile_Send(Xbyak::Reg32 value); - - Macro::Opcode GetOpCode() const; - std::bitset<32> PersistentCallerSavedRegs() const; - - struct JITState { - Engines::Maxwell3D* maxwell3d{}; - std::array<u32, Macro::NUM_MACRO_REGISTERS> registers{}; - u32 carry_flag{}; - }; - static_assert(offsetof(JITState, maxwell3d) == 0, "Maxwell3D is not at 0x0"); - using ProgramType = void (*)(JITState*, const u32*); - - struct OptimizerState { - bool can_skip_carry{}; - bool has_delayed_pc{}; - bool zero_reg_skip{}; - bool skip_dummy_addimmediate{}; - bool optimize_for_method_move{}; - bool enable_asserts{}; - }; - OptimizerState optimizer{}; - - std::optional<Macro::Opcode> next_opcode{}; - ProgramType program{nullptr}; - - std::array<Xbyak::Label, MAX_CODE_SIZE> labels; - std::array<Xbyak::Label, MAX_CODE_SIZE> delay_skip; - Xbyak::Label end_of_code{}; - - bool is_delay_slot{}; - u32 pc{}; - std::optional<u32> delayed_pc; - - const std::vector<u32>& code; - Engines::Maxwell3D& maxwell3d; -}; - } // namespace Tegra diff --git a/src/video_core/renderer_vulkan/vk_fsr.cpp b/src/video_core/renderer_vulkan/vk_fsr.cpp index 73629d229..b630090e8 100644 --- a/src/video_core/renderer_vulkan/vk_fsr.cpp +++ b/src/video_core/renderer_vulkan/vk_fsr.cpp @@ -214,7 +214,7 @@ VkImageView FSR::Draw(VKScheduler& scheduler, size_t image_index, VkImageView im { VkImageMemoryBarrier fsr_write_barrier = base_barrier; - fsr_write_barrier.image = *images[image_index], + fsr_write_barrier.image = *images[image_index]; fsr_write_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, |