diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/common/settings.h | 1 | ||||
-rw-r--r-- | src/video_core/engines/maxwell_3d.cpp | 2 | ||||
-rw-r--r-- | src/video_core/engines/maxwell_dma.cpp | 9 | ||||
-rw-r--r-- | src/video_core/macro/macro.cpp | 32 | ||||
-rw-r--r-- | src/video_core/macro/macro.h | 3 | ||||
-rw-r--r-- | src/video_core/macro/macro_jit_x64.cpp | 24 | ||||
-rw-r--r-- | src/yuzu/configuration/configure_debug.cpp | 3 | ||||
-rw-r--r-- | src/yuzu/configuration/configure_debug.ui | 13 |
8 files changed, 81 insertions, 6 deletions
diff --git a/src/common/settings.h b/src/common/settings.h index 5b34169a8..e61d9cd7f 100644 --- a/src/common/settings.h +++ b/src/common/settings.h @@ -606,6 +606,7 @@ struct Values { BasicSetting<bool> dump_exefs{false, "dump_exefs"}; BasicSetting<bool> dump_nso{false, "dump_nso"}; BasicSetting<bool> dump_shaders{false, "dump_shaders"}; + BasicSetting<bool> dump_macros{false, "dump_macros"}; BasicSetting<bool> enable_fs_access_log{false, "enable_fs_access_log"}; BasicSetting<bool> reporting_services{false, "reporting_services"}; BasicSetting<bool> quest_flag{false, "quest_flag"}; diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index d4652b167..7d0cb8fce 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -173,6 +173,8 @@ void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argume case MAXWELL3D_REG_INDEX(shadow_ram_control): shadow_state.shadow_ram_control = static_cast<Regs::ShadowRamControl>(nonshadow_argument); return; + case MAXWELL3D_REG_INDEX(macros.upload_address): + return macro_engine->ClearCode(regs.macros.upload_address); case MAXWELL3D_REG_INDEX(macros.data): return macro_engine->AddCode(regs.macros.upload_address, argument); case MAXWELL3D_REG_INDEX(macros.bind): diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index 76e8bc656..a7302f7c1 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp @@ -134,7 +134,8 @@ void MaxwellDMA::CopyBlockLinearToPitch() { // Deswizzle the input and copy it over. UNIMPLEMENTED_IF(regs.launch_dma.remap_enable != 0); - const u32 bytes_per_pixel = regs.pitch_out / regs.line_length_in; + const u32 bytes_per_pixel = + regs.launch_dma.remap_enable ? regs.pitch_out / regs.line_length_in : 1; const Parameters& src_params = regs.src_params; const u32 width = src_params.width; const u32 height = src_params.height; @@ -166,7 +167,8 @@ void MaxwellDMA::CopyPitchToBlockLinear() { UNIMPLEMENTED_IF(regs.launch_dma.remap_enable != 0); const auto& dst_params = regs.dst_params; - const u32 bytes_per_pixel = regs.pitch_in / regs.line_length_in; + const u32 bytes_per_pixel = + regs.launch_dma.remap_enable ? regs.pitch_in / regs.line_length_in : 1; const u32 width = dst_params.width; const u32 height = dst_params.height; const u32 depth = dst_params.depth; @@ -210,7 +212,8 @@ void MaxwellDMA::CopyPitchToBlockLinear() { } void MaxwellDMA::FastCopyBlockLinearToPitch() { - const u32 bytes_per_pixel = regs.pitch_out / regs.line_length_in; + const u32 bytes_per_pixel = + regs.launch_dma.remap_enable ? regs.pitch_out / regs.line_length_in : 1; const size_t src_size = GOB_SIZE; const size_t dst_size = static_cast<size_t>(regs.pitch_out) * regs.line_count; u32 pos_x = regs.src_params.origin.x; diff --git a/src/video_core/macro/macro.cpp b/src/video_core/macro/macro.cpp index a033d03be..e7279efcd 100644 --- a/src/video_core/macro/macro.cpp +++ b/src/video_core/macro/macro.cpp @@ -2,11 +2,15 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include <cstring> +#include <fstream> #include <optional> +#include <span> #include <boost/container_hash/hash.hpp> #include "common/assert.h" +#include "common/fs/fs.h" +#include "common/fs/path_util.h" #include "common/settings.h" #include "video_core/macro/macro.h" #include "video_core/macro/macro_hle.h" @@ -15,6 +19,23 @@ namespace Tegra { +static void Dump(u64 hash, std::span<const u32> code) { + const auto base_dir{Common::FS::GetYuzuPath(Common::FS::YuzuPath::DumpDir)}; + const auto macro_dir{base_dir / "macros"}; + if (!Common::FS::CreateDir(base_dir) || !Common::FS::CreateDir(macro_dir)) { + LOG_ERROR(Common_Filesystem, "Failed to create macro dump directories"); + return; + } + const auto name{macro_dir / fmt::format("{:016x}.macro", hash)}; + std::fstream macro_file(name, std::ios::out | std::ios::binary); + if (!macro_file) { + LOG_ERROR(Common_Filesystem, "Unable to open or create file at {}", + Common::FS::PathToUTF8String(name)); + return; + } + macro_file.write(reinterpret_cast<const char*>(code.data()), code.size_bytes()); +} + MacroEngine::MacroEngine(Engines::Maxwell3D& maxwell3d) : hle_macros{std::make_unique<Tegra::HLEMacro>(maxwell3d)} {} @@ -24,6 +45,11 @@ void MacroEngine::AddCode(u32 method, u32 data) { uploaded_macro_code[method].push_back(data); } +void MacroEngine::ClearCode(u32 method) { + macro_cache.erase(method); + uploaded_macro_code.erase(method); +} + void MacroEngine::Execute(u32 method, const std::vector<u32>& parameters) { auto compiled_macro = macro_cache.find(method); if (compiled_macro != macro_cache.end()) { @@ -54,6 +80,9 @@ void MacroEngine::Execute(u32 method, const std::vector<u32>& parameters) { if (!mid_method.has_value()) { cache_info.lle_program = Compile(macro_code->second); cache_info.hash = boost::hash_value(macro_code->second); + if (Settings::values.dump_macros) { + Dump(cache_info.hash, macro_code->second); + } } else { const auto& macro_cached = uploaded_macro_code[mid_method.value()]; const auto rebased_method = method - mid_method.value(); @@ -63,6 +92,9 @@ void MacroEngine::Execute(u32 method, const std::vector<u32>& parameters) { code.size() * sizeof(u32)); cache_info.hash = boost::hash_value(code); cache_info.lle_program = Compile(code); + if (Settings::values.dump_macros) { + Dump(cache_info.hash, code); + } } if (auto hle_program = hle_macros->GetHLEProgram(cache_info.hash)) { diff --git a/src/video_core/macro/macro.h b/src/video_core/macro/macro.h index 7e12c16dc..07d97ba39 100644 --- a/src/video_core/macro/macro.h +++ b/src/video_core/macro/macro.h @@ -117,6 +117,9 @@ public: // Store the uploaded macro code to compile them when they're called. void AddCode(u32 method, u32 data); + // Clear the code associated with a method. + void ClearCode(u32 method); + // Compiles the macro if its not in the cache, and executes the compiled macro void Execute(u32 method, const std::vector<u32>& parameters); diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp index dc2b490d4..dc5376501 100644 --- a/src/video_core/macro/macro_jit_x64.cpp +++ b/src/video_core/macro/macro_jit_x64.cpp @@ -23,7 +23,8 @@ MICROPROFILE_DEFINE(MacroJitExecute, "GPU", "Execute macro JIT", MP_RGB(255, 255 namespace Tegra { namespace { constexpr Xbyak::Reg64 STATE = Xbyak::util::rbx; -constexpr Xbyak::Reg32 RESULT = Xbyak::util::ebp; +constexpr Xbyak::Reg32 RESULT = Xbyak::util::r10d; +constexpr Xbyak::Reg64 MAX_PARAMETER = Xbyak::util::r11; constexpr Xbyak::Reg64 PARAMETERS = Xbyak::util::r12; constexpr Xbyak::Reg32 METHOD_ADDRESS = Xbyak::util::r14d; constexpr Xbyak::Reg64 BRANCH_HOLDER = Xbyak::util::r15; @@ -31,6 +32,7 @@ constexpr Xbyak::Reg64 BRANCH_HOLDER = Xbyak::util::r15; constexpr std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({ STATE, RESULT, + MAX_PARAMETER, PARAMETERS, METHOD_ADDRESS, BRANCH_HOLDER, @@ -80,7 +82,7 @@ private: u32 carry_flag{}; }; static_assert(offsetof(JITState, maxwell3d) == 0, "Maxwell3D is not at 0x0"); - using ProgramType = void (*)(JITState*, const u32*); + using ProgramType = void (*)(JITState*, const u32*, const u32*); struct OptimizerState { bool can_skip_carry{}; @@ -112,7 +114,7 @@ void MacroJITx64Impl::Execute(const std::vector<u32>& parameters, u32 method) { JITState state{}; state.maxwell3d = &maxwell3d; state.registers = {}; - program(&state, parameters.data()); + program(&state, parameters.data(), parameters.data() + parameters.size()); } void MacroJITx64Impl::Compile_ALU(Macro::Opcode opcode) { @@ -488,6 +490,7 @@ void MacroJITx64Impl::Compile() { // JIT state mov(STATE, Common::X64::ABI_PARAM1); mov(PARAMETERS, Common::X64::ABI_PARAM2); + mov(MAX_PARAMETER, Common::X64::ABI_PARAM3); xor_(RESULT, RESULT); xor_(METHOD_ADDRESS, METHOD_ADDRESS); xor_(BRANCH_HOLDER, BRANCH_HOLDER); @@ -598,7 +601,22 @@ bool MacroJITx64Impl::Compile_NextInstruction() { return true; } +static void WarnInvalidParameter(uintptr_t parameter, uintptr_t max_parameter) { + LOG_CRITICAL(HW_GPU, + "Macro JIT: invalid parameter access 0x{:x} (0x{:x} is the last parameter)", + parameter, max_parameter - sizeof(u32)); +} + Xbyak::Reg32 MacroJITx64Impl::Compile_FetchParameter() { + Xbyak::Label parameter_ok{}; + cmp(PARAMETERS, MAX_PARAMETER); + jb(parameter_ok, T_NEAR); + Common::X64::ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); + mov(Common::X64::ABI_PARAM1, PARAMETERS); + mov(Common::X64::ABI_PARAM2, MAX_PARAMETER); + Common::X64::CallFarFunction(*this, &WarnInvalidParameter); + Common::X64::ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); + L(parameter_ok); mov(eax, dword[PARAMETERS]); add(PARAMETERS, sizeof(u32)); return eax; diff --git a/src/yuzu/configuration/configure_debug.cpp b/src/yuzu/configuration/configure_debug.cpp index bd50f7a68..d6e8b5ead 100644 --- a/src/yuzu/configuration/configure_debug.cpp +++ b/src/yuzu/configuration/configure_debug.cpp @@ -53,6 +53,8 @@ void ConfigureDebug::SetConfiguration() { ui->enable_nsight_aftermath->setChecked(Settings::values.enable_nsight_aftermath.GetValue()); ui->dump_shaders->setEnabled(runtime_lock); ui->dump_shaders->setChecked(Settings::values.dump_shaders.GetValue()); + ui->dump_macros->setEnabled(runtime_lock); + ui->dump_macros->setChecked(Settings::values.dump_macros.GetValue()); ui->disable_macro_jit->setEnabled(runtime_lock); ui->disable_macro_jit->setChecked(Settings::values.disable_macro_jit.GetValue()); ui->disable_loop_safety_checks->setEnabled(runtime_lock); @@ -83,6 +85,7 @@ void ConfigureDebug::ApplyConfiguration() { Settings::values.cpu_debug_mode = ui->enable_cpu_debugging->isChecked(); Settings::values.enable_nsight_aftermath = ui->enable_nsight_aftermath->isChecked(); Settings::values.dump_shaders = ui->dump_shaders->isChecked(); + Settings::values.dump_macros = ui->dump_macros->isChecked(); Settings::values.disable_shader_loop_safety_checks = ui->disable_loop_safety_checks->isChecked(); Settings::values.disable_macro_jit = ui->disable_macro_jit->isChecked(); diff --git a/src/yuzu/configuration/configure_debug.ui b/src/yuzu/configuration/configure_debug.ui index c1d90d588..863a3fd57 100644 --- a/src/yuzu/configuration/configure_debug.ui +++ b/src/yuzu/configuration/configure_debug.ui @@ -118,6 +118,19 @@ </property> </widget> </item> + <item row="0" column="2"> + <widget class="QCheckBox" name="dump_macros"> + <property name="enabled"> + <bool>true</bool> + </property> + <property name="toolTip"> + <string>When checked, it will dump all the macro programs of the GPU</string> + </property> + <property name="text"> + <string>Dump Maxwell Macros</string> + </property> + </widget> + </item> <item row="0" column="1"> <widget class="QCheckBox" name="disable_macro_jit"> <property name="enabled"> |