From 16784e5bb3dd043f9430401097a4be42ad21eb91 Mon Sep 17 00:00:00 2001 From: merry Date: Sun, 27 Feb 2022 19:40:05 +0000 Subject: dynarmic: Inline exclusive memory accesses Inlines implementation of exclusive instructions into JITted code, improving performance of applications relying heavily on these instructions. We also fastmem these instructions for additional speed, with support for appropriate recompilation on fastmem failure. An unsafe optimization to disable the intercore global_monitor is also provided, should one wish to rely solely on cmpxchg semantics for safety. See also: merryhime/dynarmic#664 --- src/core/arm/dynarmic/arm_dynarmic_32.cpp | 12 ++++++++++++ src/core/arm/dynarmic/arm_dynarmic_64.cpp | 13 +++++++++++++ src/core/arm/dynarmic/arm_exclusive_monitor.cpp | 4 ++-- src/core/arm/dynarmic/arm_exclusive_monitor.h | 2 +- src/core/arm/exclusive_monitor.h | 2 +- src/core/hle/kernel/k_address_arbiter.cpp | 4 ++-- 6 files changed, 31 insertions(+), 6 deletions(-) (limited to 'src/core') diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.cpp b/src/core/arm/dynarmic/arm_dynarmic_32.cpp index b0d89c539..286976623 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_32.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_32.cpp @@ -137,6 +137,8 @@ std::shared_ptr ARM_Dynarmic_32::MakeJit(Common::PageTable* config.page_table_pointer_mask_bits = Common::PageTable::ATTRIBUTE_BITS; config.detect_misaligned_access_via_page_table = 16 | 32 | 64 | 128; config.only_detect_misalignment_via_page_table_on_page_boundary = true; + config.fastmem_exclusive_access = true; + config.recompile_on_exclusive_fastmem_failure = true; // Multi-process state config.processor_id = core_index; @@ -178,6 +180,12 @@ std::shared_ptr ARM_Dynarmic_32::MakeJit(Common::PageTable* if (!Settings::values.cpuopt_fastmem) { config.fastmem_pointer = nullptr; } + if (!Settings::values.cpuopt_fastmem_exclusives) { + config.fastmem_exclusive_access = false; + } + if (!Settings::values.cpuopt_recompile_exclusives) { + config.recompile_on_exclusive_fastmem_failure = false; + } } // Unsafe optimizations @@ -195,6 +203,9 @@ std::shared_ptr ARM_Dynarmic_32::MakeJit(Common::PageTable* if (Settings::values.cpuopt_unsafe_inaccurate_nan) { config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_InaccurateNaN; } + if (Settings::values.cpuopt_unsafe_ignore_global_monitor) { + config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_IgnoreGlobalMonitor; + } } // Curated optimizations @@ -203,6 +214,7 @@ std::shared_ptr ARM_Dynarmic_32::MakeJit(Common::PageTable* config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_UnfuseFMA; config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_IgnoreStandardFPCRValue; config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_InaccurateNaN; + config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_IgnoreGlobalMonitor; } return std::make_unique(config); diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp index 56836bd05..d96226c41 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp @@ -185,6 +185,9 @@ std::shared_ptr ARM_Dynarmic_64::MakeJit(Common::PageTable* config.fastmem_pointer = page_table->fastmem_arena; config.fastmem_address_space_bits = address_space_bits; config.silently_mirror_fastmem = false; + + config.fastmem_exclusive_access = true; + config.recompile_on_exclusive_fastmem_failure = true; } // Multi-process state @@ -237,6 +240,12 @@ std::shared_ptr ARM_Dynarmic_64::MakeJit(Common::PageTable* if (!Settings::values.cpuopt_fastmem) { config.fastmem_pointer = nullptr; } + if (!Settings::values.cpuopt_fastmem_exclusives) { + config.fastmem_exclusive_access = false; + } + if (!Settings::values.cpuopt_recompile_exclusives) { + config.recompile_on_exclusive_fastmem_failure = false; + } } // Unsafe optimizations @@ -254,6 +263,9 @@ std::shared_ptr ARM_Dynarmic_64::MakeJit(Common::PageTable* if (Settings::values.cpuopt_unsafe_fastmem_check) { config.fastmem_address_space_bits = 64; } + if (Settings::values.cpuopt_unsafe_ignore_global_monitor) { + config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_IgnoreGlobalMonitor; + } } // Curated optimizations @@ -262,6 +274,7 @@ std::shared_ptr ARM_Dynarmic_64::MakeJit(Common::PageTable* config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_UnfuseFMA; config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_InaccurateNaN; config.fastmem_address_space_bits = 64; + config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_IgnoreGlobalMonitor; } return std::make_shared(config); diff --git a/src/core/arm/dynarmic/arm_exclusive_monitor.cpp b/src/core/arm/dynarmic/arm_exclusive_monitor.cpp index 397d054a8..ea6b224e0 100644 --- a/src/core/arm/dynarmic/arm_exclusive_monitor.cpp +++ b/src/core/arm/dynarmic/arm_exclusive_monitor.cpp @@ -37,8 +37,8 @@ u128 DynarmicExclusiveMonitor::ExclusiveRead128(std::size_t core_index, VAddr ad }); } -void DynarmicExclusiveMonitor::ClearExclusive() { - monitor.Clear(); +void DynarmicExclusiveMonitor::ClearExclusive(std::size_t core_index) { + monitor.ClearProcessor(core_index); } bool DynarmicExclusiveMonitor::ExclusiveWrite8(std::size_t core_index, VAddr vaddr, u8 value) { diff --git a/src/core/arm/dynarmic/arm_exclusive_monitor.h b/src/core/arm/dynarmic/arm_exclusive_monitor.h index 265c4ecef..5a15b43ef 100644 --- a/src/core/arm/dynarmic/arm_exclusive_monitor.h +++ b/src/core/arm/dynarmic/arm_exclusive_monitor.h @@ -29,7 +29,7 @@ public: u32 ExclusiveRead32(std::size_t core_index, VAddr addr) override; u64 ExclusiveRead64(std::size_t core_index, VAddr addr) override; u128 ExclusiveRead128(std::size_t core_index, VAddr addr) override; - void ClearExclusive() override; + void ClearExclusive(std::size_t core_index) override; bool ExclusiveWrite8(std::size_t core_index, VAddr vaddr, u8 value) override; bool ExclusiveWrite16(std::size_t core_index, VAddr vaddr, u16 value) override; diff --git a/src/core/arm/exclusive_monitor.h b/src/core/arm/exclusive_monitor.h index 62f6e6023..9914ca3da 100644 --- a/src/core/arm/exclusive_monitor.h +++ b/src/core/arm/exclusive_monitor.h @@ -23,7 +23,7 @@ public: virtual u32 ExclusiveRead32(std::size_t core_index, VAddr addr) = 0; virtual u64 ExclusiveRead64(std::size_t core_index, VAddr addr) = 0; virtual u128 ExclusiveRead128(std::size_t core_index, VAddr addr) = 0; - virtual void ClearExclusive() = 0; + virtual void ClearExclusive(std::size_t core_index) = 0; virtual bool ExclusiveWrite8(std::size_t core_index, VAddr vaddr, u8 value) = 0; virtual bool ExclusiveWrite16(std::size_t core_index, VAddr vaddr, u16 value) = 0; diff --git a/src/core/hle/kernel/k_address_arbiter.cpp b/src/core/hle/kernel/k_address_arbiter.cpp index 783c69858..1d1f5e5f8 100644 --- a/src/core/hle/kernel/k_address_arbiter.cpp +++ b/src/core/hle/kernel/k_address_arbiter.cpp @@ -49,7 +49,7 @@ bool DecrementIfLessThan(Core::System& system, s32* out, VAddr address, s32 valu } } else { // Otherwise, clear our exclusive hold and finish - monitor.ClearExclusive(); + monitor.ClearExclusive(current_core); } // We're done. @@ -78,7 +78,7 @@ bool UpdateIfEqual(Core::System& system, s32* out, VAddr address, s32 value, s32 } } else { // Otherwise, clear our exclusive hold and finish. - monitor.ClearExclusive(); + monitor.ClearExclusive(current_core); } // We're done. -- cgit v1.2.3