From dd12dd4c67dd4bc6e7a7d071b925afc38e687f8b Mon Sep 17 00:00:00 2001 From: Morph <39850852+Morph1984@users.noreply.github.com> Date: Sat, 22 Apr 2023 22:55:03 -0400 Subject: x64: Deduplicate RDTSC usage --- src/common/x64/cpu_wait.cpp | 20 +------------------- 1 file changed, 1 insertion(+), 19 deletions(-) (limited to 'src/common/x64/cpu_wait.cpp') diff --git a/src/common/x64/cpu_wait.cpp b/src/common/x64/cpu_wait.cpp index cfeef6a3d..c53dd4945 100644 --- a/src/common/x64/cpu_wait.cpp +++ b/src/common/x64/cpu_wait.cpp @@ -9,19 +9,11 @@ #include "common/x64/cpu_detect.h" #include "common/x64/cpu_wait.h" +#include "common/x64/rdtsc.h" namespace Common::X64 { #ifdef _MSC_VER -__forceinline static u64 FencedRDTSC() { - _mm_lfence(); - _ReadWriteBarrier(); - const u64 result = __rdtsc(); - _mm_lfence(); - _ReadWriteBarrier(); - return result; -} - __forceinline static void TPAUSE() { // 100,000 cycles is a reasonable amount of time to wait to save on CPU resources. // For reference: @@ -32,16 +24,6 @@ __forceinline static void TPAUSE() { _tpause(0, FencedRDTSC() + PauseCycles); } #else -static u64 FencedRDTSC() { - u64 eax; - u64 edx; - asm volatile("lfence\n\t" - "rdtsc\n\t" - "lfence\n\t" - : "=a"(eax), "=d"(edx)); - return (edx << 32) | eax; -} - static void TPAUSE() { // 100,000 cycles is a reasonable amount of time to wait to save on CPU resources. // For reference: -- cgit v1.2.3 From 3d868baaa44152e7a4bd8c64905443fd9a08adce Mon Sep 17 00:00:00 2001 From: Morph <39850852+Morph1984@users.noreply.github.com> Date: Wed, 28 Jun 2023 01:07:59 -0400 Subject: x64: cpu_wait: Make use of MWAITX in MicroSleep MWAITX is equivalent to UMWAIT on Intel's Alder Lake CPUs. We can emulate TPAUSE by using MONITORX in conjunction with MWAITX to wait for 100K cycles. --- src/common/x64/cpu_wait.cpp | 33 +++++++++++++++++++++------------ 1 file changed, 21 insertions(+), 12 deletions(-) (limited to 'src/common/x64/cpu_wait.cpp') diff --git a/src/common/x64/cpu_wait.cpp b/src/common/x64/cpu_wait.cpp index c53dd4945..11b9c4d83 100644 --- a/src/common/x64/cpu_wait.cpp +++ b/src/common/x64/cpu_wait.cpp @@ -13,24 +13,30 @@ namespace Common::X64 { +namespace { + +// 100,000 cycles is a reasonable amount of time to wait to save on CPU resources. +// For reference: +// At 1 GHz, 100K cycles is 100us +// At 2 GHz, 100K cycles is 50us +// At 4 GHz, 100K cycles is 25us +constexpr auto PauseCycles = 100'000U; + +} // Anonymous namespace + #ifdef _MSC_VER __forceinline static void TPAUSE() { - // 100,000 cycles is a reasonable amount of time to wait to save on CPU resources. - // For reference: - // At 1 GHz, 100K cycles is 100us - // At 2 GHz, 100K cycles is 50us - // At 4 GHz, 100K cycles is 25us - static constexpr auto PauseCycles = 100'000; _tpause(0, FencedRDTSC() + PauseCycles); } + +__forceinline static void MWAITX() { + // monitor_var should be aligned to a cache line. + alignas(64) u64 monitor_var{}; + _mm_monitorx(&monitor_var, 0, 0); + _mm_mwaitx(/* extensions*/ 2, /* hints */ 0, /* cycles */ PauseCycles); +} #else static void TPAUSE() { - // 100,000 cycles is a reasonable amount of time to wait to save on CPU resources. - // For reference: - // At 1 GHz, 100K cycles is 100us - // At 2 GHz, 100K cycles is 50us - // At 4 GHz, 100K cycles is 25us - static constexpr auto PauseCycles = 100'000; const auto tsc = FencedRDTSC() + PauseCycles; const auto eax = static_cast(tsc & 0xFFFFFFFF); const auto edx = static_cast(tsc >> 32); @@ -40,9 +46,12 @@ static void TPAUSE() { void MicroSleep() { static const bool has_waitpkg = GetCPUCaps().waitpkg; + static const bool has_monitorx = GetCPUCaps().monitorx; if (has_waitpkg) { TPAUSE(); + } else if (has_monitorx) { + MWAITX(); } else { std::this_thread::yield(); } -- cgit v1.2.3 From 2b68a3cbbf144b97aa524eb1dd17aad34cdf1a67 Mon Sep 17 00:00:00 2001 From: Morph <39850852+Morph1984@users.noreply.github.com> Date: Wed, 28 Jun 2023 01:19:41 -0400 Subject: x64: cpu_wait: Remove magic values --- src/common/x64/cpu_wait.cpp | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'src/common/x64/cpu_wait.cpp') diff --git a/src/common/x64/cpu_wait.cpp b/src/common/x64/cpu_wait.cpp index 11b9c4d83..ea16c8490 100644 --- a/src/common/x64/cpu_wait.cpp +++ b/src/common/x64/cpu_wait.cpp @@ -26,21 +26,26 @@ constexpr auto PauseCycles = 100'000U; #ifdef _MSC_VER __forceinline static void TPAUSE() { - _tpause(0, FencedRDTSC() + PauseCycles); + static constexpr auto RequestC02State = 0U; + _tpause(RequestC02State, FencedRDTSC() + PauseCycles); } __forceinline static void MWAITX() { + static constexpr auto EnableWaitTimeFlag = 1U << 1; + static constexpr auto RequestC1State = 0U; + // monitor_var should be aligned to a cache line. alignas(64) u64 monitor_var{}; _mm_monitorx(&monitor_var, 0, 0); - _mm_mwaitx(/* extensions*/ 2, /* hints */ 0, /* cycles */ PauseCycles); + _mm_mwaitx(EnableWaitTimeFlag, RequestC1State, PauseCycles); } #else static void TPAUSE() { + static constexpr auto RequestC02State = 0U; const auto tsc = FencedRDTSC() + PauseCycles; const auto eax = static_cast(tsc & 0xFFFFFFFF); const auto edx = static_cast(tsc >> 32); - asm volatile("tpause %0" : : "r"(0), "d"(edx), "a"(eax)); + asm volatile("tpause %0" : : "r"(RequestC02State), "d"(edx), "a"(eax)); } #endif -- cgit v1.2.3 From 295fc7d0f8f0b6158307c5c9b11a60516f9eb221 Mon Sep 17 00:00:00 2001 From: Morph <39850852+Morph1984@users.noreply.github.com> Date: Wed, 28 Jun 2023 01:35:16 -0400 Subject: x64: cpu_wait: Implement MWAITX for non-MSVC compilers --- src/common/x64/cpu_wait.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'src/common/x64/cpu_wait.cpp') diff --git a/src/common/x64/cpu_wait.cpp b/src/common/x64/cpu_wait.cpp index ea16c8490..41d385f59 100644 --- a/src/common/x64/cpu_wait.cpp +++ b/src/common/x64/cpu_wait.cpp @@ -47,6 +47,16 @@ static void TPAUSE() { const auto edx = static_cast(tsc >> 32); asm volatile("tpause %0" : : "r"(RequestC02State), "d"(edx), "a"(eax)); } + +static void MWAITX() { + static constexpr auto EnableWaitTimeFlag = 1U << 1; + static constexpr auto RequestC1State = 0U; + + // monitor_var should be aligned to a cache line. + alignas(64) u64 monitor_var{}; + asm volatile("monitorx" : : "a"(&monitor_var), "c"(0), "d"(0)); + asm volatile("mwaitx" : : "a"(RequestC1State), "b"(PauseCycles), "c"(EnableWaitTimeFlag)); +} #endif void MicroSleep() { -- cgit v1.2.3