From bd09c825218aac9255643b9aa7c75f5ba156038c Mon Sep 17 00:00:00 2001 From: Morph <39850852+Morph1984@users.noreply.github.com> Date: Wed, 1 Mar 2023 19:17:50 -0500 Subject: common: Implement a high resolution steady clock This implementation provides a consistent, high performance, and high resolution clock where/when std::chrono::steady_clock does not provide sufficient precision. --- src/common/CMakeLists.txt | 2 ++ src/common/steady_clock.cpp | 56 +++++++++++++++++++++++++++++++++++++++++++++ src/common/steady_clock.h | 23 +++++++++++++++++++ 3 files changed, 81 insertions(+) create mode 100644 src/common/steady_clock.cpp create mode 100644 src/common/steady_clock.h diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 56b247ac4..9f5d4c265 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -113,6 +113,8 @@ add_library(common STATIC socket_types.h spin_lock.cpp spin_lock.h + steady_clock.cpp + steady_clock.h stream.cpp stream.h string_util.cpp diff --git a/src/common/steady_clock.cpp b/src/common/steady_clock.cpp new file mode 100644 index 000000000..0d5908aa7 --- /dev/null +++ b/src/common/steady_clock.cpp @@ -0,0 +1,56 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#if defined(_WIN32) +#include +#else +#include +#endif + +#include "common/steady_clock.h" + +namespace Common { + +#ifdef _WIN32 +static s64 WindowsQueryPerformanceFrequency() { + LARGE_INTEGER frequency; + QueryPerformanceFrequency(&frequency); + return frequency.QuadPart; +} + +static s64 WindowsQueryPerformanceCounter() { + LARGE_INTEGER counter; + QueryPerformanceCounter(&counter); + return counter.QuadPart; +} +#endif + +SteadyClock::time_point SteadyClock::Now() noexcept { +#if defined(_WIN32) + static const auto freq = WindowsQueryPerformanceFrequency(); + const auto counter = WindowsQueryPerformanceCounter(); + + // 10 MHz is a very common QPC frequency on modern PCs. + // Optimizing for this specific frequency can double the performance of + // this function by avoiding the expensive frequency conversion path. + static constexpr s64 TenMHz = 10'000'000; + + if (freq == TenMHz) [[likely]] { + static_assert(period::den % TenMHz == 0); + static constexpr s64 Multiplier = period::den / TenMHz; + return time_point{duration{counter * Multiplier}}; + } + + const auto whole = (counter / freq) * period::den; + const auto part = (counter % freq) * period::den / freq; + return time_point{duration{whole + part}}; +#elif defined(__APPLE__) + return time_point{duration{clock_gettime_nsec_np(CLOCK_MONOTONIC_RAW)}}; +#else + timespec ts; + clock_gettime(CLOCK_MONOTONIC, &ts); + return time_point{std::chrono::seconds{ts.tv_sec} + std::chrono::nanoseconds{ts.tv_nsec}}; +#endif +} + +}; // namespace Common diff --git a/src/common/steady_clock.h b/src/common/steady_clock.h new file mode 100644 index 000000000..9497cf865 --- /dev/null +++ b/src/common/steady_clock.h @@ -0,0 +1,23 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include + +#include "common/common_types.h" + +namespace Common { + +struct SteadyClock { + using rep = s64; + using period = std::nano; + using duration = std::chrono::nanoseconds; + using time_point = std::chrono::time_point; + + static constexpr bool is_steady = true; + + [[nodiscard]] static time_point Now() noexcept; +}; + +} // namespace Common -- cgit v1.2.3 From 1ed49f92dd56289e6e31a967e602c65ccedd4ff1 Mon Sep 17 00:00:00 2001 From: Morph <39850852+Morph1984@users.noreply.github.com> Date: Wed, 1 Mar 2023 19:27:10 -0500 Subject: common: Implement a method to change the Windows timer resolution This utilizes undocumented NtDll functions to change the current timer resolution from the default of 1ms. --- src/common/CMakeLists.txt | 8 +++ src/common/windows/timer_resolution.cpp | 87 +++++++++++++++++++++++++++++++++ src/common/windows/timer_resolution.h | 38 ++++++++++++++ 3 files changed, 133 insertions(+) create mode 100644 src/common/windows/timer_resolution.cpp create mode 100644 src/common/windows/timer_resolution.h diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 9f5d4c265..58ff5f2f3 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -144,6 +144,14 @@ add_library(common STATIC zstd_compression.h ) +if (WIN32) + target_sources(common PRIVATE + windows/timer_resolution.cpp + windows/timer_resolution.h + ) + target_link_libraries(common PRIVATE ntdll) +endif() + if(ARCHITECTURE_x86_64) target_sources(common PRIVATE diff --git a/src/common/windows/timer_resolution.cpp b/src/common/windows/timer_resolution.cpp new file mode 100644 index 000000000..6c2063a4c --- /dev/null +++ b/src/common/windows/timer_resolution.cpp @@ -0,0 +1,87 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include + +#include "common/windows/timer_resolution.h" + +extern "C" { +// http://undocumented.ntinternals.net/index.html?page=UserMode%2FUndocumented%20Functions%2FTime%2FNtQueryTimerResolution.html +NTSYSAPI LONG NTAPI NtQueryTimerResolution(PULONG MinimumResolution, PULONG MaximumResolution, + PULONG CurrentResolution); + +// http://undocumented.ntinternals.net/index.html?page=UserMode%2FUndocumented%20Functions%2FTime%2FNtSetTimerResolution.html +NTSYSAPI LONG NTAPI NtSetTimerResolution(ULONG DesiredResolution, BOOLEAN SetResolution, + PULONG CurrentResolution); + +// http://undocumented.ntinternals.net/index.html?page=UserMode%2FUndocumented%20Functions%2FNT%20Objects%2FThread%2FNtDelayExecution.html +NTSYSAPI LONG NTAPI NtDelayExecution(BOOLEAN Alertable, PLARGE_INTEGER DelayInterval); +} + +namespace Common::Windows { + +namespace { + +using namespace std::chrono; + +constexpr nanoseconds ToNS(ULONG hundred_ns) { + return nanoseconds{hundred_ns * 100}; +} + +constexpr ULONG ToHundredNS(nanoseconds ns) { + return static_cast(ns.count()) / 100; +} + +struct TimerResolution { + std::chrono::nanoseconds minimum; + std::chrono::nanoseconds maximum; + std::chrono::nanoseconds current; +}; + +TimerResolution GetTimerResolution() { + ULONG MinimumTimerResolution; + ULONG MaximumTimerResolution; + ULONG CurrentTimerResolution; + NtQueryTimerResolution(&MinimumTimerResolution, &MaximumTimerResolution, + &CurrentTimerResolution); + return { + .minimum{ToNS(MinimumTimerResolution)}, + .maximum{ToNS(MaximumTimerResolution)}, + .current{ToNS(CurrentTimerResolution)}, + }; +} + +} // Anonymous namespace + +nanoseconds GetMinimumTimerResolution() { + return GetTimerResolution().minimum; +} + +nanoseconds GetMaximumTimerResolution() { + return GetTimerResolution().maximum; +} + +nanoseconds GetCurrentTimerResolution() { + return GetTimerResolution().current; +} + +nanoseconds SetCurrentTimerResolution(nanoseconds timer_resolution) { + // Set the timer resolution, and return the current timer resolution. + const auto DesiredTimerResolution = ToHundredNS(timer_resolution); + ULONG CurrentTimerResolution; + NtSetTimerResolution(DesiredTimerResolution, TRUE, &CurrentTimerResolution); + return ToNS(CurrentTimerResolution); +} + +nanoseconds SetCurrentTimerResolutionToMaximum() { + return SetCurrentTimerResolution(GetMaximumTimerResolution()); +} + +void SleepForOneTick() { + LARGE_INTEGER DelayInterval{ + .QuadPart{-1}, + }; + NtDelayExecution(FALSE, &DelayInterval); +} + +} // namespace Common::Windows diff --git a/src/common/windows/timer_resolution.h b/src/common/windows/timer_resolution.h new file mode 100644 index 000000000..e1e50a62d --- /dev/null +++ b/src/common/windows/timer_resolution.h @@ -0,0 +1,38 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include + +namespace Common::Windows { + +/// Returns the minimum (least precise) supported timer resolution in nanoseconds. +std::chrono::nanoseconds GetMinimumTimerResolution(); + +/// Returns the maximum (most precise) supported timer resolution in nanoseconds. +std::chrono::nanoseconds GetMaximumTimerResolution(); + +/// Returns the current timer resolution in nanoseconds. +std::chrono::nanoseconds GetCurrentTimerResolution(); + +/** + * Sets the current timer resolution. + * + * @param timer_resolution Timer resolution in nanoseconds. + * + * @returns The current timer resolution. + */ +std::chrono::nanoseconds SetCurrentTimerResolution(std::chrono::nanoseconds timer_resolution); + +/** + * Sets the current timer resolution to the maximum supported timer resolution. + * + * @returns The current timer resolution. + */ +std::chrono::nanoseconds SetCurrentTimerResolutionToMaximum(); + +/// Sleep for one tick of the current timer resolution. +void SleepForOneTick(); + +} // namespace Common::Windows -- cgit v1.2.3 From 7fffdf83b70ec5f0ead804cb2c16b9029f7e0cfa Mon Sep 17 00:00:00 2001 From: Morph <39850852+Morph1984@users.noreply.github.com> Date: Wed, 1 Mar 2023 19:43:00 -0500 Subject: wall_clock: Make use of SteadyClock --- src/common/wall_clock.cpp | 34 +++++++++++----------------------- 1 file changed, 11 insertions(+), 23 deletions(-) diff --git a/src/common/wall_clock.cpp b/src/common/wall_clock.cpp index ae07f2811..6d972d136 100644 --- a/src/common/wall_clock.cpp +++ b/src/common/wall_clock.cpp @@ -1,6 +1,7 @@ // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include "common/steady_clock.h" #include "common/uint128.h" #include "common/wall_clock.h" @@ -11,45 +12,32 @@ namespace Common { -using base_timer = std::chrono::steady_clock; -using base_time_point = std::chrono::time_point; - class StandardWallClock final : public WallClock { public: explicit StandardWallClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequency_) - : WallClock(emulated_cpu_frequency_, emulated_clock_frequency_, false) { - start_time = base_timer::now(); - } + : WallClock{emulated_cpu_frequency_, emulated_clock_frequency_, false}, + start_time{SteadyClock::Now()} {} std::chrono::nanoseconds GetTimeNS() override { - base_time_point current = base_timer::now(); - auto elapsed = current - start_time; - return std::chrono::duration_cast(elapsed); + return SteadyClock::Now() - start_time; } std::chrono::microseconds GetTimeUS() override { - base_time_point current = base_timer::now(); - auto elapsed = current - start_time; - return std::chrono::duration_cast(elapsed); + return std::chrono::duration_cast(GetTimeNS()); } std::chrono::milliseconds GetTimeMS() override { - base_time_point current = base_timer::now(); - auto elapsed = current - start_time; - return std::chrono::duration_cast(elapsed); + return std::chrono::duration_cast(GetTimeNS()); } u64 GetClockCycles() override { - std::chrono::nanoseconds time_now = GetTimeNS(); - const u128 temporary = - Common::Multiply64Into128(time_now.count(), emulated_clock_frequency); - return Common::Divide128On32(temporary, 1000000000).first; + const u128 temp = Common::Multiply64Into128(GetTimeNS().count(), emulated_clock_frequency); + return Common::Divide128On32(temp, NS_RATIO).first; } u64 GetCPUCycles() override { - std::chrono::nanoseconds time_now = GetTimeNS(); - const u128 temporary = Common::Multiply64Into128(time_now.count(), emulated_cpu_frequency); - return Common::Divide128On32(temporary, 1000000000).first; + const u128 temp = Common::Multiply64Into128(GetTimeNS().count(), emulated_cpu_frequency); + return Common::Divide128On32(temp, NS_RATIO).first; } void Pause([[maybe_unused]] bool is_paused) override { @@ -57,7 +45,7 @@ public: } private: - base_time_point start_time; + SteadyClock::time_point start_time; }; #ifdef ARCHITECTURE_x86_64 -- cgit v1.2.3 From 7e353082ac79bef59d48602f9196cf804d3dfc4f Mon Sep 17 00:00:00 2001 From: Morph <39850852+Morph1984@users.noreply.github.com> Date: Wed, 1 Mar 2023 20:07:59 -0500 Subject: main: (Windows) Set the current timer resolution to the maximum Increases the precision of thread sleeps on Windows. --- src/yuzu/main.cpp | 9 +++++++++ src/yuzu_cmd/yuzu.cpp | 4 ++++ 2 files changed, 13 insertions(+) diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp index f233b065e..c092507f4 100644 --- a/src/yuzu/main.cpp +++ b/src/yuzu/main.cpp @@ -91,6 +91,9 @@ static FileSys::VirtualFile VfsDirectoryCreateFileWrapper(const FileSys::Virtual #include "common/microprofile.h" #include "common/scm_rev.h" #include "common/scope_exit.h" +#ifdef _WIN32 +#include "common/windows/timer_resolution.h" +#endif #ifdef ARCHITECTURE_x86_64 #include "common/x64/cpu_detect.h" #endif @@ -377,6 +380,12 @@ GMainWindow::GMainWindow(std::unique_ptr config_, bool has_broken_vulkan LOG_INFO(Frontend, "Host RAM: {:.2f} GiB", Common::GetMemInfo().TotalPhysicalMemory / f64{1_GiB}); LOG_INFO(Frontend, "Host Swap: {:.2f} GiB", Common::GetMemInfo().TotalSwapMemory / f64{1_GiB}); +#ifdef _WIN32 + LOG_INFO(Frontend, "Host Timer Resolution: {:.4f} ms", + std::chrono::duration_cast>( + Common::Windows::SetCurrentTimerResolutionToMaximum()) + .count()); +#endif UpdateWindowTitle(); show(); diff --git a/src/yuzu_cmd/yuzu.cpp b/src/yuzu_cmd/yuzu.cpp index 77edd58ca..5f39ece32 100644 --- a/src/yuzu_cmd/yuzu.cpp +++ b/src/yuzu_cmd/yuzu.cpp @@ -42,6 +42,8 @@ #include #include + +#include "common/windows/timer_resolution.h" #endif #undef _UNICODE @@ -314,6 +316,8 @@ int main(int argc, char** argv) { #ifdef _WIN32 LocalFree(argv_w); + + Common::Windows::SetCurrentTimerResolutionToMaximum(); #endif MicroProfileOnThreadCreate("EmuThread"); -- cgit v1.2.3 From bff14532825e7517882ca913738347059f73cf7f Mon Sep 17 00:00:00 2001 From: Morph <39850852+Morph1984@users.noreply.github.com> Date: Wed, 1 Mar 2023 21:06:19 -0500 Subject: core_timing: Use higher precision sleeps on Windows The precision of sleep_for and wait_for is limited to 1-1.5ms on Windows. Using SleepForOneTick() allows us to sleep for exactly one interval of the current timer resolution. This allows us to take advantage of systems that have a timer resolution of 0.5ms to reduce CPU overhead in the event loop. --- src/common/wall_clock.cpp | 5 +++++ src/common/wall_clock.h | 3 +++ src/core/core_timing.cpp | 55 ++++++++++++++++++++++++++++------------------- src/core/core_timing.h | 6 +++++- src/video_core/gpu.cpp | 2 +- 5 files changed, 47 insertions(+), 24 deletions(-) diff --git a/src/common/wall_clock.cpp b/src/common/wall_clock.cpp index 6d972d136..817e71d52 100644 --- a/src/common/wall_clock.cpp +++ b/src/common/wall_clock.cpp @@ -81,4 +81,9 @@ std::unique_ptr CreateBestMatchingClock(u64 emulated_cpu_frequency, #endif +std::unique_ptr CreateStandardWallClock(u64 emulated_cpu_frequency, + u64 emulated_clock_frequency) { + return std::make_unique(emulated_cpu_frequency, emulated_clock_frequency); +} + } // namespace Common diff --git a/src/common/wall_clock.h b/src/common/wall_clock.h index 828a523a8..157ec5eae 100644 --- a/src/common/wall_clock.h +++ b/src/common/wall_clock.h @@ -55,4 +55,7 @@ private: [[nodiscard]] std::unique_ptr CreateBestMatchingClock(u64 emulated_cpu_frequency, u64 emulated_clock_frequency); +[[nodiscard]] std::unique_ptr CreateStandardWallClock(u64 emulated_cpu_frequency, + u64 emulated_clock_frequency); + } // namespace Common diff --git a/src/core/core_timing.cpp b/src/core/core_timing.cpp index 3a63b52e3..742cfb996 100644 --- a/src/core/core_timing.cpp +++ b/src/core/core_timing.cpp @@ -6,6 +6,10 @@ #include #include +#ifdef _WIN32 +#include "common/windows/timer_resolution.h" +#endif + #include "common/microprofile.h" #include "core/core_timing.h" #include "core/core_timing_util.h" @@ -38,7 +42,8 @@ struct CoreTiming::Event { }; CoreTiming::CoreTiming() - : clock{Common::CreateBestMatchingClock(Hardware::BASE_CLOCK_RATE, Hardware::CNTFREQ)} {} + : cpu_clock{Common::CreateBestMatchingClock(Hardware::BASE_CLOCK_RATE, Hardware::CNTFREQ)}, + event_clock{Common::CreateStandardWallClock(Hardware::BASE_CLOCK_RATE, Hardware::CNTFREQ)} {} CoreTiming::~CoreTiming() { Reset(); @@ -185,15 +190,15 @@ void CoreTiming::ResetTicks() { } u64 CoreTiming::GetCPUTicks() const { - if (is_multicore) { - return clock->GetCPUCycles(); + if (is_multicore) [[likely]] { + return cpu_clock->GetCPUCycles(); } return ticks; } u64 CoreTiming::GetClockTicks() const { - if (is_multicore) { - return clock->GetClockCycles(); + if (is_multicore) [[likely]] { + return cpu_clock->GetClockCycles(); } return CpuCyclesToClockCycles(ticks); } @@ -252,21 +257,20 @@ void CoreTiming::ThreadLoop() { const auto next_time = Advance(); if (next_time) { // There are more events left in the queue, wait until the next event. - const auto wait_time = *next_time - GetGlobalTimeNs().count(); + auto wait_time = *next_time - GetGlobalTimeNs().count(); if (wait_time > 0) { #ifdef _WIN32 - // Assume a timer resolution of 1ms. - static constexpr s64 TimerResolutionNS = 1000000; + const auto timer_resolution_ns = + Common::Windows::GetCurrentTimerResolution().count(); - // Sleep in discrete intervals of the timer resolution, and spin the rest. - const auto sleep_time = wait_time - (wait_time % TimerResolutionNS); - if (sleep_time > 0) { - event.WaitFor(std::chrono::nanoseconds(sleep_time)); - } + while (!paused && !event.IsSet() && wait_time > 0) { + wait_time = *next_time - GetGlobalTimeNs().count(); - while (!paused && !event.IsSet() && GetGlobalTimeNs().count() < *next_time) { - // Yield to reduce thread starvation. - std::this_thread::yield(); + if (wait_time >= timer_resolution_ns) { + Common::Windows::SleepForOneTick(); + } else { + std::this_thread::yield(); + } } if (event.IsSet()) { @@ -285,9 +289,9 @@ void CoreTiming::ThreadLoop() { } paused_set = true; - clock->Pause(true); + event_clock->Pause(true); pause_event.Wait(); - clock->Pause(false); + event_clock->Pause(false); } } @@ -303,16 +307,23 @@ void CoreTiming::Reset() { has_started = false; } +std::chrono::nanoseconds CoreTiming::GetCPUTimeNs() const { + if (is_multicore) [[likely]] { + return cpu_clock->GetTimeNS(); + } + return CyclesToNs(ticks); +} + std::chrono::nanoseconds CoreTiming::GetGlobalTimeNs() const { - if (is_multicore) { - return clock->GetTimeNS(); + if (is_multicore) [[likely]] { + return event_clock->GetTimeNS(); } return CyclesToNs(ticks); } std::chrono::microseconds CoreTiming::GetGlobalTimeUs() const { - if (is_multicore) { - return clock->GetTimeUS(); + if (is_multicore) [[likely]] { + return event_clock->GetTimeUS(); } return CyclesToUs(ticks); } diff --git a/src/core/core_timing.h b/src/core/core_timing.h index da366637b..4b89c0c39 100644 --- a/src/core/core_timing.h +++ b/src/core/core_timing.h @@ -122,6 +122,9 @@ public: /// Returns current time in emulated in Clock cycles u64 GetClockTicks() const; + /// Returns current time in nanoseconds. + std::chrono::nanoseconds GetCPUTimeNs() const; + /// Returns current time in microseconds. std::chrono::microseconds GetGlobalTimeUs() const; @@ -139,7 +142,8 @@ private: void Reset(); - std::unique_ptr clock; + std::unique_ptr cpu_clock; + std::unique_ptr event_clock; s64 global_timer = 0; diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 7024a19cf..2e7f9c5ed 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -197,7 +197,7 @@ struct GPU::Impl { constexpr u64 gpu_ticks_num = 384; constexpr u64 gpu_ticks_den = 625; - u64 nanoseconds = system.CoreTiming().GetGlobalTimeNs().count(); + u64 nanoseconds = system.CoreTiming().GetCPUTimeNs().count(); if (Settings::values.use_fast_gpu_time.GetValue()) { nanoseconds /= 256; } -- cgit v1.2.3 From 194cf0b4974f088dbcf0df879e44e767b3ab4450 Mon Sep 17 00:00:00 2001 From: Morph <39850852+Morph1984@users.noreply.github.com> Date: Fri, 3 Mar 2023 20:43:51 -0500 Subject: hardware_properties: Update BASE_CLOCK_RATE to exactly 1020 MHz --- src/core/hardware_properties.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/core/hardware_properties.h b/src/core/hardware_properties.h index 45567b840..191c28bb4 100644 --- a/src/core/hardware_properties.h +++ b/src/core/hardware_properties.h @@ -13,11 +13,9 @@ namespace Core { namespace Hardware { -// The below clock rate is based on Switch's clockspeed being widely known as 1.020GHz -// The exact value used is of course unverified. -constexpr u64 BASE_CLOCK_RATE = 1019215872; // Switch cpu frequency is 1020MHz un/docked -constexpr u64 CNTFREQ = 19200000; // Switch's hardware clock speed -constexpr u32 NUM_CPU_CORES = 4; // Number of CPU Cores +constexpr u64 BASE_CLOCK_RATE = 1'020'000'000; // Default CPU Frequency = 1020 MHz +constexpr u64 CNTFREQ = 19'200'000; // CNTPCT_EL0 Frequency = 19.2 MHz +constexpr u32 NUM_CPU_CORES = 4; // Number of CPU Cores // Virtual to Physical core map. constexpr std::array()> VirtualToPhysicalCoreMap{ -- cgit v1.2.3 From 3453beb1e0fa7af3db4e07418f3ea8d997d005be Mon Sep 17 00:00:00 2001 From: Morph <39850852+Morph1984@users.noreply.github.com> Date: Fri, 3 Mar 2023 20:48:15 -0500 Subject: general: Target Windows 10 SDK We no longer support operating systems below Windows 10. --- CMakeLists.txt | 4 ++-- dist/yuzu.manifest | 6 ------ 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index f26a0c6b8..91ec50bef 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -477,8 +477,8 @@ if (APPLE) find_library(COCOA_LIBRARY Cocoa) set(PLATFORM_LIBRARIES ${COCOA_LIBRARY} ${IOKIT_LIBRARY} ${COREVIDEO_LIBRARY}) elseif (WIN32) - # WSAPoll and SHGetKnownFolderPath (AppData/Roaming) didn't exist before WinNT 6.x (Vista) - add_definitions(-D_WIN32_WINNT=0x0600 -DWINVER=0x0600) + # Target Windows 10 + add_definitions(-D_WIN32_WINNT=0x0A00 -DWINVER=0x0A00) set(PLATFORM_LIBRARIES winmm ws2_32 iphlpapi) if (MINGW) # PSAPI is the Process Status API diff --git a/dist/yuzu.manifest b/dist/yuzu.manifest index 10a8df9b5..f2c8639a2 100644 --- a/dist/yuzu.manifest +++ b/dist/yuzu.manifest @@ -36,12 +36,6 @@ SPDX-License-Identifier: GPL-2.0-or-later - - - - - - Date: Fri, 3 Mar 2023 20:54:15 -0500 Subject: timer_resolution: Set current process to High QoS Ensures that this process is treated as a high performance process by the Windows scheduler. --- src/common/windows/timer_resolution.cpp | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/src/common/windows/timer_resolution.cpp b/src/common/windows/timer_resolution.cpp index 6c2063a4c..29c6e5c7e 100644 --- a/src/common/windows/timer_resolution.cpp +++ b/src/common/windows/timer_resolution.cpp @@ -18,6 +18,15 @@ NTSYSAPI LONG NTAPI NtSetTimerResolution(ULONG DesiredResolution, BOOLEAN SetRes NTSYSAPI LONG NTAPI NtDelayExecution(BOOLEAN Alertable, PLARGE_INTEGER DelayInterval); } +// Defines for compatibility with older Windows 10 SDKs. + +#ifndef PROCESS_POWER_THROTTLING_EXECUTION_SPEED +#define PROCESS_POWER_THROTTLING_EXECUTION_SPEED 0x1 +#endif +#ifndef PROCESS_POWER_THROTTLING_IGNORE_TIMER_RESOLUTION +#define PROCESS_POWER_THROTTLING_IGNORE_TIMER_RESOLUTION 0x4 +#endif + namespace Common::Windows { namespace { @@ -51,6 +60,18 @@ TimerResolution GetTimerResolution() { }; } +void SetHighQoS() { + // https://learn.microsoft.com/en-us/windows/win32/procthread/quality-of-service + PROCESS_POWER_THROTTLING_STATE PowerThrottling{ + .Version{PROCESS_POWER_THROTTLING_CURRENT_VERSION}, + .ControlMask{PROCESS_POWER_THROTTLING_EXECUTION_SPEED | + PROCESS_POWER_THROTTLING_IGNORE_TIMER_RESOLUTION}, + .StateMask{}, + }; + SetProcessInformation(GetCurrentProcess(), ProcessPowerThrottling, &PowerThrottling, + sizeof(PROCESS_POWER_THROTTLING_STATE)); +} + } // Anonymous namespace nanoseconds GetMinimumTimerResolution() { @@ -74,6 +95,7 @@ nanoseconds SetCurrentTimerResolution(nanoseconds timer_resolution) { } nanoseconds SetCurrentTimerResolutionToMaximum() { + SetHighQoS(); return SetCurrentTimerResolution(GetMaximumTimerResolution()); } -- cgit v1.2.3 From 376a414f5bc6d27e5e0fbda323caf5520436bf39 Mon Sep 17 00:00:00 2001 From: Morph <39850852+Morph1984@users.noreply.github.com> Date: Fri, 3 Mar 2023 21:02:24 -0500 Subject: native_clock: Round RDTSC frequency to the nearest 1000 --- src/common/x64/native_clock.cpp | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/src/common/x64/native_clock.cpp b/src/common/x64/native_clock.cpp index 8b08332ab..bc1a973b0 100644 --- a/src/common/x64/native_clock.cpp +++ b/src/common/x64/native_clock.cpp @@ -6,6 +6,7 @@ #include #include "common/atomic_ops.h" +#include "common/steady_clock.h" #include "common/uint128.h" #include "common/x64/native_clock.h" @@ -39,6 +40,12 @@ static u64 FencedRDTSC() { } #endif +template +static u64 RoundToNearest(u64 value) { + const auto mod = value % Nearest; + return mod >= (Nearest / 2) ? (value - mod + Nearest) : (value - mod); +} + u64 EstimateRDTSCFrequency() { // Discard the first result measuring the rdtsc. FencedRDTSC(); @@ -46,18 +53,18 @@ u64 EstimateRDTSCFrequency() { FencedRDTSC(); // Get the current time. - const auto start_time = std::chrono::steady_clock::now(); + const auto start_time = Common::SteadyClock::Now(); const u64 tsc_start = FencedRDTSC(); - // Wait for 200 milliseconds. - std::this_thread::sleep_for(std::chrono::milliseconds{200}); - const auto end_time = std::chrono::steady_clock::now(); + // Wait for 250 milliseconds. + std::this_thread::sleep_for(std::chrono::milliseconds{250}); + const auto end_time = Common::SteadyClock::Now(); const u64 tsc_end = FencedRDTSC(); // Calculate differences. const u64 timer_diff = static_cast( std::chrono::duration_cast(end_time - start_time).count()); const u64 tsc_diff = tsc_end - tsc_start; const u64 tsc_freq = MultiplyAndDivide64(tsc_diff, 1000000000ULL, timer_diff); - return tsc_freq; + return RoundToNearest<1000>(tsc_freq); } namespace X64 { -- cgit v1.2.3