diff options
Diffstat (limited to '')
-rw-r--r-- | src/common/assert.cpp | 7 | ||||
-rw-r--r-- | src/common/assert.h | 55 | ||||
-rw-r--r-- | src/common/bounded_threadsafe_queue.h | 159 | ||||
-rw-r--r-- | src/common/detached_tasks.cpp | 4 | ||||
-rw-r--r-- | src/common/page_table.h | 3 | ||||
-rw-r--r-- | src/common/param_package.cpp | 6 | ||||
-rw-r--r-- | src/common/settings.cpp | 2 | ||||
-rw-r--r-- | src/common/thread.cpp | 12 | ||||
-rw-r--r-- | src/common/thread.h | 1 | ||||
-rw-r--r-- | src/common/uint128.h | 5 | ||||
-rw-r--r-- | src/common/x64/native_clock.cpp | 5 | ||||
-rw-r--r-- | src/common/x64/native_clock.h | 6 |
12 files changed, 132 insertions, 133 deletions
diff --git a/src/common/assert.cpp b/src/common/assert.cpp index b44570528..6026b7dc2 100644 --- a/src/common/assert.cpp +++ b/src/common/assert.cpp @@ -6,8 +6,13 @@ #include "common/settings.h" -void assert_handle_failure() { +void assert_fail_impl() { if (Settings::values.use_debug_asserts) { Crash(); } } + +[[noreturn]] void unreachable_impl() { + Crash(); + throw std::runtime_error("Unreachable code"); +} diff --git a/src/common/assert.h b/src/common/assert.h index dbfd8abaf..8c927fcc0 100644 --- a/src/common/assert.h +++ b/src/common/assert.h @@ -9,44 +9,43 @@ // Sometimes we want to try to continue even after hitting an assert. // However touching this file yields a global recompilation as this header is included almost // everywhere. So let's just move the handling of the failed assert to a single cpp file. -void assert_handle_failure(); -// For asserts we'd like to keep all the junk executed when an assert happens away from the -// important code in the function. One way of doing this is to put all the relevant code inside a -// lambda and force the compiler to not inline it. Unfortunately, MSVC seems to have no syntax to -// specify __declspec on lambda functions, so what we do instead is define a noinline wrapper -// template that calls the lambda. This seems to generate an extra instruction at the call-site -// compared to the ideal implementation (which wouldn't support ASSERT_MSG parameters), but is good -// enough for our purposes. -template <typename Fn> -#if defined(_MSC_VER) -[[msvc::noinline]] -#elif defined(__GNUC__) -[[gnu::cold, gnu::noinline]] +void assert_fail_impl(); +[[noreturn]] void unreachable_impl(); + +#ifdef _MSC_VER +#define YUZU_NO_INLINE __declspec(noinline) +#else +#define YUZU_NO_INLINE __attribute__((noinline)) #endif -static void -assert_noinline_call(const Fn& fn) { - fn(); - assert_handle_failure(); -} #define ASSERT(_a_) \ - do \ - if (!(_a_)) { \ - assert_noinline_call([] { LOG_CRITICAL(Debug, "Assertion Failed!"); }); \ + ([&]() YUZU_NO_INLINE { \ + if (!(_a_)) [[unlikely]] { \ + LOG_CRITICAL(Debug, "Assertion Failed!"); \ + assert_fail_impl(); \ } \ - while (0) + }()) #define ASSERT_MSG(_a_, ...) \ - do \ - if (!(_a_)) { \ - assert_noinline_call([&] { LOG_CRITICAL(Debug, "Assertion Failed!\n" __VA_ARGS__); }); \ + ([&]() YUZU_NO_INLINE { \ + if (!(_a_)) [[unlikely]] { \ + LOG_CRITICAL(Debug, "Assertion Failed!\n" __VA_ARGS__); \ + assert_fail_impl(); \ } \ - while (0) + }()) + +#define UNREACHABLE() \ + do { \ + LOG_CRITICAL(Debug, "Unreachable code!"); \ + unreachable_impl(); \ + } while (0) -#define UNREACHABLE() assert_noinline_call([] { LOG_CRITICAL(Debug, "Unreachable code!"); }) #define UNREACHABLE_MSG(...) \ - assert_noinline_call([&] { LOG_CRITICAL(Debug, "Unreachable code!\n" __VA_ARGS__); }) + do { \ + LOG_CRITICAL(Debug, "Unreachable code!\n" __VA_ARGS__); \ + unreachable_impl(); \ + } while (0) #ifdef _DEBUG #define DEBUG_ASSERT(_a_) ASSERT(_a_) diff --git a/src/common/bounded_threadsafe_queue.h b/src/common/bounded_threadsafe_queue.h index e83064c7f..7e465549b 100644 --- a/src/common/bounded_threadsafe_queue.h +++ b/src/common/bounded_threadsafe_queue.h @@ -1,10 +1,7 @@ // SPDX-FileCopyrightText: Copyright (c) 2020 Erik Rigtorp <erik@rigtorp.se> // SPDX-License-Identifier: MIT + #pragma once -#ifdef _MSC_VER -#pragma warning(push) -#pragma warning(disable : 4324) -#endif #include <atomic> #include <bit> @@ -12,105 +9,63 @@ #include <memory> #include <mutex> #include <new> -#include <stdexcept> #include <stop_token> #include <type_traits> #include <utility> namespace Common { -namespace mpsc { + #if defined(__cpp_lib_hardware_interference_size) constexpr size_t hardware_interference_size = std::hardware_destructive_interference_size; #else constexpr size_t hardware_interference_size = 64; #endif -template <typename T> -using AlignedAllocator = std::allocator<T>; - -template <typename T> -struct Slot { - ~Slot() noexcept { - if (turn.test()) { - destroy(); - } - } - - template <typename... Args> - void construct(Args&&... args) noexcept { - static_assert(std::is_nothrow_constructible_v<T, Args&&...>, - "T must be nothrow constructible with Args&&..."); - std::construct_at(reinterpret_cast<T*>(&storage), std::forward<Args>(args)...); - } - - void destroy() noexcept { - static_assert(std::is_nothrow_destructible_v<T>, "T must be nothrow destructible"); - std::destroy_at(reinterpret_cast<T*>(&storage)); - } - - T&& move() noexcept { - return reinterpret_cast<T&&>(storage); - } - - // Align to avoid false sharing between adjacent slots - alignas(hardware_interference_size) std::atomic_flag turn{}; - struct aligned_store { - struct type { - alignas(T) unsigned char data[sizeof(T)]; - }; - }; - typename aligned_store::type storage; -}; +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable : 4324) +#endif -template <typename T, typename Allocator = AlignedAllocator<Slot<T>>> -class Queue { +template <typename T, size_t capacity = 0x400> +class MPSCQueue { public: - explicit Queue(const size_t capacity, const Allocator& allocator = Allocator()) - : allocator_(allocator) { - if (capacity < 1) { - throw std::invalid_argument("capacity < 1"); - } - // Ensure that the queue length is an integer power of 2 - // This is so that idx(i) can be a simple i & mask_ insted of i % capacity - // https://github.com/rigtorp/MPMCQueue/pull/36 - if (!std::has_single_bit(capacity)) { - throw std::invalid_argument("capacity must be an integer power of 2"); - } - - mask_ = capacity - 1; - + explicit MPSCQueue() : allocator{std::allocator<Slot<T>>()} { // Allocate one extra slot to prevent false sharing on the last slot - slots_ = allocator_.allocate(mask_ + 2); + slots = allocator.allocate(capacity + 1); // Allocators are not required to honor alignment for over-aligned types // (see http://eel.is/c++draft/allocator.requirements#10) so we verify // alignment here - if (reinterpret_cast<uintptr_t>(slots_) % alignof(Slot<T>) != 0) { - allocator_.deallocate(slots_, mask_ + 2); + if (reinterpret_cast<uintptr_t>(slots) % alignof(Slot<T>) != 0) { + allocator.deallocate(slots, capacity + 1); throw std::bad_alloc(); } - for (size_t i = 0; i < mask_ + 1; ++i) { - std::construct_at(&slots_[i]); + for (size_t i = 0; i < capacity; ++i) { + std::construct_at(&slots[i]); } + static_assert(std::has_single_bit(capacity), "capacity must be an integer power of 2"); static_assert(alignof(Slot<T>) == hardware_interference_size, "Slot must be aligned to cache line boundary to prevent false sharing"); static_assert(sizeof(Slot<T>) % hardware_interference_size == 0, "Slot size must be a multiple of cache line size to prevent " "false sharing between adjacent slots"); - static_assert(sizeof(Queue) % hardware_interference_size == 0, + static_assert(sizeof(MPSCQueue) % hardware_interference_size == 0, "Queue size must be a multiple of cache line size to " "prevent false sharing between adjacent queues"); } - ~Queue() noexcept { - for (size_t i = 0; i < mask_ + 1; ++i) { - slots_[i].~Slot(); + ~MPSCQueue() noexcept { + for (size_t i = 0; i < capacity; ++i) { + std::destroy_at(&slots[i]); } - allocator_.deallocate(slots_, mask_ + 2); + allocator.deallocate(slots, capacity + 1); } - // non-copyable and non-movable - Queue(const Queue&) = delete; - Queue& operator=(const Queue&) = delete; + // The queue must be both non-copyable and non-movable + MPSCQueue(const MPSCQueue&) = delete; + MPSCQueue& operator=(const MPSCQueue&) = delete; + + MPSCQueue(MPSCQueue&&) = delete; + MPSCQueue& operator=(MPSCQueue&&) = delete; void Push(const T& v) noexcept { static_assert(std::is_nothrow_copy_constructible_v<T>, @@ -125,8 +80,8 @@ public: void Pop(T& v, std::stop_token stop) noexcept { auto const tail = tail_.fetch_add(1); - auto& slot = slots_[idx(tail)]; - if (false == slot.turn.test()) { + auto& slot = slots[idx(tail)]; + if (!slot.turn.test()) { std::unique_lock lock{cv_mutex}; cv.wait(lock, stop, [&slot] { return slot.turn.test(); }); } @@ -137,12 +92,46 @@ public: } private: + template <typename U = T> + struct Slot { + ~Slot() noexcept { + if (turn.test()) { + destroy(); + } + } + + template <typename... Args> + void construct(Args&&... args) noexcept { + static_assert(std::is_nothrow_constructible_v<U, Args&&...>, + "T must be nothrow constructible with Args&&..."); + std::construct_at(reinterpret_cast<U*>(&storage), std::forward<Args>(args)...); + } + + void destroy() noexcept { + static_assert(std::is_nothrow_destructible_v<U>, "T must be nothrow destructible"); + std::destroy_at(reinterpret_cast<U*>(&storage)); + } + + U&& move() noexcept { + return reinterpret_cast<U&&>(storage); + } + + // Align to avoid false sharing between adjacent slots + alignas(hardware_interference_size) std::atomic_flag turn{}; + struct aligned_store { + struct type { + alignas(U) unsigned char data[sizeof(U)]; + }; + }; + typename aligned_store::type storage; + }; + template <typename... Args> void emplace(Args&&... args) noexcept { static_assert(std::is_nothrow_constructible_v<T, Args&&...>, "T must be nothrow constructible with Args&&..."); auto const head = head_.fetch_add(1); - auto& slot = slots_[idx(head)]; + auto& slot = slots[idx(head)]; slot.turn.wait(true); slot.construct(std::forward<Args>(args)...); slot.turn.test_and_set(); @@ -150,31 +139,29 @@ private: } constexpr size_t idx(size_t i) const noexcept { - return i & mask_; + return i & mask; } - std::conditional_t<true, std::condition_variable_any, std::condition_variable> cv; - std::mutex cv_mutex; - size_t mask_; - Slot<T>* slots_; - [[no_unique_address]] Allocator allocator_; + static constexpr size_t mask = capacity - 1; // Align to avoid false sharing between head_ and tail_ alignas(hardware_interference_size) std::atomic<size_t> head_{0}; alignas(hardware_interference_size) std::atomic<size_t> tail_{0}; + std::mutex cv_mutex; + std::condition_variable_any cv; + + Slot<T>* slots; + [[no_unique_address]] std::allocator<Slot<T>> allocator; + static_assert(std::is_nothrow_copy_assignable_v<T> || std::is_nothrow_move_assignable_v<T>, "T must be nothrow copy or move assignable"); static_assert(std::is_nothrow_destructible_v<T>, "T must be nothrow destructible"); }; -} // namespace mpsc - -template <typename T, typename Allocator = mpsc::AlignedAllocator<mpsc::Slot<T>>> -using MPSCQueue = mpsc::Queue<T, Allocator>; - -} // namespace Common #ifdef _MSC_VER #pragma warning(pop) #endif + +} // namespace Common diff --git a/src/common/detached_tasks.cpp b/src/common/detached_tasks.cpp index c1362631e..ec31d0b88 100644 --- a/src/common/detached_tasks.cpp +++ b/src/common/detached_tasks.cpp @@ -33,9 +33,9 @@ void DetachedTasks::AddTask(std::function<void()> task) { ++instance->count; std::thread([task{std::move(task)}]() { task(); - std::unique_lock lock{instance->mutex}; + std::unique_lock thread_lock{instance->mutex}; --instance->count; - std::notify_all_at_thread_exit(instance->cv, std::move(lock)); + std::notify_all_at_thread_exit(instance->cv, std::move(thread_lock)); }).detach(); } diff --git a/src/common/page_table.h b/src/common/page_table.h index fcbd12a43..1ad3a9f8b 100644 --- a/src/common/page_table.h +++ b/src/common/page_table.h @@ -15,6 +15,9 @@ enum class PageType : u8 { Unmapped, /// Page is mapped to regular memory. This is the only type you can get pointers to. Memory, + /// Page is mapped to regular memory, but inaccessible from CPU fastmem and must use + /// the callbacks. + DebugMemory, /// Page is mapped to regular memory, but also needs to check for rasterizer cache flushing and /// invalidation RasterizerCachedMemory, diff --git a/src/common/param_package.cpp b/src/common/param_package.cpp index bbf20f5eb..462502e34 100644 --- a/src/common/param_package.cpp +++ b/src/common/param_package.cpp @@ -76,7 +76,7 @@ std::string ParamPackage::Serialize() const { std::string ParamPackage::Get(const std::string& key, const std::string& default_value) const { auto pair = data.find(key); if (pair == data.end()) { - LOG_DEBUG(Common, "key '{}' not found", key); + LOG_TRACE(Common, "key '{}' not found", key); return default_value; } @@ -86,7 +86,7 @@ std::string ParamPackage::Get(const std::string& key, const std::string& default int ParamPackage::Get(const std::string& key, int default_value) const { auto pair = data.find(key); if (pair == data.end()) { - LOG_DEBUG(Common, "key '{}' not found", key); + LOG_TRACE(Common, "key '{}' not found", key); return default_value; } @@ -101,7 +101,7 @@ int ParamPackage::Get(const std::string& key, int default_value) const { float ParamPackage::Get(const std::string& key, float default_value) const { auto pair = data.find(key); if (pair == data.end()) { - LOG_DEBUG(Common, "key {} not found", key); + LOG_TRACE(Common, "key {} not found", key); return default_value; } diff --git a/src/common/settings.cpp b/src/common/settings.cpp index 6ffab63af..751549583 100644 --- a/src/common/settings.cpp +++ b/src/common/settings.cpp @@ -147,7 +147,7 @@ void UpdateRescalingInfo() { info.down_shift = 0; break; default: - UNREACHABLE(); + ASSERT(false); info.up_scale = 1; info.down_shift = 0; } diff --git a/src/common/thread.cpp b/src/common/thread.cpp index f932a7290..919e33af9 100644 --- a/src/common/thread.cpp +++ b/src/common/thread.cpp @@ -47,6 +47,9 @@ void SetCurrentThreadPriority(ThreadPriority new_priority) { case ThreadPriority::VeryHigh: windows_priority = THREAD_PRIORITY_HIGHEST; break; + case ThreadPriority::Critical: + windows_priority = THREAD_PRIORITY_TIME_CRITICAL; + break; default: windows_priority = THREAD_PRIORITY_NORMAL; break; @@ -59,9 +62,10 @@ void SetCurrentThreadPriority(ThreadPriority new_priority) { void SetCurrentThreadPriority(ThreadPriority new_priority) { pthread_t this_thread = pthread_self(); - s32 max_prio = sched_get_priority_max(SCHED_OTHER); - s32 min_prio = sched_get_priority_min(SCHED_OTHER); - u32 level = static_cast<u32>(new_priority) + 1; + const auto scheduling_type = SCHED_OTHER; + s32 max_prio = sched_get_priority_max(scheduling_type); + s32 min_prio = sched_get_priority_min(scheduling_type); + u32 level = std::max(static_cast<u32>(new_priority) + 1, 4U); struct sched_param params; if (max_prio > min_prio) { @@ -70,7 +74,7 @@ void SetCurrentThreadPriority(ThreadPriority new_priority) { params.sched_priority = min_prio - ((min_prio - max_prio) * level) / 4; } - pthread_setschedparam(this_thread, SCHED_OTHER, ¶ms); + pthread_setschedparam(this_thread, scheduling_type, ¶ms); } #endif diff --git a/src/common/thread.h b/src/common/thread.h index a63122516..1552f58e0 100644 --- a/src/common/thread.h +++ b/src/common/thread.h @@ -92,6 +92,7 @@ enum class ThreadPriority : u32 { Normal = 1, High = 2, VeryHigh = 3, + Critical = 4, }; void SetCurrentThreadPriority(ThreadPriority new_priority); diff --git a/src/common/uint128.h b/src/common/uint128.h index f890ffec2..199d0f55e 100644 --- a/src/common/uint128.h +++ b/src/common/uint128.h @@ -31,12 +31,17 @@ namespace Common { return _udiv128(r[1], r[0], d, &remainder); #endif #else +#ifdef __SIZEOF_INT128__ + const auto product = static_cast<unsigned __int128>(a) * static_cast<unsigned __int128>(b); + return static_cast<u64>(product / d); +#else const u64 diva = a / d; const u64 moda = a % d; const u64 divb = b / d; const u64 modb = b % d; return diva * b + moda * divb + moda * modb / d; #endif +#endif } // This function multiplies 2 u64 values and produces a u128 value; diff --git a/src/common/x64/native_clock.cpp b/src/common/x64/native_clock.cpp index 1b7194503..6aaa8cdf9 100644 --- a/src/common/x64/native_clock.cpp +++ b/src/common/x64/native_clock.cpp @@ -75,8 +75,8 @@ NativeClock::NativeClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequen } u64 NativeClock::GetRTSC() { - TimePoint new_time_point{}; TimePoint current_time_point{}; + TimePoint new_time_point{}; current_time_point.pack = Common::AtomicLoad128(time_point.pack.data()); do { @@ -89,8 +89,7 @@ u64 NativeClock::GetRTSC() { new_time_point.inner.accumulated_ticks = current_time_point.inner.accumulated_ticks + diff; } while (!Common::AtomicCompareAndSwap(time_point.pack.data(), new_time_point.pack, current_time_point.pack, current_time_point.pack)); - /// The clock cannot be more precise than the guest timer, remove the lower bits - return new_time_point.inner.accumulated_ticks & inaccuracy_mask; + return new_time_point.inner.accumulated_ticks; } void NativeClock::Pause(bool is_paused) { diff --git a/src/common/x64/native_clock.h b/src/common/x64/native_clock.h index 30d2ba2e9..38ae7a462 100644 --- a/src/common/x64/native_clock.h +++ b/src/common/x64/native_clock.h @@ -37,12 +37,8 @@ private: } inner; }; - /// value used to reduce the native clocks accuracy as some apss rely on - /// undefined behavior where the level of accuracy in the clock shouldn't - /// be higher. - static constexpr u64 inaccuracy_mask = ~(UINT64_C(0x400) - 1); - TimePoint time_point; + // factors u64 clock_rtsc_factor{}; u64 cpu_rtsc_factor{}; |