diff options
36 files changed, 790 insertions, 293 deletions
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index a6fa9a85d..e03fffd8d 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -180,7 +180,6 @@ add_library(common STATIC thread.cpp thread.h thread_queue_list.h - thread_worker.cpp thread_worker.h threadsafe_queue.h time_zone.cpp @@ -188,6 +187,7 @@ add_library(common STATIC tiny_mt.h tree.h uint128.h + unique_function.h uuid.cpp uuid.h vector_math.h diff --git a/src/common/settings.cpp b/src/common/settings.cpp index 0061e29cc..e1973af85 100644 --- a/src/common/settings.cpp +++ b/src/common/settings.cpp @@ -93,7 +93,7 @@ bool IsGPULevelHigh() { } bool IsFastmemEnabled() { - if (values.cpu_accuracy.GetValue() == CPUAccuracy::DebugMode) { + if (values.cpu_debug_mode) { return static_cast<bool>(values.cpuopt_fastmem); } return true; diff --git a/src/common/settings.h b/src/common/settings.h index 1d6f196c6..71d0f864f 100644 --- a/src/common/settings.h +++ b/src/common/settings.h @@ -31,9 +31,9 @@ enum class GPUAccuracy : u32 { }; enum class CPUAccuracy : u32 { - Accurate = 0, - Unsafe = 1, - DebugMode = 2, + Auto = 0, + Accurate = 1, + Unsafe = 2, }; /** The BasicSetting class is a simple resource manager. It defines a label and default value @@ -284,7 +284,10 @@ struct Values { Setting<bool> use_multi_core{true, "use_multi_core"}; // Cpu - Setting<CPUAccuracy> cpu_accuracy{CPUAccuracy::Accurate, "cpu_accuracy"}; + Setting<CPUAccuracy> cpu_accuracy{CPUAccuracy::Auto, "cpu_accuracy"}; + // TODO: remove cpu_accuracy_first_time, migration setting added 8 July 2021 + BasicSetting<bool> cpu_accuracy_first_time{true, "cpu_accuracy_first_time"}; + BasicSetting<bool> cpu_debug_mode{false, "cpu_debug_mode"}; BasicSetting<bool> cpuopt_page_tables{true, "cpuopt_page_tables"}; BasicSetting<bool> cpuopt_block_linking{true, "cpuopt_block_linking"}; diff --git a/src/common/thread_worker.cpp b/src/common/thread_worker.cpp deleted file mode 100644 index 8f9bf447a..000000000 --- a/src/common/thread_worker.cpp +++ /dev/null @@ -1,58 +0,0 @@ -// Copyright 2020 yuzu emulator team -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/thread.h" -#include "common/thread_worker.h" - -namespace Common { - -ThreadWorker::ThreadWorker(std::size_t num_workers, const std::string& name) { - for (std::size_t i = 0; i < num_workers; ++i) - threads.emplace_back([this, thread_name{std::string{name}}] { - Common::SetCurrentThreadName(thread_name.c_str()); - - // Wait for first request - { - std::unique_lock lock{queue_mutex}; - condition.wait(lock, [this] { return stop || !requests.empty(); }); - } - - while (true) { - std::function<void()> task; - - { - std::unique_lock lock{queue_mutex}; - condition.wait(lock, [this] { return stop || !requests.empty(); }); - if (stop || requests.empty()) { - return; - } - task = std::move(requests.front()); - requests.pop(); - } - - task(); - } - }); -} - -ThreadWorker::~ThreadWorker() { - { - std::unique_lock lock{queue_mutex}; - stop = true; - } - condition.notify_all(); - for (std::thread& thread : threads) { - thread.join(); - } -} - -void ThreadWorker::QueueWork(std::function<void()>&& work) { - { - std::unique_lock lock{queue_mutex}; - requests.emplace(work); - } - condition.notify_one(); -} - -} // namespace Common diff --git a/src/common/thread_worker.h b/src/common/thread_worker.h index f1859971f..8272985ff 100644 --- a/src/common/thread_worker.h +++ b/src/common/thread_worker.h @@ -7,24 +7,110 @@ #include <atomic> #include <functional> #include <mutex> +#include <stop_token> #include <string> +#include <thread> +#include <type_traits> #include <vector> #include <queue> +#include "common/thread.h" +#include "common/unique_function.h" + namespace Common { -class ThreadWorker final { +template <class StateType = void> +class StatefulThreadWorker { + static constexpr bool with_state = !std::is_same_v<StateType, void>; + + struct DummyCallable { + int operator()() const noexcept { + return 0; + } + }; + + using Task = + std::conditional_t<with_state, UniqueFunction<void, StateType*>, UniqueFunction<void>>; + using StateMaker = std::conditional_t<with_state, std::function<StateType()>, DummyCallable>; + public: - explicit ThreadWorker(std::size_t num_workers, const std::string& name); - ~ThreadWorker(); - void QueueWork(std::function<void()>&& work); + explicit StatefulThreadWorker(size_t num_workers, std::string name, StateMaker func = {}) + : workers_queued{num_workers}, thread_name{std::move(name)} { + const auto lambda = [this, func](std::stop_token stop_token) { + Common::SetCurrentThreadName(thread_name.c_str()); + { + std::conditional_t<with_state, StateType, int> state{func()}; + while (!stop_token.stop_requested()) { + Task task; + { + std::unique_lock lock{queue_mutex}; + if (requests.empty()) { + wait_condition.notify_all(); + } + condition.wait(lock, stop_token, [this] { return !requests.empty(); }); + if (stop_token.stop_requested()) { + break; + } + task = std::move(requests.front()); + requests.pop(); + } + if constexpr (with_state) { + task(&state); + } else { + task(); + } + ++work_done; + } + } + ++workers_stopped; + wait_condition.notify_all(); + }; + threads.reserve(num_workers); + for (size_t i = 0; i < num_workers; ++i) { + threads.emplace_back(lambda); + } + } + + StatefulThreadWorker& operator=(const StatefulThreadWorker&) = delete; + StatefulThreadWorker(const StatefulThreadWorker&) = delete; + + StatefulThreadWorker& operator=(StatefulThreadWorker&&) = delete; + StatefulThreadWorker(StatefulThreadWorker&&) = delete; + + void QueueWork(Task work) { + { + std::unique_lock lock{queue_mutex}; + requests.emplace(std::move(work)); + ++work_scheduled; + } + condition.notify_one(); + } + + void WaitForRequests(std::stop_token stop_token = {}) { + std::stop_callback callback(stop_token, [this] { + for (auto& thread : threads) { + thread.request_stop(); + } + }); + std::unique_lock lock{queue_mutex}; + wait_condition.wait(lock, [this] { + return workers_stopped >= workers_queued || work_done >= work_scheduled; + }); + } private: - std::vector<std::thread> threads; - std::queue<std::function<void()>> requests; + std::queue<Task> requests; std::mutex queue_mutex; - std::condition_variable condition; - std::atomic_bool stop{}; + std::condition_variable_any condition; + std::condition_variable wait_condition; + std::atomic<size_t> work_scheduled{}; + std::atomic<size_t> work_done{}; + std::atomic<size_t> workers_stopped{}; + std::atomic<size_t> workers_queued{}; + std::string thread_name; + std::vector<std::jthread> threads; }; +using ThreadWorker = StatefulThreadWorker<>; + } // namespace Common diff --git a/src/common/unique_function.h b/src/common/unique_function.h new file mode 100644 index 000000000..ca0559071 --- /dev/null +++ b/src/common/unique_function.h @@ -0,0 +1,62 @@ +// Copyright 2021 yuzu emulator team +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <memory> +#include <utility> + +namespace Common { + +/// General purpose function wrapper similar to std::function. +/// Unlike std::function, the captured values don't have to be copyable. +/// This class can be moved but not copied. +template <typename ResultType, typename... Args> +class UniqueFunction { + class CallableBase { + public: + virtual ~CallableBase() = default; + virtual ResultType operator()(Args&&...) = 0; + }; + + template <typename Functor> + class Callable final : public CallableBase { + public: + Callable(Functor&& functor_) : functor{std::move(functor_)} {} + ~Callable() override = default; + + ResultType operator()(Args&&... args) override { + return functor(std::forward<Args>(args)...); + } + + private: + Functor functor; + }; + +public: + UniqueFunction() = default; + + template <typename Functor> + UniqueFunction(Functor&& functor) + : callable{std::make_unique<Callable<Functor>>(std::move(functor))} {} + + UniqueFunction& operator=(UniqueFunction&& rhs) noexcept = default; + UniqueFunction(UniqueFunction&& rhs) noexcept = default; + + UniqueFunction& operator=(const UniqueFunction&) = delete; + UniqueFunction(const UniqueFunction&) = delete; + + ResultType operator()(Args&&... args) const { + return (*callable)(std::forward<Args>(args)...); + } + + explicit operator bool() const noexcept { + return static_cast<bool>(callable); + } + +private: + std::unique_ptr<CallableBase> callable; +}; + +} // namespace Common diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.cpp b/src/core/arm/dynarmic/arm_dynarmic_32.cpp index 77a44f862..b0d89c539 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_32.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_32.cpp @@ -150,7 +150,7 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable* config.far_code_offset = 400_MiB; // Safe optimizations - if (Settings::values.cpu_accuracy.GetValue() == Settings::CPUAccuracy::DebugMode) { + if (Settings::values.cpu_debug_mode) { if (!Settings::values.cpuopt_page_tables) { config.page_table = nullptr; } @@ -183,20 +183,28 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable* // Unsafe optimizations if (Settings::values.cpu_accuracy.GetValue() == Settings::CPUAccuracy::Unsafe) { config.unsafe_optimizations = true; - if (Settings::values.cpuopt_unsafe_unfuse_fma.GetValue()) { + if (Settings::values.cpuopt_unsafe_unfuse_fma) { config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_UnfuseFMA; } - if (Settings::values.cpuopt_unsafe_reduce_fp_error.GetValue()) { + if (Settings::values.cpuopt_unsafe_reduce_fp_error) { config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_ReducedErrorFP; } - if (Settings::values.cpuopt_unsafe_ignore_standard_fpcr.GetValue()) { + if (Settings::values.cpuopt_unsafe_ignore_standard_fpcr) { config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_IgnoreStandardFPCRValue; } - if (Settings::values.cpuopt_unsafe_inaccurate_nan.GetValue()) { + if (Settings::values.cpuopt_unsafe_inaccurate_nan) { config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_InaccurateNaN; } } + // Curated optimizations + if (Settings::values.cpu_accuracy.GetValue() == Settings::CPUAccuracy::Auto) { + config.unsafe_optimizations = true; + config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_UnfuseFMA; + config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_IgnoreStandardFPCRValue; + config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_InaccurateNaN; + } + return std::make_unique<Dynarmic::A32::Jit>(config); } diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp index 75332e348..bf27ffe71 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp @@ -190,7 +190,7 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable* config.far_code_offset = 400_MiB; // Safe optimizations - if (Settings::values.cpu_accuracy.GetValue() == Settings::CPUAccuracy::DebugMode) { + if (Settings::values.cpu_debug_mode) { if (!Settings::values.cpuopt_page_tables) { config.page_table = nullptr; } @@ -223,20 +223,28 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable* // Unsafe optimizations if (Settings::values.cpu_accuracy.GetValue() == Settings::CPUAccuracy::Unsafe) { config.unsafe_optimizations = true; - if (Settings::values.cpuopt_unsafe_unfuse_fma.GetValue()) { + if (Settings::values.cpuopt_unsafe_unfuse_fma) { config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_UnfuseFMA; } - if (Settings::values.cpuopt_unsafe_reduce_fp_error.GetValue()) { + if (Settings::values.cpuopt_unsafe_reduce_fp_error) { config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_ReducedErrorFP; } - if (Settings::values.cpuopt_unsafe_inaccurate_nan.GetValue()) { + if (Settings::values.cpuopt_unsafe_inaccurate_nan) { config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_InaccurateNaN; } - if (Settings::values.cpuopt_unsafe_fastmem_check.GetValue()) { + if (Settings::values.cpuopt_unsafe_fastmem_check) { config.fastmem_address_space_bits = 64; } } + // Curated optimizations + if (Settings::values.cpu_accuracy.GetValue() == Settings::CPUAccuracy::Auto) { + config.unsafe_optimizations = true; + config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_UnfuseFMA; + config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_InaccurateNaN; + config.fastmem_address_space_bits = 64; + } + return std::make_shared<Dynarmic::A64::Jit>(config); } diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt index 96bc30cac..c4c012f3d 100644 --- a/src/tests/CMakeLists.txt +++ b/src/tests/CMakeLists.txt @@ -5,6 +5,7 @@ add_executable(tests common/host_memory.cpp common/param_package.cpp common/ring_buffer.cpp + common/unique_function.cpp core/core_timing.cpp core/network/network.cpp tests.cpp diff --git a/src/tests/common/unique_function.cpp b/src/tests/common/unique_function.cpp new file mode 100644 index 000000000..ac9912738 --- /dev/null +++ b/src/tests/common/unique_function.cpp @@ -0,0 +1,108 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <string> + +#include <catch2/catch.hpp> + +#include "common/unique_function.h" + +namespace { +struct Noisy { + Noisy() : state{"Default constructed"} {} + Noisy(Noisy&& rhs) noexcept : state{"Move constructed"} { + rhs.state = "Moved away"; + } + Noisy& operator=(Noisy&& rhs) noexcept { + state = "Move assigned"; + rhs.state = "Moved away"; + } + Noisy(const Noisy&) : state{"Copied constructed"} {} + Noisy& operator=(const Noisy&) { + state = "Copied assigned"; + } + + std::string state; +}; +} // Anonymous namespace + +TEST_CASE("UniqueFunction", "[common]") { + SECTION("Capture reference") { + int value = 0; + Common::UniqueFunction<void> func = [&value] { value = 5; }; + func(); + REQUIRE(value == 5); + } + SECTION("Capture pointer") { + int value = 0; + int* pointer = &value; + Common::UniqueFunction<void> func = [pointer] { *pointer = 5; }; + func(); + REQUIRE(value == 5); + } + SECTION("Move object") { + Noisy noisy; + REQUIRE(noisy.state == "Default constructed"); + + Common::UniqueFunction<void> func = [noisy = std::move(noisy)] { + REQUIRE(noisy.state == "Move constructed"); + }; + REQUIRE(noisy.state == "Moved away"); + func(); + } + SECTION("Move construct function") { + int value = 0; + Common::UniqueFunction<void> func = [&value] { value = 5; }; + Common::UniqueFunction<void> new_func = std::move(func); + new_func(); + REQUIRE(value == 5); + } + SECTION("Move assign function") { + int value = 0; + Common::UniqueFunction<void> func = [&value] { value = 5; }; + Common::UniqueFunction<void> new_func; + new_func = std::move(func); + new_func(); + REQUIRE(value == 5); + } + SECTION("Default construct then assign function") { + int value = 0; + Common::UniqueFunction<void> func; + func = [&value] { value = 5; }; + func(); + REQUIRE(value == 5); + } + SECTION("Pass arguments") { + int result = 0; + Common::UniqueFunction<void, int, int> func = [&result](int a, int b) { result = a + b; }; + func(5, 4); + REQUIRE(result == 9); + } + SECTION("Pass arguments and return value") { + Common::UniqueFunction<int, int, int> func = [](int a, int b) { return a + b; }; + REQUIRE(func(5, 4) == 9); + } + SECTION("Destructor") { + int num_destroyed = 0; + struct Foo { + Foo(int* num_) : num{num_} {} + Foo(Foo&& rhs) : num{std::exchange(rhs.num, nullptr)} {} + Foo(const Foo&) = delete; + + ~Foo() { + if (num) { + ++*num; + } + } + + int* num = nullptr; + }; + Foo object{&num_destroyed}; + { + Common::UniqueFunction<void> func = [object = std::move(object)] {}; + REQUIRE(num_destroyed == 0); + } + REQUIRE(num_destroyed == 1); + } +} diff --git a/src/tests/video_core/buffer_base.cpp b/src/tests/video_core/buffer_base.cpp index edced69bb..9f5a54de4 100644 --- a/src/tests/video_core/buffer_base.cpp +++ b/src/tests/video_core/buffer_base.cpp @@ -536,7 +536,7 @@ TEST_CASE("BufferBase: Cached write downloads") { REQUIRE(rasterizer.Count() == 63); buffer.MarkRegionAsGpuModified(c + PAGE, PAGE); int num = 0; - buffer.ForEachDownloadRange(c, WORD, [&](u64 offset, u64 size) { ++num; }); + buffer.ForEachDownloadRangeAndClear(c, WORD, [&](u64 offset, u64 size) { ++num; }); buffer.ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) { ++num; }); REQUIRE(num == 0); REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE)); diff --git a/src/video_core/buffer_cache/buffer_base.h b/src/video_core/buffer_cache/buffer_base.h index b121d36a3..c3318095c 100644 --- a/src/video_core/buffer_cache/buffer_base.h +++ b/src/video_core/buffer_cache/buffer_base.h @@ -226,19 +226,24 @@ public: /// Call 'func' for each CPU modified range and unmark those pages as CPU modified template <typename Func> void ForEachUploadRange(VAddr query_cpu_range, u64 size, Func&& func) { - ForEachModifiedRange<Type::CPU>(query_cpu_range, size, func); + ForEachModifiedRange<Type::CPU>(query_cpu_range, size, true, func); } /// Call 'func' for each GPU modified range and unmark those pages as GPU modified template <typename Func> - void ForEachDownloadRange(VAddr query_cpu_range, u64 size, Func&& func) { - ForEachModifiedRange<Type::GPU>(query_cpu_range, size, func); + void ForEachDownloadRange(VAddr query_cpu_range, u64 size, bool clear, Func&& func) { + ForEachModifiedRange<Type::GPU>(query_cpu_range, size, clear, func); + } + + template <typename Func> + void ForEachDownloadRangeAndClear(VAddr query_cpu_range, u64 size, Func&& func) { + ForEachModifiedRange<Type::GPU>(query_cpu_range, size, true, func); } /// Call 'func' for each GPU modified range and unmark those pages as GPU modified template <typename Func> void ForEachDownloadRange(Func&& func) { - ForEachModifiedRange<Type::GPU>(cpu_addr, SizeBytes(), func); + ForEachModifiedRange<Type::GPU>(cpu_addr, SizeBytes(), true, func); } /// Mark buffer as picked @@ -415,7 +420,7 @@ private: * @param func Function to call for each turned off region */ template <Type type, typename Func> - void ForEachModifiedRange(VAddr query_cpu_range, s64 size, Func&& func) { + void ForEachModifiedRange(VAddr query_cpu_range, s64 size, bool clear, Func&& func) { static_assert(type != Type::Untracked); const s64 difference = query_cpu_range - cpu_addr; @@ -467,7 +472,9 @@ private: bits = (bits << left_offset) >> left_offset; const u64 current_word = state_words[word_index] & bits; - state_words[word_index] &= ~bits; + if (clear) { + state_words[word_index] &= ~bits; + } if constexpr (type == Type::CPU) { const u64 current_bits = untracked_words[word_index] & bits; diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index cad7f902d..502feddba 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -15,6 +15,7 @@ #include <vector> #include <boost/container/small_vector.hpp> +#include <boost/icl/interval_set.hpp> #include "common/common_types.h" #include "common/div_ceil.h" @@ -77,6 +78,9 @@ class BufferCache { using Runtime = typename P::Runtime; using Buffer = typename P::Buffer; + using IntervalSet = boost::icl::interval_set<VAddr>; + using IntervalType = typename IntervalSet::interval_type; + struct Empty {}; struct OverlapResult { @@ -148,11 +152,14 @@ public: /// Return true when there are uncommitted buffers to be downloaded [[nodiscard]] bool HasUncommittedFlushes() const noexcept; + void AccumulateFlushes(); + /// Return true when the caller should wait for async downloads [[nodiscard]] bool ShouldWaitAsyncFlushes() const noexcept; /// Commit asynchronous downloads void CommitAsyncFlushes(); + void CommitAsyncFlushesHigh(); /// Pop asynchronous downloads void PopAsyncFlushes(); @@ -160,6 +167,9 @@ public: /// Return true when a CPU region is modified from the GPU [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); + /// Return true when a CPU region is modified from the CPU + [[nodiscard]] bool IsRegionCpuModified(VAddr addr, size_t size); + std::mutex mutex; private: @@ -272,8 +282,6 @@ private: void DeleteBuffer(BufferId buffer_id); - void ReplaceBufferDownloads(BufferId old_buffer_id, BufferId new_buffer_id); - void NotifyBufferDeletion(); [[nodiscard]] Binding StorageBufferBinding(GPUVAddr ssbo_addr) const; @@ -327,9 +335,9 @@ private: std::vector<BufferId> cached_write_buffer_ids; - // TODO: This data structure is not optimal and it should be reworked - std::vector<BufferId> uncommitted_downloads; - std::deque<std::vector<BufferId>> committed_downloads; + IntervalSet uncommitted_ranges; + IntervalSet common_ranges; + std::deque<IntervalSet> committed_ranges; size_t immediate_buffer_capacity = 0; std::unique_ptr<u8[]> immediate_buffer_alloc; @@ -352,6 +360,7 @@ BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_, // Ensure the first slot is used for the null buffer void(slot_buffers.insert(runtime, NullBufferParams{})); deletion_iterator = slot_buffers.end(); + common_ranges.clear(); } template <class P> @@ -547,29 +556,30 @@ void BufferCache<P>::FlushCachedWrites() { template <class P> bool BufferCache<P>::HasUncommittedFlushes() const noexcept { - return !uncommitted_downloads.empty(); + return !uncommitted_ranges.empty() || !committed_ranges.empty(); } template <class P> -bool BufferCache<P>::ShouldWaitAsyncFlushes() const noexcept { - return !committed_downloads.empty() && !committed_downloads.front().empty(); +void BufferCache<P>::AccumulateFlushes() { + if (Settings::values.gpu_accuracy.GetValue() != Settings::GPUAccuracy::High) { + uncommitted_ranges.clear(); + return; + } + if (uncommitted_ranges.empty()) { + return; + } + committed_ranges.emplace_back(std::move(uncommitted_ranges)); } template <class P> -void BufferCache<P>::CommitAsyncFlushes() { - // This is intentionally passing the value by copy - committed_downloads.push_front(uncommitted_downloads); - uncommitted_downloads.clear(); +bool BufferCache<P>::ShouldWaitAsyncFlushes() const noexcept { + return false; } template <class P> -void BufferCache<P>::PopAsyncFlushes() { - if (committed_downloads.empty()) { - return; - } - auto scope_exit_pop_download = detail::ScopeExit([this] { committed_downloads.pop_back(); }); - const std::span<const BufferId> download_ids = committed_downloads.back(); - if (download_ids.empty()) { +void BufferCache<P>::CommitAsyncFlushesHigh() { + AccumulateFlushes(); + if (committed_ranges.empty()) { return; } MICROPROFILE_SCOPE(GPU_DownloadMemory); @@ -577,20 +587,66 @@ void BufferCache<P>::PopAsyncFlushes() { boost::container::small_vector<std::pair<BufferCopy, BufferId>, 1> downloads; u64 total_size_bytes = 0; u64 largest_copy = 0; - for (const BufferId buffer_id : download_ids) { - slot_buffers[buffer_id].ForEachDownloadRange([&](u64 range_offset, u64 range_size) { - downloads.push_back({ - BufferCopy{ - .src_offset = range_offset, - .dst_offset = total_size_bytes, - .size = range_size, - }, - buffer_id, + for (const IntervalSet& intervals : committed_ranges) { + for (auto& interval : intervals) { + const std::size_t size = interval.upper() - interval.lower(); + const VAddr cpu_addr = interval.lower(); + ForEachBufferInRange(cpu_addr, size, [&](BufferId buffer_id, Buffer& buffer) { + boost::container::small_vector<BufferCopy, 1> copies; + buffer.ForEachDownloadRangeAndClear( + cpu_addr, size, [&](u64 range_offset, u64 range_size) { + const VAddr buffer_addr = buffer.CpuAddr(); + const auto add_download = [&](VAddr start, VAddr end) { + const u64 new_offset = start - buffer_addr; + const u64 new_size = end - start; + downloads.push_back({ + BufferCopy{ + .src_offset = new_offset, + .dst_offset = total_size_bytes, + .size = new_size, + }, + buffer_id, + }); + // Align up to avoid cache conflicts + constexpr u64 align = 256ULL; + constexpr u64 mask = ~(align - 1ULL); + total_size_bytes += (new_size + align - 1) & mask; + largest_copy = std::max(largest_copy, new_size); + }; + + const VAddr start_address = buffer_addr + range_offset; + const VAddr end_address = start_address + range_size; + const IntervalType search_interval{cpu_addr, 1}; + auto it = common_ranges.lower_bound(search_interval); + if (it == common_ranges.end()) { + it = common_ranges.begin(); + } + while (it != common_ranges.end()) { + VAddr inter_addr_end = it->upper(); + VAddr inter_addr = it->lower(); + if (inter_addr >= end_address) { + break; + } + if (inter_addr_end <= start_address) { + it++; + continue; + } + if (inter_addr_end > end_address) { + inter_addr_end = end_address; + } + if (inter_addr < start_address) { + inter_addr = start_address; + } + add_download(inter_addr, inter_addr_end); + it++; + } + const IntervalType subtract_interval{start_address, end_address}; + common_ranges.subtract(subtract_interval); + }); }); - total_size_bytes += range_size; - largest_copy = std::max(largest_copy, range_size); - }); + } } + committed_ranges.clear(); if (downloads.empty()) { return; } @@ -623,6 +679,19 @@ void BufferCache<P>::PopAsyncFlushes() { } template <class P> +void BufferCache<P>::CommitAsyncFlushes() { + if (Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::High) { + CommitAsyncFlushesHigh(); + } else { + uncommitted_ranges.clear(); + committed_ranges.clear(); + } +} + +template <class P> +void BufferCache<P>::PopAsyncFlushes() {} + +template <class P> bool BufferCache<P>::IsRegionGpuModified(VAddr addr, size_t size) { const u64 page_end = Common::DivCeil(addr + size, PAGE_SIZE); for (u64 page = addr >> PAGE_BITS; page < page_end;) { @@ -642,6 +711,25 @@ bool BufferCache<P>::IsRegionGpuModified(VAddr addr, size_t size) { } template <class P> +bool BufferCache<P>::IsRegionCpuModified(VAddr addr, size_t size) { + const u64 page_end = Common::DivCeil(addr + size, PAGE_SIZE); + for (u64 page = addr >> PAGE_BITS; page < page_end;) { + const BufferId image_id = page_table[page]; + if (!image_id) { + ++page; + continue; + } + Buffer& buffer = slot_buffers[image_id]; + if (buffer.IsRegionCpuModified(addr, size)) { + return true; + } + const VAddr end_addr = buffer.CpuAddr() + buffer.SizeBytes(); + page = Common::DivCeil(end_addr, PAGE_SIZE); + } + return false; +} + +template <class P> void BufferCache<P>::BindHostIndexBuffer() { Buffer& buffer = slot_buffers[index_buffer.buffer_id]; TouchBuffer(buffer); @@ -1010,16 +1098,16 @@ void BufferCache<P>::MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 s Buffer& buffer = slot_buffers[buffer_id]; buffer.MarkRegionAsGpuModified(cpu_addr, size); - const bool is_accuracy_high = Settings::IsGPULevelHigh(); + const IntervalType base_interval{cpu_addr, cpu_addr + size}; + common_ranges.add(base_interval); + + const bool is_accuracy_high = + Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::High; const bool is_async = Settings::values.use_asynchronous_gpu_emulation.GetValue(); - if (!is_accuracy_high || !is_async) { - return; - } - if (std::ranges::find(uncommitted_downloads, buffer_id) != uncommitted_downloads.end()) { - // Already inserted + if (!is_async && !is_accuracy_high) { return; } - uncommitted_downloads.push_back(buffer_id); + uncommitted_ranges.add(base_interval); } template <class P> @@ -1103,7 +1191,6 @@ void BufferCache<P>::JoinOverlap(BufferId new_buffer_id, BufferId overlap_id, if (!copies.empty()) { runtime.CopyBuffer(slot_buffers[new_buffer_id], overlap, copies); } - ReplaceBufferDownloads(overlap_id, new_buffer_id); DeleteBuffer(overlap_id); } @@ -1244,14 +1331,51 @@ void BufferCache<P>::DownloadBufferMemory(Buffer& buffer, VAddr cpu_addr, u64 si boost::container::small_vector<BufferCopy, 1> copies; u64 total_size_bytes = 0; u64 largest_copy = 0; - buffer.ForEachDownloadRange(cpu_addr, size, [&](u64 range_offset, u64 range_size) { - copies.push_back(BufferCopy{ - .src_offset = range_offset, - .dst_offset = total_size_bytes, - .size = range_size, - }); - total_size_bytes += range_size; - largest_copy = std::max(largest_copy, range_size); + buffer.ForEachDownloadRangeAndClear(cpu_addr, size, [&](u64 range_offset, u64 range_size) { + const VAddr buffer_addr = buffer.CpuAddr(); + const auto add_download = [&](VAddr start, VAddr end) { + const u64 new_offset = start - buffer_addr; + const u64 new_size = end - start; + copies.push_back(BufferCopy{ + .src_offset = new_offset, + .dst_offset = total_size_bytes, + .size = new_size, + }); + // Align up to avoid cache conflicts + constexpr u64 align = 256ULL; + constexpr u64 mask = ~(align - 1ULL); + total_size_bytes += (new_size + align - 1) & mask; + largest_copy = std::max(largest_copy, new_size); + }; + + const VAddr start_address = buffer_addr + range_offset; + const VAddr end_address = start_address + range_size; + const IntervalType search_interval{start_address - range_size, 1}; + auto it = common_ranges.lower_bound(search_interval); + if (it == common_ranges.end()) { + it = common_ranges.begin(); + } + while (it != common_ranges.end()) { + VAddr inter_addr_end = it->upper(); + VAddr inter_addr = it->lower(); + if (inter_addr >= end_address) { + break; + } + if (inter_addr_end <= start_address) { + it++; + continue; + } + if (inter_addr_end > end_address) { + inter_addr_end = end_address; + } + if (inter_addr < start_address) { + inter_addr = start_address; + } + add_download(inter_addr, inter_addr_end); + it++; + } + const IntervalType subtract_interval{start_address, end_address}; + common_ranges.subtract(subtract_interval); }); if (total_size_bytes == 0) { return; @@ -1316,18 +1440,6 @@ void BufferCache<P>::DeleteBuffer(BufferId buffer_id) { } template <class P> -void BufferCache<P>::ReplaceBufferDownloads(BufferId old_buffer_id, BufferId new_buffer_id) { - const auto replace = [old_buffer_id, new_buffer_id](std::vector<BufferId>& buffers) { - std::ranges::replace(buffers, old_buffer_id, new_buffer_id); - if (auto it = std::ranges::find(buffers, new_buffer_id); it != buffers.end()) { - buffers.erase(std::remove(it + 1, buffers.end(), new_buffer_id), buffers.end()); - } - }; - replace(uncommitted_downloads); - std::ranges::for_each(committed_downloads, replace); -} - -template <class P> void BufferCache<P>::NotifyBufferDeletion() { if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { dirty_uniform_buffers.fill(~u32{0}); @@ -1349,15 +1461,9 @@ typename BufferCache<P>::Binding BufferCache<P>::StorageBufferBinding(GPUVAddr s if (!cpu_addr || size == 0) { return NULL_BINDING; } - // HACK(Rodrigo): This is the number of bytes bound in host beyond the guest API's range. - // It exists due to some games like Astral Chain operate out of bounds. - // Binding the whole map range would be technically correct, but games have large maps that make - // this approach unaffordable for now. - static constexpr u32 arbitrary_extra_bytes = 0xc000; - const u32 bytes_to_map_end = static_cast<u32>(gpu_memory.BytesToMapEnd(gpu_addr)); const Binding binding{ .cpu_addr = *cpu_addr, - .size = std::min(size + arbitrary_extra_bytes, bytes_to_map_end), + .size = size, .buffer_id = BufferId{}, }; return binding; diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp index 8b33c04ab..8d28bd884 100644 --- a/src/video_core/dma_pusher.cpp +++ b/src/video_core/dma_pusher.cpp @@ -4,6 +4,7 @@ #include "common/cityhash.h" #include "common/microprofile.h" +#include "common/settings.h" #include "core/core.h" #include "core/memory.h" #include "video_core/dma_pusher.h" @@ -76,8 +77,13 @@ bool DmaPusher::Step() { // Push buffer non-empty, read a word command_headers.resize(command_list_header.size); - gpu.MemoryManager().ReadBlockUnsafe(dma_get, command_headers.data(), - command_list_header.size * sizeof(u32)); + if (Settings::IsGPULevelHigh()) { + gpu.MemoryManager().ReadBlock(dma_get, command_headers.data(), + command_list_header.size * sizeof(u32)); + } else { + gpu.MemoryManager().ReadBlockUnsafe(dma_get, command_headers.data(), + command_list_header.size * sizeof(u32)); + } } for (std::size_t index = 0; index < command_headers.size();) { const CommandHeader& command_header = command_headers[index]; diff --git a/src/video_core/fence_manager.h b/src/video_core/fence_manager.h index f055b61e9..34dc6c596 100644 --- a/src/video_core/fence_manager.h +++ b/src/video_core/fence_manager.h @@ -8,6 +8,7 @@ #include <queue> #include "common/common_types.h" +#include "common/settings.h" #include "core/core.h" #include "video_core/delayed_destruction_ring.h" #include "video_core/gpu.h" @@ -53,6 +54,12 @@ public: delayed_destruction_ring.Tick(); } + // Unlike other fences, this one doesn't + void SignalOrdering() { + std::scoped_lock lock{buffer_cache.mutex}; + buffer_cache.AccumulateFlushes(); + } + void SignalSemaphore(GPUVAddr addr, u32 value) { TryReleasePendingFences(); const bool should_flush = ShouldFlush(); diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 35cc561be..f317ddc2b 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -268,11 +268,13 @@ void GPU::CallPullerMethod(const MethodCall& method_call) { case BufferMethods::SemaphoreAddressHigh: case BufferMethods::SemaphoreAddressLow: case BufferMethods::SemaphoreSequence: - case BufferMethods::RefCnt: case BufferMethods::UnkCacheFlush: case BufferMethods::WrcacheFlush: case BufferMethods::FenceValue: break; + case BufferMethods::RefCnt: + rasterizer->SignalReference(); + break; case BufferMethods::FenceAction: ProcessFenceActionMethod(); break; diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 0cec4225b..67aef6000 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -63,6 +63,9 @@ public: /// Signal a GPU based syncpoint as a fence virtual void SignalSyncPoint(u32 value) = 0; + /// Signal a GPU based reference as point + virtual void SignalReference() = 0; + /// Release all pending fences. virtual void ReleaseFences() = 0; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 07ad0e205..a4ed8f68f 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -634,6 +634,13 @@ void RasterizerOpenGL::SignalSyncPoint(u32 value) { fence_manager.SignalSyncPoint(value); } +void RasterizerOpenGL::SignalReference() { + if (!gpu.IsAsync()) { + return; + } + fence_manager.SignalOrdering(); +} + void RasterizerOpenGL::ReleaseFences() { if (!gpu.IsAsync()) { return; @@ -650,6 +657,7 @@ void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) { void RasterizerOpenGL::WaitForIdle() { glMemoryBarrier(GL_ALL_BARRIER_BITS); + SignalReference(); } void RasterizerOpenGL::FragmentBarrier() { diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 482efed7a..d8df71962 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -83,6 +83,7 @@ public: void ModifyGPUMemory(GPUVAddr addr, u64 size) override; void SignalSemaphore(GPUVAddr addr, u32 value) override; void SignalSyncPoint(u32 value) override; + void SignalReference() override; void ReleaseFences() override; void FlushAndInvalidateRegion(VAddr addr, u64 size) override; void WaitForIdle() override; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index bd4d649cc..9ea4b6653 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -580,6 +580,13 @@ void RasterizerVulkan::SignalSyncPoint(u32 value) { fence_manager.SignalSyncPoint(value); } +void RasterizerVulkan::SignalReference() { + if (!gpu.IsAsync()) { + return; + } + fence_manager.SignalOrdering(); +} + void RasterizerVulkan::ReleaseFences() { if (!gpu.IsAsync()) { return; @@ -612,6 +619,7 @@ void RasterizerVulkan::WaitForIdle() { cmdbuf.SetEvent(event, flags); cmdbuf.WaitEvents(event, flags, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, {}, {}, {}); }); + SignalReference(); } void RasterizerVulkan::FragmentBarrier() { diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 41459c5c5..5450ccfb5 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -75,6 +75,7 @@ public: void ModifyGPUMemory(GPUVAddr addr, u64 size) override; void SignalSemaphore(GPUVAddr addr, u32 value) override; void SignalSyncPoint(u32 value) override; + void SignalReference() override; void ReleaseFences() override; void FlushAndInvalidateRegion(VAddr addr, u64 size) override; void WaitForIdle() override; diff --git a/src/video_core/texture_cache/types.h b/src/video_core/texture_cache/types.h index 9fbdc1ac6..47a11cb2f 100644 --- a/src/video_core/texture_cache/types.h +++ b/src/video_core/texture_cache/types.h @@ -133,8 +133,8 @@ struct BufferImageCopy { }; struct BufferCopy { - size_t src_offset; - size_t dst_offset; + u64 src_offset; + u64 dst_offset; size_t size; }; diff --git a/src/yuzu/CMakeLists.txt b/src/yuzu/CMakeLists.txt index 634fe66a5..f870b33b1 100644 --- a/src/yuzu/CMakeLists.txt +++ b/src/yuzu/CMakeLists.txt @@ -52,6 +52,9 @@ add_executable(yuzu configuration/configure_debug_controller.cpp configuration/configure_debug_controller.h configuration/configure_debug_controller.ui + configuration/configure_debug_tab.cpp + configuration/configure_debug_tab.h + configuration/configure_debug_tab.ui configuration/configure_dialog.cpp configuration/configure_dialog.h configuration/configure_filesystem.cpp diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp index d652ee6ae..8c71ad5c1 100644 --- a/src/yuzu/configuration/config.cpp +++ b/src/yuzu/configuration/config.cpp @@ -793,7 +793,13 @@ void Config::ReadPathValues() { void Config::ReadCpuValues() { qt_config->beginGroup(QStringLiteral("Cpu")); - ReadGlobalSetting(Settings::values.cpu_accuracy); + ReadBasicSetting(Settings::values.cpu_accuracy_first_time); + if (Settings::values.cpu_accuracy_first_time) { + Settings::values.cpu_accuracy.SetValue(Settings::values.cpu_accuracy.GetDefault()); + Settings::values.cpu_accuracy_first_time.SetValue(false); + } else { + ReadGlobalSetting(Settings::values.cpu_accuracy); + } ReadGlobalSetting(Settings::values.cpuopt_unsafe_unfuse_fma); ReadGlobalSetting(Settings::values.cpuopt_unsafe_reduce_fp_error); @@ -802,6 +808,7 @@ void Config::ReadCpuValues() { ReadGlobalSetting(Settings::values.cpuopt_unsafe_fastmem_check); if (global) { + ReadBasicSetting(Settings::values.cpu_debug_mode); ReadBasicSetting(Settings::values.cpuopt_page_tables); ReadBasicSetting(Settings::values.cpuopt_block_linking); ReadBasicSetting(Settings::values.cpuopt_return_stack_buffer); @@ -820,7 +827,6 @@ void Config::ReadRendererValues() { qt_config->beginGroup(QStringLiteral("Renderer")); ReadGlobalSetting(Settings::values.renderer_backend); - ReadBasicSetting(Settings::values.renderer_debug); ReadGlobalSetting(Settings::values.vulkan_device); ReadGlobalSetting(Settings::values.fullscreen_mode); ReadGlobalSetting(Settings::values.aspect_ratio); @@ -841,6 +847,10 @@ void Config::ReadRendererValues() { ReadGlobalSetting(Settings::values.bg_green); ReadGlobalSetting(Settings::values.bg_blue); + if (global) { + ReadBasicSetting(Settings::values.renderer_debug); + } + qt_config->endGroup(); } @@ -1308,6 +1318,7 @@ void Config::SavePathValues() { void Config::SaveCpuValues() { qt_config->beginGroup(QStringLiteral("Cpu")); + WriteBasicSetting(Settings::values.cpu_accuracy_first_time); WriteSetting(QStringLiteral("cpu_accuracy"), static_cast<u32>(Settings::values.cpu_accuracy.GetValue(global)), static_cast<u32>(Settings::values.cpu_accuracy.GetDefault()), @@ -1320,6 +1331,7 @@ void Config::SaveCpuValues() { WriteGlobalSetting(Settings::values.cpuopt_unsafe_fastmem_check); if (global) { + WriteBasicSetting(Settings::values.cpu_debug_mode); WriteBasicSetting(Settings::values.cpuopt_page_tables); WriteBasicSetting(Settings::values.cpuopt_block_linking); WriteBasicSetting(Settings::values.cpuopt_return_stack_buffer); @@ -1341,7 +1353,6 @@ void Config::SaveRendererValues() { static_cast<u32>(Settings::values.renderer_backend.GetValue(global)), static_cast<u32>(Settings::values.renderer_backend.GetDefault()), Settings::values.renderer_backend.UsingGlobal()); - WriteBasicSetting(Settings::values.renderer_debug); WriteGlobalSetting(Settings::values.vulkan_device); WriteGlobalSetting(Settings::values.fullscreen_mode); WriteGlobalSetting(Settings::values.aspect_ratio); @@ -1365,6 +1376,10 @@ void Config::SaveRendererValues() { WriteGlobalSetting(Settings::values.bg_green); WriteGlobalSetting(Settings::values.bg_blue); + if (global) { + WriteBasicSetting(Settings::values.renderer_debug); + } + qt_config->endGroup(); } diff --git a/src/yuzu/configuration/configure.ui b/src/yuzu/configuration/configure.ui index f92c3aff3..fca9aed5f 100644 --- a/src/yuzu/configuration/configure.ui +++ b/src/yuzu/configuration/configure.ui @@ -41,7 +41,7 @@ <item> <widget class="QTabWidget" name="tabWidget"> <property name="currentIndex"> - <number>0</number> + <number>11</number> </property> <widget class="ConfigureGeneral" name="generalTab"> <property name="accessibleName"> @@ -107,14 +107,6 @@ <string>CPU</string> </attribute> </widget> - <widget class="ConfigureCpuDebug" name="cpuDebugTab"> - <property name="accessibleName"> - <string>Debug</string> - </property> - <attribute name="title"> - <string>Debug</string> - </attribute> - </widget> <widget class="ConfigureGraphics" name="graphicsTab"> <property name="accessibleName"> <string>Graphics</string> @@ -139,7 +131,7 @@ <string>Audio</string> </attribute> </widget> - <widget class="ConfigureDebug" name="debugTab"> + <widget class="ConfigureDebugTab" name="debugTab"> <property name="accessibleName"> <string>Debug</string> </property> @@ -208,24 +200,12 @@ <container>1</container> </customwidget> <customwidget> - <class>ConfigureDebug</class> - <extends>QWidget</extends> - <header>configuration/configure_debug.h</header> - <container>1</container> - </customwidget> - <customwidget> <class>ConfigureCpu</class> <extends>QWidget</extends> <header>configuration/configure_cpu.h</header> <container>1</container> </customwidget> <customwidget> - <class>ConfigureCpuDebug</class> - <extends>QWidget</extends> - <header>configuration/configure_cpu_debug.h</header> - <container>1</container> - </customwidget> - <customwidget> <class>ConfigureGraphics</class> <extends>QWidget</extends> <header>configuration/configure_graphics.h</header> @@ -267,6 +247,12 @@ <header>configuration/configure_service.h</header> <container>1</container> </customwidget> + <customwidget> + <class>ConfigureDebugTab</class> + <extends>QWidget</extends> + <header>configuration/configure_debug_tab.h</header> + <container>1</container> + </customwidget> </customwidgets> <resources/> <connections> @@ -275,12 +261,32 @@ <signal>accepted()</signal> <receiver>ConfigureDialog</receiver> <slot>accept()</slot> + <hints> + <hint type="sourcelabel"> + <x>20</x> + <y>20</y> + </hint> + <hint type="destinationlabel"> + <x>20</x> + <y>20</y> + </hint> + </hints> </connection> <connection> <sender>buttonBox</sender> <signal>rejected()</signal> <receiver>ConfigureDialog</receiver> <slot>reject()</slot> + <hints> + <hint type="sourcelabel"> + <x>20</x> + <y>20</y> + </hint> + <hint type="destinationlabel"> + <x>20</x> + <y>20</y> + </hint> + </hints> </connection> </connections> </ui> diff --git a/src/yuzu/configuration/configure_cpu.cpp b/src/yuzu/configuration/configure_cpu.cpp index 13db2ba98..8d7171487 100644 --- a/src/yuzu/configuration/configure_cpu.cpp +++ b/src/yuzu/configuration/configure_cpu.cpp @@ -20,8 +20,6 @@ ConfigureCpu::ConfigureCpu(QWidget* parent) : QWidget(parent), ui(new Ui::Config SetConfiguration(); - connect(ui->accuracy, qOverload<int>(&QComboBox::activated), this, - &ConfigureCpu::AccuracyUpdated); connect(ui->accuracy, qOverload<int>(&QComboBox::currentIndexChanged), this, &ConfigureCpu::UpdateGroup); } @@ -58,20 +56,6 @@ void ConfigureCpu::SetConfiguration() { UpdateGroup(ui->accuracy->currentIndex()); } -void ConfigureCpu::AccuracyUpdated(int index) { - if (Settings::IsConfiguringGlobal() && - static_cast<Settings::CPUAccuracy>(index) == Settings::CPUAccuracy::DebugMode) { - const auto result = QMessageBox::warning(this, tr("Setting CPU to Debug Mode"), - tr("CPU Debug Mode is only intended for developer " - "use. Are you sure you want to enable this?"), - QMessageBox::Yes | QMessageBox::No); - if (result == QMessageBox::No) { - ui->accuracy->setCurrentIndex(static_cast<int>(Settings::CPUAccuracy::Accurate)); - UpdateGroup(static_cast<int>(Settings::CPUAccuracy::Accurate)); - } - } -} - void ConfigureCpu::UpdateGroup(int index) { if (!Settings::IsConfiguringGlobal()) { index -= ConfigurationShared::USE_GLOBAL_OFFSET; @@ -134,8 +118,6 @@ void ConfigureCpu::SetupPerGameUI() { ConfigurationShared::SetColoredComboBox( ui->accuracy, ui->widget_accuracy, static_cast<u32>(Settings::values.cpu_accuracy.GetValue(true))); - ui->accuracy->removeItem(static_cast<u32>(Settings::CPUAccuracy::DebugMode) + - ConfigurationShared::USE_GLOBAL_OFFSET); ConfigurationShared::SetColoredTristate(ui->cpuopt_unsafe_unfuse_fma, Settings::values.cpuopt_unsafe_unfuse_fma, diff --git a/src/yuzu/configuration/configure_cpu.h b/src/yuzu/configuration/configure_cpu.h index b2b5f1671..154931482 100644 --- a/src/yuzu/configuration/configure_cpu.h +++ b/src/yuzu/configuration/configure_cpu.h @@ -29,7 +29,6 @@ private: void changeEvent(QEvent* event) override; void RetranslateUI(); - void AccuracyUpdated(int index); void UpdateGroup(int index); void SetConfiguration(); diff --git a/src/yuzu/configuration/configure_cpu.ui b/src/yuzu/configuration/configure_cpu.ui index 0e296d4e5..5b9457faf 100644 --- a/src/yuzu/configuration/configure_cpu.ui +++ b/src/yuzu/configuration/configure_cpu.ui @@ -6,8 +6,8 @@ <rect> <x>0</x> <y>0</y> - <width>400</width> - <height>321</height> + <width>448</width> + <height>433</height> </rect> </property> <property name="windowTitle"> @@ -17,7 +17,7 @@ <item> <layout class="QVBoxLayout"> <item> - <widget class="QGroupBox"> + <widget class="QGroupBox" name="groupBox"> <property name="title"> <string>General</string> </property> @@ -36,17 +36,17 @@ <widget class="QComboBox" name="accuracy"> <item> <property name="text"> - <string>Accurate</string> + <string>Auto</string> </property> </item> <item> <property name="text"> - <string>Unsafe</string> + <string>Accurate</string> </property> </item> <item> <property name="text"> - <string>Enable Debug Mode</string> + <string>Unsafe</string> </property> </item> </widget> @@ -57,7 +57,7 @@ <item> <widget class="QLabel" name="label_recommended_accuracy"> <property name="text"> - <string>We recommend setting accuracy to "Accurate".</string> + <string>We recommend setting accuracy to "Auto".</string> </property> <property name="wordWrap"> <bool>false</bool> diff --git a/src/yuzu/configuration/configure_cpu_debug.h b/src/yuzu/configuration/configure_cpu_debug.h index 10de55099..1b0d8050c 100644 --- a/src/yuzu/configuration/configure_cpu_debug.h +++ b/src/yuzu/configuration/configure_cpu_debug.h @@ -6,7 +6,6 @@ #include <memory> #include <QWidget> -#include "common/settings.h" namespace Ui { class ConfigureCpuDebug; diff --git a/src/yuzu/configuration/configure_cpu_debug.ui b/src/yuzu/configuration/configure_cpu_debug.ui index c43f89a5a..abf469b55 100644 --- a/src/yuzu/configuration/configure_cpu_debug.ui +++ b/src/yuzu/configuration/configure_cpu_debug.ui @@ -6,8 +6,8 @@ <rect> <x>0</x> <y>0</y> - <width>400</width> - <height>321</height> + <width>592</width> + <height>503</height> </rect> </property> <property name="windowTitle"> @@ -17,140 +17,132 @@ <item> <layout class="QVBoxLayout"> <item> - <widget class="QGroupBox"> + <widget class="QGroupBox" name="groupBox"> <property name="title"> <string>Toggle CPU Optimizations</string> </property> <layout class="QVBoxLayout"> <item> - <widget class="QLabel"> - <property name="wordWrap"> - <bool>1</bool> - </property> + <widget class="QLabel" name="label"> <property name="text"> - <string> - <div> - <b>For debugging only.</b> - <br> - If you're not sure what these do, keep all of these enabled. - <br> - These settings, when disabled, only take effect when CPU Accuracy is "Debug Mode". - </div> - </string> + <string><html><head/><body><p><span style=" font-weight:600;">For debugging only.</span><br/>If you're not sure what these do, keep all of these enabled. <br/>These settings, when disabled, only take effect when CPU Debugging is enabled. </p></body></html></string> + </property> + <property name="wordWrap"> + <bool>false</bool> </property> </widget> </item> <item> <widget class="QCheckBox" name="cpuopt_page_tables"> - <property name="text"> - <string>Enable inline page tables</string> - </property> <property name="toolTip"> <string> - <div style="white-space: nowrap">This optimization speeds up memory accesses by the guest program.</div> - <div style="white-space: nowrap">Enabling it inlines accesses to PageTable::pointers into emitted code.</div> - <div style="white-space: nowrap">Disabling this forces all memory accesses to go through the Memory::Read/Memory::Write functions.</div> + <div style="white-space: nowrap">This optimization speeds up memory accesses by the guest program.</div> + <div style="white-space: nowrap">Enabling it inlines accesses to PageTable::pointers into emitted code.</div> + <div style="white-space: nowrap">Disabling this forces all memory accesses to go through the Memory::Read/Memory::Write functions.</div> </string> </property> + <property name="text"> + <string>Enable inline page tables</string> + </property> </widget> </item> <item> <widget class="QCheckBox" name="cpuopt_block_linking"> - <property name="text"> - <string>Enable block linking</string> - </property> <property name="toolTip"> <string> <div>This optimization avoids dispatcher lookups by allowing emitted basic blocks to jump directly to other basic blocks if the destination PC is static.</div> </string> </property> + <property name="text"> + <string>Enable block linking</string> + </property> </widget> </item> <item> <widget class="QCheckBox" name="cpuopt_return_stack_buffer"> - <property name="text"> - <string>Enable return stack buffer</string> - </property> <property name="toolTip"> <string> <div>This optimization avoids dispatcher lookups by keeping track potential return addresses of BL instructions. This approximates what happens with a return stack buffer on a real CPU.</div> </string> </property> + <property name="text"> + <string>Enable return stack buffer</string> + </property> </widget> </item> <item> <widget class="QCheckBox" name="cpuopt_fast_dispatcher"> - <property name="text"> - <string>Enable fast dispatcher</string> - </property> <property name="toolTip"> <string> <div>Enable a two-tiered dispatch system. A faster dispatcher written in assembly has a small MRU cache of jump destinations is used first. If that fails, dispatch falls back to the slower C++ dispatcher.</div> </string> </property> + <property name="text"> + <string>Enable fast dispatcher</string> + </property> </widget> </item> <item> <widget class="QCheckBox" name="cpuopt_context_elimination"> - <property name="text"> - <string>Enable context elimination</string> - </property> <property name="toolTip"> <string> <div>Enables an IR optimization that reduces unnecessary accesses to the CPU context structure.</div> </string> </property> + <property name="text"> + <string>Enable context elimination</string> + </property> </widget> </item> <item> <widget class="QCheckBox" name="cpuopt_const_prop"> - <property name="text"> - <string>Enable constant propagation</string> - </property> <property name="toolTip"> <string> <div>Enables IR optimizations that involve constant propagation.</div> </string> </property> + <property name="text"> + <string>Enable constant propagation</string> + </property> </widget> </item> <item> <widget class="QCheckBox" name="cpuopt_misc_ir"> - <property name="text"> - <string>Enable miscellaneous optimizations</string> - </property> <property name="toolTip"> <string> <div>Enables miscellaneous IR optimizations.</div> </string> </property> + <property name="text"> + <string>Enable miscellaneous optimizations</string> + </property> </widget> </item> <item> <widget class="QCheckBox" name="cpuopt_reduce_misalign_checks"> - <property name="text"> - <string>Enable misalignment check reduction</string> - </property> <property name="toolTip"> <string> - <div style="white-space: nowrap">When enabled, a misalignment is only triggered when an access crosses a page boundary.</div> - <div style="white-space: nowrap">When disabled, a misalignment is triggered on all misaligned accesses.</div> + <div style="white-space: nowrap">When enabled, a misalignment is only triggered when an access crosses a page boundary.</div> + <div style="white-space: nowrap">When disabled, a misalignment is triggered on all misaligned accesses.</div> </string> </property> + <property name="text"> + <string>Enable misalignment check reduction</string> + </property> </widget> </item> <item> <widget class="QCheckBox" name="cpuopt_fastmem"> - <property name="text"> - <string>Enable Host MMU Emulation</string> - </property> <property name="toolTip"> <string> - <div style="white-space: nowrap">This optimization speeds up memory accesses by the guest program.</div> - <div style="white-space: nowrap">Enabling it causes guest memory reads/writes to be done directly into memory and make use of Host's MMU.</div> - <div style="white-space: nowrap">Disabling this forces all memory accesses to use Software MMU Emulation.</div> + <div style="white-space: nowrap">This optimization speeds up memory accesses by the guest program.</div> + <div style="white-space: nowrap">Enabling it causes guest memory reads/writes to be done directly into memory and make use of Host's MMU.</div> + <div style="white-space: nowrap">Disabling this forces all memory accesses to use Software MMU Emulation.</div> </string> </property> + <property name="text"> + <string>Enable Host MMU Emulation</string> + </property> </widget> </item> </layout> diff --git a/src/yuzu/configuration/configure_debug.cpp b/src/yuzu/configuration/configure_debug.cpp index cbe45a305..8fceb3878 100644 --- a/src/yuzu/configuration/configure_debug.cpp +++ b/src/yuzu/configuration/configure_debug.cpp @@ -43,6 +43,8 @@ void ConfigureDebug::SetConfiguration() { ui->use_auto_stub->setChecked(Settings::values.use_auto_stub.GetValue()); ui->enable_graphics_debugging->setEnabled(runtime_lock); ui->enable_graphics_debugging->setChecked(Settings::values.renderer_debug.GetValue()); + ui->enable_cpu_debugging->setEnabled(runtime_lock); + ui->enable_cpu_debugging->setChecked(Settings::values.cpu_debug_mode.GetValue()); ui->disable_macro_jit->setEnabled(runtime_lock); ui->disable_macro_jit->setChecked(Settings::values.disable_macro_jit.GetValue()); ui->extended_logging->setChecked(Settings::values.extended_logging.GetValue()); @@ -58,6 +60,7 @@ void ConfigureDebug::ApplyConfiguration() { Settings::values.use_debug_asserts = ui->use_debug_asserts->isChecked(); Settings::values.use_auto_stub = ui->use_auto_stub->isChecked(); Settings::values.renderer_debug = ui->enable_graphics_debugging->isChecked(); + Settings::values.cpu_debug_mode = ui->enable_cpu_debugging->isChecked(); Settings::values.disable_macro_jit = ui->disable_macro_jit->isChecked(); Settings::values.extended_logging = ui->extended_logging->isChecked(); Debugger::ToggleConsole(); diff --git a/src/yuzu/configuration/configure_debug.ui b/src/yuzu/configuration/configure_debug.ui index c8087542f..1260ad6f0 100644 --- a/src/yuzu/configuration/configure_debug.ui +++ b/src/yuzu/configuration/configure_debug.ui @@ -7,7 +7,7 @@ <x>0</x> <y>0</y> <width>400</width> - <height>486</height> + <height>777</height> </rect> </property> <property name="windowTitle"> @@ -192,34 +192,41 @@ </property> </widget> </item> - <item> - <widget class="QCheckBox" name="use_debug_asserts"> - <property name="text"> - <string>Enable Debug Asserts</string> - </property> - </widget> - </item> - <item> - <widget class="QCheckBox" name="use_auto_stub"> - <property name="text"> - <string>Enable Auto-Stub</string> - </property> - </widget> - </item> <item> - <widget class="QLabel" name="label_5"> - <property name="font"> - <font> - <italic>true</italic> - </font> - </property> - <property name="text"> - <string>This will be reset automatically when yuzu closes.</string> - </property> - <property name="indent"> - <number>20</number> - </property> - </widget> + <widget class="QCheckBox" name="enable_cpu_debugging"> + <property name="text"> + <string>Enable CPU Debugging</string> + </property> + </widget> + </item> + <item> + <widget class="QCheckBox" name="use_debug_asserts"> + <property name="text"> + <string>Enable Debug Asserts</string> + </property> + </widget> + </item> + <item> + <widget class="QCheckBox" name="use_auto_stub"> + <property name="text"> + <string>Enable Auto-Stub</string> + </property> + </widget> + </item> + <item> + <widget class="QLabel" name="label_5"> + <property name="font"> + <font> + <italic>true</italic> + </font> + </property> + <property name="text"> + <string>This will be reset automatically when yuzu closes.</string> + </property> + <property name="indent"> + <number>20</number> + </property> + </widget> </item> </layout> </widget> diff --git a/src/yuzu/configuration/configure_debug_tab.cpp b/src/yuzu/configuration/configure_debug_tab.cpp new file mode 100644 index 000000000..67d369249 --- /dev/null +++ b/src/yuzu/configuration/configure_debug_tab.cpp @@ -0,0 +1,38 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "ui_configure_debug_tab.h" +#include "yuzu/configuration/configure_debug_tab.h" + +ConfigureDebugTab::ConfigureDebugTab(QWidget* parent) + : QWidget(parent), ui(new Ui::ConfigureDebugTab) { + ui->setupUi(this); + + SetConfiguration(); +} + +ConfigureDebugTab::~ConfigureDebugTab() = default; + +void ConfigureDebugTab::ApplyConfiguration() { + ui->debugTab->ApplyConfiguration(); + ui->cpuDebugTab->ApplyConfiguration(); +} + +void ConfigureDebugTab::SetCurrentIndex(int index) { + ui->tabWidget->setCurrentIndex(index); +} + +void ConfigureDebugTab::changeEvent(QEvent* event) { + if (event->type() == QEvent::LanguageChange) { + RetranslateUI(); + } + + QWidget::changeEvent(event); +} + +void ConfigureDebugTab::RetranslateUI() { + ui->retranslateUi(this); +} + +void ConfigureDebugTab::SetConfiguration() {} diff --git a/src/yuzu/configuration/configure_debug_tab.h b/src/yuzu/configuration/configure_debug_tab.h new file mode 100644 index 000000000..0a96d43d0 --- /dev/null +++ b/src/yuzu/configuration/configure_debug_tab.h @@ -0,0 +1,32 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <memory> +#include <QWidget> + +namespace Ui { +class ConfigureDebugTab; +} + +class ConfigureDebugTab : public QWidget { + Q_OBJECT + +public: + explicit ConfigureDebugTab(QWidget* parent = nullptr); + ~ConfigureDebugTab() override; + + void ApplyConfiguration(); + + void SetCurrentIndex(int index); + +private: + void changeEvent(QEvent* event) override; + void RetranslateUI(); + + void SetConfiguration(); + + std::unique_ptr<Ui::ConfigureDebugTab> ui; +}; diff --git a/src/yuzu/configuration/configure_debug_tab.ui b/src/yuzu/configuration/configure_debug_tab.ui new file mode 100644 index 000000000..7dc6dd704 --- /dev/null +++ b/src/yuzu/configuration/configure_debug_tab.ui @@ -0,0 +1,52 @@ +<?xml version="1.0" encoding="UTF-8"?> +<ui version="4.0"> + <class>ConfigureDebugTab</class> + <widget class="QWidget" name="ConfigureDebugTab"> + <property name="geometry"> + <rect> + <x>0</x> + <y>0</y> + <width>320</width> + <height>240</height> + </rect> + </property> + <property name="windowTitle"> + <string>Form</string> + </property> + <layout class="QVBoxLayout" name="verticalLayout"> + <item> + <widget class="QTabWidget" name="tabWidget"> + <property name="currentIndex"> + <number>1</number> + </property> + <widget class="ConfigureDebug" name="debugTab"> + <attribute name="title"> + <string>General</string> + </attribute> + </widget> + <widget class="ConfigureCpuDebug" name="cpuDebugTab"> + <attribute name="title"> + <string>CPU</string> + </attribute> + </widget> + </widget> + </item> + </layout> + </widget> + <customwidgets> + <customwidget> + <class>ConfigureDebug</class> + <extends>QWidget</extends> + <header>configuration/configure_debug.h</header> + <container>1</container> + </customwidget> + <customwidget> + <class>ConfigureCpuDebug</class> + <extends>QWidget</extends> + <header>configuration/configure_cpu_debug.h</header> + <container>1</container> + </customwidget> + </customwidgets> + <resources/> + <connections/> +</ui> diff --git a/src/yuzu/configuration/configure_dialog.cpp b/src/yuzu/configuration/configure_dialog.cpp index 371bc01b1..bc009b6b3 100644 --- a/src/yuzu/configuration/configure_dialog.cpp +++ b/src/yuzu/configuration/configure_dialog.cpp @@ -8,6 +8,7 @@ #include <QListWidgetItem> #include <QPushButton> #include <QSignalBlocker> +#include <QTabWidget> #include "common/settings.h" #include "core/core.h" #include "ui_configure.h" @@ -32,6 +33,8 @@ ConfigureDialog::ConfigureDialog(QWidget* parent, HotkeyRegistry& registry, SetConfiguration(); PopulateSelectionList(); + connect(ui->tabWidget, &QTabWidget::currentChanged, this, + [this]() { ui->debugTab->SetCurrentIndex(0); }); connect(ui->uiTab, &ConfigureUi::LanguageChanged, this, &ConfigureDialog::OnLanguageChanged); connect(ui->selectorList, &QListWidget::itemSelectionChanged, this, &ConfigureDialog::UpdateVisibleTabs); @@ -59,7 +62,6 @@ void ConfigureDialog::ApplyConfiguration() { ui->inputTab->ApplyConfiguration(); ui->hotkeysTab->ApplyConfiguration(registry); ui->cpuTab->ApplyConfiguration(); - ui->cpuDebugTab->ApplyConfiguration(); ui->graphicsTab->ApplyConfiguration(); ui->graphicsAdvancedTab->ApplyConfiguration(); ui->audioTab->ApplyConfiguration(); @@ -102,7 +104,7 @@ void ConfigureDialog::PopulateSelectionList() { const std::array<std::pair<QString, QList<QWidget*>>, 6> items{ {{tr("General"), {ui->generalTab, ui->hotkeysTab, ui->uiTab, ui->webTab, ui->debugTab}}, {tr("System"), {ui->systemTab, ui->profileManagerTab, ui->serviceTab, ui->filesystemTab}}, - {tr("CPU"), {ui->cpuTab, ui->cpuDebugTab}}, + {tr("CPU"), {ui->cpuTab}}, {tr("Graphics"), {ui->graphicsTab, ui->graphicsAdvancedTab}}, {tr("Audio"), {ui->audioTab}}, {tr("Controls"), ui->inputTab->GetSubTabs()}}, |