diff options
Diffstat (limited to 'src')
334 files changed, 16132 insertions, 5567 deletions
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 3a57356ab..1e977e8a8 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -62,6 +62,10 @@ else() -Wno-unused-parameter ) + if (ARCHITECTURE_x86_64) + add_compile_options("-mcx16") + endif() + if (APPLE AND CMAKE_CXX_COMPILER_ID STREQUAL Clang) add_compile_options("-stdlib=libc++") endif() diff --git a/src/audio_core/audio_renderer.cpp b/src/audio_core/audio_renderer.cpp index 50846a854..d64452617 100644 --- a/src/audio_core/audio_renderer.cpp +++ b/src/audio_core/audio_renderer.cpp @@ -180,11 +180,12 @@ ResultVal<std::vector<u8>> AudioRenderer::UpdateAudioRenderer(const std::vector< // Copy output header UpdateDataHeader response_data{worker_params}; - std::vector<u8> output_params(response_data.total_size); if (behavior_info.IsElapsedFrameCountSupported()) { - response_data.frame_count = 0x10; - response_data.total_size += 0x10; + response_data.render_info = sizeof(RendererInfo); + response_data.total_size += sizeof(RendererInfo); } + + std::vector<u8> output_params(response_data.total_size); std::memcpy(output_params.data(), &response_data, sizeof(UpdateDataHeader)); // Copy output memory pool entries @@ -219,6 +220,17 @@ ResultVal<std::vector<u8>> AudioRenderer::UpdateAudioRenderer(const std::vector< return Audren::ERR_INVALID_PARAMETERS; } + if (behavior_info.IsElapsedFrameCountSupported()) { + const std::size_t renderer_info_offset{ + sizeof(UpdateDataHeader) + response_data.memory_pools_size + response_data.voices_size + + response_data.effects_size + response_data.sinks_size + + response_data.performance_manager_size + response_data.behavior_size}; + RendererInfo renderer_info{}; + renderer_info.elasped_frame_count = elapsed_frame_count; + std::memcpy(output_params.data() + renderer_info_offset, &renderer_info, + sizeof(RendererInfo)); + } + return MakeResult(output_params); } @@ -447,6 +459,7 @@ void AudioRenderer::QueueMixedBuffer(Buffer::Tag tag) { } } audio_out->QueueBuffer(stream, tag, std::move(buffer)); + elapsed_frame_count++; } void AudioRenderer::ReleaseAndQueueBuffers() { diff --git a/src/audio_core/audio_renderer.h b/src/audio_core/audio_renderer.h index 1f9114c07..f0b691a86 100644 --- a/src/audio_core/audio_renderer.h +++ b/src/audio_core/audio_renderer.h @@ -196,6 +196,12 @@ struct EffectOutStatus { }; static_assert(sizeof(EffectOutStatus) == 0x10, "EffectOutStatus is an invalid size"); +struct RendererInfo { + u64_le elasped_frame_count{}; + INSERT_PADDING_WORDS(2); +}; +static_assert(sizeof(RendererInfo) == 0x10, "RendererInfo is an invalid size"); + struct UpdateDataHeader { UpdateDataHeader() {} @@ -209,7 +215,7 @@ struct UpdateDataHeader { mixes_size = 0x0; sinks_size = config.sink_count * 0x20; performance_manager_size = 0x10; - frame_count = 0; + render_info = 0; total_size = sizeof(UpdateDataHeader) + behavior_size + memory_pools_size + voices_size + effects_size + sinks_size + performance_manager_size; } @@ -223,8 +229,8 @@ struct UpdateDataHeader { u32_le mixes_size{}; u32_le sinks_size{}; u32_le performance_manager_size{}; - INSERT_PADDING_WORDS(1); - u32_le frame_count{}; + u32_le splitter_size{}; + u32_le render_info{}; INSERT_PADDING_WORDS(4); u32_le total_size{}; }; @@ -258,6 +264,7 @@ private: std::unique_ptr<AudioOut> audio_out; StreamPtr stream; Core::Memory::Memory& memory; + std::size_t elapsed_frame_count{}; }; } // namespace AudioCore diff --git a/src/audio_core/cubeb_sink.cpp b/src/audio_core/cubeb_sink.cpp index c4e0e30fe..41bf5cd4d 100644 --- a/src/audio_core/cubeb_sink.cpp +++ b/src/audio_core/cubeb_sink.cpp @@ -193,7 +193,7 @@ long CubebSinkStream::DataCallback(cubeb_stream* stream, void* user_data, const const std::size_t samples_to_write = num_channels * num_frames; std::size_t samples_written; - if (Settings::values.enable_audio_stretching) { + if (Settings::values.enable_audio_stretching.GetValue()) { const std::vector<s16> in{impl->queue.Pop()}; const std::size_t num_in{in.size() / num_channels}; s16* const out{reinterpret_cast<s16*>(buffer)}; diff --git a/src/audio_core/stream.cpp b/src/audio_core/stream.cpp index 4ca98f8ea..aab3e979a 100644 --- a/src/audio_core/stream.cpp +++ b/src/audio_core/stream.cpp @@ -38,7 +38,7 @@ Stream::Stream(Core::Timing::CoreTiming& core_timing, u32 sample_rate, Format fo sink_stream{sink_stream}, core_timing{core_timing}, name{std::move(name_)} { release_event = Core::Timing::CreateEvent( - name, [this](u64 userdata, s64 cycles_late) { ReleaseActiveBuffer(); }); + name, [this](u64 userdata, s64 cycles_late) { ReleaseActiveBuffer(cycles_late); }); } void Stream::Play() { @@ -59,15 +59,15 @@ Stream::State Stream::GetState() const { return state; } -s64 Stream::GetBufferReleaseCycles(const Buffer& buffer) const { +s64 Stream::GetBufferReleaseNS(const Buffer& buffer) const { const std::size_t num_samples{buffer.GetSamples().size() / GetNumChannels()}; - const auto us = - std::chrono::microseconds((static_cast<u64>(num_samples) * 1000000) / sample_rate); - return Core::Timing::usToCycles(us); + const auto ns = + std::chrono::nanoseconds((static_cast<u64>(num_samples) * 1000000000ULL) / sample_rate); + return ns.count(); } static void VolumeAdjustSamples(std::vector<s16>& samples, float game_volume) { - const float volume{std::clamp(Settings::values.volume - (1.0f - game_volume), 0.0f, 1.0f)}; + const float volume{std::clamp(Settings::Volume() - (1.0f - game_volume), 0.0f, 1.0f)}; if (volume == 1.0f) { return; @@ -80,7 +80,7 @@ static void VolumeAdjustSamples(std::vector<s16>& samples, float game_volume) { } } -void Stream::PlayNextBuffer() { +void Stream::PlayNextBuffer(s64 cycles_late) { if (!IsPlaying()) { // Ensure we are in playing state before playing the next buffer sink_stream.Flush(); @@ -105,14 +105,17 @@ void Stream::PlayNextBuffer() { sink_stream.EnqueueSamples(GetNumChannels(), active_buffer->GetSamples()); - core_timing.ScheduleEvent(GetBufferReleaseCycles(*active_buffer), release_event, {}); + core_timing.ScheduleEvent( + GetBufferReleaseNS(*active_buffer) - + (Settings::values.enable_audio_stretching.GetValue() ? 0 : cycles_late), + release_event, {}); } -void Stream::ReleaseActiveBuffer() { +void Stream::ReleaseActiveBuffer(s64 cycles_late) { ASSERT(active_buffer); released_buffers.push(std::move(active_buffer)); release_callback(); - PlayNextBuffer(); + PlayNextBuffer(cycles_late); } bool Stream::QueueBuffer(BufferPtr&& buffer) { diff --git a/src/audio_core/stream.h b/src/audio_core/stream.h index 1708a4d98..524376257 100644 --- a/src/audio_core/stream.h +++ b/src/audio_core/stream.h @@ -90,13 +90,16 @@ public: private: /// Plays the next queued buffer in the audio stream, starting playback if necessary - void PlayNextBuffer(); + void PlayNextBuffer(s64 cycles_late = 0); /// Releases the actively playing buffer, signalling that it has been completed - void ReleaseActiveBuffer(); + void ReleaseActiveBuffer(s64 cycles_late = 0); /// Gets the number of core cycles when the specified buffer will be released - s64 GetBufferReleaseCycles(const Buffer& buffer) const; + s64 GetBufferReleaseNS(const Buffer& buffer) const; + + /// Gets the number of core cycles when the specified buffer will be released + s64 GetBufferReleaseNSHostTiming(const Buffer& buffer) const; u32 sample_rate; ///< Sample rate of the stream Format format; ///< Format of the stream diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 24b7a083c..d120c8d3d 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -32,6 +32,8 @@ add_custom_command(OUTPUT scm_rev.cpp DEPENDS # WARNING! It was too much work to try and make a common location for this list, # so if you need to change it, please update CMakeModules/GenerateSCMRev.cmake as well + "${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.cpp" + "${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.h" "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.cpp" "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.h" "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.cpp" @@ -96,6 +98,8 @@ add_library(common STATIC algorithm.h alignment.h assert.h + atomic_ops.cpp + atomic_ops.h detached_tasks.cpp detached_tasks.h bit_field.h @@ -108,6 +112,8 @@ add_library(common STATIC common_types.h dynamic_library.cpp dynamic_library.h + fiber.cpp + fiber.h file_util.cpp file_util.h hash.h @@ -141,6 +147,8 @@ add_library(common STATIC scm_rev.cpp scm_rev.h scope_exit.h + spin_lock.cpp + spin_lock.h string_util.cpp string_util.h swap.h @@ -161,6 +169,8 @@ add_library(common STATIC vector_math.h virtual_buffer.cpp virtual_buffer.h + wall_clock.cpp + wall_clock.h web_result.h zstd_compression.cpp zstd_compression.h @@ -171,12 +181,15 @@ if(ARCHITECTURE_x86_64) PRIVATE x64/cpu_detect.cpp x64/cpu_detect.h + x64/native_clock.cpp + x64/native_clock.h x64/xbyak_abi.h x64/xbyak_util.h ) endif() create_target_directory_groups(common) +find_package(Boost 1.71 COMPONENTS context headers REQUIRED) -target_link_libraries(common PUBLIC Boost::boost fmt::fmt microprofile) +target_link_libraries(common PUBLIC ${Boost_LIBRARIES} fmt::fmt microprofile) target_link_libraries(common PRIVATE lz4::lz4 zstd::zstd xbyak) diff --git a/src/common/atomic_ops.cpp b/src/common/atomic_ops.cpp new file mode 100644 index 000000000..1098e21ff --- /dev/null +++ b/src/common/atomic_ops.cpp @@ -0,0 +1,70 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <cstring> + +#include "common/atomic_ops.h" + +#if _MSC_VER +#include <intrin.h> +#endif + +namespace Common { + +#if _MSC_VER + +bool AtomicCompareAndSwap(u8 volatile* pointer, u8 value, u8 expected) { + u8 result = _InterlockedCompareExchange8((char*)pointer, value, expected); + return result == expected; +} + +bool AtomicCompareAndSwap(u16 volatile* pointer, u16 value, u16 expected) { + u16 result = _InterlockedCompareExchange16((short*)pointer, value, expected); + return result == expected; +} + +bool AtomicCompareAndSwap(u32 volatile* pointer, u32 value, u32 expected) { + u32 result = _InterlockedCompareExchange((long*)pointer, value, expected); + return result == expected; +} + +bool AtomicCompareAndSwap(u64 volatile* pointer, u64 value, u64 expected) { + u64 result = _InterlockedCompareExchange64((__int64*)pointer, value, expected); + return result == expected; +} + +bool AtomicCompareAndSwap(u64 volatile* pointer, u128 value, u128 expected) { + return _InterlockedCompareExchange128((__int64*)pointer, value[1], value[0], + (__int64*)expected.data()) != 0; +} + +#else + +bool AtomicCompareAndSwap(u8 volatile* pointer, u8 value, u8 expected) { + return __sync_bool_compare_and_swap(pointer, expected, value); +} + +bool AtomicCompareAndSwap(u16 volatile* pointer, u16 value, u16 expected) { + return __sync_bool_compare_and_swap(pointer, expected, value); +} + +bool AtomicCompareAndSwap(u32 volatile* pointer, u32 value, u32 expected) { + return __sync_bool_compare_and_swap(pointer, expected, value); +} + +bool AtomicCompareAndSwap(u64 volatile* pointer, u64 value, u64 expected) { + return __sync_bool_compare_and_swap(pointer, expected, value); +} + +bool AtomicCompareAndSwap(u64 volatile* pointer, u128 value, u128 expected) { + unsigned __int128 value_a; + unsigned __int128 expected_a; + std::memcpy(&value_a, value.data(), sizeof(u128)); + std::memcpy(&expected_a, expected.data(), sizeof(u128)); + return __sync_bool_compare_and_swap((unsigned __int128*)pointer, expected_a, value_a); +} + +#endif + +} // namespace Common diff --git a/src/common/atomic_ops.h b/src/common/atomic_ops.h new file mode 100644 index 000000000..e6181d521 --- /dev/null +++ b/src/common/atomic_ops.h @@ -0,0 +1,17 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/common_types.h" + +namespace Common { + +bool AtomicCompareAndSwap(u8 volatile* pointer, u8 value, u8 expected); +bool AtomicCompareAndSwap(u16 volatile* pointer, u16 value, u16 expected); +bool AtomicCompareAndSwap(u32 volatile* pointer, u32 value, u32 expected); +bool AtomicCompareAndSwap(u64 volatile* pointer, u64 value, u64 expected); +bool AtomicCompareAndSwap(u64 volatile* pointer, u128 value, u128 expected); + +} // namespace Common diff --git a/src/common/fiber.cpp b/src/common/fiber.cpp new file mode 100644 index 000000000..1c1d09ccb --- /dev/null +++ b/src/common/fiber.cpp @@ -0,0 +1,222 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/assert.h" +#include "common/fiber.h" +#if defined(_WIN32) || defined(WIN32) +#include <windows.h> +#else +#include <boost/context/detail/fcontext.hpp> +#endif + +namespace Common { + +constexpr std::size_t default_stack_size = 256 * 1024; // 256kb + +#if defined(_WIN32) || defined(WIN32) + +struct Fiber::FiberImpl { + LPVOID handle = nullptr; + LPVOID rewind_handle = nullptr; +}; + +void Fiber::Start() { + ASSERT(previous_fiber != nullptr); + previous_fiber->guard.unlock(); + previous_fiber.reset(); + entry_point(start_parameter); + UNREACHABLE(); +} + +void Fiber::OnRewind() { + ASSERT(impl->handle != nullptr); + DeleteFiber(impl->handle); + impl->handle = impl->rewind_handle; + impl->rewind_handle = nullptr; + rewind_point(rewind_parameter); + UNREACHABLE(); +} + +void Fiber::FiberStartFunc(void* fiber_parameter) { + auto fiber = static_cast<Fiber*>(fiber_parameter); + fiber->Start(); +} + +void Fiber::RewindStartFunc(void* fiber_parameter) { + auto fiber = static_cast<Fiber*>(fiber_parameter); + fiber->OnRewind(); +} + +Fiber::Fiber(std::function<void(void*)>&& entry_point_func, void* start_parameter) + : entry_point{std::move(entry_point_func)}, start_parameter{start_parameter} { + impl = std::make_unique<FiberImpl>(); + impl->handle = CreateFiber(default_stack_size, &FiberStartFunc, this); +} + +Fiber::Fiber() : impl{std::make_unique<FiberImpl>()} {} + +Fiber::~Fiber() { + if (released) { + return; + } + // Make sure the Fiber is not being used + const bool locked = guard.try_lock(); + ASSERT_MSG(locked, "Destroying a fiber that's still running"); + if (locked) { + guard.unlock(); + } + DeleteFiber(impl->handle); +} + +void Fiber::Exit() { + ASSERT_MSG(is_thread_fiber, "Exitting non main thread fiber"); + if (!is_thread_fiber) { + return; + } + ConvertFiberToThread(); + guard.unlock(); + released = true; +} + +void Fiber::SetRewindPoint(std::function<void(void*)>&& rewind_func, void* start_parameter) { + rewind_point = std::move(rewind_func); + rewind_parameter = start_parameter; +} + +void Fiber::Rewind() { + ASSERT(rewind_point); + ASSERT(impl->rewind_handle == nullptr); + impl->rewind_handle = CreateFiber(default_stack_size, &RewindStartFunc, this); + SwitchToFiber(impl->rewind_handle); +} + +void Fiber::YieldTo(std::shared_ptr<Fiber>& from, std::shared_ptr<Fiber>& to) { + ASSERT_MSG(from != nullptr, "Yielding fiber is null!"); + ASSERT_MSG(to != nullptr, "Next fiber is null!"); + to->guard.lock(); + to->previous_fiber = from; + SwitchToFiber(to->impl->handle); + ASSERT(from->previous_fiber != nullptr); + from->previous_fiber->guard.unlock(); + from->previous_fiber.reset(); +} + +std::shared_ptr<Fiber> Fiber::ThreadToFiber() { + std::shared_ptr<Fiber> fiber = std::shared_ptr<Fiber>{new Fiber()}; + fiber->guard.lock(); + fiber->impl->handle = ConvertThreadToFiber(nullptr); + fiber->is_thread_fiber = true; + return fiber; +} + +#else + +struct Fiber::FiberImpl { + alignas(64) std::array<u8, default_stack_size> stack; + alignas(64) std::array<u8, default_stack_size> rewind_stack; + u8* stack_limit; + u8* rewind_stack_limit; + boost::context::detail::fcontext_t context; + boost::context::detail::fcontext_t rewind_context; +}; + +void Fiber::Start(boost::context::detail::transfer_t& transfer) { + ASSERT(previous_fiber != nullptr); + previous_fiber->impl->context = transfer.fctx; + previous_fiber->guard.unlock(); + previous_fiber.reset(); + entry_point(start_parameter); + UNREACHABLE(); +} + +void Fiber::OnRewind([[maybe_unused]] boost::context::detail::transfer_t& transfer) { + ASSERT(impl->context != nullptr); + impl->context = impl->rewind_context; + impl->rewind_context = nullptr; + u8* tmp = impl->stack_limit; + impl->stack_limit = impl->rewind_stack_limit; + impl->rewind_stack_limit = tmp; + rewind_point(rewind_parameter); + UNREACHABLE(); +} + +void Fiber::FiberStartFunc(boost::context::detail::transfer_t transfer) { + auto fiber = static_cast<Fiber*>(transfer.data); + fiber->Start(transfer); +} + +void Fiber::RewindStartFunc(boost::context::detail::transfer_t transfer) { + auto fiber = static_cast<Fiber*>(transfer.data); + fiber->OnRewind(transfer); +} + +Fiber::Fiber(std::function<void(void*)>&& entry_point_func, void* start_parameter) + : entry_point{std::move(entry_point_func)}, start_parameter{start_parameter} { + impl = std::make_unique<FiberImpl>(); + impl->stack_limit = impl->stack.data(); + impl->rewind_stack_limit = impl->rewind_stack.data(); + u8* stack_base = impl->stack_limit + default_stack_size; + impl->context = + boost::context::detail::make_fcontext(stack_base, impl->stack.size(), FiberStartFunc); +} + +void Fiber::SetRewindPoint(std::function<void(void*)>&& rewind_func, void* start_parameter) { + rewind_point = std::move(rewind_func); + rewind_parameter = start_parameter; +} + +Fiber::Fiber() : impl{std::make_unique<FiberImpl>()} {} + +Fiber::~Fiber() { + if (released) { + return; + } + // Make sure the Fiber is not being used + const bool locked = guard.try_lock(); + ASSERT_MSG(locked, "Destroying a fiber that's still running"); + if (locked) { + guard.unlock(); + } +} + +void Fiber::Exit() { + + ASSERT_MSG(is_thread_fiber, "Exitting non main thread fiber"); + if (!is_thread_fiber) { + return; + } + guard.unlock(); + released = true; +} + +void Fiber::Rewind() { + ASSERT(rewind_point); + ASSERT(impl->rewind_context == nullptr); + u8* stack_base = impl->rewind_stack_limit + default_stack_size; + impl->rewind_context = + boost::context::detail::make_fcontext(stack_base, impl->stack.size(), RewindStartFunc); + boost::context::detail::jump_fcontext(impl->rewind_context, this); +} + +void Fiber::YieldTo(std::shared_ptr<Fiber>& from, std::shared_ptr<Fiber>& to) { + ASSERT_MSG(from != nullptr, "Yielding fiber is null!"); + ASSERT_MSG(to != nullptr, "Next fiber is null!"); + to->guard.lock(); + to->previous_fiber = from; + auto transfer = boost::context::detail::jump_fcontext(to->impl->context, to.get()); + ASSERT(from->previous_fiber != nullptr); + from->previous_fiber->impl->context = transfer.fctx; + from->previous_fiber->guard.unlock(); + from->previous_fiber.reset(); +} + +std::shared_ptr<Fiber> Fiber::ThreadToFiber() { + std::shared_ptr<Fiber> fiber = std::shared_ptr<Fiber>{new Fiber()}; + fiber->guard.lock(); + fiber->is_thread_fiber = true; + return fiber; +} + +#endif +} // namespace Common diff --git a/src/common/fiber.h b/src/common/fiber.h new file mode 100644 index 000000000..dafc1100e --- /dev/null +++ b/src/common/fiber.h @@ -0,0 +1,92 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <functional> +#include <memory> + +#include "common/common_types.h" +#include "common/spin_lock.h" + +#if !defined(_WIN32) && !defined(WIN32) +namespace boost::context::detail { +struct transfer_t; +} +#endif + +namespace Common { + +/** + * Fiber class + * a fiber is a userspace thread with it's own context. They can be used to + * implement coroutines, emulated threading systems and certain asynchronous + * patterns. + * + * This class implements fibers at a low level, thus allowing greater freedom + * to implement such patterns. This fiber class is 'threadsafe' only one fiber + * can be running at a time and threads will be locked while trying to yield to + * a running fiber until it yields. WARNING exchanging two running fibers between + * threads will cause a deadlock. In order to prevent a deadlock, each thread should + * have an intermediary fiber, you switch to the intermediary fiber of the current + * thread and then from it switch to the expected fiber. This way you can exchange + * 2 fibers within 2 different threads. + */ +class Fiber { +public: + Fiber(std::function<void(void*)>&& entry_point_func, void* start_parameter); + ~Fiber(); + + Fiber(const Fiber&) = delete; + Fiber& operator=(const Fiber&) = delete; + + Fiber(Fiber&&) = default; + Fiber& operator=(Fiber&&) = default; + + /// Yields control from Fiber 'from' to Fiber 'to' + /// Fiber 'from' must be the currently running fiber. + static void YieldTo(std::shared_ptr<Fiber>& from, std::shared_ptr<Fiber>& to); + static std::shared_ptr<Fiber> ThreadToFiber(); + + void SetRewindPoint(std::function<void(void*)>&& rewind_func, void* start_parameter); + + void Rewind(); + + /// Only call from main thread's fiber + void Exit(); + + /// Changes the start parameter of the fiber. Has no effect if the fiber already started + void SetStartParameter(void* new_parameter) { + start_parameter = new_parameter; + } + +private: + Fiber(); + +#if defined(_WIN32) || defined(WIN32) + void OnRewind(); + void Start(); + static void FiberStartFunc(void* fiber_parameter); + static void RewindStartFunc(void* fiber_parameter); +#else + void OnRewind(boost::context::detail::transfer_t& transfer); + void Start(boost::context::detail::transfer_t& transfer); + static void FiberStartFunc(boost::context::detail::transfer_t transfer); + static void RewindStartFunc(boost::context::detail::transfer_t transfer); +#endif + + struct FiberImpl; + + SpinLock guard{}; + std::function<void(void*)> entry_point; + std::function<void(void*)> rewind_point; + void* rewind_parameter{}; + void* start_parameter{}; + std::shared_ptr<Fiber> previous_fiber; + std::unique_ptr<FiberImpl> impl; + bool is_thread_fiber{}; + bool released{}; +}; + +} // namespace Common diff --git a/src/common/memory_detect.cpp b/src/common/memory_detect.cpp index 3fdc309a2..8cff6ec37 100644 --- a/src/common/memory_detect.cpp +++ b/src/common/memory_detect.cpp @@ -9,10 +9,12 @@ // clang-format on #else #include <sys/types.h> -#ifdef __APPLE__ +#if defined(__APPLE__) || defined(__FreeBSD__) #include <sys/sysctl.h> -#else +#elif defined(__linux__) #include <sys/sysinfo.h> +#else +#include <unistd.h> #endif #endif @@ -38,15 +40,26 @@ static MemoryInfo Detect() { // hw and vm are defined in sysctl.h // https://github.com/apple/darwin-xnu/blob/master/bsd/sys/sysctl.h#L471 // sysctlbyname(const char *, void *, size_t *, void *, size_t); - sysctlbyname("hw.memsize", &ramsize, &sizeof_ramsize, NULL, 0); - sysctlbyname("vm.swapusage", &vmusage, &sizeof_vmusage, NULL, 0); + sysctlbyname("hw.memsize", &ramsize, &sizeof_ramsize, nullptr, 0); + sysctlbyname("vm.swapusage", &vmusage, &sizeof_vmusage, nullptr, 0); mem_info.TotalPhysicalMemory = ramsize; mem_info.TotalSwapMemory = vmusage.xsu_total; -#else +#elif defined(__FreeBSD__) + u_long physmem, swap_total; + std::size_t sizeof_u_long = sizeof(u_long); + // sysctlbyname(const char *, void *, size_t *, const void *, size_t); + sysctlbyname("hw.physmem", &physmem, &sizeof_u_long, nullptr, 0); + sysctlbyname("vm.swap_total", &swap_total, &sizeof_u_long, nullptr, 0); + mem_info.TotalPhysicalMemory = physmem; + mem_info.TotalSwapMemory = swap_total; +#elif defined(__linux__) struct sysinfo meminfo; sysinfo(&meminfo); mem_info.TotalPhysicalMemory = meminfo.totalram; mem_info.TotalSwapMemory = meminfo.totalswap; +#else + mem_info.TotalPhysicalMemory = sysconf(_SC_PHYS_PAGES) * sysconf(_SC_PAGE_SIZE); + mem_info.TotalSwapMemory = 0; #endif return mem_info; diff --git a/src/common/spin_lock.cpp b/src/common/spin_lock.cpp new file mode 100644 index 000000000..c1524220f --- /dev/null +++ b/src/common/spin_lock.cpp @@ -0,0 +1,54 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/spin_lock.h" + +#if _MSC_VER +#include <intrin.h> +#if _M_AMD64 +#define __x86_64__ 1 +#endif +#if _M_ARM64 +#define __aarch64__ 1 +#endif +#else +#if __x86_64__ +#include <xmmintrin.h> +#endif +#endif + +namespace { + +void ThreadPause() { +#if __x86_64__ + _mm_pause(); +#elif __aarch64__ && _MSC_VER + __yield(); +#elif __aarch64__ + asm("yield"); +#endif +} + +} // Anonymous namespace + +namespace Common { + +void SpinLock::lock() { + while (lck.test_and_set(std::memory_order_acquire)) { + ThreadPause(); + } +} + +void SpinLock::unlock() { + lck.clear(std::memory_order_release); +} + +bool SpinLock::try_lock() { + if (lck.test_and_set(std::memory_order_acquire)) { + return false; + } + return true; +} + +} // namespace Common diff --git a/src/common/spin_lock.h b/src/common/spin_lock.h new file mode 100644 index 000000000..1df5528c4 --- /dev/null +++ b/src/common/spin_lock.h @@ -0,0 +1,26 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <atomic> + +namespace Common { + +/** + * SpinLock class + * a lock similar to mutex that forces a thread to spin wait instead calling the + * supervisor. Should be used on short sequences of code. + */ +class SpinLock { +public: + void lock(); + void unlock(); + bool try_lock(); + +private: + std::atomic_flag lck = ATOMIC_FLAG_INIT; +}; + +} // namespace Common diff --git a/src/common/telemetry.cpp b/src/common/telemetry.cpp index 200c6489a..16d42facd 100644 --- a/src/common/telemetry.cpp +++ b/src/common/telemetry.cpp @@ -60,6 +60,7 @@ void AppendCPUInfo(FieldCollection& fc) { fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AES", Common::GetCPUCaps().aes); fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AVX", Common::GetCPUCaps().avx); fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AVX2", Common::GetCPUCaps().avx2); + fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AVX512", Common::GetCPUCaps().avx512); fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_BMI1", Common::GetCPUCaps().bmi1); fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_BMI2", Common::GetCPUCaps().bmi2); fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_FMA", Common::GetCPUCaps().fma); diff --git a/src/common/thread.cpp b/src/common/thread.cpp index 0cd2d10bf..8e5935e6a 100644 --- a/src/common/thread.cpp +++ b/src/common/thread.cpp @@ -25,6 +25,52 @@ namespace Common { +#ifdef _WIN32 + +void SetCurrentThreadPriority(ThreadPriority new_priority) { + auto handle = GetCurrentThread(); + int windows_priority = 0; + switch (new_priority) { + case ThreadPriority::Low: + windows_priority = THREAD_PRIORITY_BELOW_NORMAL; + break; + case ThreadPriority::Normal: + windows_priority = THREAD_PRIORITY_NORMAL; + break; + case ThreadPriority::High: + windows_priority = THREAD_PRIORITY_ABOVE_NORMAL; + break; + case ThreadPriority::VeryHigh: + windows_priority = THREAD_PRIORITY_HIGHEST; + break; + default: + windows_priority = THREAD_PRIORITY_NORMAL; + break; + } + SetThreadPriority(handle, windows_priority); +} + +#else + +void SetCurrentThreadPriority(ThreadPriority new_priority) { + pthread_t this_thread = pthread_self(); + + s32 max_prio = sched_get_priority_max(SCHED_OTHER); + s32 min_prio = sched_get_priority_min(SCHED_OTHER); + u32 level = static_cast<u32>(new_priority) + 1; + + struct sched_param params; + if (max_prio > min_prio) { + params.sched_priority = min_prio + ((max_prio - min_prio) * level) / 4; + } else { + params.sched_priority = min_prio - ((min_prio - max_prio) * level) / 4; + } + + pthread_setschedparam(this_thread, SCHED_OTHER, ¶ms); +} + +#endif + #ifdef _MSC_VER // Sets the debugger-visible name of the current thread. @@ -70,6 +116,12 @@ void SetCurrentThreadName(const char* name) { } #endif +#if defined(_WIN32) +void SetCurrentThreadName(const char* name) { + // Do Nothing on MingW +} +#endif + #endif } // namespace Common diff --git a/src/common/thread.h b/src/common/thread.h index 2fc071685..52b359413 100644 --- a/src/common/thread.h +++ b/src/common/thread.h @@ -9,6 +9,7 @@ #include <cstddef> #include <mutex> #include <thread> +#include "common/common_types.h" namespace Common { @@ -28,8 +29,7 @@ public: is_set = false; } - template <class Duration> - bool WaitFor(const std::chrono::duration<Duration>& time) { + bool WaitFor(const std::chrono::nanoseconds& time) { std::unique_lock lk{mutex}; if (!condvar.wait_for(lk, time, [this] { return is_set; })) return false; @@ -86,6 +86,15 @@ private: std::size_t generation = 0; // Incremented once each time the barrier is used }; +enum class ThreadPriority : u32 { + Low = 0, + Normal = 1, + High = 2, + VeryHigh = 3, +}; + +void SetCurrentThreadPriority(ThreadPriority new_priority); + void SetCurrentThreadName(const char* name); } // namespace Common diff --git a/src/common/uint128.cpp b/src/common/uint128.cpp index 32bf56730..16bf7c828 100644 --- a/src/common/uint128.cpp +++ b/src/common/uint128.cpp @@ -6,12 +6,38 @@ #include <intrin.h> #pragma intrinsic(_umul128) +#pragma intrinsic(_udiv128) #endif #include <cstring> #include "common/uint128.h" namespace Common { +#ifdef _MSC_VER + +u64 MultiplyAndDivide64(u64 a, u64 b, u64 d) { + u128 r{}; + r[0] = _umul128(a, b, &r[1]); + u64 remainder; +#if _MSC_VER < 1923 + return udiv128(r[1], r[0], d, &remainder); +#else + return _udiv128(r[1], r[0], d, &remainder); +#endif +} + +#else + +u64 MultiplyAndDivide64(u64 a, u64 b, u64 d) { + const u64 diva = a / d; + const u64 moda = a % d; + const u64 divb = b / d; + const u64 modb = b % d; + return diva * b + moda * divb + moda * modb / d; +} + +#endif + u128 Multiply64Into128(u64 a, u64 b) { u128 result; #ifdef _MSC_VER diff --git a/src/common/uint128.h b/src/common/uint128.h index a3be2a2cb..503cd2d0c 100644 --- a/src/common/uint128.h +++ b/src/common/uint128.h @@ -9,6 +9,9 @@ namespace Common { +// This function multiplies 2 u64 values and divides it by a u64 value. +u64 MultiplyAndDivide64(u64 a, u64 b, u64 d); + // This function multiplies 2 u64 values and produces a u128 value; u128 Multiply64Into128(u64 a, u64 b); diff --git a/src/common/wall_clock.cpp b/src/common/wall_clock.cpp new file mode 100644 index 000000000..3afbdb898 --- /dev/null +++ b/src/common/wall_clock.cpp @@ -0,0 +1,91 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/uint128.h" +#include "common/wall_clock.h" + +#ifdef ARCHITECTURE_x86_64 +#include "common/x64/cpu_detect.h" +#include "common/x64/native_clock.h" +#endif + +namespace Common { + +using base_timer = std::chrono::steady_clock; +using base_time_point = std::chrono::time_point<base_timer>; + +class StandardWallClock : public WallClock { +public: + StandardWallClock(u64 emulated_cpu_frequency, u64 emulated_clock_frequency) + : WallClock(emulated_cpu_frequency, emulated_clock_frequency, false) { + start_time = base_timer::now(); + } + + std::chrono::nanoseconds GetTimeNS() override { + base_time_point current = base_timer::now(); + auto elapsed = current - start_time; + return std::chrono::duration_cast<std::chrono::nanoseconds>(elapsed); + } + + std::chrono::microseconds GetTimeUS() override { + base_time_point current = base_timer::now(); + auto elapsed = current - start_time; + return std::chrono::duration_cast<std::chrono::microseconds>(elapsed); + } + + std::chrono::milliseconds GetTimeMS() override { + base_time_point current = base_timer::now(); + auto elapsed = current - start_time; + return std::chrono::duration_cast<std::chrono::milliseconds>(elapsed); + } + + u64 GetClockCycles() override { + std::chrono::nanoseconds time_now = GetTimeNS(); + const u128 temporary = + Common::Multiply64Into128(time_now.count(), emulated_clock_frequency); + return Common::Divide128On32(temporary, 1000000000).first; + } + + u64 GetCPUCycles() override { + std::chrono::nanoseconds time_now = GetTimeNS(); + const u128 temporary = Common::Multiply64Into128(time_now.count(), emulated_cpu_frequency); + return Common::Divide128On32(temporary, 1000000000).first; + } + + void Pause(bool is_paused) override { + // Do nothing in this clock type. + } + +private: + base_time_point start_time; +}; + +#ifdef ARCHITECTURE_x86_64 + +std::unique_ptr<WallClock> CreateBestMatchingClock(u32 emulated_cpu_frequency, + u32 emulated_clock_frequency) { + const auto& caps = GetCPUCaps(); + u64 rtsc_frequency = 0; + if (caps.invariant_tsc) { + rtsc_frequency = EstimateRDTSCFrequency(); + } + if (rtsc_frequency == 0) { + return std::make_unique<StandardWallClock>(emulated_cpu_frequency, + emulated_clock_frequency); + } else { + return std::make_unique<X64::NativeClock>(emulated_cpu_frequency, emulated_clock_frequency, + rtsc_frequency); + } +} + +#else + +std::unique_ptr<WallClock> CreateBestMatchingClock(u32 emulated_cpu_frequency, + u32 emulated_clock_frequency) { + return std::make_unique<StandardWallClock>(emulated_cpu_frequency, emulated_clock_frequency); +} + +#endif + +} // namespace Common diff --git a/src/common/wall_clock.h b/src/common/wall_clock.h new file mode 100644 index 000000000..367d72134 --- /dev/null +++ b/src/common/wall_clock.h @@ -0,0 +1,53 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <chrono> +#include <memory> + +#include "common/common_types.h" + +namespace Common { + +class WallClock { +public: + /// Returns current wall time in nanoseconds + virtual std::chrono::nanoseconds GetTimeNS() = 0; + + /// Returns current wall time in microseconds + virtual std::chrono::microseconds GetTimeUS() = 0; + + /// Returns current wall time in milliseconds + virtual std::chrono::milliseconds GetTimeMS() = 0; + + /// Returns current wall time in emulated clock cycles + virtual u64 GetClockCycles() = 0; + + /// Returns current wall time in emulated cpu cycles + virtual u64 GetCPUCycles() = 0; + + virtual void Pause(bool is_paused) = 0; + + /// Tells if the wall clock, uses the host CPU's hardware clock + bool IsNative() const { + return is_native; + } + +protected: + WallClock(u64 emulated_cpu_frequency, u64 emulated_clock_frequency, bool is_native) + : emulated_cpu_frequency{emulated_cpu_frequency}, + emulated_clock_frequency{emulated_clock_frequency}, is_native{is_native} {} + + u64 emulated_cpu_frequency; + u64 emulated_clock_frequency; + +private: + bool is_native; +}; + +std::unique_ptr<WallClock> CreateBestMatchingClock(u32 emulated_cpu_frequency, + u32 emulated_clock_frequency); + +} // namespace Common diff --git a/src/common/x64/cpu_detect.cpp b/src/common/x64/cpu_detect.cpp index c9349a6b4..fccd2eee5 100644 --- a/src/common/x64/cpu_detect.cpp +++ b/src/common/x64/cpu_detect.cpp @@ -62,6 +62,17 @@ static CPUCaps Detect() { std::memcpy(&caps.brand_string[0], &cpu_id[1], sizeof(int)); std::memcpy(&caps.brand_string[4], &cpu_id[3], sizeof(int)); std::memcpy(&caps.brand_string[8], &cpu_id[2], sizeof(int)); + if (cpu_id[1] == 0x756e6547 && cpu_id[2] == 0x6c65746e && cpu_id[3] == 0x49656e69) + caps.manufacturer = Manufacturer::Intel; + else if (cpu_id[1] == 0x68747541 && cpu_id[2] == 0x444d4163 && cpu_id[3] == 0x69746e65) + caps.manufacturer = Manufacturer::AMD; + else if (cpu_id[1] == 0x6f677948 && cpu_id[2] == 0x656e6975 && cpu_id[3] == 0x6e65476e) + caps.manufacturer = Manufacturer::Hygon; + else + caps.manufacturer = Manufacturer::Unknown; + + u32 family = {}; + u32 model = {}; __cpuid(cpu_id, 0x80000000); @@ -73,6 +84,14 @@ static CPUCaps Detect() { // Detect family and other miscellaneous features if (max_std_fn >= 1) { __cpuid(cpu_id, 0x00000001); + family = (cpu_id[0] >> 8) & 0xf; + model = (cpu_id[0] >> 4) & 0xf; + if (family == 0xf) { + family += (cpu_id[0] >> 20) & 0xff; + } + if (family >= 6) { + model += ((cpu_id[0] >> 16) & 0xf) << 4; + } if ((cpu_id[3] >> 25) & 1) caps.sse = true; @@ -110,6 +129,11 @@ static CPUCaps Detect() { caps.bmi1 = true; if ((cpu_id[1] >> 8) & 1) caps.bmi2 = true; + // Checks for AVX512F, AVX512CD, AVX512VL, AVX512DQ, AVX512BW (Intel Skylake-X/SP) + if ((cpu_id[1] >> 16) & 1 && (cpu_id[1] >> 28) & 1 && (cpu_id[1] >> 31) & 1 && + (cpu_id[1] >> 17) & 1 && (cpu_id[1] >> 30) & 1) { + caps.avx512 = caps.avx2; + } } } @@ -130,6 +154,20 @@ static CPUCaps Detect() { caps.fma4 = true; } + if (max_ex_fn >= 0x80000007) { + __cpuid(cpu_id, 0x80000007); + if (cpu_id[3] & (1 << 8)) { + caps.invariant_tsc = true; + } + } + + if (max_std_fn >= 0x16) { + __cpuid(cpu_id, 0x16); + caps.base_frequency = cpu_id[0]; + caps.max_frequency = cpu_id[1]; + caps.bus_frequency = cpu_id[2]; + } + return caps; } diff --git a/src/common/x64/cpu_detect.h b/src/common/x64/cpu_detect.h index 20f2ba234..e3b63302e 100644 --- a/src/common/x64/cpu_detect.h +++ b/src/common/x64/cpu_detect.h @@ -6,8 +6,16 @@ namespace Common { +enum class Manufacturer : u32 { + Intel = 0, + AMD = 1, + Hygon = 2, + Unknown = 3, +}; + /// x86/x64 CPU capabilities that may be detected by this module struct CPUCaps { + Manufacturer manufacturer; char cpu_string[0x21]; char brand_string[0x41]; bool sse; @@ -19,11 +27,16 @@ struct CPUCaps { bool lzcnt; bool avx; bool avx2; + bool avx512; bool bmi1; bool bmi2; bool fma; bool fma4; bool aes; + bool invariant_tsc; + u32 base_frequency; + u32 max_frequency; + u32 bus_frequency; }; /** diff --git a/src/common/x64/native_clock.cpp b/src/common/x64/native_clock.cpp new file mode 100644 index 000000000..424b39b1f --- /dev/null +++ b/src/common/x64/native_clock.cpp @@ -0,0 +1,103 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <chrono> +#include <mutex> +#include <thread> + +#ifdef _MSC_VER +#include <intrin.h> +#else +#include <x86intrin.h> +#endif + +#include "common/uint128.h" +#include "common/x64/native_clock.h" + +namespace Common { + +u64 EstimateRDTSCFrequency() { + const auto milli_10 = std::chrono::milliseconds{10}; + // get current time + _mm_mfence(); + const u64 tscStart = __rdtsc(); + const auto startTime = std::chrono::high_resolution_clock::now(); + // wait roughly 3 seconds + while (true) { + auto milli = std::chrono::duration_cast<std::chrono::milliseconds>( + std::chrono::high_resolution_clock::now() - startTime); + if (milli.count() >= 3000) + break; + std::this_thread::sleep_for(milli_10); + } + const auto endTime = std::chrono::high_resolution_clock::now(); + _mm_mfence(); + const u64 tscEnd = __rdtsc(); + // calculate difference + const u64 timer_diff = + std::chrono::duration_cast<std::chrono::nanoseconds>(endTime - startTime).count(); + const u64 tsc_diff = tscEnd - tscStart; + const u64 tsc_freq = MultiplyAndDivide64(tsc_diff, 1000000000ULL, timer_diff); + return tsc_freq; +} + +namespace X64 { +NativeClock::NativeClock(u64 emulated_cpu_frequency, u64 emulated_clock_frequency, + u64 rtsc_frequency) + : WallClock(emulated_cpu_frequency, emulated_clock_frequency, true), rtsc_frequency{ + rtsc_frequency} { + _mm_mfence(); + last_measure = __rdtsc(); + accumulated_ticks = 0U; +} + +u64 NativeClock::GetRTSC() { + std::scoped_lock scope{rtsc_serialize}; + _mm_mfence(); + const u64 current_measure = __rdtsc(); + u64 diff = current_measure - last_measure; + diff = diff & ~static_cast<u64>(static_cast<s64>(diff) >> 63); // max(diff, 0) + if (current_measure > last_measure) { + last_measure = current_measure; + } + accumulated_ticks += diff; + /// The clock cannot be more precise than the guest timer, remove the lower bits + return accumulated_ticks & inaccuracy_mask; +} + +void NativeClock::Pause(bool is_paused) { + if (!is_paused) { + _mm_mfence(); + last_measure = __rdtsc(); + } +} + +std::chrono::nanoseconds NativeClock::GetTimeNS() { + const u64 rtsc_value = GetRTSC(); + return std::chrono::nanoseconds{MultiplyAndDivide64(rtsc_value, 1000000000, rtsc_frequency)}; +} + +std::chrono::microseconds NativeClock::GetTimeUS() { + const u64 rtsc_value = GetRTSC(); + return std::chrono::microseconds{MultiplyAndDivide64(rtsc_value, 1000000, rtsc_frequency)}; +} + +std::chrono::milliseconds NativeClock::GetTimeMS() { + const u64 rtsc_value = GetRTSC(); + return std::chrono::milliseconds{MultiplyAndDivide64(rtsc_value, 1000, rtsc_frequency)}; +} + +u64 NativeClock::GetClockCycles() { + const u64 rtsc_value = GetRTSC(); + return MultiplyAndDivide64(rtsc_value, emulated_clock_frequency, rtsc_frequency); +} + +u64 NativeClock::GetCPUCycles() { + const u64 rtsc_value = GetRTSC(); + return MultiplyAndDivide64(rtsc_value, emulated_cpu_frequency, rtsc_frequency); +} + +} // namespace X64 + +} // namespace Common diff --git a/src/common/x64/native_clock.h b/src/common/x64/native_clock.h new file mode 100644 index 000000000..891a3bbfd --- /dev/null +++ b/src/common/x64/native_clock.h @@ -0,0 +1,48 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <optional> + +#include "common/spin_lock.h" +#include "common/wall_clock.h" + +namespace Common { + +namespace X64 { +class NativeClock : public WallClock { +public: + NativeClock(u64 emulated_cpu_frequency, u64 emulated_clock_frequency, u64 rtsc_frequency); + + std::chrono::nanoseconds GetTimeNS() override; + + std::chrono::microseconds GetTimeUS() override; + + std::chrono::milliseconds GetTimeMS() override; + + u64 GetClockCycles() override; + + u64 GetCPUCycles() override; + + void Pause(bool is_paused) override; + +private: + u64 GetRTSC(); + + /// value used to reduce the native clocks accuracy as some apss rely on + /// undefined behavior where the level of accuracy in the clock shouldn't + /// be higher. + static constexpr u64 inaccuracy_mask = ~(0x400 - 1); + + SpinLock rtsc_serialize{}; + u64 last_measure{}; + u64 accumulated_ticks{}; + u64 rtsc_frequency; +}; +} // namespace X64 + +u64 EstimateRDTSCFrequency(); + +} // namespace Common diff --git a/src/common/x64/xbyak_abi.h b/src/common/x64/xbyak_abi.h index 794da8a52..a5f5d4fc1 100644 --- a/src/common/x64/xbyak_abi.h +++ b/src/common/x64/xbyak_abi.h @@ -11,7 +11,7 @@ namespace Common::X64 { -inline int RegToIndex(const Xbyak::Reg& reg) { +inline std::size_t RegToIndex(const Xbyak::Reg& reg) { using Kind = Xbyak::Reg::Kind; ASSERT_MSG((reg.getKind() & (Kind::REG | Kind::XMM)) != 0, "RegSet only support GPRs and XMM registers."); @@ -19,17 +19,17 @@ inline int RegToIndex(const Xbyak::Reg& reg) { return reg.getIdx() + (reg.getKind() == Kind::REG ? 0 : 16); } -inline Xbyak::Reg64 IndexToReg64(int reg_index) { +inline Xbyak::Reg64 IndexToReg64(std::size_t reg_index) { ASSERT(reg_index < 16); - return Xbyak::Reg64(reg_index); + return Xbyak::Reg64(static_cast<int>(reg_index)); } -inline Xbyak::Xmm IndexToXmm(int reg_index) { +inline Xbyak::Xmm IndexToXmm(std::size_t reg_index) { ASSERT(reg_index >= 16 && reg_index < 32); - return Xbyak::Xmm(reg_index - 16); + return Xbyak::Xmm(static_cast<int>(reg_index - 16)); } -inline Xbyak::Reg IndexToReg(int reg_index) { +inline Xbyak::Reg IndexToReg(std::size_t reg_index) { if (reg_index < 16) { return IndexToReg64(reg_index); } else { @@ -151,9 +151,13 @@ constexpr size_t ABI_SHADOW_SPACE = 0; #endif -inline void ABI_CalculateFrameSize(std::bitset<32> regs, size_t rsp_alignment, - size_t needed_frame_size, s32* out_subtraction, - s32* out_xmm_offset) { +struct ABIFrameInfo { + s32 subtraction; + s32 xmm_offset; +}; + +inline ABIFrameInfo ABI_CalculateFrameSize(std::bitset<32> regs, size_t rsp_alignment, + size_t needed_frame_size) { const auto count = (regs & ABI_ALL_GPRS).count(); rsp_alignment -= count * 8; size_t subtraction = 0; @@ -170,33 +174,28 @@ inline void ABI_CalculateFrameSize(std::bitset<32> regs, size_t rsp_alignment, rsp_alignment -= subtraction; subtraction += rsp_alignment & 0xF; - *out_subtraction = (s32)subtraction; - *out_xmm_offset = (s32)(subtraction - xmm_base_subtraction); + return ABIFrameInfo{static_cast<s32>(subtraction), + static_cast<s32>(subtraction - xmm_base_subtraction)}; } inline size_t ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bitset<32> regs, size_t rsp_alignment, size_t needed_frame_size = 0) { - s32 subtraction, xmm_offset; - ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset); + auto frame_info = ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size); + for (std::size_t i = 0; i < regs.size(); ++i) { if (regs[i] && ABI_ALL_GPRS[i]) { - code.push(IndexToReg64(static_cast<int>(i))); + code.push(IndexToReg64(i)); } } - if (subtraction != 0) { - code.sub(code.rsp, subtraction); - } - for (int i = 0; i < regs.count(); i++) { - if (regs.test(i) & ABI_ALL_GPRS.test(i)) { - code.push(IndexToReg64(i)); - } + if (frame_info.subtraction != 0) { + code.sub(code.rsp, frame_info.subtraction); } for (std::size_t i = 0; i < regs.size(); ++i) { if (regs[i] && ABI_ALL_XMMS[i]) { - code.movaps(code.xword[code.rsp + xmm_offset], IndexToXmm(static_cast<int>(i))); - xmm_offset += 0x10; + code.movaps(code.xword[code.rsp + frame_info.xmm_offset], IndexToXmm(i)); + frame_info.xmm_offset += 0x10; } } @@ -205,59 +204,23 @@ inline size_t ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::b inline void ABI_PopRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bitset<32> regs, size_t rsp_alignment, size_t needed_frame_size = 0) { - s32 subtraction, xmm_offset; - ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset); + auto frame_info = ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size); for (std::size_t i = 0; i < regs.size(); ++i) { if (regs[i] && ABI_ALL_XMMS[i]) { - code.movaps(IndexToXmm(static_cast<int>(i)), code.xword[code.rsp + xmm_offset]); - xmm_offset += 0x10; + code.movaps(IndexToXmm(i), code.xword[code.rsp + frame_info.xmm_offset]); + frame_info.xmm_offset += 0x10; } } - if (subtraction != 0) { - code.add(code.rsp, subtraction); + if (frame_info.subtraction != 0) { + code.add(code.rsp, frame_info.subtraction); } // GPRs need to be popped in reverse order - for (int i = 15; i >= 0; i--) { - if (regs[i]) { - code.pop(IndexToReg64(i)); - } - } -} - -inline size_t ABI_PushRegistersAndAdjustStackGPS(Xbyak::CodeGenerator& code, std::bitset<32> regs, - size_t rsp_alignment, - size_t needed_frame_size = 0) { - s32 subtraction, xmm_offset; - ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset); - - for (std::size_t i = 0; i < regs.size(); ++i) { + for (std::size_t j = 0; j < regs.size(); ++j) { + const std::size_t i = regs.size() - j - 1; if (regs[i] && ABI_ALL_GPRS[i]) { - code.push(IndexToReg64(static_cast<int>(i))); - } - } - - if (subtraction != 0) { - code.sub(code.rsp, subtraction); - } - - return ABI_SHADOW_SPACE; -} - -inline void ABI_PopRegistersAndAdjustStackGPS(Xbyak::CodeGenerator& code, std::bitset<32> regs, - size_t rsp_alignment, size_t needed_frame_size = 0) { - s32 subtraction, xmm_offset; - ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset); - - if (subtraction != 0) { - code.add(code.rsp, subtraction); - } - - // GPRs need to be popped in reverse order - for (int i = 15; i >= 0; i--) { - if (regs[i]) { code.pop(IndexToReg64(i)); } } diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 47418006b..d1f173f42 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -7,6 +7,16 @@ endif() add_library(core STATIC arm/arm_interface.h arm/arm_interface.cpp + arm/cpu_interrupt_handler.cpp + arm/cpu_interrupt_handler.h + arm/dynarmic/arm_dynarmic_32.cpp + arm/dynarmic/arm_dynarmic_32.h + arm/dynarmic/arm_dynarmic_64.cpp + arm/dynarmic/arm_dynarmic_64.h + arm/dynarmic/arm_dynarmic_cp15.cpp + arm/dynarmic/arm_dynarmic_cp15.h + arm/dynarmic/arm_exclusive_monitor.cpp + arm/dynarmic/arm_exclusive_monitor.h arm/exclusive_monitor.cpp arm/exclusive_monitor.h arm/unicorn/arm_unicorn.cpp @@ -15,8 +25,6 @@ add_library(core STATIC constants.h core.cpp core.h - core_manager.cpp - core_manager.h core_timing.cpp core_timing.h core_timing_util.cpp @@ -606,11 +614,11 @@ endif() create_target_directory_groups(core) target_link_libraries(core PUBLIC common PRIVATE audio_core video_core) -target_link_libraries(core PUBLIC Boost::boost PRIVATE fmt::fmt nlohmann_json::nlohmann_json mbedtls Opus::Opus unicorn) +target_link_libraries(core PUBLIC Boost::boost PRIVATE fmt::fmt nlohmann_json::nlohmann_json mbedtls opus unicorn zip) if (YUZU_ENABLE_BOXCAT) target_compile_definitions(core PRIVATE -DYUZU_ENABLE_BOXCAT) - target_link_libraries(core PRIVATE httplib nlohmann_json::nlohmann_json zip) + target_link_libraries(core PRIVATE httplib nlohmann_json::nlohmann_json) endif() if (ENABLE_WEB_SERVICE) diff --git a/src/core/arm/arm_interface.cpp b/src/core/arm/arm_interface.cpp index d079a1bc8..d2295ed90 100644 --- a/src/core/arm/arm_interface.cpp +++ b/src/core/arm/arm_interface.cpp @@ -139,6 +139,63 @@ std::optional<std::string> GetSymbolName(const Symbols& symbols, VAddr func_addr constexpr u64 SEGMENT_BASE = 0x7100000000ull; +std::vector<ARM_Interface::BacktraceEntry> ARM_Interface::GetBacktraceFromContext( + System& system, const ThreadContext64& ctx) { + std::vector<BacktraceEntry> out; + auto& memory = system.Memory(); + + auto fp = ctx.cpu_registers[29]; + auto lr = ctx.cpu_registers[30]; + while (true) { + out.push_back({"", 0, lr, 0}); + if (!fp) { + break; + } + lr = memory.Read64(fp + 8) - 4; + fp = memory.Read64(fp); + } + + std::map<VAddr, std::string> modules; + auto& loader{system.GetAppLoader()}; + if (loader.ReadNSOModules(modules) != Loader::ResultStatus::Success) { + return {}; + } + + std::map<std::string, Symbols> symbols; + for (const auto& module : modules) { + symbols.insert_or_assign(module.second, GetSymbols(module.first, memory)); + } + + for (auto& entry : out) { + VAddr base = 0; + for (auto iter = modules.rbegin(); iter != modules.rend(); ++iter) { + const auto& module{*iter}; + if (entry.original_address >= module.first) { + entry.module = module.second; + base = module.first; + break; + } + } + + entry.offset = entry.original_address - base; + entry.address = SEGMENT_BASE + entry.offset; + + if (entry.module.empty()) + entry.module = "unknown"; + + const auto symbol_set = symbols.find(entry.module); + if (symbol_set != symbols.end()) { + const auto symbol = GetSymbolName(symbol_set->second, entry.offset); + if (symbol.has_value()) { + // TODO(DarkLordZach): Add demangling of symbol names. + entry.name = *symbol; + } + } + } + + return out; +} + std::vector<ARM_Interface::BacktraceEntry> ARM_Interface::GetBacktrace() const { std::vector<BacktraceEntry> out; auto& memory = system.Memory(); diff --git a/src/core/arm/arm_interface.h b/src/core/arm/arm_interface.h index cb2e640e2..1f24051e4 100644 --- a/src/core/arm/arm_interface.h +++ b/src/core/arm/arm_interface.h @@ -7,6 +7,7 @@ #include <array> #include <vector> #include "common/common_types.h" +#include "core/hardware_properties.h" namespace Common { struct PageTable; @@ -18,25 +19,29 @@ enum class VMAPermission : u8; namespace Core { class System; +class CPUInterruptHandler; + +using CPUInterrupts = std::array<CPUInterruptHandler, Core::Hardware::NUM_CPU_CORES>; /// Generic ARMv8 CPU interface class ARM_Interface : NonCopyable { public: - explicit ARM_Interface(System& system_) : system{system_} {} + explicit ARM_Interface(System& system_, CPUInterrupts& interrupt_handlers, bool uses_wall_clock) + : system{system_}, interrupt_handlers{interrupt_handlers}, uses_wall_clock{ + uses_wall_clock} {} virtual ~ARM_Interface() = default; struct ThreadContext32 { std::array<u32, 16> cpu_registers{}; + std::array<u32, 64> extension_registers{}; u32 cpsr{}; - std::array<u8, 4> padding{}; - std::array<u64, 32> fprs{}; u32 fpscr{}; u32 fpexc{}; u32 tpidr{}; }; // Internally within the kernel, it expects the AArch32 version of the // thread context to be 344 bytes in size. - static_assert(sizeof(ThreadContext32) == 0x158); + static_assert(sizeof(ThreadContext32) == 0x150); struct ThreadContext64 { std::array<u64, 31> cpu_registers{}; @@ -143,6 +148,8 @@ public: */ virtual void SetTPIDR_EL0(u64 value) = 0; + virtual void ChangeProcessorID(std::size_t new_core_id) = 0; + virtual void SaveContext(ThreadContext32& ctx) = 0; virtual void SaveContext(ThreadContext64& ctx) = 0; virtual void LoadContext(const ThreadContext32& ctx) = 0; @@ -162,6 +169,9 @@ public: std::string name; }; + static std::vector<BacktraceEntry> GetBacktraceFromContext(System& system, + const ThreadContext64& ctx); + std::vector<BacktraceEntry> GetBacktrace() const; /// fp (= r29) points to the last frame record. @@ -175,6 +185,8 @@ public: protected: /// System context that this ARM interface is running under. System& system; + CPUInterrupts& interrupt_handlers; + bool uses_wall_clock; }; } // namespace Core diff --git a/src/core/arm/cpu_interrupt_handler.cpp b/src/core/arm/cpu_interrupt_handler.cpp new file mode 100644 index 000000000..df0350881 --- /dev/null +++ b/src/core/arm/cpu_interrupt_handler.cpp @@ -0,0 +1,27 @@ +// Copyright 2020 yuzu emulator team +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/thread.h" +#include "core/arm/cpu_interrupt_handler.h" + +namespace Core { + +CPUInterruptHandler::CPUInterruptHandler() : is_interrupted{} { + interrupt_event = std::make_unique<Common::Event>(); +} + +CPUInterruptHandler::~CPUInterruptHandler() = default; + +void CPUInterruptHandler::SetInterrupt(bool is_interrupted_) { + if (is_interrupted_) { + interrupt_event->Set(); + } + this->is_interrupted = is_interrupted_; +} + +void CPUInterruptHandler::AwaitInterrupt() { + interrupt_event->Wait(); +} + +} // namespace Core diff --git a/src/core/arm/cpu_interrupt_handler.h b/src/core/arm/cpu_interrupt_handler.h new file mode 100644 index 000000000..3d062d326 --- /dev/null +++ b/src/core/arm/cpu_interrupt_handler.h @@ -0,0 +1,39 @@ +// Copyright 2020 yuzu emulator team +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <memory> + +namespace Common { +class Event; +} + +namespace Core { + +class CPUInterruptHandler { +public: + CPUInterruptHandler(); + ~CPUInterruptHandler(); + + CPUInterruptHandler(const CPUInterruptHandler&) = delete; + CPUInterruptHandler& operator=(const CPUInterruptHandler&) = delete; + + CPUInterruptHandler(CPUInterruptHandler&&) = default; + CPUInterruptHandler& operator=(CPUInterruptHandler&&) = default; + + bool IsInterrupted() const { + return is_interrupted; + } + + void SetInterrupt(bool is_interrupted); + + void AwaitInterrupt(); + +private: + bool is_interrupted{}; + std::unique_ptr<Common::Event> interrupt_event; +}; + +} // namespace Core diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.cpp b/src/core/arm/dynarmic/arm_dynarmic_32.cpp index 9bc86e3b9..0d4ab95b7 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_32.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_32.cpp @@ -7,15 +7,17 @@ #include <dynarmic/A32/a32.h> #include <dynarmic/A32/config.h> #include <dynarmic/A32/context.h> -#include "common/microprofile.h" +#include "common/logging/log.h" +#include "common/page_table.h" +#include "core/arm/cpu_interrupt_handler.h" #include "core/arm/dynarmic/arm_dynarmic_32.h" -#include "core/arm/dynarmic/arm_dynarmic_64.h" #include "core/arm/dynarmic/arm_dynarmic_cp15.h" +#include "core/arm/dynarmic/arm_exclusive_monitor.h" #include "core/core.h" -#include "core/core_manager.h" #include "core/core_timing.h" #include "core/hle/kernel/svc.h" #include "core/memory.h" +#include "core/settings.h" namespace Core { @@ -49,8 +51,22 @@ public: parent.system.Memory().Write64(vaddr, value); } + bool MemoryWriteExclusive8(u32 vaddr, u8 value, u8 expected) override { + return parent.system.Memory().WriteExclusive8(vaddr, value, expected); + } + bool MemoryWriteExclusive16(u32 vaddr, u16 value, u16 expected) override { + return parent.system.Memory().WriteExclusive16(vaddr, value, expected); + } + bool MemoryWriteExclusive32(u32 vaddr, u32 value, u32 expected) override { + return parent.system.Memory().WriteExclusive32(vaddr, value, expected); + } + bool MemoryWriteExclusive64(u32 vaddr, u64 value, u64 expected) override { + return parent.system.Memory().WriteExclusive64(vaddr, value, expected); + } + void InterpreterFallback(u32 pc, std::size_t num_instructions) override { - UNIMPLEMENTED(); + UNIMPLEMENTED_MSG("This should never happen, pc = {:08X}, code = {:08X}", pc, + MemoryReadCode(pc)); } void ExceptionRaised(u32 pc, Dynarmic::A32::Exception exception) override { @@ -61,7 +77,7 @@ public: case Dynarmic::A32::Exception::Breakpoint: break; } - LOG_CRITICAL(HW_GPU, "ExceptionRaised(exception = {}, pc = {:08X}, code = {:08X})", + LOG_CRITICAL(Core_ARM, "ExceptionRaised(exception = {}, pc = {:08X}, code = {:08X})", static_cast<std::size_t>(exception), pc, MemoryReadCode(pc)); UNIMPLEMENTED(); } @@ -71,26 +87,36 @@ public: } void AddTicks(u64 ticks) override { + if (parent.uses_wall_clock) { + return; + } // Divide the number of ticks by the amount of CPU cores. TODO(Subv): This yields only a // rough approximation of the amount of executed ticks in the system, it may be thrown off // if not all cores are doing a similar amount of work. Instead of doing this, we should // device a way so that timing is consistent across all cores without increasing the ticks 4 // times. - u64 amortized_ticks = (ticks - num_interpreted_instructions) / Core::NUM_CPU_CORES; + u64 amortized_ticks = + (ticks - num_interpreted_instructions) / Core::Hardware::NUM_CPU_CORES; // Always execute at least one tick. amortized_ticks = std::max<u64>(amortized_ticks, 1); parent.system.CoreTiming().AddTicks(amortized_ticks); num_interpreted_instructions = 0; } + u64 GetTicksRemaining() override { - return std::max(parent.system.CoreTiming().GetDowncount(), {}); + if (parent.uses_wall_clock) { + if (!parent.interrupt_handlers[parent.core_index].IsInterrupted()) { + return minimum_run_cycles; + } + return 0U; + } + return std::max<s64>(parent.system.CoreTiming().GetDowncount(), 0); } ARM_Dynarmic_32& parent; std::size_t num_interpreted_instructions{}; - u64 tpidrro_el0{}; - u64 tpidr_el0{}; + static constexpr u64 minimum_run_cycles = 1000U; }; std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable& page_table, @@ -99,26 +125,46 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable& config.callbacks = cb.get(); // TODO(bunnei): Implement page table for 32-bit // config.page_table = &page_table.pointers; - config.coprocessors[15] = std::make_shared<DynarmicCP15>((u32*)&CP15_regs[0]); + config.coprocessors[15] = cp15; config.define_unpredictable_behaviour = true; + static constexpr std::size_t PAGE_BITS = 12; + static constexpr std::size_t NUM_PAGE_TABLE_ENTRIES = 1 << (32 - PAGE_BITS); + config.page_table = reinterpret_cast<std::array<std::uint8_t*, NUM_PAGE_TABLE_ENTRIES>*>( + page_table.pointers.data()); + config.absolute_offset_page_table = true; + config.detect_misaligned_access_via_page_table = 16 | 32 | 64 | 128; + config.only_detect_misalignment_via_page_table_on_page_boundary = true; + + // Multi-process state + config.processor_id = core_index; + config.global_monitor = &exclusive_monitor.monitor; + + // Timing + config.wall_clock_cntpct = uses_wall_clock; + + // Optimizations + if (Settings::values.disable_cpu_opt) { + config.enable_optimizations = false; + config.enable_fast_dispatch = false; + } + return std::make_unique<Dynarmic::A32::Jit>(config); } -MICROPROFILE_DEFINE(ARM_Jit_Dynarmic_32, "ARM JIT", "Dynarmic", MP_RGB(255, 64, 64)); - void ARM_Dynarmic_32::Run() { - MICROPROFILE_SCOPE(ARM_Jit_Dynarmic_32); jit->Run(); } void ARM_Dynarmic_32::Step() { - cb->InterpreterFallback(jit->Regs()[15], 1); + jit->Step(); } -ARM_Dynarmic_32::ARM_Dynarmic_32(System& system, ExclusiveMonitor& exclusive_monitor, +ARM_Dynarmic_32::ARM_Dynarmic_32(System& system, CPUInterrupts& interrupt_handlers, + bool uses_wall_clock, ExclusiveMonitor& exclusive_monitor, std::size_t core_index) - : ARM_Interface{system}, - cb(std::make_unique<DynarmicCallbacks32>(*this)), core_index{core_index}, + : ARM_Interface{system, interrupt_handlers, uses_wall_clock}, + cb(std::make_unique<DynarmicCallbacks32>(*this)), + cp15(std::make_shared<DynarmicCP15>(*this)), core_index{core_index}, exclusive_monitor{dynamic_cast<DynarmicExclusiveMonitor&>(exclusive_monitor)} {} ARM_Dynarmic_32::~ARM_Dynarmic_32() = default; @@ -154,32 +200,40 @@ void ARM_Dynarmic_32::SetPSTATE(u32 cpsr) { } u64 ARM_Dynarmic_32::GetTlsAddress() const { - return CP15_regs[static_cast<std::size_t>(CP15Register::CP15_THREAD_URO)]; + return cp15->uro; } void ARM_Dynarmic_32::SetTlsAddress(VAddr address) { - CP15_regs[static_cast<std::size_t>(CP15Register::CP15_THREAD_URO)] = static_cast<u32>(address); + cp15->uro = static_cast<u32>(address); } u64 ARM_Dynarmic_32::GetTPIDR_EL0() const { - return cb->tpidr_el0; + return cp15->uprw; } void ARM_Dynarmic_32::SetTPIDR_EL0(u64 value) { - cb->tpidr_el0 = value; + cp15->uprw = static_cast<u32>(value); +} + +void ARM_Dynarmic_32::ChangeProcessorID(std::size_t new_core_id) { + jit->ChangeProcessorID(new_core_id); } void ARM_Dynarmic_32::SaveContext(ThreadContext32& ctx) { Dynarmic::A32::Context context; jit->SaveContext(context); ctx.cpu_registers = context.Regs(); + ctx.extension_registers = context.ExtRegs(); ctx.cpsr = context.Cpsr(); + ctx.fpscr = context.Fpscr(); } void ARM_Dynarmic_32::LoadContext(const ThreadContext32& ctx) { Dynarmic::A32::Context context; context.Regs() = ctx.cpu_registers; + context.ExtRegs() = ctx.extension_registers; context.SetCpsr(ctx.cpsr); + context.SetFpscr(ctx.fpscr); jit->LoadContext(context); } @@ -188,10 +242,15 @@ void ARM_Dynarmic_32::PrepareReschedule() { } void ARM_Dynarmic_32::ClearInstructionCache() { + if (!jit) { + return; + } jit->ClearCache(); } -void ARM_Dynarmic_32::ClearExclusiveState() {} +void ARM_Dynarmic_32::ClearExclusiveState() { + jit->ClearExclusiveState(); +} void ARM_Dynarmic_32::PageTableChanged(Common::PageTable& page_table, std::size_t new_address_space_size_in_bits) { diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.h b/src/core/arm/dynarmic/arm_dynarmic_32.h index 8ba9cea8f..2bab31b92 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_32.h +++ b/src/core/arm/dynarmic/arm_dynarmic_32.h @@ -9,7 +9,7 @@ #include <dynarmic/A32/a32.h> #include <dynarmic/A64/a64.h> -#include <dynarmic/A64/exclusive_monitor.h> +#include <dynarmic/exclusive_monitor.h> #include "common/common_types.h" #include "common/hash.h" #include "core/arm/arm_interface.h" @@ -21,13 +21,16 @@ class Memory; namespace Core { +class CPUInterruptHandler; class DynarmicCallbacks32; +class DynarmicCP15; class DynarmicExclusiveMonitor; class System; class ARM_Dynarmic_32 final : public ARM_Interface { public: - ARM_Dynarmic_32(System& system, ExclusiveMonitor& exclusive_monitor, std::size_t core_index); + ARM_Dynarmic_32(System& system, CPUInterrupts& interrupt_handlers, bool uses_wall_clock, + ExclusiveMonitor& exclusive_monitor, std::size_t core_index); ~ARM_Dynarmic_32() override; void SetPC(u64 pc) override; @@ -44,6 +47,7 @@ public: void SetTlsAddress(VAddr address) override; void SetTPIDR_EL0(u64 value) override; u64 GetTPIDR_EL0() const override; + void ChangeProcessorID(std::size_t new_core_id) override; void SaveContext(ThreadContext32& ctx) override; void SaveContext(ThreadContext64& ctx) override {} @@ -66,12 +70,14 @@ private: std::unordered_map<JitCacheKey, std::shared_ptr<Dynarmic::A32::Jit>, Common::PairHash>; friend class DynarmicCallbacks32; + friend class DynarmicCP15; + std::unique_ptr<DynarmicCallbacks32> cb; JitCacheType jit_cache; std::shared_ptr<Dynarmic::A32::Jit> jit; + std::shared_ptr<DynarmicCP15> cp15; std::size_t core_index; DynarmicExclusiveMonitor& exclusive_monitor; - std::array<u32, 84> CP15_regs{}; }; } // namespace Core diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp index 337b97be9..790981034 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp @@ -7,11 +7,11 @@ #include <dynarmic/A64/a64.h> #include <dynarmic/A64/config.h> #include "common/logging/log.h" -#include "common/microprofile.h" #include "common/page_table.h" +#include "core/arm/cpu_interrupt_handler.h" #include "core/arm/dynarmic/arm_dynarmic_64.h" +#include "core/arm/dynarmic/arm_exclusive_monitor.h" #include "core/core.h" -#include "core/core_manager.h" #include "core/core_timing.h" #include "core/core_timing_util.h" #include "core/gdbstub/gdbstub.h" @@ -65,6 +65,22 @@ public: memory.Write64(vaddr + 8, value[1]); } + bool MemoryWriteExclusive8(u64 vaddr, std::uint8_t value, std::uint8_t expected) override { + return parent.system.Memory().WriteExclusive8(vaddr, value, expected); + } + bool MemoryWriteExclusive16(u64 vaddr, std::uint16_t value, std::uint16_t expected) override { + return parent.system.Memory().WriteExclusive16(vaddr, value, expected); + } + bool MemoryWriteExclusive32(u64 vaddr, std::uint32_t value, std::uint32_t expected) override { + return parent.system.Memory().WriteExclusive32(vaddr, value, expected); + } + bool MemoryWriteExclusive64(u64 vaddr, std::uint64_t value, std::uint64_t expected) override { + return parent.system.Memory().WriteExclusive64(vaddr, value, expected); + } + bool MemoryWriteExclusive128(u64 vaddr, Vector value, Vector expected) override { + return parent.system.Memory().WriteExclusive128(vaddr, value, expected); + } + void InterpreterFallback(u64 pc, std::size_t num_instructions) override { LOG_INFO(Core_ARM, "Unicorn fallback @ 0x{:X} for {} instructions (instr = {:08X})", pc, num_instructions, MemoryReadCode(pc)); @@ -98,8 +114,8 @@ public: } [[fallthrough]]; default: - ASSERT_MSG(false, "ExceptionRaised(exception = {}, pc = {:X})", - static_cast<std::size_t>(exception), pc); + ASSERT_MSG(false, "ExceptionRaised(exception = {}, pc = {:08X}, code = {:08X})", + static_cast<std::size_t>(exception), pc, MemoryReadCode(pc)); } } @@ -108,29 +124,42 @@ public: } void AddTicks(u64 ticks) override { + if (parent.uses_wall_clock) { + return; + } // Divide the number of ticks by the amount of CPU cores. TODO(Subv): This yields only a // rough approximation of the amount of executed ticks in the system, it may be thrown off // if not all cores are doing a similar amount of work. Instead of doing this, we should // device a way so that timing is consistent across all cores without increasing the ticks 4 // times. - u64 amortized_ticks = (ticks - num_interpreted_instructions) / Core::NUM_CPU_CORES; + u64 amortized_ticks = + (ticks - num_interpreted_instructions) / Core::Hardware::NUM_CPU_CORES; // Always execute at least one tick. amortized_ticks = std::max<u64>(amortized_ticks, 1); parent.system.CoreTiming().AddTicks(amortized_ticks); num_interpreted_instructions = 0; } + u64 GetTicksRemaining() override { - return std::max(parent.system.CoreTiming().GetDowncount(), s64{0}); + if (parent.uses_wall_clock) { + if (!parent.interrupt_handlers[parent.core_index].IsInterrupted()) { + return minimum_run_cycles; + } + return 0U; + } + return std::max<s64>(parent.system.CoreTiming().GetDowncount(), 0); } + u64 GetCNTPCT() override { - return Timing::CpuCyclesToClockCycles(parent.system.CoreTiming().GetTicks()); + return parent.system.CoreTiming().GetClockTicks(); } ARM_Dynarmic_64& parent; std::size_t num_interpreted_instructions = 0; u64 tpidrro_el0 = 0; u64 tpidr_el0 = 0; + static constexpr u64 minimum_run_cycles = 1000U; }; std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable& page_table, @@ -168,14 +197,13 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable& config.enable_fast_dispatch = false; } + // Timing + config.wall_clock_cntpct = uses_wall_clock; + return std::make_shared<Dynarmic::A64::Jit>(config); } -MICROPROFILE_DEFINE(ARM_Jit_Dynarmic_64, "ARM JIT", "Dynarmic", MP_RGB(255, 64, 64)); - void ARM_Dynarmic_64::Run() { - MICROPROFILE_SCOPE(ARM_Jit_Dynarmic_64); - jit->Run(); } @@ -183,11 +211,16 @@ void ARM_Dynarmic_64::Step() { cb->InterpreterFallback(jit->GetPC(), 1); } -ARM_Dynarmic_64::ARM_Dynarmic_64(System& system, ExclusiveMonitor& exclusive_monitor, +ARM_Dynarmic_64::ARM_Dynarmic_64(System& system, CPUInterrupts& interrupt_handlers, + bool uses_wall_clock, ExclusiveMonitor& exclusive_monitor, std::size_t core_index) - : ARM_Interface{system}, cb(std::make_unique<DynarmicCallbacks64>(*this)), - inner_unicorn{system, ARM_Unicorn::Arch::AArch64}, core_index{core_index}, - exclusive_monitor{dynamic_cast<DynarmicExclusiveMonitor&>(exclusive_monitor)} {} + : ARM_Interface{system, interrupt_handlers, uses_wall_clock}, + cb(std::make_unique<DynarmicCallbacks64>(*this)), inner_unicorn{system, interrupt_handlers, + uses_wall_clock, + ARM_Unicorn::Arch::AArch64, + core_index}, + core_index{core_index}, exclusive_monitor{ + dynamic_cast<DynarmicExclusiveMonitor&>(exclusive_monitor)} {} ARM_Dynarmic_64::~ARM_Dynarmic_64() = default; @@ -239,6 +272,10 @@ void ARM_Dynarmic_64::SetTPIDR_EL0(u64 value) { cb->tpidr_el0 = value; } +void ARM_Dynarmic_64::ChangeProcessorID(std::size_t new_core_id) { + jit->ChangeProcessorID(new_core_id); +} + void ARM_Dynarmic_64::SaveContext(ThreadContext64& ctx) { ctx.cpu_registers = jit->GetRegisters(); ctx.sp = jit->GetSP(); @@ -266,6 +303,9 @@ void ARM_Dynarmic_64::PrepareReschedule() { } void ARM_Dynarmic_64::ClearInstructionCache() { + if (!jit) { + return; + } jit->ClearCache(); } @@ -285,44 +325,4 @@ void ARM_Dynarmic_64::PageTableChanged(Common::PageTable& page_table, jit_cache.emplace(key, jit); } -DynarmicExclusiveMonitor::DynarmicExclusiveMonitor(Memory::Memory& memory, std::size_t core_count) - : monitor(core_count), memory{memory} {} - -DynarmicExclusiveMonitor::~DynarmicExclusiveMonitor() = default; - -void DynarmicExclusiveMonitor::SetExclusive(std::size_t core_index, VAddr addr) { - // Size doesn't actually matter. - monitor.Mark(core_index, addr, 16); -} - -void DynarmicExclusiveMonitor::ClearExclusive() { - monitor.Clear(); -} - -bool DynarmicExclusiveMonitor::ExclusiveWrite8(std::size_t core_index, VAddr vaddr, u8 value) { - return monitor.DoExclusiveOperation(core_index, vaddr, 1, [&] { memory.Write8(vaddr, value); }); -} - -bool DynarmicExclusiveMonitor::ExclusiveWrite16(std::size_t core_index, VAddr vaddr, u16 value) { - return monitor.DoExclusiveOperation(core_index, vaddr, 2, - [&] { memory.Write16(vaddr, value); }); -} - -bool DynarmicExclusiveMonitor::ExclusiveWrite32(std::size_t core_index, VAddr vaddr, u32 value) { - return monitor.DoExclusiveOperation(core_index, vaddr, 4, - [&] { memory.Write32(vaddr, value); }); -} - -bool DynarmicExclusiveMonitor::ExclusiveWrite64(std::size_t core_index, VAddr vaddr, u64 value) { - return monitor.DoExclusiveOperation(core_index, vaddr, 8, - [&] { memory.Write64(vaddr, value); }); -} - -bool DynarmicExclusiveMonitor::ExclusiveWrite128(std::size_t core_index, VAddr vaddr, u128 value) { - return monitor.DoExclusiveOperation(core_index, vaddr, 16, [&] { - memory.Write64(vaddr + 0, value[0]); - memory.Write64(vaddr + 8, value[1]); - }); -} - } // namespace Core diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.h b/src/core/arm/dynarmic/arm_dynarmic_64.h index 647cecaf0..403c55961 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_64.h +++ b/src/core/arm/dynarmic/arm_dynarmic_64.h @@ -8,7 +8,6 @@ #include <unordered_map> #include <dynarmic/A64/a64.h> -#include <dynarmic/A64/exclusive_monitor.h> #include "common/common_types.h" #include "common/hash.h" #include "core/arm/arm_interface.h" @@ -22,12 +21,14 @@ class Memory; namespace Core { class DynarmicCallbacks64; +class CPUInterruptHandler; class DynarmicExclusiveMonitor; class System; class ARM_Dynarmic_64 final : public ARM_Interface { public: - ARM_Dynarmic_64(System& system, ExclusiveMonitor& exclusive_monitor, std::size_t core_index); + ARM_Dynarmic_64(System& system, CPUInterrupts& interrupt_handlers, bool uses_wall_clock, + ExclusiveMonitor& exclusive_monitor, std::size_t core_index); ~ARM_Dynarmic_64() override; void SetPC(u64 pc) override; @@ -44,6 +45,7 @@ public: void SetTlsAddress(VAddr address) override; void SetTPIDR_EL0(u64 value) override; u64 GetTPIDR_EL0() const override; + void ChangeProcessorID(std::size_t new_core_id) override; void SaveContext(ThreadContext32& ctx) override {} void SaveContext(ThreadContext64& ctx) override; @@ -75,24 +77,4 @@ private: DynarmicExclusiveMonitor& exclusive_monitor; }; -class DynarmicExclusiveMonitor final : public ExclusiveMonitor { -public: - explicit DynarmicExclusiveMonitor(Memory::Memory& memory, std::size_t core_count); - ~DynarmicExclusiveMonitor() override; - - void SetExclusive(std::size_t core_index, VAddr addr) override; - void ClearExclusive() override; - - bool ExclusiveWrite8(std::size_t core_index, VAddr vaddr, u8 value) override; - bool ExclusiveWrite16(std::size_t core_index, VAddr vaddr, u16 value) override; - bool ExclusiveWrite32(std::size_t core_index, VAddr vaddr, u32 value) override; - bool ExclusiveWrite64(std::size_t core_index, VAddr vaddr, u64 value) override; - bool ExclusiveWrite128(std::size_t core_index, VAddr vaddr, u128 value) override; - -private: - friend class ARM_Dynarmic_64; - Dynarmic::A64::ExclusiveMonitor monitor; - Core::Memory::Memory& memory; -}; - } // namespace Core diff --git a/src/core/arm/dynarmic/arm_dynarmic_cp15.cpp b/src/core/arm/dynarmic/arm_dynarmic_cp15.cpp index 3fdcdebde..54556e0f9 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_cp15.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_cp15.cpp @@ -2,79 +2,132 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <fmt/format.h> +#include "common/logging/log.h" +#include "core/arm/dynarmic/arm_dynarmic_32.h" #include "core/arm/dynarmic/arm_dynarmic_cp15.h" +#include "core/core.h" +#include "core/core_timing.h" +#include "core/core_timing_util.h" using Callback = Dynarmic::A32::Coprocessor::Callback; using CallbackOrAccessOneWord = Dynarmic::A32::Coprocessor::CallbackOrAccessOneWord; using CallbackOrAccessTwoWords = Dynarmic::A32::Coprocessor::CallbackOrAccessTwoWords; +template <> +struct fmt::formatter<Dynarmic::A32::CoprocReg> { + constexpr auto parse(format_parse_context& ctx) { + return ctx.begin(); + } + template <typename FormatContext> + auto format(const Dynarmic::A32::CoprocReg& reg, FormatContext& ctx) { + return format_to(ctx.out(), "cp{}", static_cast<size_t>(reg)); + } +}; + +namespace Core { + +static u32 dummy_value; + std::optional<Callback> DynarmicCP15::CompileInternalOperation(bool two, unsigned opc1, CoprocReg CRd, CoprocReg CRn, CoprocReg CRm, unsigned opc2) { + LOG_CRITICAL(Core_ARM, "CP15: cdp{} p15, {}, {}, {}, {}, {}", two ? "2" : "", opc1, CRd, CRn, + CRm, opc2); return {}; } CallbackOrAccessOneWord DynarmicCP15::CompileSendOneWord(bool two, unsigned opc1, CoprocReg CRn, CoprocReg CRm, unsigned opc2) { - // TODO(merry): Privileged CP15 registers - if (!two && CRn == CoprocReg::C7 && opc1 == 0 && CRm == CoprocReg::C5 && opc2 == 4) { + // CP15_FLUSH_PREFETCH_BUFFER // This is a dummy write, we ignore the value written here. - return &CP15[static_cast<std::size_t>(CP15Register::CP15_FLUSH_PREFETCH_BUFFER)]; + return &dummy_value; } if (!two && CRn == CoprocReg::C7 && opc1 == 0 && CRm == CoprocReg::C10) { switch (opc2) { case 4: + // CP15_DATA_SYNC_BARRIER // This is a dummy write, we ignore the value written here. - return &CP15[static_cast<std::size_t>(CP15Register::CP15_DATA_SYNC_BARRIER)]; + return &dummy_value; case 5: + // CP15_DATA_MEMORY_BARRIER // This is a dummy write, we ignore the value written here. - return &CP15[static_cast<std::size_t>(CP15Register::CP15_DATA_MEMORY_BARRIER)]; - default: - return {}; + return &dummy_value; } } if (!two && CRn == CoprocReg::C13 && opc1 == 0 && CRm == CoprocReg::C0 && opc2 == 2) { - return &CP15[static_cast<std::size_t>(CP15Register::CP15_THREAD_UPRW)]; + // CP15_THREAD_UPRW + return &uprw; } + LOG_CRITICAL(Core_ARM, "CP15: mcr{} p15, {}, <Rt>, {}, {}, {}", two ? "2" : "", opc1, CRn, CRm, + opc2); return {}; } CallbackOrAccessTwoWords DynarmicCP15::CompileSendTwoWords(bool two, unsigned opc, CoprocReg CRm) { + LOG_CRITICAL(Core_ARM, "CP15: mcrr{} p15, {}, <Rt>, <Rt2>, {}", two ? "2" : "", opc, CRm); return {}; } CallbackOrAccessOneWord DynarmicCP15::CompileGetOneWord(bool two, unsigned opc1, CoprocReg CRn, CoprocReg CRm, unsigned opc2) { - // TODO(merry): Privileged CP15 registers - if (!two && CRn == CoprocReg::C13 && opc1 == 0 && CRm == CoprocReg::C0) { switch (opc2) { case 2: - return &CP15[static_cast<std::size_t>(CP15Register::CP15_THREAD_UPRW)]; + // CP15_THREAD_UPRW + return &uprw; case 3: - return &CP15[static_cast<std::size_t>(CP15Register::CP15_THREAD_URO)]; - default: - return {}; + // CP15_THREAD_URO + return &uro; } } + LOG_CRITICAL(Core_ARM, "CP15: mrc{} p15, {}, <Rt>, {}, {}, {}", two ? "2" : "", opc1, CRn, CRm, + opc2); return {}; } CallbackOrAccessTwoWords DynarmicCP15::CompileGetTwoWords(bool two, unsigned opc, CoprocReg CRm) { + if (!two && opc == 0 && CRm == CoprocReg::C14) { + // CNTPCT + const auto callback = static_cast<u64 (*)(Dynarmic::A32::Jit*, void*, u32, u32)>( + [](Dynarmic::A32::Jit*, void* arg, u32, u32) -> u64 { + ARM_Dynarmic_32& parent = *(ARM_Dynarmic_32*)arg; + return parent.system.CoreTiming().GetClockTicks(); + }); + return Dynarmic::A32::Coprocessor::Callback{callback, (void*)&parent}; + } + + LOG_CRITICAL(Core_ARM, "CP15: mrrc{} p15, {}, <Rt>, <Rt2>, {}", two ? "2" : "", opc, CRm); return {}; } std::optional<Callback> DynarmicCP15::CompileLoadWords(bool two, bool long_transfer, CoprocReg CRd, std::optional<u8> option) { + if (option) { + LOG_CRITICAL(Core_ARM, "CP15: mrrc{}{} p15, {}, [...], {}", two ? "2" : "", + long_transfer ? "l" : "", CRd, *option); + } else { + LOG_CRITICAL(Core_ARM, "CP15: mrrc{}{} p15, {}, [...]", two ? "2" : "", + long_transfer ? "l" : "", CRd); + } return {}; } std::optional<Callback> DynarmicCP15::CompileStoreWords(bool two, bool long_transfer, CoprocReg CRd, std::optional<u8> option) { + if (option) { + LOG_CRITICAL(Core_ARM, "CP15: mrrc{}{} p15, {}, [...], {}", two ? "2" : "", + long_transfer ? "l" : "", CRd, *option); + } else { + LOG_CRITICAL(Core_ARM, "CP15: mrrc{}{} p15, {}, [...]", two ? "2" : "", + long_transfer ? "l" : "", CRd); + } return {}; } + +} // namespace Core diff --git a/src/core/arm/dynarmic/arm_dynarmic_cp15.h b/src/core/arm/dynarmic/arm_dynarmic_cp15.h index 07bcde5f9..7356d252e 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_cp15.h +++ b/src/core/arm/dynarmic/arm_dynarmic_cp15.h @@ -10,128 +10,15 @@ #include <dynarmic/A32/coprocessor.h> #include "common/common_types.h" -enum class CP15Register { - // c0 - Information registers - CP15_MAIN_ID, - CP15_CACHE_TYPE, - CP15_TCM_STATUS, - CP15_TLB_TYPE, - CP15_CPU_ID, - CP15_PROCESSOR_FEATURE_0, - CP15_PROCESSOR_FEATURE_1, - CP15_DEBUG_FEATURE_0, - CP15_AUXILIARY_FEATURE_0, - CP15_MEMORY_MODEL_FEATURE_0, - CP15_MEMORY_MODEL_FEATURE_1, - CP15_MEMORY_MODEL_FEATURE_2, - CP15_MEMORY_MODEL_FEATURE_3, - CP15_ISA_FEATURE_0, - CP15_ISA_FEATURE_1, - CP15_ISA_FEATURE_2, - CP15_ISA_FEATURE_3, - CP15_ISA_FEATURE_4, +namespace Core { - // c1 - Control registers - CP15_CONTROL, - CP15_AUXILIARY_CONTROL, - CP15_COPROCESSOR_ACCESS_CONTROL, - - // c2 - Translation table registers - CP15_TRANSLATION_BASE_TABLE_0, - CP15_TRANSLATION_BASE_TABLE_1, - CP15_TRANSLATION_BASE_CONTROL, - CP15_DOMAIN_ACCESS_CONTROL, - CP15_RESERVED, - - // c5 - Fault status registers - CP15_FAULT_STATUS, - CP15_INSTR_FAULT_STATUS, - CP15_COMBINED_DATA_FSR = CP15_FAULT_STATUS, - CP15_INST_FSR, - - // c6 - Fault Address registers - CP15_FAULT_ADDRESS, - CP15_COMBINED_DATA_FAR = CP15_FAULT_ADDRESS, - CP15_WFAR, - CP15_IFAR, - - // c7 - Cache operation registers - CP15_WAIT_FOR_INTERRUPT, - CP15_PHYS_ADDRESS, - CP15_INVALIDATE_INSTR_CACHE, - CP15_INVALIDATE_INSTR_CACHE_USING_MVA, - CP15_INVALIDATE_INSTR_CACHE_USING_INDEX, - CP15_FLUSH_PREFETCH_BUFFER, - CP15_FLUSH_BRANCH_TARGET_CACHE, - CP15_FLUSH_BRANCH_TARGET_CACHE_ENTRY, - CP15_INVALIDATE_DATA_CACHE, - CP15_INVALIDATE_DATA_CACHE_LINE_USING_MVA, - CP15_INVALIDATE_DATA_CACHE_LINE_USING_INDEX, - CP15_INVALIDATE_DATA_AND_INSTR_CACHE, - CP15_CLEAN_DATA_CACHE, - CP15_CLEAN_DATA_CACHE_LINE_USING_MVA, - CP15_CLEAN_DATA_CACHE_LINE_USING_INDEX, - CP15_DATA_SYNC_BARRIER, - CP15_DATA_MEMORY_BARRIER, - CP15_CLEAN_AND_INVALIDATE_DATA_CACHE, - CP15_CLEAN_AND_INVALIDATE_DATA_CACHE_LINE_USING_MVA, - CP15_CLEAN_AND_INVALIDATE_DATA_CACHE_LINE_USING_INDEX, - - // c8 - TLB operations - CP15_INVALIDATE_ITLB, - CP15_INVALIDATE_ITLB_SINGLE_ENTRY, - CP15_INVALIDATE_ITLB_ENTRY_ON_ASID_MATCH, - CP15_INVALIDATE_ITLB_ENTRY_ON_MVA, - CP15_INVALIDATE_DTLB, - CP15_INVALIDATE_DTLB_SINGLE_ENTRY, - CP15_INVALIDATE_DTLB_ENTRY_ON_ASID_MATCH, - CP15_INVALIDATE_DTLB_ENTRY_ON_MVA, - CP15_INVALIDATE_UTLB, - CP15_INVALIDATE_UTLB_SINGLE_ENTRY, - CP15_INVALIDATE_UTLB_ENTRY_ON_ASID_MATCH, - CP15_INVALIDATE_UTLB_ENTRY_ON_MVA, - - // c9 - Data cache lockdown register - CP15_DATA_CACHE_LOCKDOWN, - - // c10 - TLB/Memory map registers - CP15_TLB_LOCKDOWN, - CP15_PRIMARY_REGION_REMAP, - CP15_NORMAL_REGION_REMAP, - - // c13 - Thread related registers - CP15_PID, - CP15_CONTEXT_ID, - CP15_THREAD_UPRW, // Thread ID register - User/Privileged Read/Write - CP15_THREAD_URO, // Thread ID register - User Read Only (Privileged R/W) - CP15_THREAD_PRW, // Thread ID register - Privileged R/W only. - - // c15 - Performance and TLB lockdown registers - CP15_PERFORMANCE_MONITOR_CONTROL, - CP15_CYCLE_COUNTER, - CP15_COUNT_0, - CP15_COUNT_1, - CP15_READ_MAIN_TLB_LOCKDOWN_ENTRY, - CP15_WRITE_MAIN_TLB_LOCKDOWN_ENTRY, - CP15_MAIN_TLB_LOCKDOWN_VIRT_ADDRESS, - CP15_MAIN_TLB_LOCKDOWN_PHYS_ADDRESS, - CP15_MAIN_TLB_LOCKDOWN_ATTRIBUTE, - CP15_TLB_DEBUG_CONTROL, - - // Skyeye defined - CP15_TLB_FAULT_ADDR, - CP15_TLB_FAULT_STATUS, - - // Not an actual register. - // All registers should be defined above this. - CP15_REGISTER_COUNT, -}; +class ARM_Dynarmic_32; class DynarmicCP15 final : public Dynarmic::A32::Coprocessor { public: using CoprocReg = Dynarmic::A32::CoprocReg; - explicit DynarmicCP15(u32* cp15) : CP15(cp15){}; + explicit DynarmicCP15(ARM_Dynarmic_32& parent) : parent(parent) {} std::optional<Callback> CompileInternalOperation(bool two, unsigned opc1, CoprocReg CRd, CoprocReg CRn, CoprocReg CRm, @@ -147,6 +34,9 @@ public: std::optional<Callback> CompileStoreWords(bool two, bool long_transfer, CoprocReg CRd, std::optional<u8> option) override; -private: - u32* CP15{}; + ARM_Dynarmic_32& parent; + u32 uprw; + u32 uro; }; + +} // namespace Core diff --git a/src/core/arm/dynarmic/arm_exclusive_monitor.cpp b/src/core/arm/dynarmic/arm_exclusive_monitor.cpp new file mode 100644 index 000000000..4e209f6a5 --- /dev/null +++ b/src/core/arm/dynarmic/arm_exclusive_monitor.cpp @@ -0,0 +1,76 @@ +// Copyright 2018 yuzu emulator team +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <cinttypes> +#include <memory> +#include "core/arm/dynarmic/arm_exclusive_monitor.h" +#include "core/memory.h" + +namespace Core { + +DynarmicExclusiveMonitor::DynarmicExclusiveMonitor(Memory::Memory& memory, std::size_t core_count) + : monitor(core_count), memory{memory} {} + +DynarmicExclusiveMonitor::~DynarmicExclusiveMonitor() = default; + +u8 DynarmicExclusiveMonitor::ExclusiveRead8(std::size_t core_index, VAddr addr) { + return monitor.ReadAndMark<u8>(core_index, addr, [&]() -> u8 { return memory.Read8(addr); }); +} + +u16 DynarmicExclusiveMonitor::ExclusiveRead16(std::size_t core_index, VAddr addr) { + return monitor.ReadAndMark<u16>(core_index, addr, [&]() -> u16 { return memory.Read16(addr); }); +} + +u32 DynarmicExclusiveMonitor::ExclusiveRead32(std::size_t core_index, VAddr addr) { + return monitor.ReadAndMark<u32>(core_index, addr, [&]() -> u32 { return memory.Read32(addr); }); +} + +u64 DynarmicExclusiveMonitor::ExclusiveRead64(std::size_t core_index, VAddr addr) { + return monitor.ReadAndMark<u64>(core_index, addr, [&]() -> u64 { return memory.Read64(addr); }); +} + +u128 DynarmicExclusiveMonitor::ExclusiveRead128(std::size_t core_index, VAddr addr) { + return monitor.ReadAndMark<u128>(core_index, addr, [&]() -> u128 { + u128 result; + result[0] = memory.Read64(addr); + result[1] = memory.Read64(addr + 8); + return result; + }); +} + +void DynarmicExclusiveMonitor::ClearExclusive() { + monitor.Clear(); +} + +bool DynarmicExclusiveMonitor::ExclusiveWrite8(std::size_t core_index, VAddr vaddr, u8 value) { + return monitor.DoExclusiveOperation<u8>(core_index, vaddr, [&](u8 expected) -> bool { + return memory.WriteExclusive8(vaddr, value, expected); + }); +} + +bool DynarmicExclusiveMonitor::ExclusiveWrite16(std::size_t core_index, VAddr vaddr, u16 value) { + return monitor.DoExclusiveOperation<u16>(core_index, vaddr, [&](u16 expected) -> bool { + return memory.WriteExclusive16(vaddr, value, expected); + }); +} + +bool DynarmicExclusiveMonitor::ExclusiveWrite32(std::size_t core_index, VAddr vaddr, u32 value) { + return monitor.DoExclusiveOperation<u32>(core_index, vaddr, [&](u32 expected) -> bool { + return memory.WriteExclusive32(vaddr, value, expected); + }); +} + +bool DynarmicExclusiveMonitor::ExclusiveWrite64(std::size_t core_index, VAddr vaddr, u64 value) { + return monitor.DoExclusiveOperation<u64>(core_index, vaddr, [&](u64 expected) -> bool { + return memory.WriteExclusive64(vaddr, value, expected); + }); +} + +bool DynarmicExclusiveMonitor::ExclusiveWrite128(std::size_t core_index, VAddr vaddr, u128 value) { + return monitor.DoExclusiveOperation<u128>(core_index, vaddr, [&](u128 expected) -> bool { + return memory.WriteExclusive128(vaddr, value, expected); + }); +} + +} // namespace Core diff --git a/src/core/arm/dynarmic/arm_exclusive_monitor.h b/src/core/arm/dynarmic/arm_exclusive_monitor.h new file mode 100644 index 000000000..964f4a55d --- /dev/null +++ b/src/core/arm/dynarmic/arm_exclusive_monitor.h @@ -0,0 +1,48 @@ +// Copyright 2020 yuzu emulator team +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <memory> +#include <unordered_map> + +#include <dynarmic/exclusive_monitor.h> + +#include "common/common_types.h" +#include "core/arm/dynarmic/arm_dynarmic_32.h" +#include "core/arm/dynarmic/arm_dynarmic_64.h" +#include "core/arm/exclusive_monitor.h" + +namespace Core::Memory { +class Memory; +} + +namespace Core { + +class DynarmicExclusiveMonitor final : public ExclusiveMonitor { +public: + explicit DynarmicExclusiveMonitor(Memory::Memory& memory, std::size_t core_count); + ~DynarmicExclusiveMonitor() override; + + u8 ExclusiveRead8(std::size_t core_index, VAddr addr) override; + u16 ExclusiveRead16(std::size_t core_index, VAddr addr) override; + u32 ExclusiveRead32(std::size_t core_index, VAddr addr) override; + u64 ExclusiveRead64(std::size_t core_index, VAddr addr) override; + u128 ExclusiveRead128(std::size_t core_index, VAddr addr) override; + void ClearExclusive() override; + + bool ExclusiveWrite8(std::size_t core_index, VAddr vaddr, u8 value) override; + bool ExclusiveWrite16(std::size_t core_index, VAddr vaddr, u16 value) override; + bool ExclusiveWrite32(std::size_t core_index, VAddr vaddr, u32 value) override; + bool ExclusiveWrite64(std::size_t core_index, VAddr vaddr, u64 value) override; + bool ExclusiveWrite128(std::size_t core_index, VAddr vaddr, u128 value) override; + +private: + friend class ARM_Dynarmic_32; + friend class ARM_Dynarmic_64; + Dynarmic::ExclusiveMonitor monitor; + Core::Memory::Memory& memory; +}; + +} // namespace Core diff --git a/src/core/arm/exclusive_monitor.cpp b/src/core/arm/exclusive_monitor.cpp index b32401e0b..d8cba369d 100644 --- a/src/core/arm/exclusive_monitor.cpp +++ b/src/core/arm/exclusive_monitor.cpp @@ -3,7 +3,7 @@ // Refer to the license.txt file included. #ifdef ARCHITECTURE_x86_64 -#include "core/arm/dynarmic/arm_dynarmic_64.h" +#include "core/arm/dynarmic/arm_exclusive_monitor.h" #endif #include "core/arm/exclusive_monitor.h" #include "core/memory.h" diff --git a/src/core/arm/exclusive_monitor.h b/src/core/arm/exclusive_monitor.h index ccd73b80f..62f6e6023 100644 --- a/src/core/arm/exclusive_monitor.h +++ b/src/core/arm/exclusive_monitor.h @@ -18,7 +18,11 @@ class ExclusiveMonitor { public: virtual ~ExclusiveMonitor(); - virtual void SetExclusive(std::size_t core_index, VAddr addr) = 0; + virtual u8 ExclusiveRead8(std::size_t core_index, VAddr addr) = 0; + virtual u16 ExclusiveRead16(std::size_t core_index, VAddr addr) = 0; + virtual u32 ExclusiveRead32(std::size_t core_index, VAddr addr) = 0; + virtual u64 ExclusiveRead64(std::size_t core_index, VAddr addr) = 0; + virtual u128 ExclusiveRead128(std::size_t core_index, VAddr addr) = 0; virtual void ClearExclusive() = 0; virtual bool ExclusiveWrite8(std::size_t core_index, VAddr vaddr, u8 value) = 0; diff --git a/src/core/arm/unicorn/arm_unicorn.cpp b/src/core/arm/unicorn/arm_unicorn.cpp index e40e9626a..1df3f3ed1 100644 --- a/src/core/arm/unicorn/arm_unicorn.cpp +++ b/src/core/arm/unicorn/arm_unicorn.cpp @@ -6,6 +6,7 @@ #include <unicorn/arm64.h> #include "common/assert.h" #include "common/microprofile.h" +#include "core/arm/cpu_interrupt_handler.h" #include "core/arm/unicorn/arm_unicorn.h" #include "core/core.h" #include "core/core_timing.h" @@ -62,7 +63,9 @@ static bool UnmappedMemoryHook(uc_engine* uc, uc_mem_type type, u64 addr, int si return false; } -ARM_Unicorn::ARM_Unicorn(System& system, Arch architecture) : ARM_Interface{system} { +ARM_Unicorn::ARM_Unicorn(System& system, CPUInterrupts& interrupt_handlers, bool uses_wall_clock, + Arch architecture, std::size_t core_index) + : ARM_Interface{system, interrupt_handlers, uses_wall_clock}, core_index{core_index} { const auto arch = architecture == Arch::AArch32 ? UC_ARCH_ARM : UC_ARCH_ARM64; CHECKED(uc_open(arch, UC_MODE_ARM, &uc)); @@ -156,12 +159,20 @@ void ARM_Unicorn::SetTPIDR_EL0(u64 value) { CHECKED(uc_reg_write(uc, UC_ARM64_REG_TPIDR_EL0, &value)); } +void ARM_Unicorn::ChangeProcessorID(std::size_t new_core_id) { + core_index = new_core_id; +} + void ARM_Unicorn::Run() { if (GDBStub::IsServerEnabled()) { ExecuteInstructions(std::max(4000000U, 0U)); } else { - ExecuteInstructions( - std::max(std::size_t(system.CoreTiming().GetDowncount()), std::size_t{0})); + while (true) { + if (interrupt_handlers[core_index].IsInterrupted()) { + return; + } + ExecuteInstructions(10); + } } } @@ -183,8 +194,6 @@ void ARM_Unicorn::ExecuteInstructions(std::size_t num_instructions) { UC_PROT_READ | UC_PROT_WRITE | UC_PROT_EXEC, page_buffer.data())); CHECKED(uc_emu_start(uc, GetPC(), 1ULL << 63, 0, num_instructions)); CHECKED(uc_mem_unmap(uc, map_addr, page_buffer.size())); - - system.CoreTiming().AddTicks(num_instructions); if (GDBStub::IsServerEnabled()) { if (last_bkpt_hit && last_bkpt.type == GDBStub::BreakpointType::Execute) { uc_reg_write(uc, UC_ARM64_REG_PC, &last_bkpt.address); diff --git a/src/core/arm/unicorn/arm_unicorn.h b/src/core/arm/unicorn/arm_unicorn.h index 725c65085..810aff311 100644 --- a/src/core/arm/unicorn/arm_unicorn.h +++ b/src/core/arm/unicorn/arm_unicorn.h @@ -20,7 +20,8 @@ public: AArch64, // 64-bit ARM }; - explicit ARM_Unicorn(System& system, Arch architecture); + explicit ARM_Unicorn(System& system, CPUInterrupts& interrupt_handlers, bool uses_wall_clock, + Arch architecture, std::size_t core_index); ~ARM_Unicorn() override; void SetPC(u64 pc) override; @@ -35,6 +36,7 @@ public: void SetTlsAddress(VAddr address) override; void SetTPIDR_EL0(u64 value) override; u64 GetTPIDR_EL0() const override; + void ChangeProcessorID(std::size_t new_core_id) override; void PrepareReschedule() override; void ClearExclusiveState() override; void ExecuteInstructions(std::size_t num_instructions); @@ -55,6 +57,7 @@ private: uc_engine* uc{}; GDBStub::BreakpointAddress last_bkpt{}; bool last_bkpt_hit = false; + std::size_t core_index; }; } // namespace Core diff --git a/src/core/core.cpp b/src/core/core.cpp index f9f8a3000..69a1aa0a5 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp @@ -8,10 +8,10 @@ #include "common/file_util.h" #include "common/logging/log.h" +#include "common/microprofile.h" #include "common/string_util.h" #include "core/arm/exclusive_monitor.h" #include "core/core.h" -#include "core/core_manager.h" #include "core/core_timing.h" #include "core/cpu_manager.h" #include "core/device_memory.h" @@ -51,6 +51,11 @@ #include "video_core/renderer_base.h" #include "video_core/video_core.h" +MICROPROFILE_DEFINE(ARM_Jit_Dynarmic_CPU0, "ARM JIT", "Dynarmic CPU 0", MP_RGB(255, 64, 64)); +MICROPROFILE_DEFINE(ARM_Jit_Dynarmic_CPU1, "ARM JIT", "Dynarmic CPU 1", MP_RGB(255, 64, 64)); +MICROPROFILE_DEFINE(ARM_Jit_Dynarmic_CPU2, "ARM JIT", "Dynarmic CPU 2", MP_RGB(255, 64, 64)); +MICROPROFILE_DEFINE(ARM_Jit_Dynarmic_CPU3, "ARM JIT", "Dynarmic CPU 3", MP_RGB(255, 64, 64)); + namespace Core { namespace { @@ -117,23 +122,22 @@ struct System::Impl { : kernel{system}, fs_controller{system}, memory{system}, cpu_manager{system}, reporter{system}, applet_manager{system} {} - CoreManager& CurrentCoreManager() { - return cpu_manager.GetCurrentCoreManager(); - } + ResultStatus Run() { + status = ResultStatus::Success; - Kernel::PhysicalCore& CurrentPhysicalCore() { - const auto index = cpu_manager.GetActiveCoreIndex(); - return kernel.PhysicalCore(index); - } + kernel.Suspend(false); + core_timing.SyncPause(false); + cpu_manager.Pause(false); - Kernel::PhysicalCore& GetPhysicalCore(std::size_t index) { - return kernel.PhysicalCore(index); + return status; } - ResultStatus RunLoop(bool tight_loop) { + ResultStatus Pause() { status = ResultStatus::Success; - cpu_manager.RunLoop(tight_loop); + core_timing.SyncPause(true); + kernel.Suspend(true); + cpu_manager.Pause(true); return status; } @@ -143,14 +147,22 @@ struct System::Impl { device_memory = std::make_unique<Core::DeviceMemory>(system); - core_timing.Initialize(); + is_multicore = Settings::values.use_multi_core.GetValue(); + is_async_gpu = is_multicore || Settings::values.use_asynchronous_gpu_emulation.GetValue(); + + kernel.SetMulticore(is_multicore); + cpu_manager.SetMulticore(is_multicore); + cpu_manager.SetAsyncGpu(is_async_gpu); + core_timing.SetMulticore(is_multicore); + + core_timing.Initialize([&system]() { system.RegisterHostThread(); }); kernel.Initialize(); cpu_manager.Initialize(); const auto current_time = std::chrono::duration_cast<std::chrono::seconds>( std::chrono::system_clock::now().time_since_epoch()); Settings::values.custom_rtc_differential = - Settings::values.custom_rtc.value_or(current_time) - current_time; + Settings::values.custom_rtc.GetValue().value_or(current_time) - current_time; // Create a default fs if one doesn't already exist. if (virtual_filesystem == nullptr) @@ -180,6 +192,11 @@ struct System::Impl { is_powered_on = true; exit_lock = false; + microprofile_dynarmic[0] = MICROPROFILE_TOKEN(ARM_Jit_Dynarmic_CPU0); + microprofile_dynarmic[1] = MICROPROFILE_TOKEN(ARM_Jit_Dynarmic_CPU1); + microprofile_dynarmic[2] = MICROPROFILE_TOKEN(ARM_Jit_Dynarmic_CPU2); + microprofile_dynarmic[3] = MICROPROFILE_TOKEN(ARM_Jit_Dynarmic_CPU3); + LOG_DEBUG(Core, "Initialized OK"); return ResultStatus::Success; @@ -277,8 +294,6 @@ struct System::Impl { service_manager.reset(); cheat_engine.reset(); telemetry_session.reset(); - perf_stats.reset(); - gpu_core.reset(); device_memory.reset(); // Close all CPU/threading state @@ -290,6 +305,8 @@ struct System::Impl { // Close app loader app_loader.reset(); + gpu_core.reset(); + perf_stats.reset(); // Clear all applets applet_manager.ClearAll(); @@ -382,25 +399,35 @@ struct System::Impl { std::unique_ptr<Core::PerfStats> perf_stats; Core::FrameLimiter frame_limiter; + + bool is_multicore{}; + bool is_async_gpu{}; + + std::array<u64, Core::Hardware::NUM_CPU_CORES> dynarmic_ticks{}; + std::array<MicroProfileToken, Core::Hardware::NUM_CPU_CORES> microprofile_dynarmic{}; }; System::System() : impl{std::make_unique<Impl>(*this)} {} System::~System() = default; -CoreManager& System::CurrentCoreManager() { - return impl->CurrentCoreManager(); +CpuManager& System::GetCpuManager() { + return impl->cpu_manager; +} + +const CpuManager& System::GetCpuManager() const { + return impl->cpu_manager; } -const CoreManager& System::CurrentCoreManager() const { - return impl->CurrentCoreManager(); +System::ResultStatus System::Run() { + return impl->Run(); } -System::ResultStatus System::RunLoop(bool tight_loop) { - return impl->RunLoop(tight_loop); +System::ResultStatus System::Pause() { + return impl->Pause(); } System::ResultStatus System::SingleStep() { - return RunLoop(false); + return ResultStatus::Success; } void System::InvalidateCpuInstructionCaches() { @@ -416,7 +443,7 @@ bool System::IsPoweredOn() const { } void System::PrepareReschedule() { - impl->CurrentPhysicalCore().Stop(); + // Deprecated, does nothing, kept for backward compatibility. } void System::PrepareReschedule(const u32 core_index) { @@ -436,31 +463,41 @@ const TelemetrySession& System::TelemetrySession() const { } ARM_Interface& System::CurrentArmInterface() { - return impl->CurrentPhysicalCore().ArmInterface(); + return impl->kernel.CurrentScheduler().GetCurrentThread()->ArmInterface(); } const ARM_Interface& System::CurrentArmInterface() const { - return impl->CurrentPhysicalCore().ArmInterface(); + return impl->kernel.CurrentScheduler().GetCurrentThread()->ArmInterface(); } std::size_t System::CurrentCoreIndex() const { - return impl->cpu_manager.GetActiveCoreIndex(); + std::size_t core = impl->kernel.GetCurrentHostThreadID(); + ASSERT(core < Core::Hardware::NUM_CPU_CORES); + return core; } Kernel::Scheduler& System::CurrentScheduler() { - return impl->CurrentPhysicalCore().Scheduler(); + return impl->kernel.CurrentScheduler(); } const Kernel::Scheduler& System::CurrentScheduler() const { - return impl->CurrentPhysicalCore().Scheduler(); + return impl->kernel.CurrentScheduler(); +} + +Kernel::PhysicalCore& System::CurrentPhysicalCore() { + return impl->kernel.CurrentPhysicalCore(); +} + +const Kernel::PhysicalCore& System::CurrentPhysicalCore() const { + return impl->kernel.CurrentPhysicalCore(); } Kernel::Scheduler& System::Scheduler(std::size_t core_index) { - return impl->GetPhysicalCore(core_index).Scheduler(); + return impl->kernel.Scheduler(core_index); } const Kernel::Scheduler& System::Scheduler(std::size_t core_index) const { - return impl->GetPhysicalCore(core_index).Scheduler(); + return impl->kernel.Scheduler(core_index); } /// Gets the global scheduler @@ -490,20 +527,15 @@ const Kernel::Process* System::CurrentProcess() const { } ARM_Interface& System::ArmInterface(std::size_t core_index) { - return impl->GetPhysicalCore(core_index).ArmInterface(); + auto* thread = impl->kernel.Scheduler(core_index).GetCurrentThread(); + ASSERT(thread && !thread->IsHLEThread()); + return thread->ArmInterface(); } const ARM_Interface& System::ArmInterface(std::size_t core_index) const { - return impl->GetPhysicalCore(core_index).ArmInterface(); -} - -CoreManager& System::GetCoreManager(std::size_t core_index) { - return impl->cpu_manager.GetCoreManager(core_index); -} - -const CoreManager& System::GetCoreManager(std::size_t core_index) const { - ASSERT(core_index < NUM_CPU_CORES); - return impl->cpu_manager.GetCoreManager(core_index); + auto* thread = impl->kernel.Scheduler(core_index).GetCurrentThread(); + ASSERT(thread && !thread->IsHLEThread()); + return thread->ArmInterface(); } ExclusiveMonitor& System::Monitor() { @@ -722,4 +754,18 @@ void System::RegisterHostThread() { impl->kernel.RegisterHostThread(); } +void System::EnterDynarmicProfile() { + std::size_t core = impl->kernel.GetCurrentHostThreadID(); + impl->dynarmic_ticks[core] = MicroProfileEnter(impl->microprofile_dynarmic[core]); +} + +void System::ExitDynarmicProfile() { + std::size_t core = impl->kernel.GetCurrentHostThreadID(); + MicroProfileLeave(impl->microprofile_dynarmic[core], impl->dynarmic_ticks[core]); +} + +bool System::IsMulticore() const { + return impl->is_multicore; +} + } // namespace Core diff --git a/src/core/core.h b/src/core/core.h index acc53d6a1..5c6cfbffe 100644 --- a/src/core/core.h +++ b/src/core/core.h @@ -27,6 +27,7 @@ class VfsFilesystem; namespace Kernel { class GlobalScheduler; class KernelCore; +class PhysicalCore; class Process; class Scheduler; } // namespace Kernel @@ -90,7 +91,7 @@ class InterruptManager; namespace Core { class ARM_Interface; -class CoreManager; +class CpuManager; class DeviceMemory; class ExclusiveMonitor; class FrameLimiter; @@ -136,16 +137,16 @@ public: }; /** - * Run the core CPU loop - * This function runs the core for the specified number of CPU instructions before trying to - * update hardware. This is much faster than SingleStep (and should be equivalent), as the CPU - * is not required to do a full dispatch with each instruction. NOTE: the number of instructions - * requested is not guaranteed to run, as this will be interrupted preemptively if a hardware - * update is requested (e.g. on a thread switch). - * @param tight_loop If false, the CPU single-steps. - * @return Result status, indicating whether or not the operation succeeded. + * Run the OS and Application + * This function will start emulation and run the relevant devices + */ + ResultStatus Run(); + + /** + * Pause the OS and Application + * This function will pause emulation and stop the relevant devices */ - ResultStatus RunLoop(bool tight_loop = true); + ResultStatus Pause(); /** * Step the CPU one instruction @@ -209,17 +210,21 @@ public: /// Gets the scheduler for the CPU core that is currently running const Kernel::Scheduler& CurrentScheduler() const; + /// Gets the physical core for the CPU core that is currently running + Kernel::PhysicalCore& CurrentPhysicalCore(); + + /// Gets the physical core for the CPU core that is currently running + const Kernel::PhysicalCore& CurrentPhysicalCore() const; + /// Gets a reference to an ARM interface for the CPU core with the specified index ARM_Interface& ArmInterface(std::size_t core_index); /// Gets a const reference to an ARM interface from the CPU core with the specified index const ARM_Interface& ArmInterface(std::size_t core_index) const; - /// Gets a CPU interface to the CPU core with the specified index - CoreManager& GetCoreManager(std::size_t core_index); + CpuManager& GetCpuManager(); - /// Gets a CPU interface to the CPU core with the specified index - const CoreManager& GetCoreManager(std::size_t core_index) const; + const CpuManager& GetCpuManager() const; /// Gets a reference to the exclusive monitor ExclusiveMonitor& Monitor(); @@ -370,14 +375,17 @@ public: /// Register a host thread as an auxiliary thread. void RegisterHostThread(); -private: - System(); + /// Enter Dynarmic Microprofile + void EnterDynarmicProfile(); + + /// Exit Dynarmic Microprofile + void ExitDynarmicProfile(); - /// Returns the currently running CPU core - CoreManager& CurrentCoreManager(); + /// Tells if system is running on multicore. + bool IsMulticore() const; - /// Returns the currently running CPU core - const CoreManager& CurrentCoreManager() const; +private: + System(); /** * Initialize the emulated system. diff --git a/src/core/core_manager.cpp b/src/core/core_manager.cpp deleted file mode 100644 index b6b797c80..000000000 --- a/src/core/core_manager.cpp +++ /dev/null @@ -1,67 +0,0 @@ -// Copyright 2018 yuzu emulator team -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <condition_variable> -#include <mutex> - -#include "common/logging/log.h" -#include "core/arm/exclusive_monitor.h" -#include "core/arm/unicorn/arm_unicorn.h" -#include "core/core.h" -#include "core/core_manager.h" -#include "core/core_timing.h" -#include "core/hle/kernel/kernel.h" -#include "core/hle/kernel/physical_core.h" -#include "core/hle/kernel/scheduler.h" -#include "core/hle/kernel/thread.h" -#include "core/hle/lock.h" -#include "core/settings.h" - -namespace Core { - -CoreManager::CoreManager(System& system, std::size_t core_index) - : global_scheduler{system.GlobalScheduler()}, physical_core{system.Kernel().PhysicalCore( - core_index)}, - core_timing{system.CoreTiming()}, core_index{core_index} {} - -CoreManager::~CoreManager() = default; - -void CoreManager::RunLoop(bool tight_loop) { - Reschedule(); - - // If we don't have a currently active thread then don't execute instructions, - // instead advance to the next event and try to yield to the next thread - if (Kernel::GetCurrentThread() == nullptr) { - LOG_TRACE(Core, "Core-{} idling", core_index); - core_timing.Idle(); - } else { - if (tight_loop) { - physical_core.Run(); - } else { - physical_core.Step(); - } - } - core_timing.Advance(); - - Reschedule(); -} - -void CoreManager::SingleStep() { - return RunLoop(false); -} - -void CoreManager::PrepareReschedule() { - physical_core.Stop(); -} - -void CoreManager::Reschedule() { - // Lock the global kernel mutex when we manipulate the HLE state - std::lock_guard lock(HLE::g_hle_lock); - - global_scheduler.SelectThread(core_index); - - physical_core.Scheduler().TryDoContextSwitch(); -} - -} // namespace Core diff --git a/src/core/core_manager.h b/src/core/core_manager.h deleted file mode 100644 index d525de00a..000000000 --- a/src/core/core_manager.h +++ /dev/null @@ -1,63 +0,0 @@ -// Copyright 2018 yuzu emulator team -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include <atomic> -#include <cstddef> -#include <memory> -#include "common/common_types.h" - -namespace Kernel { -class GlobalScheduler; -class PhysicalCore; -} // namespace Kernel - -namespace Core { -class System; -} - -namespace Core::Timing { -class CoreTiming; -} - -namespace Core::Memory { -class Memory; -} - -namespace Core { - -constexpr unsigned NUM_CPU_CORES{4}; - -class CoreManager { -public: - CoreManager(System& system, std::size_t core_index); - ~CoreManager(); - - void RunLoop(bool tight_loop = true); - - void SingleStep(); - - void PrepareReschedule(); - - bool IsMainCore() const { - return core_index == 0; - } - - std::size_t CoreIndex() const { - return core_index; - } - -private: - void Reschedule(); - - Kernel::GlobalScheduler& global_scheduler; - Kernel::PhysicalCore& physical_core; - Timing::CoreTiming& core_timing; - - std::atomic<bool> reschedule_pending = false; - std::size_t core_index; -}; - -} // namespace Core diff --git a/src/core/core_timing.cpp b/src/core/core_timing.cpp index 46d4178c4..a63e60461 100644 --- a/src/core/core_timing.cpp +++ b/src/core/core_timing.cpp @@ -1,29 +1,27 @@ -// Copyright 2008 Dolphin Emulator Project / 2017 Citra Emulator Project -// Licensed under GPLv2+ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include "core/core_timing.h" - #include <algorithm> #include <mutex> #include <string> #include <tuple> #include "common/assert.h" -#include "common/thread.h" +#include "common/microprofile.h" +#include "core/core_timing.h" #include "core/core_timing_util.h" -#include "core/hardware_properties.h" namespace Core::Timing { -constexpr int MAX_SLICE_LENGTH = 10000; +constexpr u64 MAX_SLICE_LENGTH = 4000; std::shared_ptr<EventType> CreateEvent(std::string name, TimedCallback&& callback) { return std::make_shared<EventType>(std::move(callback), std::move(name)); } struct CoreTiming::Event { - s64 time; + u64 time; u64 fifo_order; u64 userdata; std::weak_ptr<EventType> type; @@ -39,51 +37,90 @@ struct CoreTiming::Event { } }; -CoreTiming::CoreTiming() = default; -CoreTiming::~CoreTiming() = default; +CoreTiming::CoreTiming() { + clock = + Common::CreateBestMatchingClock(Core::Hardware::BASE_CLOCK_RATE, Core::Hardware::CNTFREQ); +} -void CoreTiming::Initialize() { - downcounts.fill(MAX_SLICE_LENGTH); - time_slice.fill(MAX_SLICE_LENGTH); - slice_length = MAX_SLICE_LENGTH; - global_timer = 0; - idled_cycles = 0; - current_context = 0; +CoreTiming::~CoreTiming() = default; - // The time between CoreTiming being initialized and the first call to Advance() is considered - // the slice boundary between slice -1 and slice 0. Dispatcher loops must call Advance() before - // executing the first cycle of each slice to prepare the slice length and downcount for - // that slice. - is_global_timer_sane = true; +void CoreTiming::ThreadEntry(CoreTiming& instance) { + constexpr char name[] = "yuzu:HostTiming"; + MicroProfileOnThreadCreate(name); + Common::SetCurrentThreadName(name); + Common::SetCurrentThreadPriority(Common::ThreadPriority::VeryHigh); + instance.on_thread_init(); + instance.ThreadLoop(); +} +void CoreTiming::Initialize(std::function<void(void)>&& on_thread_init_) { + on_thread_init = std::move(on_thread_init_); event_fifo_id = 0; - + shutting_down = false; + ticks = 0; const auto empty_timed_callback = [](u64, s64) {}; ev_lost = CreateEvent("_lost_event", empty_timed_callback); + if (is_multicore) { + timer_thread = std::make_unique<std::thread>(ThreadEntry, std::ref(*this)); + } } void CoreTiming::Shutdown() { + paused = true; + shutting_down = true; + pause_event.Set(); + event.Set(); + if (timer_thread) { + timer_thread->join(); + } ClearPendingEvents(); + timer_thread.reset(); + has_started = false; } -void CoreTiming::ScheduleEvent(s64 cycles_into_future, const std::shared_ptr<EventType>& event_type, - u64 userdata) { - std::lock_guard guard{inner_mutex}; - const s64 timeout = GetTicks() + cycles_into_future; +void CoreTiming::Pause(bool is_paused) { + paused = is_paused; + pause_event.Set(); +} - // If this event needs to be scheduled before the next advance(), force one early - if (!is_global_timer_sane) { - ForceExceptionCheck(cycles_into_future); +void CoreTiming::SyncPause(bool is_paused) { + if (is_paused == paused && paused_set == paused) { + return; + } + Pause(is_paused); + if (timer_thread) { + if (!is_paused) { + pause_event.Set(); + } + event.Set(); + while (paused_set != is_paused) + ; } +} - event_queue.emplace_back(Event{timeout, event_fifo_id++, userdata, event_type}); +bool CoreTiming::IsRunning() const { + return !paused_set; +} - std::push_heap(event_queue.begin(), event_queue.end(), std::greater<>()); +bool CoreTiming::HasPendingEvents() const { + return !(wait_set && event_queue.empty()); } -void CoreTiming::UnscheduleEvent(const std::shared_ptr<EventType>& event_type, u64 userdata) { - std::lock_guard guard{inner_mutex}; +void CoreTiming::ScheduleEvent(s64 ns_into_future, const std::shared_ptr<EventType>& event_type, + u64 userdata) { + { + std::scoped_lock scope{basic_lock}; + const u64 timeout = static_cast<u64>(GetGlobalTimeNs().count() + ns_into_future); + + event_queue.emplace_back(Event{timeout, event_fifo_id++, userdata, event_type}); + std::push_heap(event_queue.begin(), event_queue.end(), std::greater<>()); + } + event.Set(); +} + +void CoreTiming::UnscheduleEvent(const std::shared_ptr<EventType>& event_type, u64 userdata) { + std::scoped_lock scope{basic_lock}; const auto itr = std::remove_if(event_queue.begin(), event_queue.end(), [&](const Event& e) { return e.type.lock().get() == event_type.get() && e.userdata == userdata; }); @@ -95,21 +132,39 @@ void CoreTiming::UnscheduleEvent(const std::shared_ptr<EventType>& event_type, u } } -u64 CoreTiming::GetTicks() const { - u64 ticks = static_cast<u64>(global_timer); - if (!is_global_timer_sane) { - ticks += accumulated_ticks; +void CoreTiming::AddTicks(u64 ticks) { + this->ticks += ticks; + downcount -= ticks; +} + +void CoreTiming::Idle() { + if (!event_queue.empty()) { + const u64 next_event_time = event_queue.front().time; + const u64 next_ticks = nsToCycles(std::chrono::nanoseconds(next_event_time)) + 10U; + if (next_ticks > ticks) { + ticks = next_ticks; + } + return; } - return ticks; + ticks += 1000U; } -u64 CoreTiming::GetIdleTicks() const { - return static_cast<u64>(idled_cycles); +void CoreTiming::ResetTicks() { + downcount = MAX_SLICE_LENGTH; } -void CoreTiming::AddTicks(u64 ticks) { - accumulated_ticks += ticks; - downcounts[current_context] -= static_cast<s64>(ticks); +u64 CoreTiming::GetCPUTicks() const { + if (is_multicore) { + return clock->GetCPUCycles(); + } + return ticks; +} + +u64 CoreTiming::GetClockTicks() const { + if (is_multicore) { + return clock->GetClockCycles(); + } + return CpuCyclesToClockCycles(ticks); } void CoreTiming::ClearPendingEvents() { @@ -117,7 +172,7 @@ void CoreTiming::ClearPendingEvents() { } void CoreTiming::RemoveEvent(const std::shared_ptr<EventType>& event_type) { - std::lock_guard guard{inner_mutex}; + std::scoped_lock lock{basic_lock}; const auto itr = std::remove_if(event_queue.begin(), event_queue.end(), [&](const Event& e) { return e.type.lock().get() == event_type.get(); @@ -130,97 +185,68 @@ void CoreTiming::RemoveEvent(const std::shared_ptr<EventType>& event_type) { } } -void CoreTiming::ForceExceptionCheck(s64 cycles) { - cycles = std::max<s64>(0, cycles); - if (downcounts[current_context] <= cycles) { - return; - } - - // downcount is always (much) smaller than MAX_INT so we can safely cast cycles to an int - // here. Account for cycles already executed by adjusting the g.slice_length - downcounts[current_context] = static_cast<int>(cycles); -} - -std::optional<u64> CoreTiming::NextAvailableCore(const s64 needed_ticks) const { - const u64 original_context = current_context; - u64 next_context = (original_context + 1) % num_cpu_cores; - while (next_context != original_context) { - if (time_slice[next_context] >= needed_ticks) { - return {next_context}; - } else if (time_slice[next_context] >= 0) { - return std::nullopt; - } - next_context = (next_context + 1) % num_cpu_cores; - } - return std::nullopt; -} - -void CoreTiming::Advance() { - std::unique_lock<std::mutex> guard(inner_mutex); - - const u64 cycles_executed = accumulated_ticks; - time_slice[current_context] = std::max<s64>(0, time_slice[current_context] - accumulated_ticks); - global_timer += cycles_executed; - - is_global_timer_sane = true; +std::optional<s64> CoreTiming::Advance() { + std::scoped_lock lock{advance_lock, basic_lock}; + global_timer = GetGlobalTimeNs().count(); while (!event_queue.empty() && event_queue.front().time <= global_timer) { Event evt = std::move(event_queue.front()); std::pop_heap(event_queue.begin(), event_queue.end(), std::greater<>()); event_queue.pop_back(); - inner_mutex.unlock(); + basic_lock.unlock(); if (auto event_type{evt.type.lock()}) { event_type->callback(evt.userdata, global_timer - evt.time); } - inner_mutex.lock(); + basic_lock.lock(); + global_timer = GetGlobalTimeNs().count(); } - is_global_timer_sane = false; - - // Still events left (scheduled in the future) if (!event_queue.empty()) { - const s64 needed_ticks = - std::min<s64>(event_queue.front().time - global_timer, MAX_SLICE_LENGTH); - const auto next_core = NextAvailableCore(needed_ticks); - if (next_core) { - downcounts[*next_core] = needed_ticks; - } + const s64 next_time = event_queue.front().time - global_timer; + return next_time; + } else { + return std::nullopt; } - - accumulated_ticks = 0; - - downcounts[current_context] = time_slice[current_context]; } -void CoreTiming::ResetRun() { - downcounts.fill(MAX_SLICE_LENGTH); - time_slice.fill(MAX_SLICE_LENGTH); - current_context = 0; - // Still events left (scheduled in the future) - if (!event_queue.empty()) { - const s64 needed_ticks = - std::min<s64>(event_queue.front().time - global_timer, MAX_SLICE_LENGTH); - downcounts[current_context] = needed_ticks; +void CoreTiming::ThreadLoop() { + has_started = true; + while (!shutting_down) { + while (!paused) { + paused_set = false; + const auto next_time = Advance(); + if (next_time) { + if (*next_time > 0) { + std::chrono::nanoseconds next_time_ns = std::chrono::nanoseconds(*next_time); + event.WaitFor(next_time_ns); + } + } else { + wait_set = true; + event.Wait(); + } + wait_set = false; + } + paused_set = true; + clock->Pause(true); + pause_event.Wait(); + clock->Pause(false); } - - is_global_timer_sane = false; - accumulated_ticks = 0; } -void CoreTiming::Idle() { - accumulated_ticks += downcounts[current_context]; - idled_cycles += downcounts[current_context]; - downcounts[current_context] = 0; +std::chrono::nanoseconds CoreTiming::GetGlobalTimeNs() const { + if (is_multicore) { + return clock->GetTimeNS(); + } + return CyclesToNs(ticks); } std::chrono::microseconds CoreTiming::GetGlobalTimeUs() const { - return std::chrono::microseconds{GetTicks() * 1000000 / Hardware::BASE_CLOCK_RATE}; -} - -s64 CoreTiming::GetDowncount() const { - return downcounts[current_context]; + if (is_multicore) { + return clock->GetTimeUS(); + } + return CyclesToUs(ticks); } } // namespace Core::Timing diff --git a/src/core/core_timing.h b/src/core/core_timing.h index d50f4eb8a..72faaab64 100644 --- a/src/core/core_timing.h +++ b/src/core/core_timing.h @@ -1,19 +1,25 @@ -// Copyright 2008 Dolphin Emulator Project / 2017 Citra Emulator Project -// Licensed under GPLv2+ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version // Refer to the license.txt file included. #pragma once +#include <atomic> #include <chrono> #include <functional> #include <memory> #include <mutex> #include <optional> #include <string> +#include <thread> #include <vector> #include "common/common_types.h" +#include "common/spin_lock.h" +#include "common/thread.h" #include "common/threadsafe_queue.h" +#include "common/wall_clock.h" +#include "core/hardware_properties.h" namespace Core::Timing { @@ -56,16 +62,40 @@ public: /// CoreTiming begins at the boundary of timing slice -1. An initial call to Advance() is /// required to end slice - 1 and start slice 0 before the first cycle of code is executed. - void Initialize(); + void Initialize(std::function<void(void)>&& on_thread_init_); /// Tears down all timing related functionality. void Shutdown(); - /// After the first Advance, the slice lengths and the downcount will be reduced whenever an - /// event is scheduled earlier than the current values. - /// - /// Scheduling from a callback will not update the downcount until the Advance() completes. - void ScheduleEvent(s64 cycles_into_future, const std::shared_ptr<EventType>& event_type, + /// Sets if emulation is multicore or single core, must be set before Initialize + void SetMulticore(bool is_multicore) { + this->is_multicore = is_multicore; + } + + /// Check if it's using host timing. + bool IsHostTiming() const { + return is_multicore; + } + + /// Pauses/Unpauses the execution of the timer thread. + void Pause(bool is_paused); + + /// Pauses/Unpauses the execution of the timer thread and waits until paused. + void SyncPause(bool is_paused); + + /// Checks if core timing is running. + bool IsRunning() const; + + /// Checks if the timer thread has started. + bool HasStarted() const { + return has_started; + } + + /// Checks if there are any pending time events. + bool HasPendingEvents() const; + + /// Schedules an event in core timing + void ScheduleEvent(s64 ns_into_future, const std::shared_ptr<EventType>& event_type, u64 userdata = 0); void UnscheduleEvent(const std::shared_ptr<EventType>& event_type, u64 userdata); @@ -73,41 +103,30 @@ public: /// We only permit one event of each type in the queue at a time. void RemoveEvent(const std::shared_ptr<EventType>& event_type); - void ForceExceptionCheck(s64 cycles); - - /// This should only be called from the emu thread, if you are calling it any other thread, - /// you are doing something evil - u64 GetTicks() const; - - u64 GetIdleTicks() const; - void AddTicks(u64 ticks); - /// Advance must be called at the beginning of dispatcher loops, not the end. Advance() ends - /// the previous timing slice and begins the next one, you must Advance from the previous - /// slice to the current one before executing any cycles. CoreTiming starts in slice -1 so an - /// Advance() is required to initialize the slice length before the first cycle of emulated - /// instructions is executed. - void Advance(); + void ResetTicks(); - /// Pretend that the main CPU has executed enough cycles to reach the next event. void Idle(); - std::chrono::microseconds GetGlobalTimeUs() const; + s64 GetDowncount() const { + return downcount; + } - void ResetRun(); + /// Returns current time in emulated CPU cycles + u64 GetCPUTicks() const; - s64 GetDowncount() const; + /// Returns current time in emulated in Clock cycles + u64 GetClockTicks() const; - void SwitchContext(u64 new_context) { - current_context = new_context; - } + /// Returns current time in microseconds. + std::chrono::microseconds GetGlobalTimeUs() const; - bool CanCurrentContextRun() const { - return time_slice[current_context] > 0; - } + /// Returns current time in nanoseconds. + std::chrono::nanoseconds GetGlobalTimeNs() const; - std::optional<u64> NextAvailableCore(const s64 needed_ticks) const; + /// Checks for events manually and returns time in nanoseconds for next event, threadsafe. + std::optional<s64> Advance(); private: struct Event; @@ -115,21 +134,14 @@ private: /// Clear all pending events. This should ONLY be done on exit. void ClearPendingEvents(); - static constexpr u64 num_cpu_cores = 4; + static void ThreadEntry(CoreTiming& instance); + void ThreadLoop(); - s64 global_timer = 0; - s64 idled_cycles = 0; - s64 slice_length = 0; - u64 accumulated_ticks = 0; - std::array<s64, num_cpu_cores> downcounts{}; - // Slice of time assigned to each core per run. - std::array<s64, num_cpu_cores> time_slice{}; - u64 current_context = 0; + std::unique_ptr<Common::WallClock> clock; - // Are we in a function that has been called from Advance() - // If events are scheduled from a function that gets called from Advance(), - // don't change slice_length and downcount. - bool is_global_timer_sane = false; + u64 global_timer = 0; + + std::chrono::nanoseconds start_point; // The queue is a min-heap using std::make_heap/push_heap/pop_heap. // We don't use std::priority_queue because we need to be able to serialize, unserialize and @@ -139,8 +151,23 @@ private: u64 event_fifo_id = 0; std::shared_ptr<EventType> ev_lost; - - std::mutex inner_mutex; + Common::Event event{}; + Common::Event pause_event{}; + Common::SpinLock basic_lock{}; + Common::SpinLock advance_lock{}; + std::unique_ptr<std::thread> timer_thread; + std::atomic<bool> paused{}; + std::atomic<bool> paused_set{}; + std::atomic<bool> wait_set{}; + std::atomic<bool> shutting_down{}; + std::atomic<bool> has_started{}; + std::function<void(void)> on_thread_init{}; + + bool is_multicore{}; + + /// Cycle timing + u64 ticks{}; + s64 downcount{}; }; /// Creates a core timing event with the given name and callback. diff --git a/src/core/core_timing_util.cpp b/src/core/core_timing_util.cpp index de50d3b14..aefc63663 100644 --- a/src/core/core_timing_util.cpp +++ b/src/core/core_timing_util.cpp @@ -38,15 +38,23 @@ s64 usToCycles(std::chrono::microseconds us) { } s64 nsToCycles(std::chrono::nanoseconds ns) { - if (static_cast<u64>(ns.count() / 1000000000) > MAX_VALUE_TO_MULTIPLY) { - LOG_ERROR(Core_Timing, "Integer overflow, use max value"); - return std::numeric_limits<s64>::max(); - } - if (static_cast<u64>(ns.count()) > MAX_VALUE_TO_MULTIPLY) { - LOG_DEBUG(Core_Timing, "Time very big, do rounding"); - return Hardware::BASE_CLOCK_RATE * (ns.count() / 1000000000); - } - return (Hardware::BASE_CLOCK_RATE * ns.count()) / 1000000000; + const u128 temporal = Common::Multiply64Into128(ns.count(), Hardware::BASE_CLOCK_RATE); + return Common::Divide128On32(temporal, static_cast<u32>(1000000000)).first; +} + +u64 msToClockCycles(std::chrono::milliseconds ns) { + const u128 temp = Common::Multiply64Into128(ns.count(), Hardware::CNTFREQ); + return Common::Divide128On32(temp, 1000).first; +} + +u64 usToClockCycles(std::chrono::microseconds ns) { + const u128 temp = Common::Multiply64Into128(ns.count(), Hardware::CNTFREQ); + return Common::Divide128On32(temp, 1000000).first; +} + +u64 nsToClockCycles(std::chrono::nanoseconds ns) { + const u128 temp = Common::Multiply64Into128(ns.count(), Hardware::CNTFREQ); + return Common::Divide128On32(temp, 1000000000).first; } u64 CpuCyclesToClockCycles(u64 ticks) { @@ -54,4 +62,22 @@ u64 CpuCyclesToClockCycles(u64 ticks) { return Common::Divide128On32(temporal, static_cast<u32>(Hardware::BASE_CLOCK_RATE)).first; } +std::chrono::milliseconds CyclesToMs(s64 cycles) { + const u128 temporal = Common::Multiply64Into128(cycles, 1000); + u64 ms = Common::Divide128On32(temporal, static_cast<u32>(Hardware::BASE_CLOCK_RATE)).first; + return std::chrono::milliseconds(ms); +} + +std::chrono::nanoseconds CyclesToNs(s64 cycles) { + const u128 temporal = Common::Multiply64Into128(cycles, 1000000000); + u64 ns = Common::Divide128On32(temporal, static_cast<u32>(Hardware::BASE_CLOCK_RATE)).first; + return std::chrono::nanoseconds(ns); +} + +std::chrono::microseconds CyclesToUs(s64 cycles) { + const u128 temporal = Common::Multiply64Into128(cycles, 1000000); + u64 us = Common::Divide128On32(temporal, static_cast<u32>(Hardware::BASE_CLOCK_RATE)).first; + return std::chrono::microseconds(us); +} + } // namespace Core::Timing diff --git a/src/core/core_timing_util.h b/src/core/core_timing_util.h index addc72b19..2ed979e14 100644 --- a/src/core/core_timing_util.h +++ b/src/core/core_timing_util.h @@ -13,18 +13,12 @@ namespace Core::Timing { s64 msToCycles(std::chrono::milliseconds ms); s64 usToCycles(std::chrono::microseconds us); s64 nsToCycles(std::chrono::nanoseconds ns); - -inline std::chrono::milliseconds CyclesToMs(s64 cycles) { - return std::chrono::milliseconds(cycles * 1000 / Hardware::BASE_CLOCK_RATE); -} - -inline std::chrono::nanoseconds CyclesToNs(s64 cycles) { - return std::chrono::nanoseconds(cycles * 1000000000 / Hardware::BASE_CLOCK_RATE); -} - -inline std::chrono::microseconds CyclesToUs(s64 cycles) { - return std::chrono::microseconds(cycles * 1000000 / Hardware::BASE_CLOCK_RATE); -} +u64 msToClockCycles(std::chrono::milliseconds ns); +u64 usToClockCycles(std::chrono::microseconds ns); +u64 nsToClockCycles(std::chrono::nanoseconds ns); +std::chrono::milliseconds CyclesToMs(s64 cycles); +std::chrono::nanoseconds CyclesToNs(s64 cycles); +std::chrono::microseconds CyclesToUs(s64 cycles); u64 CpuCyclesToClockCycles(u64 ticks); diff --git a/src/core/cpu_manager.cpp b/src/core/cpu_manager.cpp index 70ddbdcca..32afcf3ae 100644 --- a/src/core/cpu_manager.cpp +++ b/src/core/cpu_manager.cpp @@ -2,80 +2,372 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include "common/fiber.h" +#include "common/microprofile.h" +#include "common/thread.h" #include "core/arm/exclusive_monitor.h" #include "core/core.h" -#include "core/core_manager.h" #include "core/core_timing.h" #include "core/cpu_manager.h" #include "core/gdbstub/gdbstub.h" +#include "core/hle/kernel/kernel.h" +#include "core/hle/kernel/physical_core.h" +#include "core/hle/kernel/scheduler.h" +#include "core/hle/kernel/thread.h" +#include "video_core/gpu.h" namespace Core { CpuManager::CpuManager(System& system) : system{system} {} CpuManager::~CpuManager() = default; +void CpuManager::ThreadStart(CpuManager& cpu_manager, std::size_t core) { + cpu_manager.RunThread(core); +} + void CpuManager::Initialize() { - for (std::size_t index = 0; index < core_managers.size(); ++index) { - core_managers[index] = std::make_unique<CoreManager>(system, index); + running_mode = true; + if (is_multicore) { + for (std::size_t core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) { + core_data[core].host_thread = + std::make_unique<std::thread>(ThreadStart, std::ref(*this), core); + } + } else { + core_data[0].host_thread = std::make_unique<std::thread>(ThreadStart, std::ref(*this), 0); } } void CpuManager::Shutdown() { - for (auto& cpu_core : core_managers) { - cpu_core.reset(); + running_mode = false; + Pause(false); + if (is_multicore) { + for (std::size_t core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) { + core_data[core].host_thread->join(); + core_data[core].host_thread.reset(); + } + } else { + core_data[0].host_thread->join(); + core_data[0].host_thread.reset(); } } -CoreManager& CpuManager::GetCoreManager(std::size_t index) { - return *core_managers.at(index); +std::function<void(void*)> CpuManager::GetGuestThreadStartFunc() { + return std::function<void(void*)>(GuestThreadFunction); } -const CoreManager& CpuManager::GetCoreManager(std::size_t index) const { - return *core_managers.at(index); +std::function<void(void*)> CpuManager::GetIdleThreadStartFunc() { + return std::function<void(void*)>(IdleThreadFunction); } -CoreManager& CpuManager::GetCurrentCoreManager() { - // Otherwise, use single-threaded mode active_core variable - return *core_managers[active_core]; +std::function<void(void*)> CpuManager::GetSuspendThreadStartFunc() { + return std::function<void(void*)>(SuspendThreadFunction); } -const CoreManager& CpuManager::GetCurrentCoreManager() const { - // Otherwise, use single-threaded mode active_core variable - return *core_managers[active_core]; +void CpuManager::GuestThreadFunction(void* cpu_manager_) { + CpuManager* cpu_manager = static_cast<CpuManager*>(cpu_manager_); + if (cpu_manager->is_multicore) { + cpu_manager->MultiCoreRunGuestThread(); + } else { + cpu_manager->SingleCoreRunGuestThread(); + } } -void CpuManager::RunLoop(bool tight_loop) { - if (GDBStub::IsServerEnabled()) { - GDBStub::HandlePacket(); +void CpuManager::GuestRewindFunction(void* cpu_manager_) { + CpuManager* cpu_manager = static_cast<CpuManager*>(cpu_manager_); + if (cpu_manager->is_multicore) { + cpu_manager->MultiCoreRunGuestLoop(); + } else { + cpu_manager->SingleCoreRunGuestLoop(); + } +} - // If the loop is halted and we want to step, use a tiny (1) number of instructions to - // execute. Otherwise, get out of the loop function. - if (GDBStub::GetCpuHaltFlag()) { - if (GDBStub::GetCpuStepFlag()) { - tight_loop = false; - } else { - return; +void CpuManager::IdleThreadFunction(void* cpu_manager_) { + CpuManager* cpu_manager = static_cast<CpuManager*>(cpu_manager_); + if (cpu_manager->is_multicore) { + cpu_manager->MultiCoreRunIdleThread(); + } else { + cpu_manager->SingleCoreRunIdleThread(); + } +} + +void CpuManager::SuspendThreadFunction(void* cpu_manager_) { + CpuManager* cpu_manager = static_cast<CpuManager*>(cpu_manager_); + if (cpu_manager->is_multicore) { + cpu_manager->MultiCoreRunSuspendThread(); + } else { + cpu_manager->SingleCoreRunSuspendThread(); + } +} + +void* CpuManager::GetStartFuncParamater() { + return static_cast<void*>(this); +} + +/////////////////////////////////////////////////////////////////////////////// +/// MultiCore /// +/////////////////////////////////////////////////////////////////////////////// + +void CpuManager::MultiCoreRunGuestThread() { + auto& kernel = system.Kernel(); + { + auto& sched = kernel.CurrentScheduler(); + sched.OnThreadStart(); + } + MultiCoreRunGuestLoop(); +} + +void CpuManager::MultiCoreRunGuestLoop() { + auto& kernel = system.Kernel(); + auto* thread = kernel.CurrentScheduler().GetCurrentThread(); + while (true) { + auto* physical_core = &kernel.CurrentPhysicalCore(); + auto& arm_interface = thread->ArmInterface(); + system.EnterDynarmicProfile(); + while (!physical_core->IsInterrupted()) { + arm_interface.Run(); + physical_core = &kernel.CurrentPhysicalCore(); + } + system.ExitDynarmicProfile(); + arm_interface.ClearExclusiveState(); + auto& scheduler = kernel.CurrentScheduler(); + scheduler.TryDoContextSwitch(); + } +} + +void CpuManager::MultiCoreRunIdleThread() { + auto& kernel = system.Kernel(); + while (true) { + auto& physical_core = kernel.CurrentPhysicalCore(); + physical_core.Idle(); + auto& scheduler = kernel.CurrentScheduler(); + scheduler.TryDoContextSwitch(); + } +} + +void CpuManager::MultiCoreRunSuspendThread() { + auto& kernel = system.Kernel(); + { + auto& sched = kernel.CurrentScheduler(); + sched.OnThreadStart(); + } + while (true) { + auto core = kernel.GetCurrentHostThreadID(); + auto& scheduler = kernel.CurrentScheduler(); + Kernel::Thread* current_thread = scheduler.GetCurrentThread(); + Common::Fiber::YieldTo(current_thread->GetHostContext(), core_data[core].host_context); + ASSERT(scheduler.ContextSwitchPending()); + ASSERT(core == kernel.GetCurrentHostThreadID()); + scheduler.TryDoContextSwitch(); + } +} + +void CpuManager::MultiCorePause(bool paused) { + if (!paused) { + bool all_not_barrier = false; + while (!all_not_barrier) { + all_not_barrier = true; + for (std::size_t core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) { + all_not_barrier &= + !core_data[core].is_running.load() && core_data[core].initialized.load(); + } + } + for (std::size_t core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) { + core_data[core].enter_barrier->Set(); + } + if (paused_state.load()) { + bool all_barrier = false; + while (!all_barrier) { + all_barrier = true; + for (std::size_t core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) { + all_barrier &= + core_data[core].is_paused.load() && core_data[core].initialized.load(); + } + } + for (std::size_t core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) { + core_data[core].exit_barrier->Set(); + } + } + } else { + /// Wait until all cores are paused. + bool all_barrier = false; + while (!all_barrier) { + all_barrier = true; + for (std::size_t core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) { + all_barrier &= + core_data[core].is_paused.load() && core_data[core].initialized.load(); } } + /// Don't release the barrier } + paused_state = paused; +} + +/////////////////////////////////////////////////////////////////////////////// +/// SingleCore /// +/////////////////////////////////////////////////////////////////////////////// - auto& core_timing = system.CoreTiming(); - core_timing.ResetRun(); - bool keep_running{}; - do { - keep_running = false; - for (active_core = 0; active_core < NUM_CPU_CORES; ++active_core) { - core_timing.SwitchContext(active_core); - if (core_timing.CanCurrentContextRun()) { - core_managers[active_core]->RunLoop(tight_loop); +void CpuManager::SingleCoreRunGuestThread() { + auto& kernel = system.Kernel(); + { + auto& sched = kernel.CurrentScheduler(); + sched.OnThreadStart(); + } + SingleCoreRunGuestLoop(); +} + +void CpuManager::SingleCoreRunGuestLoop() { + auto& kernel = system.Kernel(); + auto* thread = kernel.CurrentScheduler().GetCurrentThread(); + while (true) { + auto* physical_core = &kernel.CurrentPhysicalCore(); + auto& arm_interface = thread->ArmInterface(); + system.EnterDynarmicProfile(); + if (!physical_core->IsInterrupted()) { + arm_interface.Run(); + physical_core = &kernel.CurrentPhysicalCore(); + } + system.ExitDynarmicProfile(); + thread->SetPhantomMode(true); + system.CoreTiming().Advance(); + thread->SetPhantomMode(false); + arm_interface.ClearExclusiveState(); + PreemptSingleCore(); + auto& scheduler = kernel.Scheduler(current_core); + scheduler.TryDoContextSwitch(); + } +} + +void CpuManager::SingleCoreRunIdleThread() { + auto& kernel = system.Kernel(); + while (true) { + auto& physical_core = kernel.CurrentPhysicalCore(); + PreemptSingleCore(false); + system.CoreTiming().AddTicks(1000U); + idle_count++; + auto& scheduler = physical_core.Scheduler(); + scheduler.TryDoContextSwitch(); + } +} + +void CpuManager::SingleCoreRunSuspendThread() { + auto& kernel = system.Kernel(); + { + auto& sched = kernel.CurrentScheduler(); + sched.OnThreadStart(); + } + while (true) { + auto core = kernel.GetCurrentHostThreadID(); + auto& scheduler = kernel.CurrentScheduler(); + Kernel::Thread* current_thread = scheduler.GetCurrentThread(); + Common::Fiber::YieldTo(current_thread->GetHostContext(), core_data[0].host_context); + ASSERT(scheduler.ContextSwitchPending()); + ASSERT(core == kernel.GetCurrentHostThreadID()); + scheduler.TryDoContextSwitch(); + } +} + +void CpuManager::PreemptSingleCore(bool from_running_enviroment) { + std::size_t old_core = current_core; + auto& scheduler = system.Kernel().Scheduler(old_core); + Kernel::Thread* current_thread = scheduler.GetCurrentThread(); + if (idle_count >= 4 || from_running_enviroment) { + if (!from_running_enviroment) { + system.CoreTiming().Idle(); + idle_count = 0; + } + current_thread->SetPhantomMode(true); + system.CoreTiming().Advance(); + current_thread->SetPhantomMode(false); + } + current_core.store((current_core + 1) % Core::Hardware::NUM_CPU_CORES); + system.CoreTiming().ResetTicks(); + scheduler.Unload(); + auto& next_scheduler = system.Kernel().Scheduler(current_core); + Common::Fiber::YieldTo(current_thread->GetHostContext(), next_scheduler.ControlContext()); + /// May have changed scheduler + auto& current_scheduler = system.Kernel().Scheduler(current_core); + current_scheduler.Reload(); + auto* currrent_thread2 = current_scheduler.GetCurrentThread(); + if (!currrent_thread2->IsIdleThread()) { + idle_count = 0; + } +} + +void CpuManager::SingleCorePause(bool paused) { + if (!paused) { + bool all_not_barrier = false; + while (!all_not_barrier) { + all_not_barrier = !core_data[0].is_running.load() && core_data[0].initialized.load(); + } + core_data[0].enter_barrier->Set(); + if (paused_state.load()) { + bool all_barrier = false; + while (!all_barrier) { + all_barrier = core_data[0].is_paused.load() && core_data[0].initialized.load(); } - keep_running |= core_timing.CanCurrentContextRun(); + core_data[0].exit_barrier->Set(); } - } while (keep_running); + } else { + /// Wait until all cores are paused. + bool all_barrier = false; + while (!all_barrier) { + all_barrier = core_data[0].is_paused.load() && core_data[0].initialized.load(); + } + /// Don't release the barrier + } + paused_state = paused; +} + +void CpuManager::Pause(bool paused) { + if (is_multicore) { + MultiCorePause(paused); + } else { + SingleCorePause(paused); + } +} - if (GDBStub::IsServerEnabled()) { - GDBStub::SetCpuStepFlag(false); +void CpuManager::RunThread(std::size_t core) { + /// Initialization + system.RegisterCoreThread(core); + std::string name; + if (is_multicore) { + name = "yuzu:CoreCPUThread_" + std::to_string(core); + } else { + name = "yuzu:CPUThread"; + } + MicroProfileOnThreadCreate(name.c_str()); + Common::SetCurrentThreadName(name.c_str()); + Common::SetCurrentThreadPriority(Common::ThreadPriority::High); + auto& data = core_data[core]; + data.enter_barrier = std::make_unique<Common::Event>(); + data.exit_barrier = std::make_unique<Common::Event>(); + data.host_context = Common::Fiber::ThreadToFiber(); + data.is_running = false; + data.initialized = true; + const bool sc_sync = !is_async_gpu && !is_multicore; + bool sc_sync_first_use = sc_sync; + /// Running + while (running_mode) { + data.is_running = false; + data.enter_barrier->Wait(); + if (sc_sync_first_use) { + system.GPU().ObtainContext(); + sc_sync_first_use = false; + } + auto& scheduler = system.Kernel().CurrentScheduler(); + Kernel::Thread* current_thread = scheduler.GetCurrentThread(); + data.is_running = true; + Common::Fiber::YieldTo(data.host_context, current_thread->GetHostContext()); + data.is_running = false; + data.is_paused = true; + data.exit_barrier->Wait(); + data.is_paused = false; } + /// Time to cleanup + data.host_context->Exit(); + data.enter_barrier.reset(); + data.exit_barrier.reset(); + data.initialized = false; } } // namespace Core diff --git a/src/core/cpu_manager.h b/src/core/cpu_manager.h index 97554d1bb..35929ed94 100644 --- a/src/core/cpu_manager.h +++ b/src/core/cpu_manager.h @@ -5,12 +5,19 @@ #pragma once #include <array> +#include <atomic> +#include <functional> #include <memory> +#include <thread> #include "core/hardware_properties.h" +namespace Common { +class Event; +class Fiber; +} // namespace Common + namespace Core { -class CoreManager; class System; class CpuManager { @@ -24,24 +31,75 @@ public: CpuManager& operator=(const CpuManager&) = delete; CpuManager& operator=(CpuManager&&) = delete; + /// Sets if emulation is multicore or single core, must be set before Initialize + void SetMulticore(bool is_multicore) { + this->is_multicore = is_multicore; + } + + /// Sets if emulation is using an asynchronous GPU. + void SetAsyncGpu(bool is_async_gpu) { + this->is_async_gpu = is_async_gpu; + } + void Initialize(); void Shutdown(); - CoreManager& GetCoreManager(std::size_t index); - const CoreManager& GetCoreManager(std::size_t index) const; + void Pause(bool paused); - CoreManager& GetCurrentCoreManager(); - const CoreManager& GetCurrentCoreManager() const; + std::function<void(void*)> GetGuestThreadStartFunc(); + std::function<void(void*)> GetIdleThreadStartFunc(); + std::function<void(void*)> GetSuspendThreadStartFunc(); + void* GetStartFuncParamater(); - std::size_t GetActiveCoreIndex() const { - return active_core; - } + void PreemptSingleCore(bool from_running_enviroment = true); - void RunLoop(bool tight_loop); + std::size_t CurrentCore() const { + return current_core.load(); + } private: - std::array<std::unique_ptr<CoreManager>, Hardware::NUM_CPU_CORES> core_managers; - std::size_t active_core{}; ///< Active core, only used in single thread mode + static void GuestThreadFunction(void* cpu_manager); + static void GuestRewindFunction(void* cpu_manager); + static void IdleThreadFunction(void* cpu_manager); + static void SuspendThreadFunction(void* cpu_manager); + + void MultiCoreRunGuestThread(); + void MultiCoreRunGuestLoop(); + void MultiCoreRunIdleThread(); + void MultiCoreRunSuspendThread(); + void MultiCorePause(bool paused); + + void SingleCoreRunGuestThread(); + void SingleCoreRunGuestLoop(); + void SingleCoreRunIdleThread(); + void SingleCoreRunSuspendThread(); + void SingleCorePause(bool paused); + + static void ThreadStart(CpuManager& cpu_manager, std::size_t core); + + void RunThread(std::size_t core); + + struct CoreData { + std::shared_ptr<Common::Fiber> host_context; + std::unique_ptr<Common::Event> enter_barrier; + std::unique_ptr<Common::Event> exit_barrier; + std::atomic<bool> is_running; + std::atomic<bool> is_paused; + std::atomic<bool> initialized; + std::unique_ptr<std::thread> host_thread; + }; + + std::atomic<bool> running_mode{}; + std::atomic<bool> paused_state{}; + + std::array<CoreData, Core::Hardware::NUM_CPU_CORES> core_data{}; + + bool is_async_gpu{}; + bool is_multicore{}; + std::atomic<std::size_t> current_core{}; + std::size_t preemption_count{}; + std::size_t idle_count{}; + static constexpr std::size_t max_cycle_runs = 5; System& system; }; diff --git a/src/core/crypto/key_manager.cpp b/src/core/crypto/key_manager.cpp index 8997c7082..f87fe0abc 100644 --- a/src/core/crypto/key_manager.cpp +++ b/src/core/crypto/key_manager.cpp @@ -695,8 +695,9 @@ void KeyManager::WriteKeyToFile(KeyCategory category, std::string_view keyname, } void KeyManager::SetKey(S128KeyType id, Key128 key, u64 field1, u64 field2) { - if (s128_keys.find({id, field1, field2}) != s128_keys.end()) + if (s128_keys.find({id, field1, field2}) != s128_keys.end() || key == Key128{}) { return; + } if (id == S128KeyType::Titlekey) { Key128 rights_id; std::memcpy(rights_id.data(), &field2, sizeof(u64)); @@ -716,8 +717,9 @@ void KeyManager::SetKey(S128KeyType id, Key128 key, u64 field1, u64 field2) { return std::tie(elem.second.type, elem.second.field1, elem.second.field2) == std::tie(id, field1, field2); }); - if (iter2 != s128_file_id.end()) + if (iter2 != s128_file_id.end()) { WriteKeyToFile(category, iter2->first, key); + } // Variable cases if (id == S128KeyType::KeyArea) { @@ -745,16 +747,18 @@ void KeyManager::SetKey(S128KeyType id, Key128 key, u64 field1, u64 field2) { } void KeyManager::SetKey(S256KeyType id, Key256 key, u64 field1, u64 field2) { - if (s256_keys.find({id, field1, field2}) != s256_keys.end()) + if (s256_keys.find({id, field1, field2}) != s256_keys.end() || key == Key256{}) { return; + } const auto iter = std::find_if( s256_file_id.begin(), s256_file_id.end(), [&id, &field1, &field2](const std::pair<std::string, KeyIndex<S256KeyType>> elem) { return std::tie(elem.second.type, elem.second.field1, elem.second.field2) == std::tie(id, field1, field2); }); - if (iter != s256_file_id.end()) + if (iter != s256_file_id.end()) { WriteKeyToFile(KeyCategory::Standard, iter->first, key); + } s256_keys[{id, field1, field2}] = key; } diff --git a/src/core/crypto/key_manager.h b/src/core/crypto/key_manager.h index 7265c4171..9269a73f2 100644 --- a/src/core/crypto/key_manager.h +++ b/src/core/crypto/key_manager.h @@ -223,7 +223,16 @@ bool operator<(const KeyIndex<KeyType>& lhs, const KeyIndex<KeyType>& rhs) { class KeyManager { public: - KeyManager(); + static KeyManager& Instance() { + static KeyManager instance; + return instance; + } + + KeyManager(const KeyManager&) = delete; + KeyManager& operator=(const KeyManager&) = delete; + + KeyManager(KeyManager&&) = delete; + KeyManager& operator=(KeyManager&&) = delete; bool HasKey(S128KeyType id, u64 field1 = 0, u64 field2 = 0) const; bool HasKey(S256KeyType id, u64 field1 = 0, u64 field2 = 0) const; @@ -257,6 +266,8 @@ public: bool AddTicketPersonalized(Ticket raw); private: + KeyManager(); + std::map<KeyIndex<S128KeyType>, Key128> s128_keys; std::map<KeyIndex<S256KeyType>, Key256> s256_keys; diff --git a/src/core/file_sys/bis_factory.cpp b/src/core/file_sys/bis_factory.cpp index 0af44f340..8935a62c3 100644 --- a/src/core/file_sys/bis_factory.cpp +++ b/src/core/file_sys/bis_factory.cpp @@ -79,7 +79,7 @@ VirtualDir BISFactory::OpenPartition(BisPartitionId id) const { } VirtualFile BISFactory::OpenPartitionStorage(BisPartitionId id) const { - Core::Crypto::KeyManager keys; + auto& keys = Core::Crypto::KeyManager::Instance(); Core::Crypto::PartitionDataManager pdm{ Core::System::GetInstance().GetFilesystem()->OpenDirectory( FileUtil::GetUserPath(FileUtil::UserPath::SysDataDir), Mode::Read)}; diff --git a/src/core/file_sys/card_image.cpp b/src/core/file_sys/card_image.cpp index 07d0c8d5d..664a47e7f 100644 --- a/src/core/file_sys/card_image.cpp +++ b/src/core/file_sys/card_image.cpp @@ -178,7 +178,7 @@ u32 XCI::GetSystemUpdateVersion() { return 0; for (const auto& file : update->GetFiles()) { - NCA nca{file, nullptr, 0, keys}; + NCA nca{file, nullptr, 0}; if (nca.GetStatus() != Loader::ResultStatus::Success) continue; @@ -286,7 +286,7 @@ Loader::ResultStatus XCI::AddNCAFromPartition(XCIPartition part) { continue; } - auto nca = std::make_shared<NCA>(file, nullptr, 0, keys); + auto nca = std::make_shared<NCA>(file, nullptr, 0); if (nca->IsUpdate()) { continue; } diff --git a/src/core/file_sys/card_image.h b/src/core/file_sys/card_image.h index c2ee0ea99..e1b136426 100644 --- a/src/core/file_sys/card_image.h +++ b/src/core/file_sys/card_image.h @@ -140,6 +140,6 @@ private: u64 update_normal_partition_end; - Core::Crypto::KeyManager keys; + Core::Crypto::KeyManager& keys = Core::Crypto::KeyManager::Instance(); }; } // namespace FileSys diff --git a/src/core/file_sys/content_archive.cpp b/src/core/file_sys/content_archive.cpp index b8bbdd1ef..473245d5a 100644 --- a/src/core/file_sys/content_archive.cpp +++ b/src/core/file_sys/content_archive.cpp @@ -118,9 +118,8 @@ static bool IsValidNCA(const NCAHeader& header) { return header.magic == Common::MakeMagic('N', 'C', 'A', '3'); } -NCA::NCA(VirtualFile file_, VirtualFile bktr_base_romfs_, u64 bktr_base_ivfc_offset, - Core::Crypto::KeyManager keys_) - : file(std::move(file_)), bktr_base_romfs(std::move(bktr_base_romfs_)), keys(std::move(keys_)) { +NCA::NCA(VirtualFile file_, VirtualFile bktr_base_romfs_, u64 bktr_base_ivfc_offset) + : file(std::move(file_)), bktr_base_romfs(std::move(bktr_base_romfs_)) { if (file == nullptr) { status = Loader::ResultStatus::ErrorNullFile; return; diff --git a/src/core/file_sys/content_archive.h b/src/core/file_sys/content_archive.h index e249079b5..d25cbcf91 100644 --- a/src/core/file_sys/content_archive.h +++ b/src/core/file_sys/content_archive.h @@ -99,8 +99,7 @@ inline bool IsDirectoryLogoPartition(const VirtualDir& pfs) { class NCA : public ReadOnlyVfsDirectory { public: explicit NCA(VirtualFile file, VirtualFile bktr_base_romfs = nullptr, - u64 bktr_base_ivfc_offset = 0, - Core::Crypto::KeyManager keys = Core::Crypto::KeyManager()); + u64 bktr_base_ivfc_offset = 0); ~NCA() override; Loader::ResultStatus GetStatus() const; @@ -159,7 +158,7 @@ private: bool encrypted = false; bool is_update = false; - Core::Crypto::KeyManager keys; + Core::Crypto::KeyManager& keys = Core::Crypto::KeyManager::Instance(); }; } // namespace FileSys diff --git a/src/core/file_sys/registered_cache.cpp b/src/core/file_sys/registered_cache.cpp index ba5f76288..27c1b0233 100644 --- a/src/core/file_sys/registered_cache.cpp +++ b/src/core/file_sys/registered_cache.cpp @@ -408,7 +408,7 @@ void RegisteredCache::ProcessFiles(const std::vector<NcaID>& ids) { if (file == nullptr) continue; - const auto nca = std::make_shared<NCA>(parser(file, id), nullptr, 0, keys); + const auto nca = std::make_shared<NCA>(parser(file, id), nullptr, 0); if (nca->GetStatus() != Loader::ResultStatus::Success || nca->GetType() != NCAContentType::Meta) { continue; @@ -486,7 +486,7 @@ std::unique_ptr<NCA> RegisteredCache::GetEntry(u64 title_id, ContentRecordType t const auto raw = GetEntryRaw(title_id, type); if (raw == nullptr) return nullptr; - return std::make_unique<NCA>(raw, nullptr, 0, keys); + return std::make_unique<NCA>(raw, nullptr, 0); } template <typename T> @@ -865,7 +865,7 @@ std::unique_ptr<NCA> ManualContentProvider::GetEntry(u64 title_id, ContentRecord const auto res = GetEntryRaw(title_id, type); if (res == nullptr) return nullptr; - return std::make_unique<NCA>(res, nullptr, 0, keys); + return std::make_unique<NCA>(res, nullptr, 0); } std::vector<ContentProviderEntry> ManualContentProvider::ListEntriesFilter( diff --git a/src/core/file_sys/registered_cache.h b/src/core/file_sys/registered_cache.h index d1eec240e..f339cd17b 100644 --- a/src/core/file_sys/registered_cache.h +++ b/src/core/file_sys/registered_cache.h @@ -88,7 +88,7 @@ public: protected: // A single instance of KeyManager to be used by GetEntry() - Core::Crypto::KeyManager keys; + Core::Crypto::KeyManager& keys = Core::Crypto::KeyManager::Instance(); }; class PlaceholderCache { diff --git a/src/core/file_sys/submission_package.cpp b/src/core/file_sys/submission_package.cpp index ef3084681..175a8266a 100644 --- a/src/core/file_sys/submission_package.cpp +++ b/src/core/file_sys/submission_package.cpp @@ -21,7 +21,7 @@ namespace FileSys { namespace { void SetTicketKeys(const std::vector<VirtualFile>& files) { - Core::Crypto::KeyManager keys; + auto& keys = Core::Crypto::KeyManager::Instance(); for (const auto& ticket_file : files) { if (ticket_file == nullptr) { @@ -285,7 +285,7 @@ void NSP::ReadNCAs(const std::vector<VirtualFile>& files) { continue; } - auto next_nca = std::make_shared<NCA>(std::move(next_file), nullptr, 0, keys); + auto next_nca = std::make_shared<NCA>(std::move(next_file), nullptr, 0); if (next_nca->GetType() == NCAContentType::Program) { program_status[cnmt.GetTitleID()] = next_nca->GetStatus(); } diff --git a/src/core/file_sys/submission_package.h b/src/core/file_sys/submission_package.h index ee9b6ce17..cf89de6a9 100644 --- a/src/core/file_sys/submission_package.h +++ b/src/core/file_sys/submission_package.h @@ -73,7 +73,7 @@ private: std::map<u64, std::map<std::pair<TitleType, ContentRecordType>, std::shared_ptr<NCA>>> ncas; std::vector<VirtualFile> ticket_files; - Core::Crypto::KeyManager keys; + Core::Crypto::KeyManager& keys = Core::Crypto::KeyManager::Instance(); VirtualFile romfs; VirtualDir exefs; diff --git a/src/core/file_sys/system_archive/mii_model.cpp b/src/core/file_sys/system_archive/mii_model.cpp index 6a9add87c..61bb67945 100644 --- a/src/core/file_sys/system_archive/mii_model.cpp +++ b/src/core/file_sys/system_archive/mii_model.cpp @@ -40,7 +40,7 @@ VirtualDir MiiModel() { out->AddFile(std::make_shared<ArrayVfsFile<MiiModelData::SHAPE_MID.size()>>( MiiModelData::SHAPE_MID, "ShapeMid.dat")); - return std::move(out); + return out; } } // namespace FileSys::SystemArchive diff --git a/src/core/file_sys/system_archive/shared_font.cpp b/src/core/file_sys/system_archive/shared_font.cpp index 2c05eb42e..c5cdf7d9b 100644 --- a/src/core/file_sys/system_archive/shared_font.cpp +++ b/src/core/file_sys/system_archive/shared_font.cpp @@ -23,7 +23,7 @@ VirtualFile PackBFTTF(const std::array<u8, Size>& data, const std::string& name) std::vector<u8> bfttf(Size + sizeof(u64)); - u64 offset = 0; + size_t offset = 0; Service::NS::EncryptSharedFont(vec, bfttf, offset); return std::make_shared<VectorVfsFile>(std::move(bfttf), name); } diff --git a/src/core/file_sys/xts_archive.h b/src/core/file_sys/xts_archive.h index 7704dee90..563531bb6 100644 --- a/src/core/file_sys/xts_archive.h +++ b/src/core/file_sys/xts_archive.h @@ -62,6 +62,6 @@ private: VirtualFile dec_file; - Core::Crypto::KeyManager keys; + Core::Crypto::KeyManager& keys = Core::Crypto::KeyManager::Instance(); }; } // namespace FileSys diff --git a/src/core/frontend/framebuffer_layout.cpp b/src/core/frontend/framebuffer_layout.cpp index d0c43447c..c1fbc235b 100644 --- a/src/core/frontend/framebuffer_layout.cpp +++ b/src/core/frontend/framebuffer_layout.cpp @@ -29,7 +29,7 @@ FramebufferLayout DefaultFrameLayout(u32 width, u32 height) { const float window_aspect_ratio = static_cast<float>(height) / width; const float emulation_aspect_ratio = EmulationAspectRatio( - static_cast<AspectRatio>(Settings::values.aspect_ratio), window_aspect_ratio); + static_cast<AspectRatio>(Settings::values.aspect_ratio.GetValue()), window_aspect_ratio); const Common::Rectangle<u32> screen_window_area{0, 0, width, height}; Common::Rectangle<u32> screen = MaxRectangle(screen_window_area, emulation_aspect_ratio); diff --git a/src/core/gdbstub/gdbstub.cpp b/src/core/gdbstub/gdbstub.cpp index 70c0f8b80..79f22a403 100644 --- a/src/core/gdbstub/gdbstub.cpp +++ b/src/core/gdbstub/gdbstub.cpp @@ -35,7 +35,6 @@ #include "common/swap.h" #include "core/arm/arm_interface.h" #include "core/core.h" -#include "core/core_manager.h" #include "core/gdbstub/gdbstub.h" #include "core/hle/kernel/memory/page_table.h" #include "core/hle/kernel/process.h" diff --git a/src/core/hardware_properties.h b/src/core/hardware_properties.h index b04e046ed..456b41e1b 100644 --- a/src/core/hardware_properties.h +++ b/src/core/hardware_properties.h @@ -42,6 +42,10 @@ struct EmuThreadHandle { constexpr u32 invalid_handle = 0xFFFFFFFF; return {invalid_handle, invalid_handle}; } + + bool IsInvalid() const { + return (*this) == InvalidHandle(); + } }; } // namespace Core diff --git a/src/core/hle/kernel/address_arbiter.cpp b/src/core/hle/kernel/address_arbiter.cpp index 8475b698c..4d2a9b35d 100644 --- a/src/core/hle/kernel/address_arbiter.cpp +++ b/src/core/hle/kernel/address_arbiter.cpp @@ -7,11 +7,15 @@ #include "common/assert.h" #include "common/common_types.h" +#include "core/arm/exclusive_monitor.h" #include "core/core.h" #include "core/hle/kernel/address_arbiter.h" #include "core/hle/kernel/errors.h" +#include "core/hle/kernel/handle_table.h" +#include "core/hle/kernel/kernel.h" #include "core/hle/kernel/scheduler.h" #include "core/hle/kernel/thread.h" +#include "core/hle/kernel/time_manager.h" #include "core/hle/result.h" #include "core/memory.h" @@ -20,6 +24,7 @@ namespace Kernel { // Wake up num_to_wake (or all) threads in a vector. void AddressArbiter::WakeThreads(const std::vector<std::shared_ptr<Thread>>& waiting_threads, s32 num_to_wake) { + auto& time_manager = system.Kernel().TimeManager(); // Only process up to 'target' threads, unless 'target' is <= 0, in which case process // them all. std::size_t last = waiting_threads.size(); @@ -29,12 +34,10 @@ void AddressArbiter::WakeThreads(const std::vector<std::shared_ptr<Thread>>& wai // Signal the waiting threads. for (std::size_t i = 0; i < last; i++) { - ASSERT(waiting_threads[i]->GetStatus() == ThreadStatus::WaitArb); - waiting_threads[i]->SetWaitSynchronizationResult(RESULT_SUCCESS); + waiting_threads[i]->SetSynchronizationResults(nullptr, RESULT_SUCCESS); RemoveThread(waiting_threads[i]); - waiting_threads[i]->SetArbiterWaitAddress(0); + waiting_threads[i]->WaitForArbitration(false); waiting_threads[i]->ResumeFromWait(); - system.PrepareReschedule(waiting_threads[i]->GetProcessorID()); } } @@ -56,6 +59,7 @@ ResultCode AddressArbiter::SignalToAddress(VAddr address, SignalType type, s32 v } ResultCode AddressArbiter::SignalToAddressOnly(VAddr address, s32 num_to_wake) { + SchedulerLock lock(system.Kernel()); const std::vector<std::shared_ptr<Thread>> waiting_threads = GetThreadsWaitingOnAddress(address); WakeThreads(waiting_threads, num_to_wake); @@ -64,6 +68,7 @@ ResultCode AddressArbiter::SignalToAddressOnly(VAddr address, s32 num_to_wake) { ResultCode AddressArbiter::IncrementAndSignalToAddressIfEqual(VAddr address, s32 value, s32 num_to_wake) { + SchedulerLock lock(system.Kernel()); auto& memory = system.Memory(); // Ensure that we can write to the address. @@ -71,16 +76,24 @@ ResultCode AddressArbiter::IncrementAndSignalToAddressIfEqual(VAddr address, s32 return ERR_INVALID_ADDRESS_STATE; } - if (static_cast<s32>(memory.Read32(address)) != value) { - return ERR_INVALID_STATE; - } + const std::size_t current_core = system.CurrentCoreIndex(); + auto& monitor = system.Monitor(); + u32 current_value; + do { + current_value = monitor.ExclusiveRead32(current_core, address); + + if (current_value != value) { + return ERR_INVALID_STATE; + } + current_value++; + } while (!monitor.ExclusiveWrite32(current_core, address, current_value)); - memory.Write32(address, static_cast<u32>(value + 1)); return SignalToAddressOnly(address, num_to_wake); } ResultCode AddressArbiter::ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr address, s32 value, s32 num_to_wake) { + SchedulerLock lock(system.Kernel()); auto& memory = system.Memory(); // Ensure that we can write to the address. @@ -92,29 +105,33 @@ ResultCode AddressArbiter::ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr a const std::vector<std::shared_ptr<Thread>> waiting_threads = GetThreadsWaitingOnAddress(address); - // Determine the modified value depending on the waiting count. + const std::size_t current_core = system.CurrentCoreIndex(); + auto& monitor = system.Monitor(); s32 updated_value; - if (num_to_wake <= 0) { - if (waiting_threads.empty()) { - updated_value = value + 1; - } else { - updated_value = value - 1; + do { + updated_value = monitor.ExclusiveRead32(current_core, address); + + if (updated_value != value) { + return ERR_INVALID_STATE; } - } else { - if (waiting_threads.empty()) { - updated_value = value + 1; - } else if (waiting_threads.size() <= static_cast<u32>(num_to_wake)) { - updated_value = value - 1; + // Determine the modified value depending on the waiting count. + if (num_to_wake <= 0) { + if (waiting_threads.empty()) { + updated_value = value + 1; + } else { + updated_value = value - 1; + } } else { - updated_value = value; + if (waiting_threads.empty()) { + updated_value = value + 1; + } else if (waiting_threads.size() <= static_cast<u32>(num_to_wake)) { + updated_value = value - 1; + } else { + updated_value = value; + } } - } + } while (!monitor.ExclusiveWrite32(current_core, address, updated_value)); - if (static_cast<s32>(memory.Read32(address)) != value) { - return ERR_INVALID_STATE; - } - - memory.Write32(address, static_cast<u32>(updated_value)); WakeThreads(waiting_threads, num_to_wake); return RESULT_SUCCESS; } @@ -136,60 +153,127 @@ ResultCode AddressArbiter::WaitForAddress(VAddr address, ArbitrationType type, s ResultCode AddressArbiter::WaitForAddressIfLessThan(VAddr address, s32 value, s64 timeout, bool should_decrement) { auto& memory = system.Memory(); + auto& kernel = system.Kernel(); + Thread* current_thread = system.CurrentScheduler().GetCurrentThread(); - // Ensure that we can read the address. - if (!memory.IsValidVirtualAddress(address)) { - return ERR_INVALID_ADDRESS_STATE; - } + Handle event_handle = InvalidHandle; + { + SchedulerLockAndSleep lock(kernel, event_handle, current_thread, timeout); + + if (current_thread->IsPendingTermination()) { + lock.CancelSleep(); + return ERR_THREAD_TERMINATING; + } + + // Ensure that we can read the address. + if (!memory.IsValidVirtualAddress(address)) { + lock.CancelSleep(); + return ERR_INVALID_ADDRESS_STATE; + } + + s32 current_value = static_cast<s32>(memory.Read32(address)); + if (current_value >= value) { + lock.CancelSleep(); + return ERR_INVALID_STATE; + } + + current_thread->SetSynchronizationResults(nullptr, RESULT_TIMEOUT); + + s32 decrement_value; + + const std::size_t current_core = system.CurrentCoreIndex(); + auto& monitor = system.Monitor(); + do { + current_value = static_cast<s32>(monitor.ExclusiveRead32(current_core, address)); + if (should_decrement) { + decrement_value = current_value - 1; + } else { + decrement_value = current_value; + } + } while ( + !monitor.ExclusiveWrite32(current_core, address, static_cast<u32>(decrement_value))); + + // Short-circuit without rescheduling, if timeout is zero. + if (timeout == 0) { + lock.CancelSleep(); + return RESULT_TIMEOUT; + } - const s32 cur_value = static_cast<s32>(memory.Read32(address)); - if (cur_value >= value) { - return ERR_INVALID_STATE; + current_thread->SetArbiterWaitAddress(address); + InsertThread(SharedFrom(current_thread)); + current_thread->SetStatus(ThreadStatus::WaitArb); + current_thread->WaitForArbitration(true); } - if (should_decrement) { - memory.Write32(address, static_cast<u32>(cur_value - 1)); + if (event_handle != InvalidHandle) { + auto& time_manager = kernel.TimeManager(); + time_manager.UnscheduleTimeEvent(event_handle); } - // Short-circuit without rescheduling, if timeout is zero. - if (timeout == 0) { - return RESULT_TIMEOUT; + { + SchedulerLock lock(kernel); + if (current_thread->IsWaitingForArbitration()) { + RemoveThread(SharedFrom(current_thread)); + current_thread->WaitForArbitration(false); + } } - return WaitForAddressImpl(address, timeout); + return current_thread->GetSignalingResult(); } ResultCode AddressArbiter::WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout) { auto& memory = system.Memory(); + auto& kernel = system.Kernel(); + Thread* current_thread = system.CurrentScheduler().GetCurrentThread(); - // Ensure that we can read the address. - if (!memory.IsValidVirtualAddress(address)) { - return ERR_INVALID_ADDRESS_STATE; - } + Handle event_handle = InvalidHandle; + { + SchedulerLockAndSleep lock(kernel, event_handle, current_thread, timeout); + + if (current_thread->IsPendingTermination()) { + lock.CancelSleep(); + return ERR_THREAD_TERMINATING; + } + + // Ensure that we can read the address. + if (!memory.IsValidVirtualAddress(address)) { + lock.CancelSleep(); + return ERR_INVALID_ADDRESS_STATE; + } - // Only wait for the address if equal. - if (static_cast<s32>(memory.Read32(address)) != value) { - return ERR_INVALID_STATE; + s32 current_value = static_cast<s32>(memory.Read32(address)); + if (current_value != value) { + lock.CancelSleep(); + return ERR_INVALID_STATE; + } + + // Short-circuit without rescheduling, if timeout is zero. + if (timeout == 0) { + lock.CancelSleep(); + return RESULT_TIMEOUT; + } + + current_thread->SetSynchronizationResults(nullptr, RESULT_TIMEOUT); + current_thread->SetArbiterWaitAddress(address); + InsertThread(SharedFrom(current_thread)); + current_thread->SetStatus(ThreadStatus::WaitArb); + current_thread->WaitForArbitration(true); } - // Short-circuit without rescheduling if timeout is zero. - if (timeout == 0) { - return RESULT_TIMEOUT; + if (event_handle != InvalidHandle) { + auto& time_manager = kernel.TimeManager(); + time_manager.UnscheduleTimeEvent(event_handle); } - return WaitForAddressImpl(address, timeout); -} + { + SchedulerLock lock(kernel); + if (current_thread->IsWaitingForArbitration()) { + RemoveThread(SharedFrom(current_thread)); + current_thread->WaitForArbitration(false); + } + } -ResultCode AddressArbiter::WaitForAddressImpl(VAddr address, s64 timeout) { - Thread* current_thread = system.CurrentScheduler().GetCurrentThread(); - current_thread->SetArbiterWaitAddress(address); - InsertThread(SharedFrom(current_thread)); - current_thread->SetStatus(ThreadStatus::WaitArb); - current_thread->InvalidateWakeupCallback(); - current_thread->WakeAfterDelay(timeout); - - system.PrepareReschedule(current_thread->GetProcessorID()); - return RESULT_TIMEOUT; + return current_thread->GetSignalingResult(); } void AddressArbiter::HandleWakeupThread(std::shared_ptr<Thread> thread) { @@ -221,9 +305,9 @@ void AddressArbiter::RemoveThread(std::shared_ptr<Thread> thread) { const auto iter = std::find_if(thread_list.cbegin(), thread_list.cend(), [&thread](const auto& entry) { return thread == entry; }); - ASSERT(iter != thread_list.cend()); - - thread_list.erase(iter); + if (iter != thread_list.cend()) { + thread_list.erase(iter); + } } std::vector<std::shared_ptr<Thread>> AddressArbiter::GetThreadsWaitingOnAddress( diff --git a/src/core/hle/kernel/address_arbiter.h b/src/core/hle/kernel/address_arbiter.h index f958eee5a..0b05d533c 100644 --- a/src/core/hle/kernel/address_arbiter.h +++ b/src/core/hle/kernel/address_arbiter.h @@ -73,9 +73,6 @@ private: /// Waits on an address if the value passed is equal to the argument value. ResultCode WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout); - // Waits on the given address with a timeout in nanoseconds - ResultCode WaitForAddressImpl(VAddr address, s64 timeout); - /// Wake up num_to_wake (or all) threads in a vector. void WakeThreads(const std::vector<std::shared_ptr<Thread>>& waiting_threads, s32 num_to_wake); diff --git a/src/core/hle/kernel/client_port.cpp b/src/core/hle/kernel/client_port.cpp index 5498fd313..8aff2227a 100644 --- a/src/core/hle/kernel/client_port.cpp +++ b/src/core/hle/kernel/client_port.cpp @@ -34,7 +34,7 @@ ResultVal<std::shared_ptr<ClientSession>> ClientPort::Connect() { } // Wake the threads waiting on the ServerPort - server_port->WakeupAllWaitingThreads(); + server_port->Signal(); return MakeResult(std::move(client)); } diff --git a/src/core/hle/kernel/errors.h b/src/core/hle/kernel/errors.h index 29bfa3621..d4e5d88cf 100644 --- a/src/core/hle/kernel/errors.h +++ b/src/core/hle/kernel/errors.h @@ -12,6 +12,7 @@ namespace Kernel { constexpr ResultCode ERR_MAX_CONNECTIONS_REACHED{ErrorModule::Kernel, 7}; constexpr ResultCode ERR_INVALID_CAPABILITY_DESCRIPTOR{ErrorModule::Kernel, 14}; +constexpr ResultCode ERR_THREAD_TERMINATING{ErrorModule::Kernel, 59}; constexpr ResultCode ERR_INVALID_SIZE{ErrorModule::Kernel, 101}; constexpr ResultCode ERR_INVALID_ADDRESS{ErrorModule::Kernel, 102}; constexpr ResultCode ERR_OUT_OF_RESOURCES{ErrorModule::Kernel, 103}; diff --git a/src/core/hle/kernel/hle_ipc.cpp b/src/core/hle/kernel/hle_ipc.cpp index ba0eac4c2..9277b5d08 100644 --- a/src/core/hle/kernel/hle_ipc.cpp +++ b/src/core/hle/kernel/hle_ipc.cpp @@ -14,14 +14,17 @@ #include "common/common_types.h" #include "common/logging/log.h" #include "core/hle/ipc_helpers.h" +#include "core/hle/kernel/errors.h" #include "core/hle/kernel/handle_table.h" #include "core/hle/kernel/hle_ipc.h" #include "core/hle/kernel/kernel.h" #include "core/hle/kernel/object.h" #include "core/hle/kernel/process.h" #include "core/hle/kernel/readable_event.h" +#include "core/hle/kernel/scheduler.h" #include "core/hle/kernel/server_session.h" #include "core/hle/kernel/thread.h" +#include "core/hle/kernel/time_manager.h" #include "core/hle/kernel/writable_event.h" #include "core/memory.h" @@ -46,15 +49,6 @@ std::shared_ptr<WritableEvent> HLERequestContext::SleepClientThread( const std::string& reason, u64 timeout, WakeupCallback&& callback, std::shared_ptr<WritableEvent> writable_event) { // Put the client thread to sleep until the wait event is signaled or the timeout expires. - thread->SetWakeupCallback( - [context = *this, callback](ThreadWakeupReason reason, std::shared_ptr<Thread> thread, - std::shared_ptr<SynchronizationObject> object, - std::size_t index) mutable -> bool { - ASSERT(thread->GetStatus() == ThreadStatus::WaitHLEEvent); - callback(thread, context, reason); - context.WriteToOutgoingCommandBuffer(*thread); - return true; - }); if (!writable_event) { // Create event if not provided @@ -62,14 +56,26 @@ std::shared_ptr<WritableEvent> HLERequestContext::SleepClientThread( writable_event = pair.writable; } - const auto readable_event{writable_event->GetReadableEvent()}; - writable_event->Clear(); - thread->SetStatus(ThreadStatus::WaitHLEEvent); - thread->SetSynchronizationObjects({readable_event}); - readable_event->AddWaitingThread(thread); - - if (timeout > 0) { - thread->WakeAfterDelay(timeout); + { + Handle event_handle = InvalidHandle; + SchedulerLockAndSleep lock(kernel, event_handle, thread.get(), timeout); + thread->SetHLECallback( + [context = *this, callback](std::shared_ptr<Thread> thread) mutable -> bool { + ThreadWakeupReason reason = thread->GetSignalingResult() == RESULT_TIMEOUT + ? ThreadWakeupReason::Timeout + : ThreadWakeupReason::Signal; + callback(thread, context, reason); + context.WriteToOutgoingCommandBuffer(*thread); + return true; + }); + const auto readable_event{writable_event->GetReadableEvent()}; + writable_event->Clear(); + thread->SetHLESyncObject(readable_event.get()); + thread->SetStatus(ThreadStatus::WaitHLEEvent); + thread->SetSynchronizationResults(nullptr, RESULT_TIMEOUT); + readable_event->AddWaitingThread(thread); + lock.Release(); + thread->SetHLETimeEvent(event_handle); } is_thread_waiting = true; @@ -282,18 +288,18 @@ ResultCode HLERequestContext::WriteToOutgoingCommandBuffer(Thread& thread) { } std::vector<u8> HLERequestContext::ReadBuffer(std::size_t buffer_index) const { - std::vector<u8> buffer; + std::vector<u8> buffer{}; const bool is_buffer_a{BufferDescriptorA().size() > buffer_index && BufferDescriptorA()[buffer_index].Size()}; if (is_buffer_a) { - ASSERT_MSG(BufferDescriptorA().size() > buffer_index, - "BufferDescriptorA invalid buffer_index {}", buffer_index); + ASSERT_OR_EXECUTE_MSG(BufferDescriptorA().size() > buffer_index, { return buffer; }, + "BufferDescriptorA invalid buffer_index {}", buffer_index); buffer.resize(BufferDescriptorA()[buffer_index].Size()); memory.ReadBlock(BufferDescriptorA()[buffer_index].Address(), buffer.data(), buffer.size()); } else { - ASSERT_MSG(BufferDescriptorX().size() > buffer_index, - "BufferDescriptorX invalid buffer_index {}", buffer_index); + ASSERT_OR_EXECUTE_MSG(BufferDescriptorX().size() > buffer_index, { return buffer; }, + "BufferDescriptorX invalid buffer_index {}", buffer_index); buffer.resize(BufferDescriptorX()[buffer_index].Size()); memory.ReadBlock(BufferDescriptorX()[buffer_index].Address(), buffer.data(), buffer.size()); } @@ -318,16 +324,16 @@ std::size_t HLERequestContext::WriteBuffer(const void* buffer, std::size_t size, } if (is_buffer_b) { - ASSERT_MSG(BufferDescriptorB().size() > buffer_index, - "BufferDescriptorB invalid buffer_index {}", buffer_index); - ASSERT_MSG(BufferDescriptorB()[buffer_index].Size() >= size, - "BufferDescriptorB buffer_index {} is not large enough", buffer_index); + ASSERT_OR_EXECUTE_MSG(BufferDescriptorB().size() > buffer_index && + BufferDescriptorB()[buffer_index].Size() >= size, + { return 0; }, "BufferDescriptorB is invalid, index={}, size={}", + buffer_index, size); memory.WriteBlock(BufferDescriptorB()[buffer_index].Address(), buffer, size); } else { - ASSERT_MSG(BufferDescriptorC().size() > buffer_index, - "BufferDescriptorC invalid buffer_index {}", buffer_index); - ASSERT_MSG(BufferDescriptorC()[buffer_index].Size() >= size, - "BufferDescriptorC buffer_index {} is not large enough", buffer_index); + ASSERT_OR_EXECUTE_MSG(BufferDescriptorC().size() > buffer_index && + BufferDescriptorC()[buffer_index].Size() >= size, + { return 0; }, "BufferDescriptorC is invalid, index={}, size={}", + buffer_index, size); memory.WriteBlock(BufferDescriptorC()[buffer_index].Address(), buffer, size); } @@ -338,16 +344,12 @@ std::size_t HLERequestContext::GetReadBufferSize(std::size_t buffer_index) const const bool is_buffer_a{BufferDescriptorA().size() > buffer_index && BufferDescriptorA()[buffer_index].Size()}; if (is_buffer_a) { - ASSERT_MSG(BufferDescriptorA().size() > buffer_index, - "BufferDescriptorA invalid buffer_index {}", buffer_index); - ASSERT_MSG(BufferDescriptorA()[buffer_index].Size() > 0, - "BufferDescriptorA buffer_index {} is empty", buffer_index); + ASSERT_OR_EXECUTE_MSG(BufferDescriptorA().size() > buffer_index, { return 0; }, + "BufferDescriptorA invalid buffer_index {}", buffer_index); return BufferDescriptorA()[buffer_index].Size(); } else { - ASSERT_MSG(BufferDescriptorX().size() > buffer_index, - "BufferDescriptorX invalid buffer_index {}", buffer_index); - ASSERT_MSG(BufferDescriptorX()[buffer_index].Size() > 0, - "BufferDescriptorX buffer_index {} is empty", buffer_index); + ASSERT_OR_EXECUTE_MSG(BufferDescriptorX().size() > buffer_index, { return 0; }, + "BufferDescriptorX invalid buffer_index {}", buffer_index); return BufferDescriptorX()[buffer_index].Size(); } } @@ -356,14 +358,15 @@ std::size_t HLERequestContext::GetWriteBufferSize(std::size_t buffer_index) cons const bool is_buffer_b{BufferDescriptorB().size() > buffer_index && BufferDescriptorB()[buffer_index].Size()}; if (is_buffer_b) { - ASSERT_MSG(BufferDescriptorB().size() > buffer_index, - "BufferDescriptorB invalid buffer_index {}", buffer_index); + ASSERT_OR_EXECUTE_MSG(BufferDescriptorB().size() > buffer_index, { return 0; }, + "BufferDescriptorB invalid buffer_index {}", buffer_index); return BufferDescriptorB()[buffer_index].Size(); } else { - ASSERT_MSG(BufferDescriptorC().size() > buffer_index, - "BufferDescriptorC invalid buffer_index {}", buffer_index); + ASSERT_OR_EXECUTE_MSG(BufferDescriptorC().size() > buffer_index, { return 0; }, + "BufferDescriptorC invalid buffer_index {}", buffer_index); return BufferDescriptorC()[buffer_index].Size(); } + return 0; } std::string HLERequestContext::Description() const { diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp index 7655382fa..1f2af7a1b 100644 --- a/src/core/hle/kernel/kernel.cpp +++ b/src/core/hle/kernel/kernel.cpp @@ -2,6 +2,7 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <array> #include <atomic> #include <bitset> #include <functional> @@ -13,11 +14,15 @@ #include "common/assert.h" #include "common/logging/log.h" +#include "common/microprofile.h" +#include "common/thread.h" #include "core/arm/arm_interface.h" +#include "core/arm/cpu_interrupt_handler.h" #include "core/arm/exclusive_monitor.h" #include "core/core.h" #include "core/core_timing.h" #include "core/core_timing_util.h" +#include "core/cpu_manager.h" #include "core/device_memory.h" #include "core/hardware_properties.h" #include "core/hle/kernel/client_port.h" @@ -39,85 +44,28 @@ #include "core/hle/result.h" #include "core/memory.h" -namespace Kernel { - -/** - * Callback that will wake up the thread it was scheduled for - * @param thread_handle The handle of the thread that's been awoken - * @param cycles_late The number of CPU cycles that have passed since the desired wakeup time - */ -static void ThreadWakeupCallback(u64 thread_handle, [[maybe_unused]] s64 cycles_late) { - const auto proper_handle = static_cast<Handle>(thread_handle); - const auto& system = Core::System::GetInstance(); - - // Lock the global kernel mutex when we enter the kernel HLE. - std::lock_guard lock{HLE::g_hle_lock}; - - std::shared_ptr<Thread> thread = - system.Kernel().RetrieveThreadFromGlobalHandleTable(proper_handle); - if (thread == nullptr) { - LOG_CRITICAL(Kernel, "Callback fired for invalid thread {:08X}", proper_handle); - return; - } - - bool resume = true; - - if (thread->GetStatus() == ThreadStatus::WaitSynch || - thread->GetStatus() == ThreadStatus::WaitHLEEvent) { - // Remove the thread from each of its waiting objects' waitlists - for (const auto& object : thread->GetSynchronizationObjects()) { - object->RemoveWaitingThread(thread); - } - thread->ClearSynchronizationObjects(); - - // Invoke the wakeup callback before clearing the wait objects - if (thread->HasWakeupCallback()) { - resume = thread->InvokeWakeupCallback(ThreadWakeupReason::Timeout, thread, nullptr, 0); - } - } else if (thread->GetStatus() == ThreadStatus::WaitMutex || - thread->GetStatus() == ThreadStatus::WaitCondVar) { - thread->SetMutexWaitAddress(0); - thread->SetWaitHandle(0); - if (thread->GetStatus() == ThreadStatus::WaitCondVar) { - thread->GetOwnerProcess()->RemoveConditionVariableThread(thread); - thread->SetCondVarWaitAddress(0); - } - - auto* const lock_owner = thread->GetLockOwner(); - // Threads waking up by timeout from WaitProcessWideKey do not perform priority inheritance - // and don't have a lock owner unless SignalProcessWideKey was called first and the thread - // wasn't awakened due to the mutex already being acquired. - if (lock_owner != nullptr) { - lock_owner->RemoveMutexWaiter(thread); - } - } +MICROPROFILE_DEFINE(Kernel_SVC, "Kernel", "SVC", MP_RGB(70, 200, 70)); - if (thread->GetStatus() == ThreadStatus::WaitArb) { - auto& address_arbiter = thread->GetOwnerProcess()->GetAddressArbiter(); - address_arbiter.HandleWakeupThread(thread); - } - - if (resume) { - if (thread->GetStatus() == ThreadStatus::WaitCondVar || - thread->GetStatus() == ThreadStatus::WaitArb) { - thread->SetWaitSynchronizationResult(RESULT_TIMEOUT); - } - thread->ResumeFromWait(); - } -} +namespace Kernel { struct KernelCore::Impl { explicit Impl(Core::System& system, KernelCore& kernel) : global_scheduler{kernel}, synchronization{system}, time_manager{system}, system{system} {} + void SetMulticore(bool is_multicore) { + this->is_multicore = is_multicore; + } + void Initialize(KernelCore& kernel) { Shutdown(); + RegisterHostThread(); InitializePhysicalCores(); InitializeSystemResourceLimit(kernel); InitializeMemoryLayout(); - InitializeThreads(); - InitializePreemption(); + InitializePreemption(kernel); + InitializeSchedulers(); + InitializeSuspendThreads(); } void Shutdown() { @@ -126,13 +74,26 @@ struct KernelCore::Impl { next_user_process_id = Process::ProcessIDMin; next_thread_id = 1; + for (std::size_t i = 0; i < Core::Hardware::NUM_CPU_CORES; i++) { + if (suspend_threads[i]) { + suspend_threads[i].reset(); + } + } + + for (std::size_t i = 0; i < cores.size(); i++) { + cores[i].Shutdown(); + schedulers[i].reset(); + } + cores.clear(); + + registered_core_threads.reset(); + process_list.clear(); current_process = nullptr; system_resource_limit = nullptr; global_handle_table.Clear(); - thread_wakeup_event_type = nullptr; preemption_event = nullptr; global_scheduler.Shutdown(); @@ -145,13 +106,21 @@ struct KernelCore::Impl { cores.clear(); exclusive_monitor.reset(); + host_thread_ids.clear(); } void InitializePhysicalCores() { exclusive_monitor = Core::MakeExclusiveMonitor(system.Memory(), Core::Hardware::NUM_CPU_CORES); for (std::size_t i = 0; i < Core::Hardware::NUM_CPU_CORES; i++) { - cores.emplace_back(system, i, *exclusive_monitor); + schedulers[i] = std::make_unique<Kernel::Scheduler>(system, i); + cores.emplace_back(system, i, *schedulers[i], interrupts[i]); + } + } + + void InitializeSchedulers() { + for (std::size_t i = 0; i < Core::Hardware::NUM_CPU_CORES; i++) { + cores[i].Scheduler().Initialize(); } } @@ -173,15 +142,13 @@ struct KernelCore::Impl { } } - void InitializeThreads() { - thread_wakeup_event_type = - Core::Timing::CreateEvent("ThreadWakeupCallback", ThreadWakeupCallback); - } - - void InitializePreemption() { - preemption_event = - Core::Timing::CreateEvent("PreemptionCallback", [this](u64 userdata, s64 cycles_late) { - global_scheduler.PreemptThreads(); + void InitializePreemption(KernelCore& kernel) { + preemption_event = Core::Timing::CreateEvent( + "PreemptionCallback", [this, &kernel](u64 userdata, s64 cycles_late) { + { + SchedulerLock lock(kernel); + global_scheduler.PreemptThreads(); + } s64 time_interval = Core::Timing::msToCycles(std::chrono::milliseconds(10)); system.CoreTiming().ScheduleEvent(time_interval, preemption_event); }); @@ -190,6 +157,20 @@ struct KernelCore::Impl { system.CoreTiming().ScheduleEvent(time_interval, preemption_event); } + void InitializeSuspendThreads() { + for (std::size_t i = 0; i < Core::Hardware::NUM_CPU_CORES; i++) { + std::string name = "Suspend Thread Id:" + std::to_string(i); + std::function<void(void*)> init_func = + system.GetCpuManager().GetSuspendThreadStartFunc(); + void* init_func_parameter = system.GetCpuManager().GetStartFuncParamater(); + ThreadType type = + static_cast<ThreadType>(THREADTYPE_KERNEL | THREADTYPE_HLE | THREADTYPE_SUSPEND); + auto thread_res = Thread::Create(system, type, name, 0, 0, 0, static_cast<u32>(i), 0, + nullptr, std::move(init_func), init_func_parameter); + suspend_threads[i] = std::move(thread_res).Unwrap(); + } + } + void MakeCurrentProcess(Process* process) { current_process = process; @@ -197,15 +178,17 @@ struct KernelCore::Impl { return; } - for (auto& core : cores) { - core.SetIs64Bit(process->Is64BitProcess()); + u32 core_id = GetCurrentHostThreadID(); + if (core_id < Core::Hardware::NUM_CPU_CORES) { + system.Memory().SetCurrentPageTable(*process, core_id); } - - system.Memory().SetCurrentPageTable(*process); } void RegisterCoreThread(std::size_t core_id) { std::unique_lock lock{register_thread_mutex}; + if (!is_multicore) { + single_core_thread_id = std::this_thread::get_id(); + } const std::thread::id this_id = std::this_thread::get_id(); const auto it = host_thread_ids.find(this_id); ASSERT(core_id < Core::Hardware::NUM_CPU_CORES); @@ -219,12 +202,19 @@ struct KernelCore::Impl { std::unique_lock lock{register_thread_mutex}; const std::thread::id this_id = std::this_thread::get_id(); const auto it = host_thread_ids.find(this_id); - ASSERT(it == host_thread_ids.end()); + if (it != host_thread_ids.end()) { + return; + } host_thread_ids[this_id] = registered_thread_ids++; } u32 GetCurrentHostThreadID() const { const std::thread::id this_id = std::this_thread::get_id(); + if (!is_multicore) { + if (single_core_thread_id == this_id) { + return static_cast<u32>(system.GetCpuManager().CurrentCore()); + } + } const auto it = host_thread_ids.find(this_id); if (it == host_thread_ids.end()) { return Core::INVALID_HOST_THREAD_ID; @@ -240,7 +230,7 @@ struct KernelCore::Impl { } const Kernel::Scheduler& sched = cores[result.host_handle].Scheduler(); const Kernel::Thread* current = sched.GetCurrentThread(); - if (current != nullptr) { + if (current != nullptr && !current->IsPhantomMode()) { result.guest_handle = current->GetGlobalHandle(); } else { result.guest_handle = InvalidHandle; @@ -313,7 +303,6 @@ struct KernelCore::Impl { std::shared_ptr<ResourceLimit> system_resource_limit; - std::shared_ptr<Core::Timing::EventType> thread_wakeup_event_type; std::shared_ptr<Core::Timing::EventType> preemption_event; // This is the kernel's handle table or supervisor handle table which @@ -343,6 +332,15 @@ struct KernelCore::Impl { std::shared_ptr<Kernel::SharedMemory> irs_shared_mem; std::shared_ptr<Kernel::SharedMemory> time_shared_mem; + std::array<std::shared_ptr<Thread>, Core::Hardware::NUM_CPU_CORES> suspend_threads{}; + std::array<Core::CPUInterruptHandler, Core::Hardware::NUM_CPU_CORES> interrupts{}; + std::array<std::unique_ptr<Kernel::Scheduler>, Core::Hardware::NUM_CPU_CORES> schedulers{}; + + bool is_multicore{}; + std::thread::id single_core_thread_id{}; + + std::array<u64, Core::Hardware::NUM_CPU_CORES> svc_ticks{}; + // System context Core::System& system; }; @@ -352,6 +350,10 @@ KernelCore::~KernelCore() { Shutdown(); } +void KernelCore::SetMulticore(bool is_multicore) { + impl->SetMulticore(is_multicore); +} + void KernelCore::Initialize() { impl->Initialize(*this); } @@ -397,11 +399,11 @@ const Kernel::GlobalScheduler& KernelCore::GlobalScheduler() const { } Kernel::Scheduler& KernelCore::Scheduler(std::size_t id) { - return impl->cores[id].Scheduler(); + return *impl->schedulers[id]; } const Kernel::Scheduler& KernelCore::Scheduler(std::size_t id) const { - return impl->cores[id].Scheduler(); + return *impl->schedulers[id]; } Kernel::PhysicalCore& KernelCore::PhysicalCore(std::size_t id) { @@ -412,6 +414,39 @@ const Kernel::PhysicalCore& KernelCore::PhysicalCore(std::size_t id) const { return impl->cores[id]; } +Kernel::PhysicalCore& KernelCore::CurrentPhysicalCore() { + u32 core_id = impl->GetCurrentHostThreadID(); + ASSERT(core_id < Core::Hardware::NUM_CPU_CORES); + return impl->cores[core_id]; +} + +const Kernel::PhysicalCore& KernelCore::CurrentPhysicalCore() const { + u32 core_id = impl->GetCurrentHostThreadID(); + ASSERT(core_id < Core::Hardware::NUM_CPU_CORES); + return impl->cores[core_id]; +} + +Kernel::Scheduler& KernelCore::CurrentScheduler() { + u32 core_id = impl->GetCurrentHostThreadID(); + ASSERT(core_id < Core::Hardware::NUM_CPU_CORES); + return *impl->schedulers[core_id]; +} + +const Kernel::Scheduler& KernelCore::CurrentScheduler() const { + u32 core_id = impl->GetCurrentHostThreadID(); + ASSERT(core_id < Core::Hardware::NUM_CPU_CORES); + return *impl->schedulers[core_id]; +} + +std::array<Core::CPUInterruptHandler, Core::Hardware::NUM_CPU_CORES>& KernelCore::Interrupts() { + return impl->interrupts; +} + +const std::array<Core::CPUInterruptHandler, Core::Hardware::NUM_CPU_CORES>& KernelCore::Interrupts() + const { + return impl->interrupts; +} + Kernel::Synchronization& KernelCore::Synchronization() { return impl->synchronization; } @@ -437,15 +472,17 @@ const Core::ExclusiveMonitor& KernelCore::GetExclusiveMonitor() const { } void KernelCore::InvalidateAllInstructionCaches() { - for (std::size_t i = 0; i < impl->global_scheduler.CpuCoresCount(); i++) { - PhysicalCore(i).ArmInterface().ClearInstructionCache(); + auto& threads = GlobalScheduler().GetThreadList(); + for (auto& thread : threads) { + if (!thread->IsHLEThread()) { + auto& arm_interface = thread->ArmInterface(); + arm_interface.ClearInstructionCache(); + } } } void KernelCore::PrepareReschedule(std::size_t id) { - if (id < impl->global_scheduler.CpuCoresCount()) { - impl->cores[id].Stop(); - } + // TODO: Reimplement, this } void KernelCore::AddNamedPort(std::string name, std::shared_ptr<ClientPort> port) { @@ -481,10 +518,6 @@ u64 KernelCore::CreateNewUserProcessID() { return impl->next_user_process_id++; } -const std::shared_ptr<Core::Timing::EventType>& KernelCore::ThreadWakeupCallbackEventType() const { - return impl->thread_wakeup_event_type; -} - Kernel::HandleTable& KernelCore::GlobalHandleTable() { return impl->global_handle_table; } @@ -557,4 +590,34 @@ const Kernel::SharedMemory& KernelCore::GetTimeSharedMem() const { return *impl->time_shared_mem; } +void KernelCore::Suspend(bool in_suspention) { + const bool should_suspend = exception_exited || in_suspention; + { + SchedulerLock lock(*this); + ThreadStatus status = should_suspend ? ThreadStatus::Ready : ThreadStatus::WaitSleep; + for (std::size_t i = 0; i < Core::Hardware::NUM_CPU_CORES; i++) { + impl->suspend_threads[i]->SetStatus(status); + } + } +} + +bool KernelCore::IsMulticore() const { + return impl->is_multicore; +} + +void KernelCore::ExceptionalExit() { + exception_exited = true; + Suspend(true); +} + +void KernelCore::EnterSVCProfile() { + std::size_t core = impl->GetCurrentHostThreadID(); + impl->svc_ticks[core] = MicroProfileEnter(MICROPROFILE_TOKEN(Kernel_SVC)); +} + +void KernelCore::ExitSVCProfile() { + std::size_t core = impl->GetCurrentHostThreadID(); + MicroProfileLeave(MICROPROFILE_TOKEN(Kernel_SVC), impl->svc_ticks[core]); +} + } // namespace Kernel diff --git a/src/core/hle/kernel/kernel.h b/src/core/hle/kernel/kernel.h index 83de1f542..49bd47e89 100644 --- a/src/core/hle/kernel/kernel.h +++ b/src/core/hle/kernel/kernel.h @@ -4,15 +4,17 @@ #pragma once +#include <array> #include <memory> #include <string> #include <unordered_map> #include <vector> +#include "core/hardware_properties.h" #include "core/hle/kernel/memory/memory_types.h" #include "core/hle/kernel/object.h" namespace Core { -struct EmuThreadHandle; +class CPUInterruptHandler; class ExclusiveMonitor; class System; } // namespace Core @@ -65,6 +67,9 @@ public: KernelCore(KernelCore&&) = delete; KernelCore& operator=(KernelCore&&) = delete; + /// Sets if emulation is multicore or single core, must be set before Initialize + void SetMulticore(bool is_multicore); + /// Resets the kernel to a clean slate for use. void Initialize(); @@ -110,6 +115,18 @@ public: /// Gets the an instance of the respective physical CPU core. const Kernel::PhysicalCore& PhysicalCore(std::size_t id) const; + /// Gets the sole instance of the Scheduler at the current running core. + Kernel::Scheduler& CurrentScheduler(); + + /// Gets the sole instance of the Scheduler at the current running core. + const Kernel::Scheduler& CurrentScheduler() const; + + /// Gets the an instance of the current physical CPU core. + Kernel::PhysicalCore& CurrentPhysicalCore(); + + /// Gets the an instance of the current physical CPU core. + const Kernel::PhysicalCore& CurrentPhysicalCore() const; + /// Gets the an instance of the Synchronization Interface. Kernel::Synchronization& Synchronization(); @@ -129,6 +146,10 @@ public: const Core::ExclusiveMonitor& GetExclusiveMonitor() const; + std::array<Core::CPUInterruptHandler, Core::Hardware::NUM_CPU_CORES>& Interrupts(); + + const std::array<Core::CPUInterruptHandler, Core::Hardware::NUM_CPU_CORES>& Interrupts() const; + void InvalidateAllInstructionCaches(); /// Adds a port to the named port table @@ -191,6 +212,18 @@ public: /// Gets the shared memory object for Time services. const Kernel::SharedMemory& GetTimeSharedMem() const; + /// Suspend/unsuspend the OS. + void Suspend(bool in_suspention); + + /// Exceptional exit the OS. + void ExceptionalExit(); + + bool IsMulticore() const; + + void EnterSVCProfile(); + + void ExitSVCProfile(); + private: friend class Object; friend class Process; @@ -208,9 +241,6 @@ private: /// Creates a new thread ID, incrementing the internal thread ID counter. u64 CreateNewThreadID(); - /// Retrieves the event type used for thread wakeup callbacks. - const std::shared_ptr<Core::Timing::EventType>& ThreadWakeupCallbackEventType() const; - /// Provides a reference to the global handle table. Kernel::HandleTable& GlobalHandleTable(); @@ -219,6 +249,7 @@ private: struct Impl; std::unique_ptr<Impl> impl; + bool exception_exited{}; }; } // namespace Kernel diff --git a/src/core/hle/kernel/memory/memory_manager.cpp b/src/core/hle/kernel/memory/memory_manager.cpp index 6b432e1b2..acf13585c 100644 --- a/src/core/hle/kernel/memory/memory_manager.cpp +++ b/src/core/hle/kernel/memory/memory_manager.cpp @@ -104,7 +104,7 @@ ResultCode MemoryManager::Allocate(PageLinkedList& page_list, std::size_t num_pa // Ensure that we don't leave anything un-freed auto group_guard = detail::ScopeExit([&] { for (const auto& it : page_list.Nodes()) { - const auto min_num_pages{std::min( + const auto min_num_pages{std::min<size_t>( it.GetNumPages(), (chosen_manager.GetEndAddress() - it.GetAddress()) / PageSize)}; chosen_manager.Free(it.GetAddress(), min_num_pages); } @@ -139,7 +139,6 @@ ResultCode MemoryManager::Allocate(PageLinkedList& page_list, std::size_t num_pa } // Only succeed if we allocated as many pages as we wanted - ASSERT(num_pages >= 0); if (num_pages) { return ERR_OUT_OF_MEMORY; } @@ -165,7 +164,7 @@ ResultCode MemoryManager::Free(PageLinkedList& page_list, std::size_t num_pages, // Free all of the pages for (const auto& it : page_list.Nodes()) { - const auto min_num_pages{std::min( + const auto min_num_pages{std::min<size_t>( it.GetNumPages(), (chosen_manager.GetEndAddress() - it.GetAddress()) / PageSize)}; chosen_manager.Free(it.GetAddress(), min_num_pages); } diff --git a/src/core/hle/kernel/mutex.cpp b/src/core/hle/kernel/mutex.cpp index 7869eb32b..8f6c944d1 100644 --- a/src/core/hle/kernel/mutex.cpp +++ b/src/core/hle/kernel/mutex.cpp @@ -34,8 +34,6 @@ static std::pair<std::shared_ptr<Thread>, u32> GetHighestPriorityMutexWaitingThr if (thread->GetMutexWaitAddress() != mutex_addr) continue; - ASSERT(thread->GetStatus() == ThreadStatus::WaitMutex); - ++num_waiters; if (highest_priority_thread == nullptr || thread->GetPriority() < highest_priority_thread->GetPriority()) { @@ -49,6 +47,7 @@ static std::pair<std::shared_ptr<Thread>, u32> GetHighestPriorityMutexWaitingThr /// Update the mutex owner field of all threads waiting on the mutex to point to the new owner. static void TransferMutexOwnership(VAddr mutex_addr, std::shared_ptr<Thread> current_thread, std::shared_ptr<Thread> new_owner) { + current_thread->RemoveMutexWaiter(new_owner); const auto threads = current_thread->GetMutexWaitingThreads(); for (const auto& thread : threads) { if (thread->GetMutexWaitAddress() != mutex_addr) @@ -72,85 +71,100 @@ ResultCode Mutex::TryAcquire(VAddr address, Handle holding_thread_handle, return ERR_INVALID_ADDRESS; } - const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable(); + auto& kernel = system.Kernel(); std::shared_ptr<Thread> current_thread = - SharedFrom(system.CurrentScheduler().GetCurrentThread()); - std::shared_ptr<Thread> holding_thread = handle_table.Get<Thread>(holding_thread_handle); - std::shared_ptr<Thread> requesting_thread = handle_table.Get<Thread>(requesting_thread_handle); + SharedFrom(kernel.CurrentScheduler().GetCurrentThread()); + { + SchedulerLock lock(kernel); + // The mutex address must be 4-byte aligned + if ((address % sizeof(u32)) != 0) { + return ERR_INVALID_ADDRESS; + } - // TODO(Subv): It is currently unknown if it is possible to lock a mutex in behalf of another - // thread. - ASSERT(requesting_thread == current_thread); + const auto& handle_table = kernel.CurrentProcess()->GetHandleTable(); + std::shared_ptr<Thread> holding_thread = handle_table.Get<Thread>(holding_thread_handle); + std::shared_ptr<Thread> requesting_thread = + handle_table.Get<Thread>(requesting_thread_handle); - const u32 addr_value = system.Memory().Read32(address); + // TODO(Subv): It is currently unknown if it is possible to lock a mutex in behalf of + // another thread. + ASSERT(requesting_thread == current_thread); - // If the mutex isn't being held, just return success. - if (addr_value != (holding_thread_handle | Mutex::MutexHasWaitersFlag)) { - return RESULT_SUCCESS; - } + current_thread->SetSynchronizationResults(nullptr, RESULT_SUCCESS); - if (holding_thread == nullptr) { - LOG_ERROR(Kernel, "Holding thread does not exist! thread_handle={:08X}", - holding_thread_handle); - return ERR_INVALID_HANDLE; - } + const u32 addr_value = system.Memory().Read32(address); + + // If the mutex isn't being held, just return success. + if (addr_value != (holding_thread_handle | Mutex::MutexHasWaitersFlag)) { + return RESULT_SUCCESS; + } - // Wait until the mutex is released - current_thread->SetMutexWaitAddress(address); - current_thread->SetWaitHandle(requesting_thread_handle); + if (holding_thread == nullptr) { + return ERR_INVALID_HANDLE; + } - current_thread->SetStatus(ThreadStatus::WaitMutex); - current_thread->InvalidateWakeupCallback(); + // Wait until the mutex is released + current_thread->SetMutexWaitAddress(address); + current_thread->SetWaitHandle(requesting_thread_handle); - // Update the lock holder thread's priority to prevent priority inversion. - holding_thread->AddMutexWaiter(current_thread); + current_thread->SetStatus(ThreadStatus::WaitMutex); - system.PrepareReschedule(); + // Update the lock holder thread's priority to prevent priority inversion. + holding_thread->AddMutexWaiter(current_thread); + } - return RESULT_SUCCESS; + { + SchedulerLock lock(kernel); + auto* owner = current_thread->GetLockOwner(); + if (owner != nullptr) { + owner->RemoveMutexWaiter(current_thread); + } + } + return current_thread->GetSignalingResult(); } -ResultCode Mutex::Release(VAddr address) { +std::pair<ResultCode, std::shared_ptr<Thread>> Mutex::Unlock(std::shared_ptr<Thread> owner, + VAddr address) { // The mutex address must be 4-byte aligned if ((address % sizeof(u32)) != 0) { LOG_ERROR(Kernel, "Address is not 4-byte aligned! address={:016X}", address); - return ERR_INVALID_ADDRESS; + return {ERR_INVALID_ADDRESS, nullptr}; } - std::shared_ptr<Thread> current_thread = - SharedFrom(system.CurrentScheduler().GetCurrentThread()); - auto [thread, num_waiters] = GetHighestPriorityMutexWaitingThread(current_thread, address); - - // There are no more threads waiting for the mutex, release it completely. - if (thread == nullptr) { + auto [new_owner, num_waiters] = GetHighestPriorityMutexWaitingThread(owner, address); + if (new_owner == nullptr) { system.Memory().Write32(address, 0); - return RESULT_SUCCESS; + return {RESULT_SUCCESS, nullptr}; } - // Transfer the ownership of the mutex from the previous owner to the new one. - TransferMutexOwnership(address, current_thread, thread); - - u32 mutex_value = thread->GetWaitHandle(); - + TransferMutexOwnership(address, owner, new_owner); + u32 mutex_value = new_owner->GetWaitHandle(); if (num_waiters >= 2) { // Notify the guest that there are still some threads waiting for the mutex mutex_value |= Mutex::MutexHasWaitersFlag; } + new_owner->SetSynchronizationResults(nullptr, RESULT_SUCCESS); + new_owner->SetLockOwner(nullptr); + new_owner->ResumeFromWait(); - // Grant the mutex to the next waiting thread and resume it. system.Memory().Write32(address, mutex_value); + return {RESULT_SUCCESS, new_owner}; +} - ASSERT(thread->GetStatus() == ThreadStatus::WaitMutex); - thread->ResumeFromWait(); +ResultCode Mutex::Release(VAddr address) { + auto& kernel = system.Kernel(); + SchedulerLock lock(kernel); - thread->SetLockOwner(nullptr); - thread->SetCondVarWaitAddress(0); - thread->SetMutexWaitAddress(0); - thread->SetWaitHandle(0); - thread->SetWaitSynchronizationResult(RESULT_SUCCESS); + std::shared_ptr<Thread> current_thread = + SharedFrom(kernel.CurrentScheduler().GetCurrentThread()); - system.PrepareReschedule(); + auto [result, new_owner] = Unlock(current_thread, address); - return RESULT_SUCCESS; + if (result != RESULT_SUCCESS && new_owner != nullptr) { + new_owner->SetSynchronizationResults(nullptr, result); + } + + return result; } + } // namespace Kernel diff --git a/src/core/hle/kernel/mutex.h b/src/core/hle/kernel/mutex.h index b904de2e8..3b81dc3df 100644 --- a/src/core/hle/kernel/mutex.h +++ b/src/core/hle/kernel/mutex.h @@ -28,6 +28,10 @@ public: ResultCode TryAcquire(VAddr address, Handle holding_thread_handle, Handle requesting_thread_handle); + /// Unlocks a mutex for owner at address + std::pair<ResultCode, std::shared_ptr<Thread>> Unlock(std::shared_ptr<Thread> owner, + VAddr address); + /// Releases the mutex at the specified address. ResultCode Release(VAddr address); diff --git a/src/core/hle/kernel/physical_core.cpp b/src/core/hle/kernel/physical_core.cpp index a15011076..c6bbdb080 100644 --- a/src/core/hle/kernel/physical_core.cpp +++ b/src/core/hle/kernel/physical_core.cpp @@ -2,12 +2,15 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include "common/assert.h" #include "common/logging/log.h" +#include "common/spin_lock.h" #include "core/arm/arm_interface.h" #ifdef ARCHITECTURE_x86_64 #include "core/arm/dynarmic/arm_dynarmic_32.h" #include "core/arm/dynarmic/arm_dynarmic_64.h" #endif +#include "core/arm/cpu_interrupt_handler.h" #include "core/arm/exclusive_monitor.h" #include "core/arm/unicorn/arm_unicorn.h" #include "core/core.h" @@ -17,50 +20,37 @@ namespace Kernel { -PhysicalCore::PhysicalCore(Core::System& system, std::size_t id, - Core::ExclusiveMonitor& exclusive_monitor) - : core_index{id} { -#ifdef ARCHITECTURE_x86_64 - arm_interface_32 = - std::make_unique<Core::ARM_Dynarmic_32>(system, exclusive_monitor, core_index); - arm_interface_64 = - std::make_unique<Core::ARM_Dynarmic_64>(system, exclusive_monitor, core_index); - -#else - using Core::ARM_Unicorn; - arm_interface_32 = std::make_unique<ARM_Unicorn>(system, ARM_Unicorn::Arch::AArch32); - arm_interface_64 = std::make_unique<ARM_Unicorn>(system, ARM_Unicorn::Arch::AArch64); - LOG_WARNING(Core, "CPU JIT requested, but Dynarmic not available"); -#endif +PhysicalCore::PhysicalCore(Core::System& system, std::size_t id, Kernel::Scheduler& scheduler, + Core::CPUInterruptHandler& interrupt_handler) + : interrupt_handler{interrupt_handler}, core_index{id}, scheduler{scheduler} { - scheduler = std::make_unique<Kernel::Scheduler>(system, core_index); + guard = std::make_unique<Common::SpinLock>(); } PhysicalCore::~PhysicalCore() = default; -void PhysicalCore::Run() { - arm_interface->Run(); - arm_interface->ClearExclusiveState(); +void PhysicalCore::Idle() { + interrupt_handler.AwaitInterrupt(); } -void PhysicalCore::Step() { - arm_interface->Step(); +void PhysicalCore::Shutdown() { + scheduler.Shutdown(); } -void PhysicalCore::Stop() { - arm_interface->PrepareReschedule(); +bool PhysicalCore::IsInterrupted() const { + return interrupt_handler.IsInterrupted(); } -void PhysicalCore::Shutdown() { - scheduler->Shutdown(); +void PhysicalCore::Interrupt() { + guard->lock(); + interrupt_handler.SetInterrupt(true); + guard->unlock(); } -void PhysicalCore::SetIs64Bit(bool is_64_bit) { - if (is_64_bit) { - arm_interface = arm_interface_64.get(); - } else { - arm_interface = arm_interface_32.get(); - } +void PhysicalCore::ClearInterrupt() { + guard->lock(); + interrupt_handler.SetInterrupt(false); + guard->unlock(); } } // namespace Kernel diff --git a/src/core/hle/kernel/physical_core.h b/src/core/hle/kernel/physical_core.h index 3269166be..d7a7a951c 100644 --- a/src/core/hle/kernel/physical_core.h +++ b/src/core/hle/kernel/physical_core.h @@ -7,12 +7,17 @@ #include <cstddef> #include <memory> +namespace Common { +class SpinLock; +} + namespace Kernel { class Scheduler; } // namespace Kernel namespace Core { class ARM_Interface; +class CPUInterruptHandler; class ExclusiveMonitor; class System; } // namespace Core @@ -21,7 +26,8 @@ namespace Kernel { class PhysicalCore { public: - PhysicalCore(Core::System& system, std::size_t id, Core::ExclusiveMonitor& exclusive_monitor); + PhysicalCore(Core::System& system, std::size_t id, Kernel::Scheduler& scheduler, + Core::CPUInterruptHandler& interrupt_handler); ~PhysicalCore(); PhysicalCore(const PhysicalCore&) = delete; @@ -30,23 +36,18 @@ public: PhysicalCore(PhysicalCore&&) = default; PhysicalCore& operator=(PhysicalCore&&) = default; - /// Execute current jit state - void Run(); - /// Execute a single instruction in current jit. - void Step(); - /// Stop JIT execution/exit - void Stop(); + void Idle(); + /// Interrupt this physical core. + void Interrupt(); - // Shutdown this physical core. - void Shutdown(); + /// Clear this core's interrupt + void ClearInterrupt(); - Core::ARM_Interface& ArmInterface() { - return *arm_interface; - } + /// Check if this core is interrupted + bool IsInterrupted() const; - const Core::ARM_Interface& ArmInterface() const { - return *arm_interface; - } + // Shutdown this physical core. + void Shutdown(); bool IsMainCore() const { return core_index == 0; @@ -61,21 +62,18 @@ public: } Kernel::Scheduler& Scheduler() { - return *scheduler; + return scheduler; } const Kernel::Scheduler& Scheduler() const { - return *scheduler; + return scheduler; } - void SetIs64Bit(bool is_64_bit); - private: + Core::CPUInterruptHandler& interrupt_handler; std::size_t core_index; - std::unique_ptr<Core::ARM_Interface> arm_interface_32; - std::unique_ptr<Core::ARM_Interface> arm_interface_64; - std::unique_ptr<Kernel::Scheduler> scheduler; - Core::ARM_Interface* arm_interface{}; + Kernel::Scheduler& scheduler; + std::unique_ptr<Common::SpinLock> guard; }; } // namespace Kernel diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp index 36724569f..c6fcb56ad 100644 --- a/src/core/hle/kernel/process.cpp +++ b/src/core/hle/kernel/process.cpp @@ -22,6 +22,7 @@ #include "core/hle/kernel/resource_limit.h" #include "core/hle/kernel/scheduler.h" #include "core/hle/kernel/thread.h" +#include "core/hle/lock.h" #include "core/memory.h" #include "core/settings.h" @@ -30,14 +31,15 @@ namespace { /** * Sets up the primary application thread * + * @param system The system instance to create the main thread under. * @param owner_process The parent process for the main thread - * @param kernel The kernel instance to create the main thread under. * @param priority The priority to give the main thread */ -void SetupMainThread(Process& owner_process, KernelCore& kernel, u32 priority, VAddr stack_top) { +void SetupMainThread(Core::System& system, Process& owner_process, u32 priority, VAddr stack_top) { const VAddr entry_point = owner_process.PageTable().GetCodeRegionStart(); - auto thread_res = Thread::Create(kernel, "main", entry_point, priority, 0, - owner_process.GetIdealCore(), stack_top, owner_process); + ThreadType type = THREADTYPE_USER; + auto thread_res = Thread::Create(system, type, "main", entry_point, priority, 0, + owner_process.GetIdealCore(), stack_top, &owner_process); std::shared_ptr<Thread> thread = std::move(thread_res).Unwrap(); @@ -48,8 +50,12 @@ void SetupMainThread(Process& owner_process, KernelCore& kernel, u32 priority, V thread->GetContext32().cpu_registers[1] = thread_handle; thread->GetContext64().cpu_registers[1] = thread_handle; + auto& kernel = system.Kernel(); // Threads by default are dormant, wake up the main thread so it runs when the scheduler fires - thread->ResumeFromWait(); + { + SchedulerLock lock{kernel}; + thread->SetStatus(ThreadStatus::Ready); + } } } // Anonymous namespace @@ -117,7 +123,7 @@ std::shared_ptr<Process> Process::Create(Core::System& system, std::string name, : kernel.CreateNewUserProcessID(); process->capabilities.InitializeForMetadatalessProcess(); - std::mt19937 rng(Settings::values.rng_seed.value_or(0)); + std::mt19937 rng(Settings::values.rng_seed.GetValue().value_or(0)); std::uniform_int_distribution<u64> distribution; std::generate(process->random_entropy.begin(), process->random_entropy.end(), [&] { return distribution(rng); }); @@ -132,7 +138,8 @@ std::shared_ptr<ResourceLimit> Process::GetResourceLimit() const { u64 Process::GetTotalPhysicalMemoryAvailable() const { const u64 capacity{resource_limit->GetCurrentResourceValue(ResourceType::PhysicalMemory) + - page_table->GetTotalHeapSize() + image_size + main_thread_stack_size}; + page_table->GetTotalHeapSize() + GetSystemResourceSize() + image_size + + main_thread_stack_size}; if (capacity < memory_usage_capacity) { return capacity; @@ -146,7 +153,8 @@ u64 Process::GetTotalPhysicalMemoryAvailableWithoutSystemResource() const { } u64 Process::GetTotalPhysicalMemoryUsed() const { - return image_size + main_thread_stack_size + page_table->GetTotalHeapSize(); + return image_size + main_thread_stack_size + page_table->GetTotalHeapSize() + + GetSystemResourceSize(); } u64 Process::GetTotalPhysicalMemoryUsedWithoutSystemResource() const { @@ -180,7 +188,6 @@ void Process::RemoveConditionVariableThread(std::shared_ptr<Thread> thread) { } ++it; } - UNREACHABLE(); } std::vector<std::shared_ptr<Thread>> Process::GetConditionVariableThreads( @@ -205,6 +212,7 @@ void Process::UnregisterThread(const Thread* thread) { } ResultCode Process::ClearSignalState() { + SchedulerLock lock(system.Kernel()); if (status == ProcessStatus::Exited) { LOG_ERROR(Kernel, "called on a terminated process instance."); return ERR_INVALID_STATE; @@ -292,7 +300,7 @@ void Process::Run(s32 main_thread_priority, u64 stack_size) { ChangeStatus(ProcessStatus::Running); - SetupMainThread(*this, kernel, main_thread_priority, main_thread_stack_top); + SetupMainThread(system, *this, main_thread_priority, main_thread_stack_top); resource_limit->Reserve(ResourceType::Threads, 1); resource_limit->Reserve(ResourceType::PhysicalMemory, main_thread_stack_size); } @@ -338,6 +346,7 @@ static auto FindTLSPageWithAvailableSlots(std::vector<TLSPage>& tls_pages) { } VAddr Process::CreateTLSRegion() { + SchedulerLock lock(system.Kernel()); if (auto tls_page_iter{FindTLSPageWithAvailableSlots(tls_pages)}; tls_page_iter != tls_pages.cend()) { return *tls_page_iter->ReserveSlot(); @@ -368,6 +377,7 @@ VAddr Process::CreateTLSRegion() { } void Process::FreeTLSRegion(VAddr tls_address) { + SchedulerLock lock(system.Kernel()); const VAddr aligned_address = Common::AlignDown(tls_address, Core::Memory::PAGE_SIZE); auto iter = std::find_if(tls_pages.begin(), tls_pages.end(), [aligned_address](const auto& page) { @@ -382,6 +392,7 @@ void Process::FreeTLSRegion(VAddr tls_address) { } void Process::LoadModule(CodeSet code_set, VAddr base_addr) { + std::lock_guard lock{HLE::g_hle_lock}; const auto ReprotectSegment = [&](const CodeSet::Segment& segment, Memory::MemoryPermission permission) { page_table->SetCodeMemoryPermission(segment.addr + base_addr, segment.size, permission); diff --git a/src/core/hle/kernel/readable_event.cpp b/src/core/hle/kernel/readable_event.cpp index 00860fcbd..6e286419e 100644 --- a/src/core/hle/kernel/readable_event.cpp +++ b/src/core/hle/kernel/readable_event.cpp @@ -6,8 +6,10 @@ #include "common/assert.h" #include "common/logging/log.h" #include "core/hle/kernel/errors.h" +#include "core/hle/kernel/kernel.h" #include "core/hle/kernel/object.h" #include "core/hle/kernel/readable_event.h" +#include "core/hle/kernel/scheduler.h" #include "core/hle/kernel/thread.h" namespace Kernel { @@ -37,8 +39,9 @@ void ReadableEvent::Clear() { } ResultCode ReadableEvent::Reset() { + SchedulerLock lock(kernel); if (!is_signaled) { - LOG_ERROR(Kernel, "Handle is not signaled! object_id={}, object_type={}, object_name={}", + LOG_TRACE(Kernel, "Handle is not signaled! object_id={}, object_type={}, object_name={}", GetObjectId(), GetTypeName(), GetName()); return ERR_INVALID_STATE; } diff --git a/src/core/hle/kernel/resource_limit.cpp b/src/core/hle/kernel/resource_limit.cpp index d9beaa3a4..212e442f4 100644 --- a/src/core/hle/kernel/resource_limit.cpp +++ b/src/core/hle/kernel/resource_limit.cpp @@ -24,13 +24,9 @@ bool ResourceLimit::Reserve(ResourceType resource, s64 amount, u64 timeout) { const std::size_t index{ResourceTypeToIndex(resource)}; s64 new_value = current[index] + amount; - while (new_value > limit[index] && available[index] + amount <= limit[index]) { + if (new_value > limit[index] && available[index] + amount <= limit[index]) { // TODO(bunnei): This is wrong for multicore, we should wait the calling thread for timeout new_value = current[index] + amount; - - if (timeout >= 0) { - break; - } } if (new_value <= limit[index]) { diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp index 1140c72a3..7b929781c 100644 --- a/src/core/hle/kernel/scheduler.cpp +++ b/src/core/hle/kernel/scheduler.cpp @@ -6,16 +6,21 @@ // licensed under GPLv2 or later under exception provided by the author. #include <algorithm> +#include <mutex> #include <set> #include <unordered_set> #include <utility> #include "common/assert.h" +#include "common/bit_util.h" +#include "common/fiber.h" #include "common/logging/log.h" #include "core/arm/arm_interface.h" #include "core/core.h" #include "core/core_timing.h" +#include "core/cpu_manager.h" #include "core/hle/kernel/kernel.h" +#include "core/hle/kernel/physical_core.h" #include "core/hle/kernel/process.h" #include "core/hle/kernel/scheduler.h" #include "core/hle/kernel/time_manager.h" @@ -27,103 +32,148 @@ GlobalScheduler::GlobalScheduler(KernelCore& kernel) : kernel{kernel} {} GlobalScheduler::~GlobalScheduler() = default; void GlobalScheduler::AddThread(std::shared_ptr<Thread> thread) { + std::scoped_lock lock{global_list_guard}; thread_list.push_back(std::move(thread)); } void GlobalScheduler::RemoveThread(std::shared_ptr<Thread> thread) { + std::scoped_lock lock{global_list_guard}; thread_list.erase(std::remove(thread_list.begin(), thread_list.end(), thread), thread_list.end()); } -void GlobalScheduler::UnloadThread(std::size_t core) { - Scheduler& sched = kernel.Scheduler(core); - sched.UnloadThread(); -} - -void GlobalScheduler::SelectThread(std::size_t core) { +u32 GlobalScheduler::SelectThreads() { + ASSERT(is_locked); const auto update_thread = [](Thread* thread, Scheduler& sched) { - if (thread != sched.selected_thread.get()) { + std::scoped_lock lock{sched.guard}; + if (thread != sched.selected_thread_set.get()) { if (thread == nullptr) { ++sched.idle_selection_count; } - sched.selected_thread = SharedFrom(thread); + sched.selected_thread_set = SharedFrom(thread); } - sched.is_context_switch_pending = sched.selected_thread != sched.current_thread; + const bool reschedule_pending = + sched.is_context_switch_pending || (sched.selected_thread_set != sched.current_thread); + sched.is_context_switch_pending = reschedule_pending; std::atomic_thread_fence(std::memory_order_seq_cst); + return reschedule_pending; }; - Scheduler& sched = kernel.Scheduler(core); - Thread* current_thread = nullptr; - // Step 1: Get top thread in schedule queue. - current_thread = scheduled_queue[core].empty() ? nullptr : scheduled_queue[core].front(); - if (current_thread) { - update_thread(current_thread, sched); - return; + if (!is_reselection_pending.load()) { + return 0; } - // Step 2: Try selecting a suggested thread. - Thread* winner = nullptr; - std::set<s32> sug_cores; - for (auto thread : suggested_queue[core]) { - s32 this_core = thread->GetProcessorID(); - Thread* thread_on_core = nullptr; - if (this_core >= 0) { - thread_on_core = scheduled_queue[this_core].front(); - } - if (this_core < 0 || thread != thread_on_core) { - winner = thread; - break; + std::array<Thread*, Core::Hardware::NUM_CPU_CORES> top_threads{}; + + u32 idle_cores{}; + + // Step 1: Get top thread in schedule queue. + for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) { + Thread* top_thread = + scheduled_queue[core].empty() ? nullptr : scheduled_queue[core].front(); + if (top_thread != nullptr) { + // TODO(Blinkhawk): Implement Thread Pinning + } else { + idle_cores |= (1ul << core); } - sug_cores.insert(this_core); + top_threads[core] = top_thread; } - // if we got a suggested thread, select it, else do a second pass. - if (winner && winner->GetPriority() > 2) { - if (winner->IsRunning()) { - UnloadThread(static_cast<u32>(winner->GetProcessorID())); + + while (idle_cores != 0) { + u32 core_id = Common::CountTrailingZeroes32(idle_cores); + + if (!suggested_queue[core_id].empty()) { + std::array<s32, Core::Hardware::NUM_CPU_CORES> migration_candidates{}; + std::size_t num_candidates = 0; + auto iter = suggested_queue[core_id].begin(); + Thread* suggested = nullptr; + // Step 2: Try selecting a suggested thread. + while (iter != suggested_queue[core_id].end()) { + suggested = *iter; + iter++; + s32 suggested_core_id = suggested->GetProcessorID(); + Thread* top_thread = + suggested_core_id >= 0 ? top_threads[suggested_core_id] : nullptr; + if (top_thread != suggested) { + if (top_thread != nullptr && + top_thread->GetPriority() < THREADPRIO_MAX_CORE_MIGRATION) { + suggested = nullptr; + break; + // There's a too high thread to do core migration, cancel + } + TransferToCore(suggested->GetPriority(), static_cast<s32>(core_id), suggested); + break; + } + suggested = nullptr; + migration_candidates[num_candidates++] = suggested_core_id; + } + // Step 3: Select a suggested thread from another core + if (suggested == nullptr) { + for (std::size_t i = 0; i < num_candidates; i++) { + s32 candidate_core = migration_candidates[i]; + suggested = top_threads[candidate_core]; + auto it = scheduled_queue[candidate_core].begin(); + it++; + Thread* next = it != scheduled_queue[candidate_core].end() ? *it : nullptr; + if (next != nullptr) { + TransferToCore(suggested->GetPriority(), static_cast<s32>(core_id), + suggested); + top_threads[candidate_core] = next; + break; + } else { + suggested = nullptr; + } + } + } + top_threads[core_id] = suggested; } - TransferToCore(winner->GetPriority(), static_cast<s32>(core), winner); - update_thread(winner, sched); - return; + + idle_cores &= ~(1ul << core_id); } - // Step 3: Select a suggested thread from another core - for (auto& src_core : sug_cores) { - auto it = scheduled_queue[src_core].begin(); - it++; - if (it != scheduled_queue[src_core].end()) { - Thread* thread_on_core = scheduled_queue[src_core].front(); - Thread* to_change = *it; - if (thread_on_core->IsRunning() || to_change->IsRunning()) { - UnloadThread(static_cast<u32>(src_core)); - } - TransferToCore(thread_on_core->GetPriority(), static_cast<s32>(core), thread_on_core); - current_thread = thread_on_core; - break; + u32 cores_needing_context_switch{}; + for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) { + Scheduler& sched = kernel.Scheduler(core); + ASSERT(top_threads[core] == nullptr || top_threads[core]->GetProcessorID() == core); + if (update_thread(top_threads[core], sched)) { + cores_needing_context_switch |= (1ul << core); } } - update_thread(current_thread, sched); + return cores_needing_context_switch; } bool GlobalScheduler::YieldThread(Thread* yielding_thread) { + ASSERT(is_locked); // Note: caller should use critical section, etc. + if (!yielding_thread->IsRunnable()) { + // Normally this case shouldn't happen except for SetThreadActivity. + is_reselection_pending.store(true, std::memory_order_release); + return false; + } const u32 core_id = static_cast<u32>(yielding_thread->GetProcessorID()); const u32 priority = yielding_thread->GetPriority(); // Yield the thread - const Thread* const winner = scheduled_queue[core_id].front(priority); - ASSERT_MSG(yielding_thread == winner, "Thread yielding without being in front"); - scheduled_queue[core_id].yield(priority); + Reschedule(priority, core_id, yielding_thread); + const Thread* const winner = scheduled_queue[core_id].front(); + if (kernel.GetCurrentHostThreadID() != core_id) { + is_reselection_pending.store(true, std::memory_order_release); + } return AskForReselectionOrMarkRedundant(yielding_thread, winner); } bool GlobalScheduler::YieldThreadAndBalanceLoad(Thread* yielding_thread) { + ASSERT(is_locked); // Note: caller should check if !thread.IsSchedulerOperationRedundant and use critical section, // etc. + if (!yielding_thread->IsRunnable()) { + // Normally this case shouldn't happen except for SetThreadActivity. + is_reselection_pending.store(true, std::memory_order_release); + return false; + } const u32 core_id = static_cast<u32>(yielding_thread->GetProcessorID()); const u32 priority = yielding_thread->GetPriority(); // Yield the thread - ASSERT_MSG(yielding_thread == scheduled_queue[core_id].front(priority), - "Thread yielding without being in front"); - scheduled_queue[core_id].yield(priority); + Reschedule(priority, core_id, yielding_thread); std::array<Thread*, Core::Hardware::NUM_CPU_CORES> current_threads; for (std::size_t i = 0; i < current_threads.size(); i++) { @@ -153,21 +203,28 @@ bool GlobalScheduler::YieldThreadAndBalanceLoad(Thread* yielding_thread) { if (winner != nullptr) { if (winner != yielding_thread) { - if (winner->IsRunning()) { - UnloadThread(static_cast<u32>(winner->GetProcessorID())); - } TransferToCore(winner->GetPriority(), s32(core_id), winner); } } else { winner = next_thread; } + if (kernel.GetCurrentHostThreadID() != core_id) { + is_reselection_pending.store(true, std::memory_order_release); + } + return AskForReselectionOrMarkRedundant(yielding_thread, winner); } bool GlobalScheduler::YieldThreadAndWaitForLoadBalancing(Thread* yielding_thread) { + ASSERT(is_locked); // Note: caller should check if !thread.IsSchedulerOperationRedundant and use critical section, // etc. + if (!yielding_thread->IsRunnable()) { + // Normally this case shouldn't happen except for SetThreadActivity. + is_reselection_pending.store(true, std::memory_order_release); + return false; + } Thread* winner = nullptr; const u32 core_id = static_cast<u32>(yielding_thread->GetProcessorID()); @@ -195,25 +252,31 @@ bool GlobalScheduler::YieldThreadAndWaitForLoadBalancing(Thread* yielding_thread } if (winner != nullptr) { if (winner != yielding_thread) { - if (winner->IsRunning()) { - UnloadThread(static_cast<u32>(winner->GetProcessorID())); - } TransferToCore(winner->GetPriority(), static_cast<s32>(core_id), winner); } } else { winner = yielding_thread; } + } else { + winner = scheduled_queue[core_id].front(); + } + + if (kernel.GetCurrentHostThreadID() != core_id) { + is_reselection_pending.store(true, std::memory_order_release); } return AskForReselectionOrMarkRedundant(yielding_thread, winner); } void GlobalScheduler::PreemptThreads() { + ASSERT(is_locked); for (std::size_t core_id = 0; core_id < Core::Hardware::NUM_CPU_CORES; core_id++) { const u32 priority = preemption_priorities[core_id]; if (scheduled_queue[core_id].size(priority) > 0) { - scheduled_queue[core_id].front(priority)->IncrementYieldCount(); + if (scheduled_queue[core_id].size(priority) > 1) { + scheduled_queue[core_id].front(priority)->IncrementYieldCount(); + } scheduled_queue[core_id].yield(priority); if (scheduled_queue[core_id].size(priority) > 1) { scheduled_queue[core_id].front(priority)->IncrementYieldCount(); @@ -247,9 +310,6 @@ void GlobalScheduler::PreemptThreads() { } if (winner != nullptr) { - if (winner->IsRunning()) { - UnloadThread(static_cast<u32>(winner->GetProcessorID())); - } TransferToCore(winner->GetPriority(), s32(core_id), winner); current_thread = winner->GetPriority() <= current_thread->GetPriority() ? winner : current_thread; @@ -280,9 +340,6 @@ void GlobalScheduler::PreemptThreads() { } if (winner != nullptr) { - if (winner->IsRunning()) { - UnloadThread(static_cast<u32>(winner->GetProcessorID())); - } TransferToCore(winner->GetPriority(), s32(core_id), winner); current_thread = winner; } @@ -292,34 +349,65 @@ void GlobalScheduler::PreemptThreads() { } } +void GlobalScheduler::EnableInterruptAndSchedule(u32 cores_pending_reschedule, + Core::EmuThreadHandle global_thread) { + u32 current_core = global_thread.host_handle; + bool must_context_switch = global_thread.guest_handle != InvalidHandle && + (current_core < Core::Hardware::NUM_CPU_CORES); + while (cores_pending_reschedule != 0) { + u32 core = Common::CountTrailingZeroes32(cores_pending_reschedule); + ASSERT(core < Core::Hardware::NUM_CPU_CORES); + if (!must_context_switch || core != current_core) { + auto& phys_core = kernel.PhysicalCore(core); + phys_core.Interrupt(); + } else { + must_context_switch = true; + } + cores_pending_reschedule &= ~(1ul << core); + } + if (must_context_switch) { + auto& core_scheduler = kernel.CurrentScheduler(); + kernel.ExitSVCProfile(); + core_scheduler.TryDoContextSwitch(); + kernel.EnterSVCProfile(); + } +} + void GlobalScheduler::Suggest(u32 priority, std::size_t core, Thread* thread) { + ASSERT(is_locked); suggested_queue[core].add(thread, priority); } void GlobalScheduler::Unsuggest(u32 priority, std::size_t core, Thread* thread) { + ASSERT(is_locked); suggested_queue[core].remove(thread, priority); } void GlobalScheduler::Schedule(u32 priority, std::size_t core, Thread* thread) { + ASSERT(is_locked); ASSERT_MSG(thread->GetProcessorID() == s32(core), "Thread must be assigned to this core."); scheduled_queue[core].add(thread, priority); } void GlobalScheduler::SchedulePrepend(u32 priority, std::size_t core, Thread* thread) { + ASSERT(is_locked); ASSERT_MSG(thread->GetProcessorID() == s32(core), "Thread must be assigned to this core."); scheduled_queue[core].add(thread, priority, false); } void GlobalScheduler::Reschedule(u32 priority, std::size_t core, Thread* thread) { + ASSERT(is_locked); scheduled_queue[core].remove(thread, priority); scheduled_queue[core].add(thread, priority); } void GlobalScheduler::Unschedule(u32 priority, std::size_t core, Thread* thread) { + ASSERT(is_locked); scheduled_queue[core].remove(thread, priority); } void GlobalScheduler::TransferToCore(u32 priority, s32 destination_core, Thread* thread) { + ASSERT(is_locked); const bool schedulable = thread->GetPriority() < THREADPRIO_COUNT; const s32 source_core = thread->GetProcessorID(); if (source_core == destination_core || !schedulable) { @@ -349,6 +437,108 @@ bool GlobalScheduler::AskForReselectionOrMarkRedundant(Thread* current_thread, } } +void GlobalScheduler::AdjustSchedulingOnStatus(Thread* thread, u32 old_flags) { + if (old_flags == thread->scheduling_state) { + return; + } + ASSERT(is_locked); + + if (old_flags == static_cast<u32>(ThreadSchedStatus::Runnable)) { + // In this case the thread was running, now it's pausing/exitting + if (thread->processor_id >= 0) { + Unschedule(thread->current_priority, static_cast<u32>(thread->processor_id), thread); + } + + for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) { + if (core != static_cast<u32>(thread->processor_id) && + ((thread->affinity_mask >> core) & 1) != 0) { + Unsuggest(thread->current_priority, core, thread); + } + } + } else if (thread->scheduling_state == static_cast<u32>(ThreadSchedStatus::Runnable)) { + // The thread is now set to running from being stopped + if (thread->processor_id >= 0) { + Schedule(thread->current_priority, static_cast<u32>(thread->processor_id), thread); + } + + for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) { + if (core != static_cast<u32>(thread->processor_id) && + ((thread->affinity_mask >> core) & 1) != 0) { + Suggest(thread->current_priority, core, thread); + } + } + } + + SetReselectionPending(); +} + +void GlobalScheduler::AdjustSchedulingOnPriority(Thread* thread, u32 old_priority) { + if (thread->scheduling_state != static_cast<u32>(ThreadSchedStatus::Runnable)) { + return; + } + ASSERT(is_locked); + if (thread->processor_id >= 0) { + Unschedule(old_priority, static_cast<u32>(thread->processor_id), thread); + } + + for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) { + if (core != static_cast<u32>(thread->processor_id) && + ((thread->affinity_mask >> core) & 1) != 0) { + Unsuggest(old_priority, core, thread); + } + } + + if (thread->processor_id >= 0) { + if (thread == kernel.CurrentScheduler().GetCurrentThread()) { + SchedulePrepend(thread->current_priority, static_cast<u32>(thread->processor_id), + thread); + } else { + Schedule(thread->current_priority, static_cast<u32>(thread->processor_id), thread); + } + } + + for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) { + if (core != static_cast<u32>(thread->processor_id) && + ((thread->affinity_mask >> core) & 1) != 0) { + Suggest(thread->current_priority, core, thread); + } + } + thread->IncrementYieldCount(); + SetReselectionPending(); +} + +void GlobalScheduler::AdjustSchedulingOnAffinity(Thread* thread, u64 old_affinity_mask, + s32 old_core) { + if (thread->scheduling_state != static_cast<u32>(ThreadSchedStatus::Runnable) || + thread->current_priority >= THREADPRIO_COUNT) { + return; + } + ASSERT(is_locked); + + for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) { + if (((old_affinity_mask >> core) & 1) != 0) { + if (core == static_cast<u32>(old_core)) { + Unschedule(thread->current_priority, core, thread); + } else { + Unsuggest(thread->current_priority, core, thread); + } + } + } + + for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) { + if (((thread->affinity_mask >> core) & 1) != 0) { + if (core == static_cast<u32>(thread->processor_id)) { + Schedule(thread->current_priority, core, thread); + } else { + Suggest(thread->current_priority, core, thread); + } + } + } + + thread->IncrementYieldCount(); + SetReselectionPending(); +} + void GlobalScheduler::Shutdown() { for (std::size_t core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) { scheduled_queue[core].clear(); @@ -359,10 +549,12 @@ void GlobalScheduler::Shutdown() { void GlobalScheduler::Lock() { Core::EmuThreadHandle current_thread = kernel.GetCurrentEmuThreadID(); + ASSERT(!current_thread.IsInvalid()); if (current_thread == current_owner) { ++scope_lock; } else { inner_lock.lock(); + is_locked = true; current_owner = current_thread; ASSERT(current_owner != Core::EmuThreadHandle::InvalidHandle()); scope_lock = 1; @@ -374,17 +566,18 @@ void GlobalScheduler::Unlock() { ASSERT(scope_lock > 0); return; } - for (std::size_t i = 0; i < Core::Hardware::NUM_CPU_CORES; i++) { - SelectThread(i); - } + u32 cores_pending_reschedule = SelectThreads(); + Core::EmuThreadHandle leaving_thread = current_owner; current_owner = Core::EmuThreadHandle::InvalidHandle(); scope_lock = 1; + is_locked = false; inner_lock.unlock(); - // TODO(Blinkhawk): Setup the interrupts and change context on current core. + EnableInterruptAndSchedule(cores_pending_reschedule, leaving_thread); } -Scheduler::Scheduler(Core::System& system, std::size_t core_id) - : system{system}, core_id{core_id} {} +Scheduler::Scheduler(Core::System& system, std::size_t core_id) : system(system), core_id(core_id) { + switch_fiber = std::make_shared<Common::Fiber>(std::function<void(void*)>(OnSwitch), this); +} Scheduler::~Scheduler() = default; @@ -393,56 +586,128 @@ bool Scheduler::HaveReadyThreads() const { } Thread* Scheduler::GetCurrentThread() const { - return current_thread.get(); + if (current_thread) { + return current_thread.get(); + } + return idle_thread.get(); } Thread* Scheduler::GetSelectedThread() const { return selected_thread.get(); } -void Scheduler::SelectThreads() { - system.GlobalScheduler().SelectThread(core_id); -} - u64 Scheduler::GetLastContextSwitchTicks() const { return last_context_switch_time; } void Scheduler::TryDoContextSwitch() { + auto& phys_core = system.Kernel().CurrentPhysicalCore(); + if (phys_core.IsInterrupted()) { + phys_core.ClearInterrupt(); + } + guard.lock(); if (is_context_switch_pending) { SwitchContext(); + } else { + guard.unlock(); } } -void Scheduler::UnloadThread() { - Thread* const previous_thread = GetCurrentThread(); - Process* const previous_process = system.Kernel().CurrentProcess(); +void Scheduler::OnThreadStart() { + SwitchContextStep2(); +} - UpdateLastContextSwitchTime(previous_thread, previous_process); +void Scheduler::Unload() { + Thread* thread = current_thread.get(); + if (thread) { + thread->SetContinuousOnSVC(false); + thread->last_running_ticks = system.CoreTiming().GetCPUTicks(); + thread->SetIsRunning(false); + if (!thread->IsHLEThread() && !thread->HasExited()) { + Core::ARM_Interface& cpu_core = thread->ArmInterface(); + cpu_core.SaveContext(thread->GetContext32()); + cpu_core.SaveContext(thread->GetContext64()); + // Save the TPIDR_EL0 system register in case it was modified. + thread->SetTPIDR_EL0(cpu_core.GetTPIDR_EL0()); + cpu_core.ClearExclusiveState(); + } + thread->context_guard.unlock(); + } +} - // Save context for previous thread - if (previous_thread) { - system.ArmInterface(core_id).SaveContext(previous_thread->GetContext32()); - system.ArmInterface(core_id).SaveContext(previous_thread->GetContext64()); - // Save the TPIDR_EL0 system register in case it was modified. - previous_thread->SetTPIDR_EL0(system.ArmInterface(core_id).GetTPIDR_EL0()); +void Scheduler::Reload() { + Thread* thread = current_thread.get(); + if (thread) { + ASSERT_MSG(thread->GetSchedulingStatus() == ThreadSchedStatus::Runnable, + "Thread must be runnable."); + + // Cancel any outstanding wakeup events for this thread + thread->SetIsRunning(true); + thread->SetWasRunning(false); + thread->last_running_ticks = system.CoreTiming().GetCPUTicks(); - if (previous_thread->GetStatus() == ThreadStatus::Running) { - // This is only the case when a reschedule is triggered without the current thread - // yielding execution (i.e. an event triggered, system core time-sliced, etc) - previous_thread->SetStatus(ThreadStatus::Ready); + auto* const thread_owner_process = thread->GetOwnerProcess(); + if (thread_owner_process != nullptr) { + system.Kernel().MakeCurrentProcess(thread_owner_process); + } + if (!thread->IsHLEThread()) { + Core::ARM_Interface& cpu_core = thread->ArmInterface(); + cpu_core.LoadContext(thread->GetContext32()); + cpu_core.LoadContext(thread->GetContext64()); + cpu_core.SetTlsAddress(thread->GetTLSAddress()); + cpu_core.SetTPIDR_EL0(thread->GetTPIDR_EL0()); + cpu_core.ChangeProcessorID(this->core_id); + cpu_core.ClearExclusiveState(); } - previous_thread->SetIsRunning(false); } - current_thread = nullptr; +} + +void Scheduler::SwitchContextStep2() { + Thread* previous_thread = current_thread_prev.get(); + Thread* new_thread = selected_thread.get(); + + // Load context of new thread + Process* const previous_process = + previous_thread != nullptr ? previous_thread->GetOwnerProcess() : nullptr; + + if (new_thread) { + ASSERT_MSG(new_thread->GetSchedulingStatus() == ThreadSchedStatus::Runnable, + "Thread must be runnable."); + + // Cancel any outstanding wakeup events for this thread + new_thread->SetIsRunning(true); + new_thread->last_running_ticks = system.CoreTiming().GetCPUTicks(); + new_thread->SetWasRunning(false); + + auto* const thread_owner_process = current_thread->GetOwnerProcess(); + if (thread_owner_process != nullptr) { + system.Kernel().MakeCurrentProcess(thread_owner_process); + } + if (!new_thread->IsHLEThread()) { + Core::ARM_Interface& cpu_core = new_thread->ArmInterface(); + cpu_core.LoadContext(new_thread->GetContext32()); + cpu_core.LoadContext(new_thread->GetContext64()); + cpu_core.SetTlsAddress(new_thread->GetTLSAddress()); + cpu_core.SetTPIDR_EL0(new_thread->GetTPIDR_EL0()); + cpu_core.ChangeProcessorID(this->core_id); + cpu_core.ClearExclusiveState(); + } + } + + TryDoContextSwitch(); } void Scheduler::SwitchContext() { - Thread* const previous_thread = GetCurrentThread(); - Thread* const new_thread = GetSelectedThread(); + current_thread_prev = current_thread; + selected_thread = selected_thread_set; + Thread* previous_thread = current_thread_prev.get(); + Thread* new_thread = selected_thread.get(); + current_thread = selected_thread; is_context_switch_pending = false; + if (new_thread == previous_thread) { + guard.unlock(); return; } @@ -452,51 +717,76 @@ void Scheduler::SwitchContext() { // Save context for previous thread if (previous_thread) { - system.ArmInterface(core_id).SaveContext(previous_thread->GetContext32()); - system.ArmInterface(core_id).SaveContext(previous_thread->GetContext64()); - // Save the TPIDR_EL0 system register in case it was modified. - previous_thread->SetTPIDR_EL0(system.ArmInterface(core_id).GetTPIDR_EL0()); - - if (previous_thread->GetStatus() == ThreadStatus::Running) { - // This is only the case when a reschedule is triggered without the current thread - // yielding execution (i.e. an event triggered, system core time-sliced, etc) - previous_thread->SetStatus(ThreadStatus::Ready); + if (new_thread != nullptr && new_thread->IsSuspendThread()) { + previous_thread->SetWasRunning(true); } + previous_thread->SetContinuousOnSVC(false); + previous_thread->last_running_ticks = system.CoreTiming().GetCPUTicks(); previous_thread->SetIsRunning(false); + if (!previous_thread->IsHLEThread() && !previous_thread->HasExited()) { + Core::ARM_Interface& cpu_core = previous_thread->ArmInterface(); + cpu_core.SaveContext(previous_thread->GetContext32()); + cpu_core.SaveContext(previous_thread->GetContext64()); + // Save the TPIDR_EL0 system register in case it was modified. + previous_thread->SetTPIDR_EL0(cpu_core.GetTPIDR_EL0()); + cpu_core.ClearExclusiveState(); + } + previous_thread->context_guard.unlock(); } - // Load context of new thread - if (new_thread) { - ASSERT_MSG(new_thread->GetProcessorID() == s32(this->core_id), - "Thread must be assigned to this core."); - ASSERT_MSG(new_thread->GetStatus() == ThreadStatus::Ready, - "Thread must be ready to become running."); + std::shared_ptr<Common::Fiber>* old_context; + if (previous_thread != nullptr) { + old_context = &previous_thread->GetHostContext(); + } else { + old_context = &idle_thread->GetHostContext(); + } + guard.unlock(); - // Cancel any outstanding wakeup events for this thread - new_thread->CancelWakeupTimer(); - current_thread = SharedFrom(new_thread); - new_thread->SetStatus(ThreadStatus::Running); - new_thread->SetIsRunning(true); + Common::Fiber::YieldTo(*old_context, switch_fiber); + /// When a thread wakes up, the scheduler may have changed to other in another core. + auto& next_scheduler = system.Kernel().CurrentScheduler(); + next_scheduler.SwitchContextStep2(); +} - auto* const thread_owner_process = current_thread->GetOwnerProcess(); - if (previous_process != thread_owner_process) { - system.Kernel().MakeCurrentProcess(thread_owner_process); - } +void Scheduler::OnSwitch(void* this_scheduler) { + Scheduler* sched = static_cast<Scheduler*>(this_scheduler); + sched->SwitchToCurrent(); +} - system.ArmInterface(core_id).LoadContext(new_thread->GetContext32()); - system.ArmInterface(core_id).LoadContext(new_thread->GetContext64()); - system.ArmInterface(core_id).SetTlsAddress(new_thread->GetTLSAddress()); - system.ArmInterface(core_id).SetTPIDR_EL0(new_thread->GetTPIDR_EL0()); - } else { - current_thread = nullptr; - // Note: We do not reset the current process and current page table when idling because - // technically we haven't changed processes, our threads are just paused. +void Scheduler::SwitchToCurrent() { + while (true) { + { + std::scoped_lock lock{guard}; + selected_thread = selected_thread_set; + current_thread = selected_thread; + is_context_switch_pending = false; + } + while (!is_context_switch_pending) { + if (current_thread != nullptr && !current_thread->IsHLEThread()) { + current_thread->context_guard.lock(); + if (!current_thread->IsRunnable()) { + current_thread->context_guard.unlock(); + break; + } + if (current_thread->GetProcessorID() != core_id) { + current_thread->context_guard.unlock(); + break; + } + } + std::shared_ptr<Common::Fiber>* next_context; + if (current_thread != nullptr) { + next_context = ¤t_thread->GetHostContext(); + } else { + next_context = &idle_thread->GetHostContext(); + } + Common::Fiber::YieldTo(switch_fiber, *next_context); + } } } void Scheduler::UpdateLastContextSwitchTime(Thread* thread, Process* process) { const u64 prev_switch_ticks = last_context_switch_time; - const u64 most_recent_switch_ticks = system.CoreTiming().GetTicks(); + const u64 most_recent_switch_ticks = system.CoreTiming().GetCPUTicks(); const u64 update_ticks = most_recent_switch_ticks - prev_switch_ticks; if (thread != nullptr) { @@ -510,6 +800,16 @@ void Scheduler::UpdateLastContextSwitchTime(Thread* thread, Process* process) { last_context_switch_time = most_recent_switch_ticks; } +void Scheduler::Initialize() { + std::string name = "Idle Thread Id:" + std::to_string(core_id); + std::function<void(void*)> init_func = system.GetCpuManager().GetIdleThreadStartFunc(); + void* init_func_parameter = system.GetCpuManager().GetStartFuncParamater(); + ThreadType type = static_cast<ThreadType>(THREADTYPE_KERNEL | THREADTYPE_HLE | THREADTYPE_IDLE); + auto thread_res = Thread::Create(system, type, name, 0, 64, 0, static_cast<u32>(core_id), 0, + nullptr, std::move(init_func), init_func_parameter); + idle_thread = std::move(thread_res).Unwrap(); +} + void Scheduler::Shutdown() { current_thread = nullptr; selected_thread = nullptr; @@ -538,4 +838,13 @@ SchedulerLockAndSleep::~SchedulerLockAndSleep() { time_manager.ScheduleTimeEvent(event_handle, time_task, nanoseconds); } +void SchedulerLockAndSleep::Release() { + if (sleep_cancelled) { + return; + } + auto& time_manager = kernel.TimeManager(); + time_manager.ScheduleTimeEvent(event_handle, time_task, nanoseconds); + sleep_cancelled = true; +} + } // namespace Kernel diff --git a/src/core/hle/kernel/scheduler.h b/src/core/hle/kernel/scheduler.h index 07df33f9c..b3b4b5169 100644 --- a/src/core/hle/kernel/scheduler.h +++ b/src/core/hle/kernel/scheduler.h @@ -11,9 +11,14 @@ #include "common/common_types.h" #include "common/multi_level_queue.h" +#include "common/spin_lock.h" #include "core/hardware_properties.h" #include "core/hle/kernel/thread.h" +namespace Common { +class Fiber; +} + namespace Core { class ARM_Interface; class System; @@ -41,41 +46,17 @@ public: return thread_list; } - /** - * Add a thread to the suggested queue of a cpu core. Suggested threads may be - * picked if no thread is scheduled to run on the core. - */ - void Suggest(u32 priority, std::size_t core, Thread* thread); - - /** - * Remove a thread to the suggested queue of a cpu core. Suggested threads may be - * picked if no thread is scheduled to run on the core. - */ - void Unsuggest(u32 priority, std::size_t core, Thread* thread); - - /** - * Add a thread to the scheduling queue of a cpu core. The thread is added at the - * back the queue in its priority level. - */ - void Schedule(u32 priority, std::size_t core, Thread* thread); - - /** - * Add a thread to the scheduling queue of a cpu core. The thread is added at the - * front the queue in its priority level. - */ - void SchedulePrepend(u32 priority, std::size_t core, Thread* thread); + /// Notify the scheduler a thread's status has changed. + void AdjustSchedulingOnStatus(Thread* thread, u32 old_flags); - /// Reschedule an already scheduled thread based on a new priority - void Reschedule(u32 priority, std::size_t core, Thread* thread); - - /// Unschedules a thread. - void Unschedule(u32 priority, std::size_t core, Thread* thread); + /// Notify the scheduler a thread's priority has changed. + void AdjustSchedulingOnPriority(Thread* thread, u32 old_priority); - /// Selects a core and forces it to unload its current thread's context - void UnloadThread(std::size_t core); + /// Notify the scheduler a thread's core and/or affinity mask has changed. + void AdjustSchedulingOnAffinity(Thread* thread, u64 old_affinity_mask, s32 old_core); /** - * Takes care of selecting the new scheduled thread in three steps: + * Takes care of selecting the new scheduled threads in three steps: * * 1. First a thread is selected from the top of the priority queue. If no thread * is obtained then we move to step two, else we are done. @@ -85,8 +66,10 @@ public: * * 3. Third is no suggested thread is found, we do a second pass and pick a running * thread in another core and swap it with its current thread. + * + * returns the cores needing scheduling. */ - void SelectThread(std::size_t core); + u32 SelectThreads(); bool HaveReadyThreads(std::size_t core_id) const { return !scheduled_queue[core_id].empty(); @@ -149,6 +132,40 @@ private: /// Unlocks the scheduler, reselects threads, interrupts cores for rescheduling /// and reschedules current core if needed. void Unlock(); + + void EnableInterruptAndSchedule(u32 cores_pending_reschedule, + Core::EmuThreadHandle global_thread); + + /** + * Add a thread to the suggested queue of a cpu core. Suggested threads may be + * picked if no thread is scheduled to run on the core. + */ + void Suggest(u32 priority, std::size_t core, Thread* thread); + + /** + * Remove a thread to the suggested queue of a cpu core. Suggested threads may be + * picked if no thread is scheduled to run on the core. + */ + void Unsuggest(u32 priority, std::size_t core, Thread* thread); + + /** + * Add a thread to the scheduling queue of a cpu core. The thread is added at the + * back the queue in its priority level. + */ + void Schedule(u32 priority, std::size_t core, Thread* thread); + + /** + * Add a thread to the scheduling queue of a cpu core. The thread is added at the + * front the queue in its priority level. + */ + void SchedulePrepend(u32 priority, std::size_t core, Thread* thread); + + /// Reschedule an already scheduled thread based on a new priority + void Reschedule(u32 priority, std::size_t core, Thread* thread); + + /// Unschedules a thread. + void Unschedule(u32 priority, std::size_t core, Thread* thread); + /** * Transfers a thread into an specific core. If the destination_core is -1 * it will be unscheduled from its source code and added into its suggested @@ -170,10 +187,13 @@ private: std::array<u32, Core::Hardware::NUM_CPU_CORES> preemption_priorities = {59, 59, 59, 62}; /// Scheduler lock mechanisms. - std::mutex inner_lock{}; // TODO(Blinkhawk): Replace for a SpinLock + bool is_locked{}; + Common::SpinLock inner_lock{}; std::atomic<s64> scope_lock{}; Core::EmuThreadHandle current_owner{Core::EmuThreadHandle::InvalidHandle()}; + Common::SpinLock global_list_guard{}; + /// Lists all thread ids that aren't deleted/etc. std::vector<std::shared_ptr<Thread>> thread_list; KernelCore& kernel; @@ -190,11 +210,11 @@ public: /// Reschedules to the next available thread (call after current thread is suspended) void TryDoContextSwitch(); - /// Unloads currently running thread - void UnloadThread(); - - /// Select the threads in top of the scheduling multilist. - void SelectThreads(); + /// The next two are for SingleCore Only. + /// Unload current thread before preempting core. + void Unload(); + /// Reload current thread after core preemption. + void Reload(); /// Gets the current running thread Thread* GetCurrentThread() const; @@ -209,15 +229,30 @@ public: return is_context_switch_pending; } + void Initialize(); + /// Shutdowns the scheduler. void Shutdown(); + void OnThreadStart(); + + std::shared_ptr<Common::Fiber>& ControlContext() { + return switch_fiber; + } + + const std::shared_ptr<Common::Fiber>& ControlContext() const { + return switch_fiber; + } + private: friend class GlobalScheduler; /// Switches the CPU's active thread context to that of the specified thread void SwitchContext(); + /// When a thread wakes up, it must run this through it's new scheduler + void SwitchContextStep2(); + /** * Called on every context switch to update the internal timestamp * This also updates the running time ticks for the given thread and @@ -231,14 +266,24 @@ private: */ void UpdateLastContextSwitchTime(Thread* thread, Process* process); + static void OnSwitch(void* this_scheduler); + void SwitchToCurrent(); + std::shared_ptr<Thread> current_thread = nullptr; std::shared_ptr<Thread> selected_thread = nullptr; + std::shared_ptr<Thread> current_thread_prev = nullptr; + std::shared_ptr<Thread> selected_thread_set = nullptr; + std::shared_ptr<Thread> idle_thread = nullptr; + + std::shared_ptr<Common::Fiber> switch_fiber = nullptr; Core::System& system; u64 last_context_switch_time = 0; u64 idle_selection_count = 0; const std::size_t core_id; + Common::SpinLock guard{}; + bool is_context_switch_pending = false; }; @@ -261,6 +306,8 @@ public: sleep_cancelled = true; } + void Release(); + private: Handle& event_handle; Thread* time_task; diff --git a/src/core/hle/kernel/server_session.cpp b/src/core/hle/kernel/server_session.cpp index 25438b86b..7b23a6889 100644 --- a/src/core/hle/kernel/server_session.cpp +++ b/src/core/hle/kernel/server_session.cpp @@ -17,6 +17,7 @@ #include "core/hle/kernel/hle_ipc.h" #include "core/hle/kernel/kernel.h" #include "core/hle/kernel/process.h" +#include "core/hle/kernel/scheduler.h" #include "core/hle/kernel/server_session.h" #include "core/hle/kernel/session.h" #include "core/hle/kernel/thread.h" @@ -168,9 +169,12 @@ ResultCode ServerSession::CompleteSyncRequest() { } // Some service requests require the thread to block - if (!context.IsThreadWaiting()) { - context.GetThread().ResumeFromWait(); - context.GetThread().SetWaitSynchronizationResult(result); + { + SchedulerLock lock(kernel); + if (!context.IsThreadWaiting()) { + context.GetThread().ResumeFromWait(); + context.GetThread().SetSynchronizationResults(nullptr, result); + } } request_queue.Pop(); @@ -180,8 +184,10 @@ ResultCode ServerSession::CompleteSyncRequest() { ResultCode ServerSession::HandleSyncRequest(std::shared_ptr<Thread> thread, Core::Memory::Memory& memory) { - Core::System::GetInstance().CoreTiming().ScheduleEvent(20000, request_event, {}); - return QueueSyncRequest(std::move(thread), memory); + ResultCode result = QueueSyncRequest(std::move(thread), memory); + const u64 delay = kernel.IsMulticore() ? 0U : 20000U; + Core::System::GetInstance().CoreTiming().ScheduleEvent(delay, request_event, {}); + return result; } } // namespace Kernel diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp index 4ae4529f5..5db19dcf3 100644 --- a/src/core/hle/kernel/svc.cpp +++ b/src/core/hle/kernel/svc.cpp @@ -10,14 +10,15 @@ #include "common/alignment.h" #include "common/assert.h" +#include "common/fiber.h" #include "common/logging/log.h" #include "common/microprofile.h" #include "common/string_util.h" #include "core/arm/exclusive_monitor.h" #include "core/core.h" -#include "core/core_manager.h" #include "core/core_timing.h" #include "core/core_timing_util.h" +#include "core/cpu_manager.h" #include "core/hle/kernel/address_arbiter.h" #include "core/hle/kernel/client_port.h" #include "core/hle/kernel/client_session.h" @@ -27,6 +28,7 @@ #include "core/hle/kernel/memory/memory_block.h" #include "core/hle/kernel/memory/page_table.h" #include "core/hle/kernel/mutex.h" +#include "core/hle/kernel/physical_core.h" #include "core/hle/kernel/process.h" #include "core/hle/kernel/readable_event.h" #include "core/hle/kernel/resource_limit.h" @@ -37,6 +39,7 @@ #include "core/hle/kernel/svc_wrap.h" #include "core/hle/kernel/synchronization.h" #include "core/hle/kernel/thread.h" +#include "core/hle/kernel/time_manager.h" #include "core/hle/kernel/transfer_memory.h" #include "core/hle/kernel/writable_event.h" #include "core/hle/lock.h" @@ -133,6 +136,7 @@ enum class ResourceLimitValueType { ResultVal<s64> RetrieveResourceLimitValue(Core::System& system, Handle resource_limit, u32 resource_type, ResourceLimitValueType value_type) { + std::lock_guard lock{HLE::g_hle_lock}; const auto type = static_cast<ResourceType>(resource_type); if (!IsValidResourceType(type)) { LOG_ERROR(Kernel_SVC, "Invalid resource limit type: '{}'", resource_type); @@ -160,6 +164,7 @@ ResultVal<s64> RetrieveResourceLimitValue(Core::System& system, Handle resource_ /// Set the process heap to a given Size. It can both extend and shrink the heap. static ResultCode SetHeapSize(Core::System& system, VAddr* heap_addr, u64 heap_size) { + std::lock_guard lock{HLE::g_hle_lock}; LOG_TRACE(Kernel_SVC, "called, heap_size=0x{:X}", heap_size); // Size must be a multiple of 0x200000 (2MB) and be equal to or less than 8GB. @@ -190,6 +195,7 @@ static ResultCode SetHeapSize32(Core::System& system, u32* heap_addr, u32 heap_s static ResultCode SetMemoryAttribute(Core::System& system, VAddr address, u64 size, u32 mask, u32 attribute) { + std::lock_guard lock{HLE::g_hle_lock}; LOG_DEBUG(Kernel_SVC, "called, address=0x{:016X}, size=0x{:X}, mask=0x{:08X}, attribute=0x{:08X}", address, size, mask, attribute); @@ -226,8 +232,15 @@ static ResultCode SetMemoryAttribute(Core::System& system, VAddr address, u64 si static_cast<Memory::MemoryAttribute>(attribute)); } +static ResultCode SetMemoryAttribute32(Core::System& system, u32 address, u32 size, u32 mask, + u32 attribute) { + return SetMemoryAttribute(system, static_cast<VAddr>(address), static_cast<std::size_t>(size), + mask, attribute); +} + /// Maps a memory range into a different range. static ResultCode MapMemory(Core::System& system, VAddr dst_addr, VAddr src_addr, u64 size) { + std::lock_guard lock{HLE::g_hle_lock}; LOG_TRACE(Kernel_SVC, "called, dst_addr=0x{:X}, src_addr=0x{:X}, size=0x{:X}", dst_addr, src_addr, size); @@ -241,8 +254,14 @@ static ResultCode MapMemory(Core::System& system, VAddr dst_addr, VAddr src_addr return page_table.Map(dst_addr, src_addr, size); } +static ResultCode MapMemory32(Core::System& system, u32 dst_addr, u32 src_addr, u32 size) { + return MapMemory(system, static_cast<VAddr>(dst_addr), static_cast<VAddr>(src_addr), + static_cast<std::size_t>(size)); +} + /// Unmaps a region that was previously mapped with svcMapMemory static ResultCode UnmapMemory(Core::System& system, VAddr dst_addr, VAddr src_addr, u64 size) { + std::lock_guard lock{HLE::g_hle_lock}; LOG_TRACE(Kernel_SVC, "called, dst_addr=0x{:X}, src_addr=0x{:X}, size=0x{:X}", dst_addr, src_addr, size); @@ -256,9 +275,15 @@ static ResultCode UnmapMemory(Core::System& system, VAddr dst_addr, VAddr src_ad return page_table.Unmap(dst_addr, src_addr, size); } +static ResultCode UnmapMemory32(Core::System& system, u32 dst_addr, u32 src_addr, u32 size) { + return UnmapMemory(system, static_cast<VAddr>(dst_addr), static_cast<VAddr>(src_addr), + static_cast<std::size_t>(size)); +} + /// Connect to an OS service given the port name, returns the handle to the port to out static ResultCode ConnectToNamedPort(Core::System& system, Handle* out_handle, VAddr port_name_address) { + std::lock_guard lock{HLE::g_hle_lock}; auto& memory = system.Memory(); if (!memory.IsValidVirtualAddress(port_name_address)) { @@ -317,11 +342,30 @@ static ResultCode SendSyncRequest(Core::System& system, Handle handle) { LOG_TRACE(Kernel_SVC, "called handle=0x{:08X}({})", handle, session->GetName()); auto thread = system.CurrentScheduler().GetCurrentThread(); - thread->InvalidateWakeupCallback(); - thread->SetStatus(ThreadStatus::WaitIPC); - system.PrepareReschedule(thread->GetProcessorID()); + { + SchedulerLock lock(system.Kernel()); + thread->InvalidateHLECallback(); + thread->SetStatus(ThreadStatus::WaitIPC); + session->SendSyncRequest(SharedFrom(thread), system.Memory()); + } + + if (thread->HasHLECallback()) { + Handle event_handle = thread->GetHLETimeEvent(); + if (event_handle != InvalidHandle) { + auto& time_manager = system.Kernel().TimeManager(); + time_manager.UnscheduleTimeEvent(event_handle); + } + + { + SchedulerLock lock(system.Kernel()); + auto* sync_object = thread->GetHLESyncObject(); + sync_object->RemoveWaitingThread(SharedFrom(thread)); + } + + thread->InvokeHLECallback(SharedFrom(thread)); + } - return session->SendSyncRequest(SharedFrom(thread), system.Memory()); + return thread->GetSignalingResult(); } static ResultCode SendSyncRequest32(Core::System& system, Handle handle) { @@ -383,6 +427,15 @@ static ResultCode GetProcessId(Core::System& system, u64* process_id, Handle han return ERR_INVALID_HANDLE; } +static ResultCode GetProcessId32(Core::System& system, u32* process_id_low, u32* process_id_high, + Handle handle) { + u64 process_id{}; + const auto result = GetProcessId(system, &process_id, handle); + *process_id_low = static_cast<u32>(process_id); + *process_id_high = static_cast<u32>(process_id >> 32); + return result; +} + /// Wait for the given handles to synchronize, timeout after the specified nanoseconds static ResultCode WaitSynchronization(Core::System& system, Handle* index, VAddr handles_address, u64 handle_count, s64 nano_seconds) { @@ -447,10 +500,13 @@ static ResultCode CancelSynchronization(Core::System& system, Handle thread_hand } thread->CancelWait(); - system.PrepareReschedule(thread->GetProcessorID()); return RESULT_SUCCESS; } +static ResultCode CancelSynchronization32(Core::System& system, Handle thread_handle) { + return CancelSynchronization(system, thread_handle); +} + /// Attempts to locks a mutex, creating it if it does not already exist static ResultCode ArbitrateLock(Core::System& system, Handle holding_thread_handle, VAddr mutex_addr, Handle requesting_thread_handle) { @@ -475,6 +531,12 @@ static ResultCode ArbitrateLock(Core::System& system, Handle holding_thread_hand requesting_thread_handle); } +static ResultCode ArbitrateLock32(Core::System& system, Handle holding_thread_handle, + u32 mutex_addr, Handle requesting_thread_handle) { + return ArbitrateLock(system, holding_thread_handle, static_cast<VAddr>(mutex_addr), + requesting_thread_handle); +} + /// Unlock a mutex static ResultCode ArbitrateUnlock(Core::System& system, VAddr mutex_addr) { LOG_TRACE(Kernel_SVC, "called mutex_addr=0x{:X}", mutex_addr); @@ -494,6 +556,10 @@ static ResultCode ArbitrateUnlock(Core::System& system, VAddr mutex_addr) { return current_process->GetMutex().Release(mutex_addr); } +static ResultCode ArbitrateUnlock32(Core::System& system, u32 mutex_addr) { + return ArbitrateUnlock(system, static_cast<VAddr>(mutex_addr)); +} + enum class BreakType : u32 { Panic = 0, AssertionFailed = 1, @@ -594,6 +660,7 @@ static void Break(Core::System& system, u32 reason, u64 info1, u64 info2) { info2, has_dumped_buffer ? std::make_optional(debug_buffer) : std::nullopt); if (!break_reason.signal_debugger) { + SchedulerLock lock(system.Kernel()); LOG_CRITICAL( Debug_Emulated, "Emulated program broke execution! reason=0x{:016X}, info1=0x{:016X}, info2=0x{:016X}", @@ -605,14 +672,16 @@ static void Break(Core::System& system, u32 reason, u64 info1, u64 info2) { const auto thread_processor_id = current_thread->GetProcessorID(); system.ArmInterface(static_cast<std::size_t>(thread_processor_id)).LogBacktrace(); - system.Kernel().CurrentProcess()->PrepareForTermination(); - // Kill the current thread + system.Kernel().ExceptionalExit(); current_thread->Stop(); - system.PrepareReschedule(); } } +static void Break32(Core::System& system, u32 reason, u32 info1, u32 info2) { + Break(system, reason, static_cast<u64>(info1), static_cast<u64>(info2)); +} + /// Used to output a message on a debug hardware unit - does nothing on a retail unit static void OutputDebugString([[maybe_unused]] Core::System& system, VAddr address, u64 len) { if (len == 0) { @@ -627,6 +696,7 @@ static void OutputDebugString([[maybe_unused]] Core::System& system, VAddr addre /// Gets system/memory information for the current process static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 handle, u64 info_sub_id) { + std::lock_guard lock{HLE::g_hle_lock}; LOG_TRACE(Kernel_SVC, "called info_id=0x{:X}, info_sub_id=0x{:X}, handle=0x{:08X}", info_id, info_sub_id, handle); @@ -863,9 +933,9 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha if (same_thread && info_sub_id == 0xFFFFFFFFFFFFFFFF) { const u64 thread_ticks = current_thread->GetTotalCPUTimeTicks(); - out_ticks = thread_ticks + (core_timing.GetTicks() - prev_ctx_ticks); + out_ticks = thread_ticks + (core_timing.GetCPUTicks() - prev_ctx_ticks); } else if (same_thread && info_sub_id == system.CurrentCoreIndex()) { - out_ticks = core_timing.GetTicks() - prev_ctx_ticks; + out_ticks = core_timing.GetCPUTicks() - prev_ctx_ticks; } *result = out_ticks; @@ -892,6 +962,7 @@ static ResultCode GetInfo32(Core::System& system, u32* result_low, u32* result_h /// Maps memory at a desired address static ResultCode MapPhysicalMemory(Core::System& system, VAddr addr, u64 size) { + std::lock_guard lock{HLE::g_hle_lock}; LOG_DEBUG(Kernel_SVC, "called, addr=0x{:016X}, size=0x{:X}", addr, size); if (!Common::Is4KBAligned(addr)) { @@ -939,8 +1010,13 @@ static ResultCode MapPhysicalMemory(Core::System& system, VAddr addr, u64 size) return page_table.MapPhysicalMemory(addr, size); } +static ResultCode MapPhysicalMemory32(Core::System& system, u32 addr, u32 size) { + return MapPhysicalMemory(system, static_cast<VAddr>(addr), static_cast<std::size_t>(size)); +} + /// Unmaps memory previously mapped via MapPhysicalMemory static ResultCode UnmapPhysicalMemory(Core::System& system, VAddr addr, u64 size) { + std::lock_guard lock{HLE::g_hle_lock}; LOG_DEBUG(Kernel_SVC, "called, addr=0x{:016X}, size=0x{:X}", addr, size); if (!Common::Is4KBAligned(addr)) { @@ -988,6 +1064,10 @@ static ResultCode UnmapPhysicalMemory(Core::System& system, VAddr addr, u64 size return page_table.UnmapPhysicalMemory(addr, size); } +static ResultCode UnmapPhysicalMemory32(Core::System& system, u32 addr, u32 size) { + return UnmapPhysicalMemory(system, static_cast<VAddr>(addr), static_cast<std::size_t>(size)); +} + /// Sets the thread activity static ResultCode SetThreadActivity(Core::System& system, Handle handle, u32 activity) { LOG_DEBUG(Kernel_SVC, "called, handle=0x{:08X}, activity=0x{:08X}", handle, activity); @@ -1017,10 +1097,11 @@ static ResultCode SetThreadActivity(Core::System& system, Handle handle, u32 act return ERR_BUSY; } - thread->SetActivity(static_cast<ThreadActivity>(activity)); + return thread->SetActivity(static_cast<ThreadActivity>(activity)); +} - system.PrepareReschedule(thread->GetProcessorID()); - return RESULT_SUCCESS; +static ResultCode SetThreadActivity32(Core::System& system, Handle handle, u32 activity) { + return SetThreadActivity(system, handle, activity); } /// Gets the thread context @@ -1064,6 +1145,10 @@ static ResultCode GetThreadContext(Core::System& system, VAddr thread_context, H return RESULT_SUCCESS; } +static ResultCode GetThreadContext32(Core::System& system, u32 thread_context, Handle handle) { + return GetThreadContext(system, static_cast<VAddr>(thread_context), handle); +} + /// Gets the priority for the specified thread static ResultCode GetThreadPriority(Core::System& system, u32* priority, Handle handle) { LOG_TRACE(Kernel_SVC, "called"); @@ -1071,6 +1156,7 @@ static ResultCode GetThreadPriority(Core::System& system, u32* priority, Handle const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable(); const std::shared_ptr<Thread> thread = handle_table.Get<Thread>(handle); if (!thread) { + *priority = 0; LOG_ERROR(Kernel_SVC, "Thread handle does not exist, handle=0x{:08X}", handle); return ERR_INVALID_HANDLE; } @@ -1105,18 +1191,26 @@ static ResultCode SetThreadPriority(Core::System& system, Handle handle, u32 pri thread->SetPriority(priority); - system.PrepareReschedule(thread->GetProcessorID()); return RESULT_SUCCESS; } +static ResultCode SetThreadPriority32(Core::System& system, Handle handle, u32 priority) { + return SetThreadPriority(system, handle, priority); +} + /// Get which CPU core is executing the current thread static u32 GetCurrentProcessorNumber(Core::System& system) { LOG_TRACE(Kernel_SVC, "called"); - return system.CurrentScheduler().GetCurrentThread()->GetProcessorID(); + return static_cast<u32>(system.CurrentPhysicalCore().CoreIndex()); +} + +static u32 GetCurrentProcessorNumber32(Core::System& system) { + return GetCurrentProcessorNumber(system); } static ResultCode MapSharedMemory(Core::System& system, Handle shared_memory_handle, VAddr addr, u64 size, u32 permissions) { + std::lock_guard lock{HLE::g_hle_lock}; LOG_TRACE(Kernel_SVC, "called, shared_memory_handle=0x{:X}, addr=0x{:X}, size=0x{:X}, permissions=0x{:08X}", shared_memory_handle, addr, size, permissions); @@ -1187,9 +1281,16 @@ static ResultCode MapSharedMemory(Core::System& system, Handle shared_memory_han return shared_memory->Map(*current_process, addr, size, permission_type); } +static ResultCode MapSharedMemory32(Core::System& system, Handle shared_memory_handle, u32 addr, + u32 size, u32 permissions) { + return MapSharedMemory(system, shared_memory_handle, static_cast<VAddr>(addr), + static_cast<std::size_t>(size), permissions); +} + static ResultCode QueryProcessMemory(Core::System& system, VAddr memory_info_address, VAddr page_info_address, Handle process_handle, VAddr address) { + std::lock_guard lock{HLE::g_hle_lock}; LOG_TRACE(Kernel_SVC, "called process=0x{:08X} address={:X}", process_handle, address); const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable(); std::shared_ptr<Process> process = handle_table.Get<Process>(process_handle); @@ -1372,6 +1473,7 @@ static ResultCode UnmapProcessCodeMemory(Core::System& system, Handle process_ha /// Exits the current process static void ExitProcess(Core::System& system) { auto* current_process = system.Kernel().CurrentProcess(); + UNIMPLEMENTED(); LOG_INFO(Kernel_SVC, "Process {} exiting", current_process->GetProcessID()); ASSERT_MSG(current_process->GetStatus() == ProcessStatus::Running, @@ -1381,8 +1483,10 @@ static void ExitProcess(Core::System& system) { // Kill the current thread system.CurrentScheduler().GetCurrentThread()->Stop(); +} - system.PrepareReschedule(); +static void ExitProcess32(Core::System& system) { + ExitProcess(system); } /// Creates a new thread @@ -1428,9 +1532,10 @@ static ResultCode CreateThread(Core::System& system, Handle* out_handle, VAddr e ASSERT(kernel.CurrentProcess()->GetResourceLimit()->Reserve(ResourceType::Threads, 1)); + ThreadType type = THREADTYPE_USER; CASCADE_RESULT(std::shared_ptr<Thread> thread, - Thread::Create(kernel, "", entry_point, priority, arg, processor_id, stack_top, - *current_process)); + Thread::Create(system, type, "", entry_point, priority, arg, processor_id, + stack_top, current_process)); const auto new_thread_handle = current_process->GetHandleTable().Create(thread); if (new_thread_handle.Failed()) { @@ -1444,11 +1549,15 @@ static ResultCode CreateThread(Core::System& system, Handle* out_handle, VAddr e thread->SetName( fmt::format("thread[entry_point={:X}, handle={:X}]", entry_point, *new_thread_handle)); - system.PrepareReschedule(thread->GetProcessorID()); - return RESULT_SUCCESS; } +static ResultCode CreateThread32(Core::System& system, Handle* out_handle, u32 priority, + u32 entry_point, u32 arg, u32 stack_top, s32 processor_id) { + return CreateThread(system, out_handle, static_cast<VAddr>(entry_point), static_cast<u64>(arg), + static_cast<VAddr>(stack_top), priority, processor_id); +} + /// Starts the thread for the provided handle static ResultCode StartThread(Core::System& system, Handle thread_handle) { LOG_DEBUG(Kernel_SVC, "called thread=0x{:08X}", thread_handle); @@ -1463,13 +1572,11 @@ static ResultCode StartThread(Core::System& system, Handle thread_handle) { ASSERT(thread->GetStatus() == ThreadStatus::Dormant); - thread->ResumeFromWait(); - - if (thread->GetStatus() == ThreadStatus::Ready) { - system.PrepareReschedule(thread->GetProcessorID()); - } + return thread->Start(); +} - return RESULT_SUCCESS; +static ResultCode StartThread32(Core::System& system, Handle thread_handle) { + return StartThread(system, thread_handle); } /// Called when a thread exits @@ -1477,9 +1584,12 @@ static void ExitThread(Core::System& system) { LOG_DEBUG(Kernel_SVC, "called, pc=0x{:08X}", system.CurrentArmInterface().GetPC()); auto* const current_thread = system.CurrentScheduler().GetCurrentThread(); - current_thread->Stop(); system.GlobalScheduler().RemoveThread(SharedFrom(current_thread)); - system.PrepareReschedule(); + current_thread->Stop(); +} + +static void ExitThread32(Core::System& system) { + ExitThread(system); } /// Sleep the current thread @@ -1498,15 +1608,21 @@ static void SleepThread(Core::System& system, s64 nanoseconds) { if (nanoseconds <= 0) { switch (static_cast<SleepType>(nanoseconds)) { - case SleepType::YieldWithoutLoadBalancing: - is_redundant = current_thread->YieldSimple(); + case SleepType::YieldWithoutLoadBalancing: { + auto pair = current_thread->YieldSimple(); + is_redundant = pair.second; break; - case SleepType::YieldWithLoadBalancing: - is_redundant = current_thread->YieldAndBalanceLoad(); + } + case SleepType::YieldWithLoadBalancing: { + auto pair = current_thread->YieldAndBalanceLoad(); + is_redundant = pair.second; break; - case SleepType::YieldAndWaitForLoadBalancing: - is_redundant = current_thread->YieldAndWaitForLoadBalancing(); + } + case SleepType::YieldAndWaitForLoadBalancing: { + auto pair = current_thread->YieldAndWaitForLoadBalancing(); + is_redundant = pair.second; break; + } default: UNREACHABLE_MSG("Unimplemented sleep yield type '{:016X}'!", nanoseconds); } @@ -1514,13 +1630,18 @@ static void SleepThread(Core::System& system, s64 nanoseconds) { current_thread->Sleep(nanoseconds); } - if (is_redundant) { - // If it's redundant, the core is pretty much idle. Some games keep idling - // a core while it's doing nothing, we advance timing to avoid costly continuous - // calls. - system.CoreTiming().AddTicks(2000); + if (is_redundant && !system.Kernel().IsMulticore()) { + system.Kernel().ExitSVCProfile(); + system.CoreTiming().AddTicks(1000U); + system.GetCpuManager().PreemptSingleCore(); + system.Kernel().EnterSVCProfile(); } - system.PrepareReschedule(current_thread->GetProcessorID()); +} + +static void SleepThread32(Core::System& system, u32 nanoseconds_low, u32 nanoseconds_high) { + const s64 nanoseconds = static_cast<s64>(static_cast<u64>(nanoseconds_low) | + (static_cast<u64>(nanoseconds_high) << 32)); + SleepThread(system, nanoseconds); } /// Wait process wide key atomic @@ -1547,31 +1668,69 @@ static ResultCode WaitProcessWideKeyAtomic(Core::System& system, VAddr mutex_add } ASSERT(condition_variable_addr == Common::AlignDown(condition_variable_addr, 4)); - + auto& kernel = system.Kernel(); + Handle event_handle; + Thread* current_thread = system.CurrentScheduler().GetCurrentThread(); auto* const current_process = system.Kernel().CurrentProcess(); - const auto& handle_table = current_process->GetHandleTable(); - std::shared_ptr<Thread> thread = handle_table.Get<Thread>(thread_handle); - ASSERT(thread); + { + SchedulerLockAndSleep lock(kernel, event_handle, current_thread, nano_seconds); + const auto& handle_table = current_process->GetHandleTable(); + std::shared_ptr<Thread> thread = handle_table.Get<Thread>(thread_handle); + ASSERT(thread); + + current_thread->SetSynchronizationResults(nullptr, RESULT_TIMEOUT); + + if (thread->IsPendingTermination()) { + lock.CancelSleep(); + return ERR_THREAD_TERMINATING; + } + + const auto release_result = current_process->GetMutex().Release(mutex_addr); + if (release_result.IsError()) { + lock.CancelSleep(); + return release_result; + } + + if (nano_seconds == 0) { + lock.CancelSleep(); + return RESULT_TIMEOUT; + } - const auto release_result = current_process->GetMutex().Release(mutex_addr); - if (release_result.IsError()) { - return release_result; + current_thread->SetCondVarWaitAddress(condition_variable_addr); + current_thread->SetMutexWaitAddress(mutex_addr); + current_thread->SetWaitHandle(thread_handle); + current_thread->SetStatus(ThreadStatus::WaitCondVar); + current_process->InsertConditionVariableThread(SharedFrom(current_thread)); } - Thread* current_thread = system.CurrentScheduler().GetCurrentThread(); - current_thread->SetCondVarWaitAddress(condition_variable_addr); - current_thread->SetMutexWaitAddress(mutex_addr); - current_thread->SetWaitHandle(thread_handle); - current_thread->SetStatus(ThreadStatus::WaitCondVar); - current_thread->InvalidateWakeupCallback(); - current_process->InsertConditionVariableThread(SharedFrom(current_thread)); + if (event_handle != InvalidHandle) { + auto& time_manager = kernel.TimeManager(); + time_manager.UnscheduleTimeEvent(event_handle); + } + + { + SchedulerLock lock(kernel); - current_thread->WakeAfterDelay(nano_seconds); + auto* owner = current_thread->GetLockOwner(); + if (owner != nullptr) { + owner->RemoveMutexWaiter(SharedFrom(current_thread)); + } + current_process->RemoveConditionVariableThread(SharedFrom(current_thread)); + } // Note: Deliberately don't attempt to inherit the lock owner's priority. - system.PrepareReschedule(current_thread->GetProcessorID()); - return RESULT_SUCCESS; + return current_thread->GetSignalingResult(); +} + +static ResultCode WaitProcessWideKeyAtomic32(Core::System& system, u32 mutex_addr, + u32 condition_variable_addr, Handle thread_handle, + u32 nanoseconds_low, u32 nanoseconds_high) { + const s64 nanoseconds = + static_cast<s64>(nanoseconds_low | (static_cast<u64>(nanoseconds_high) << 32)); + return WaitProcessWideKeyAtomic(system, static_cast<VAddr>(mutex_addr), + static_cast<VAddr>(condition_variable_addr), thread_handle, + nanoseconds); } /// Signal process wide key @@ -1582,7 +1741,9 @@ static void SignalProcessWideKey(Core::System& system, VAddr condition_variable_ ASSERT(condition_variable_addr == Common::AlignDown(condition_variable_addr, 4)); // Retrieve a list of all threads that are waiting for this condition variable. - auto* const current_process = system.Kernel().CurrentProcess(); + auto& kernel = system.Kernel(); + SchedulerLock lock(kernel); + auto* const current_process = kernel.CurrentProcess(); std::vector<std::shared_ptr<Thread>> waiting_threads = current_process->GetConditionVariableThreads(condition_variable_addr); @@ -1591,7 +1752,7 @@ static void SignalProcessWideKey(Core::System& system, VAddr condition_variable_ std::size_t last = waiting_threads.size(); if (target > 0) last = std::min(waiting_threads.size(), static_cast<std::size_t>(target)); - + auto& time_manager = kernel.TimeManager(); for (std::size_t index = 0; index < last; ++index) { auto& thread = waiting_threads[index]; @@ -1599,7 +1760,6 @@ static void SignalProcessWideKey(Core::System& system, VAddr condition_variable_ // liberate Cond Var Thread. current_process->RemoveConditionVariableThread(thread); - thread->SetCondVarWaitAddress(0); const std::size_t current_core = system.CurrentCoreIndex(); auto& monitor = system.Monitor(); @@ -1610,10 +1770,8 @@ static void SignalProcessWideKey(Core::System& system, VAddr condition_variable_ u32 update_val = 0; const VAddr mutex_address = thread->GetMutexWaitAddress(); do { - monitor.SetExclusive(current_core, mutex_address); - // If the mutex is not yet acquired, acquire it. - mutex_val = memory.Read32(mutex_address); + mutex_val = monitor.ExclusiveRead32(current_core, mutex_address); if (mutex_val != 0) { update_val = mutex_val | Mutex::MutexHasWaitersFlag; @@ -1621,33 +1779,28 @@ static void SignalProcessWideKey(Core::System& system, VAddr condition_variable_ update_val = thread->GetWaitHandle(); } } while (!monitor.ExclusiveWrite32(current_core, mutex_address, update_val)); + monitor.ClearExclusive(); if (mutex_val == 0) { // We were able to acquire the mutex, resume this thread. - ASSERT(thread->GetStatus() == ThreadStatus::WaitCondVar); - thread->ResumeFromWait(); - auto* const lock_owner = thread->GetLockOwner(); if (lock_owner != nullptr) { lock_owner->RemoveMutexWaiter(thread); } thread->SetLockOwner(nullptr); - thread->SetMutexWaitAddress(0); - thread->SetWaitHandle(0); - thread->SetWaitSynchronizationResult(RESULT_SUCCESS); - system.PrepareReschedule(thread->GetProcessorID()); + thread->SetSynchronizationResults(nullptr, RESULT_SUCCESS); + thread->ResumeFromWait(); } else { // The mutex is already owned by some other thread, make this thread wait on it. const Handle owner_handle = static_cast<Handle>(mutex_val & Mutex::MutexOwnerMask); const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable(); auto owner = handle_table.Get<Thread>(owner_handle); ASSERT(owner); - ASSERT(thread->GetStatus() == ThreadStatus::WaitCondVar); - thread->InvalidateWakeupCallback(); - thread->SetStatus(ThreadStatus::WaitMutex); + if (thread->GetStatus() == ThreadStatus::WaitCondVar) { + thread->SetStatus(ThreadStatus::WaitMutex); + } owner->AddMutexWaiter(thread); - system.PrepareReschedule(thread->GetProcessorID()); } } } @@ -1678,12 +1831,15 @@ static ResultCode WaitForAddress(Core::System& system, VAddr address, u32 type, auto& address_arbiter = system.Kernel().CurrentProcess()->GetAddressArbiter(); const ResultCode result = address_arbiter.WaitForAddress(address, arbitration_type, value, timeout); - if (result == RESULT_SUCCESS) { - system.PrepareReschedule(); - } return result; } +static ResultCode WaitForAddress32(Core::System& system, u32 address, u32 type, s32 value, + u32 timeout_low, u32 timeout_high) { + s64 timeout = static_cast<s64>(timeout_low | (static_cast<u64>(timeout_high) << 32)); + return WaitForAddress(system, static_cast<VAddr>(address), type, value, timeout); +} + // Signals to an address (via Address Arbiter) static ResultCode SignalToAddress(Core::System& system, VAddr address, u32 type, s32 value, s32 num_to_wake) { @@ -1707,6 +1863,11 @@ static ResultCode SignalToAddress(Core::System& system, VAddr address, u32 type, return address_arbiter.SignalToAddress(address, signal_type, value, num_to_wake); } +static ResultCode SignalToAddress32(Core::System& system, u32 address, u32 type, s32 value, + s32 num_to_wake) { + return SignalToAddress(system, static_cast<VAddr>(address), type, value, num_to_wake); +} + static void KernelDebug([[maybe_unused]] Core::System& system, [[maybe_unused]] u32 kernel_debug_type, [[maybe_unused]] u64 param1, [[maybe_unused]] u64 param2, [[maybe_unused]] u64 param3) { @@ -1725,14 +1886,21 @@ static u64 GetSystemTick(Core::System& system) { auto& core_timing = system.CoreTiming(); // Returns the value of cntpct_el0 (https://switchbrew.org/wiki/SVC#svcGetSystemTick) - const u64 result{Core::Timing::CpuCyclesToClockCycles(system.CoreTiming().GetTicks())}; + const u64 result{system.CoreTiming().GetClockTicks()}; - // Advance time to defeat dumb games that busy-wait for the frame to end. - core_timing.AddTicks(400); + if (!system.Kernel().IsMulticore()) { + core_timing.AddTicks(400U); + } return result; } +static void GetSystemTick32(Core::System& system, u32* time_low, u32* time_high) { + u64 time = GetSystemTick(system); + *time_low = static_cast<u32>(time); + *time_high = static_cast<u32>(time >> 32); +} + /// Close a handle static ResultCode CloseHandle(Core::System& system, Handle handle) { LOG_TRACE(Kernel_SVC, "Closing handle 0x{:08X}", handle); @@ -1765,9 +1933,14 @@ static ResultCode ResetSignal(Core::System& system, Handle handle) { return ERR_INVALID_HANDLE; } +static ResultCode ResetSignal32(Core::System& system, Handle handle) { + return ResetSignal(system, handle); +} + /// Creates a TransferMemory object static ResultCode CreateTransferMemory(Core::System& system, Handle* handle, VAddr addr, u64 size, u32 permissions) { + std::lock_guard lock{HLE::g_hle_lock}; LOG_DEBUG(Kernel_SVC, "called addr=0x{:X}, size=0x{:X}, perms=0x{:08X}", addr, size, permissions); @@ -1812,6 +1985,12 @@ static ResultCode CreateTransferMemory(Core::System& system, Handle* handle, VAd return RESULT_SUCCESS; } +static ResultCode CreateTransferMemory32(Core::System& system, Handle* handle, u32 addr, u32 size, + u32 permissions) { + return CreateTransferMemory(system, handle, static_cast<VAddr>(addr), + static_cast<std::size_t>(size), permissions); +} + static ResultCode GetThreadCoreMask(Core::System& system, Handle thread_handle, u32* core, u64* mask) { LOG_TRACE(Kernel_SVC, "called, handle=0x{:08X}", thread_handle); @@ -1821,6 +2000,8 @@ static ResultCode GetThreadCoreMask(Core::System& system, Handle thread_handle, if (!thread) { LOG_ERROR(Kernel_SVC, "Thread handle does not exist, thread_handle=0x{:08X}", thread_handle); + *core = 0; + *mask = 0; return ERR_INVALID_HANDLE; } @@ -1830,6 +2011,15 @@ static ResultCode GetThreadCoreMask(Core::System& system, Handle thread_handle, return RESULT_SUCCESS; } +static ResultCode GetThreadCoreMask32(Core::System& system, Handle thread_handle, u32* core, + u32* mask_low, u32* mask_high) { + u64 mask{}; + const auto result = GetThreadCoreMask(system, thread_handle, core, &mask); + *mask_high = static_cast<u32>(mask >> 32); + *mask_low = static_cast<u32>(mask); + return result; +} + static ResultCode SetThreadCoreMask(Core::System& system, Handle thread_handle, u32 core, u64 affinity_mask) { LOG_DEBUG(Kernel_SVC, "called, handle=0x{:08X}, core=0x{:X}, affinity_mask=0x{:016X}", @@ -1861,7 +2051,7 @@ static ResultCode SetThreadCoreMask(Core::System& system, Handle thread_handle, return ERR_INVALID_COMBINATION; } - if (core < Core::NUM_CPU_CORES) { + if (core < Core::Hardware::NUM_CPU_CORES) { if ((affinity_mask & (1ULL << core)) == 0) { LOG_ERROR(Kernel_SVC, "Core is not enabled for the current mask, core={}, mask={:016X}", core, @@ -1883,11 +2073,14 @@ static ResultCode SetThreadCoreMask(Core::System& system, Handle thread_handle, return ERR_INVALID_HANDLE; } - system.PrepareReschedule(thread->GetProcessorID()); - thread->ChangeCore(core, affinity_mask); - system.PrepareReschedule(thread->GetProcessorID()); + return thread->SetCoreAndAffinityMask(core, affinity_mask); +} - return RESULT_SUCCESS; +static ResultCode SetThreadCoreMask32(Core::System& system, Handle thread_handle, u32 core, + u32 affinity_mask_low, u32 affinity_mask_high) { + const u64 affinity_mask = + static_cast<u64>(affinity_mask_low) | (static_cast<u64>(affinity_mask_high) << 32); + return SetThreadCoreMask(system, thread_handle, core, affinity_mask); } static ResultCode CreateEvent(Core::System& system, Handle* write_handle, Handle* read_handle) { @@ -1918,6 +2111,10 @@ static ResultCode CreateEvent(Core::System& system, Handle* write_handle, Handle return RESULT_SUCCESS; } +static ResultCode CreateEvent32(Core::System& system, Handle* write_handle, Handle* read_handle) { + return CreateEvent(system, write_handle, read_handle); +} + static ResultCode ClearEvent(Core::System& system, Handle handle) { LOG_TRACE(Kernel_SVC, "called, event=0x{:08X}", handle); @@ -1939,6 +2136,10 @@ static ResultCode ClearEvent(Core::System& system, Handle handle) { return ERR_INVALID_HANDLE; } +static ResultCode ClearEvent32(Core::System& system, Handle handle) { + return ClearEvent(system, handle); +} + static ResultCode SignalEvent(Core::System& system, Handle handle) { LOG_DEBUG(Kernel_SVC, "called. Handle=0x{:08X}", handle); @@ -1951,10 +2152,13 @@ static ResultCode SignalEvent(Core::System& system, Handle handle) { } writable_event->Signal(); - system.PrepareReschedule(); return RESULT_SUCCESS; } +static ResultCode SignalEvent32(Core::System& system, Handle handle) { + return SignalEvent(system, handle); +} + static ResultCode GetProcessInfo(Core::System& system, u64* out, Handle process_handle, u32 type) { LOG_DEBUG(Kernel_SVC, "called, handle=0x{:08X}, type=0x{:X}", process_handle, type); @@ -1982,6 +2186,7 @@ static ResultCode GetProcessInfo(Core::System& system, u64* out, Handle process_ } static ResultCode CreateResourceLimit(Core::System& system, Handle* out_handle) { + std::lock_guard lock{HLE::g_hle_lock}; LOG_DEBUG(Kernel_SVC, "called"); auto& kernel = system.Kernel(); @@ -2139,6 +2344,15 @@ static ResultCode GetThreadList(Core::System& system, u32* out_num_threads, VAdd return RESULT_SUCCESS; } +static ResultCode FlushProcessDataCache32(Core::System& system, Handle handle, u32 address, + u32 size) { + // Note(Blinkhawk): For emulation purposes of the data cache this is mostly a nope + // as all emulation is done in the same cache level in host architecture, thus data cache + // does not need flushing. + LOG_DEBUG(Kernel_SVC, "called"); + return RESULT_SUCCESS; +} + namespace { struct FunctionDef { using Func = void(Core::System&); @@ -2153,57 +2367,57 @@ static const FunctionDef SVC_Table_32[] = { {0x00, nullptr, "Unknown"}, {0x01, SvcWrap32<SetHeapSize32>, "SetHeapSize32"}, {0x02, nullptr, "Unknown"}, - {0x03, nullptr, "SetMemoryAttribute32"}, - {0x04, nullptr, "MapMemory32"}, - {0x05, nullptr, "UnmapMemory32"}, + {0x03, SvcWrap32<SetMemoryAttribute32>, "SetMemoryAttribute32"}, + {0x04, SvcWrap32<MapMemory32>, "MapMemory32"}, + {0x05, SvcWrap32<UnmapMemory32>, "UnmapMemory32"}, {0x06, SvcWrap32<QueryMemory32>, "QueryMemory32"}, - {0x07, nullptr, "ExitProcess32"}, - {0x08, nullptr, "CreateThread32"}, - {0x09, nullptr, "StartThread32"}, - {0x0a, nullptr, "ExitThread32"}, - {0x0b, nullptr, "SleepThread32"}, + {0x07, SvcWrap32<ExitProcess32>, "ExitProcess32"}, + {0x08, SvcWrap32<CreateThread32>, "CreateThread32"}, + {0x09, SvcWrap32<StartThread32>, "StartThread32"}, + {0x0a, SvcWrap32<ExitThread32>, "ExitThread32"}, + {0x0b, SvcWrap32<SleepThread32>, "SleepThread32"}, {0x0c, SvcWrap32<GetThreadPriority32>, "GetThreadPriority32"}, - {0x0d, nullptr, "SetThreadPriority32"}, - {0x0e, nullptr, "GetThreadCoreMask32"}, - {0x0f, nullptr, "SetThreadCoreMask32"}, - {0x10, nullptr, "GetCurrentProcessorNumber32"}, - {0x11, nullptr, "SignalEvent32"}, - {0x12, nullptr, "ClearEvent32"}, - {0x13, nullptr, "MapSharedMemory32"}, + {0x0d, SvcWrap32<SetThreadPriority32>, "SetThreadPriority32"}, + {0x0e, SvcWrap32<GetThreadCoreMask32>, "GetThreadCoreMask32"}, + {0x0f, SvcWrap32<SetThreadCoreMask32>, "SetThreadCoreMask32"}, + {0x10, SvcWrap32<GetCurrentProcessorNumber32>, "GetCurrentProcessorNumber32"}, + {0x11, SvcWrap32<SignalEvent32>, "SignalEvent32"}, + {0x12, SvcWrap32<ClearEvent32>, "ClearEvent32"}, + {0x13, SvcWrap32<MapSharedMemory32>, "MapSharedMemory32"}, {0x14, nullptr, "UnmapSharedMemory32"}, - {0x15, nullptr, "CreateTransferMemory32"}, + {0x15, SvcWrap32<CreateTransferMemory32>, "CreateTransferMemory32"}, {0x16, SvcWrap32<CloseHandle32>, "CloseHandle32"}, - {0x17, nullptr, "ResetSignal32"}, + {0x17, SvcWrap32<ResetSignal32>, "ResetSignal32"}, {0x18, SvcWrap32<WaitSynchronization32>, "WaitSynchronization32"}, - {0x19, nullptr, "CancelSynchronization32"}, - {0x1a, nullptr, "ArbitrateLock32"}, - {0x1b, nullptr, "ArbitrateUnlock32"}, - {0x1c, nullptr, "WaitProcessWideKeyAtomic32"}, + {0x19, SvcWrap32<CancelSynchronization32>, "CancelSynchronization32"}, + {0x1a, SvcWrap32<ArbitrateLock32>, "ArbitrateLock32"}, + {0x1b, SvcWrap32<ArbitrateUnlock32>, "ArbitrateUnlock32"}, + {0x1c, SvcWrap32<WaitProcessWideKeyAtomic32>, "WaitProcessWideKeyAtomic32"}, {0x1d, SvcWrap32<SignalProcessWideKey32>, "SignalProcessWideKey32"}, - {0x1e, nullptr, "GetSystemTick32"}, + {0x1e, SvcWrap32<GetSystemTick32>, "GetSystemTick32"}, {0x1f, SvcWrap32<ConnectToNamedPort32>, "ConnectToNamedPort32"}, {0x20, nullptr, "Unknown"}, {0x21, SvcWrap32<SendSyncRequest32>, "SendSyncRequest32"}, {0x22, nullptr, "SendSyncRequestWithUserBuffer32"}, {0x23, nullptr, "Unknown"}, - {0x24, nullptr, "GetProcessId32"}, + {0x24, SvcWrap32<GetProcessId32>, "GetProcessId32"}, {0x25, SvcWrap32<GetThreadId32>, "GetThreadId32"}, - {0x26, nullptr, "Break32"}, + {0x26, SvcWrap32<Break32>, "Break32"}, {0x27, nullptr, "OutputDebugString32"}, {0x28, nullptr, "Unknown"}, {0x29, SvcWrap32<GetInfo32>, "GetInfo32"}, {0x2a, nullptr, "Unknown"}, {0x2b, nullptr, "Unknown"}, - {0x2c, nullptr, "MapPhysicalMemory32"}, - {0x2d, nullptr, "UnmapPhysicalMemory32"}, + {0x2c, SvcWrap32<MapPhysicalMemory32>, "MapPhysicalMemory32"}, + {0x2d, SvcWrap32<UnmapPhysicalMemory32>, "UnmapPhysicalMemory32"}, {0x2e, nullptr, "Unknown"}, {0x2f, nullptr, "Unknown"}, {0x30, nullptr, "Unknown"}, {0x31, nullptr, "Unknown"}, - {0x32, nullptr, "SetThreadActivity32"}, - {0x33, nullptr, "GetThreadContext32"}, - {0x34, nullptr, "WaitForAddress32"}, - {0x35, nullptr, "SignalToAddress32"}, + {0x32, SvcWrap32<SetThreadActivity32>, "SetThreadActivity32"}, + {0x33, SvcWrap32<GetThreadContext32>, "GetThreadContext32"}, + {0x34, SvcWrap32<WaitForAddress32>, "WaitForAddress32"}, + {0x35, SvcWrap32<SignalToAddress32>, "SignalToAddress32"}, {0x36, nullptr, "Unknown"}, {0x37, nullptr, "Unknown"}, {0x38, nullptr, "Unknown"}, @@ -2219,7 +2433,7 @@ static const FunctionDef SVC_Table_32[] = { {0x42, nullptr, "Unknown"}, {0x43, nullptr, "ReplyAndReceive32"}, {0x44, nullptr, "Unknown"}, - {0x45, nullptr, "CreateEvent32"}, + {0x45, SvcWrap32<CreateEvent32>, "CreateEvent32"}, {0x46, nullptr, "Unknown"}, {0x47, nullptr, "Unknown"}, {0x48, nullptr, "Unknown"}, @@ -2245,7 +2459,7 @@ static const FunctionDef SVC_Table_32[] = { {0x5c, nullptr, "Unknown"}, {0x5d, nullptr, "Unknown"}, {0x5e, nullptr, "Unknown"}, - {0x5F, nullptr, "FlushProcessDataCache32"}, + {0x5F, SvcWrap32<FlushProcessDataCache32>, "FlushProcessDataCache32"}, {0x60, nullptr, "Unknown"}, {0x61, nullptr, "Unknown"}, {0x62, nullptr, "Unknown"}, @@ -2423,13 +2637,10 @@ static const FunctionDef* GetSVCInfo64(u32 func_num) { return &SVC_Table_64[func_num]; } -MICROPROFILE_DEFINE(Kernel_SVC, "Kernel", "SVC", MP_RGB(70, 200, 70)); - void Call(Core::System& system, u32 immediate) { - MICROPROFILE_SCOPE(Kernel_SVC); - - // Lock the global kernel mutex when we enter the kernel HLE. - std::lock_guard lock{HLE::g_hle_lock}; + system.ExitDynarmicProfile(); + auto& kernel = system.Kernel(); + kernel.EnterSVCProfile(); const FunctionDef* info = system.CurrentProcess()->Is64BitProcess() ? GetSVCInfo64(immediate) : GetSVCInfo32(immediate); @@ -2442,6 +2653,9 @@ void Call(Core::System& system, u32 immediate) { } else { LOG_CRITICAL(Kernel_SVC, "Unknown SVC function 0x{:X}", immediate); } + + kernel.ExitSVCProfile(); + system.EnterDynarmicProfile(); } } // namespace Kernel::Svc diff --git a/src/core/hle/kernel/svc_wrap.h b/src/core/hle/kernel/svc_wrap.h index 7d735e3fa..0b6dd9df0 100644 --- a/src/core/hle/kernel/svc_wrap.h +++ b/src/core/hle/kernel/svc_wrap.h @@ -350,13 +350,50 @@ void SvcWrap64(Core::System& system) { func(system, static_cast<u32>(Param(system, 0)), Param(system, 1), Param(system, 2)); } -// Used by QueryMemory32 +// Used by QueryMemory32, ArbitrateLock32 template <ResultCode func(Core::System&, u32, u32, u32)> void SvcWrap32(Core::System& system) { FuncReturn32(system, func(system, Param32(system, 0), Param32(system, 1), Param32(system, 2)).raw); } +// Used by Break32 +template <void func(Core::System&, u32, u32, u32)> +void SvcWrap32(Core::System& system) { + func(system, Param32(system, 0), Param32(system, 1), Param32(system, 2)); +} + +// Used by ExitProcess32, ExitThread32 +template <void func(Core::System&)> +void SvcWrap32(Core::System& system) { + func(system); +} + +// Used by GetCurrentProcessorNumber32 +template <u32 func(Core::System&)> +void SvcWrap32(Core::System& system) { + FuncReturn32(system, func(system)); +} + +// Used by SleepThread32 +template <void func(Core::System&, u32, u32)> +void SvcWrap32(Core::System& system) { + func(system, Param32(system, 0), Param32(system, 1)); +} + +// Used by CreateThread32 +template <ResultCode func(Core::System&, Handle*, u32, u32, u32, u32, s32)> +void SvcWrap32(Core::System& system) { + Handle param_1 = 0; + + const u32 retval = func(system, ¶m_1, Param32(system, 0), Param32(system, 1), + Param32(system, 2), Param32(system, 3), Param32(system, 4)) + .raw; + + system.CurrentArmInterface().SetReg(1, param_1); + FuncReturn(system, retval); +} + // Used by GetInfo32 template <ResultCode func(Core::System&, u32*, u32*, u32, u32, u32, u32)> void SvcWrap32(Core::System& system) { @@ -393,18 +430,114 @@ void SvcWrap32(Core::System& system) { FuncReturn(system, retval); } +// Used by GetSystemTick32 +template <void func(Core::System&, u32*, u32*)> +void SvcWrap32(Core::System& system) { + u32 param_1 = 0; + u32 param_2 = 0; + + func(system, ¶m_1, ¶m_2); + system.CurrentArmInterface().SetReg(0, param_1); + system.CurrentArmInterface().SetReg(1, param_2); +} + +// Used by CreateEvent32 +template <ResultCode func(Core::System&, Handle*, Handle*)> +void SvcWrap32(Core::System& system) { + Handle param_1 = 0; + Handle param_2 = 0; + + const u32 retval = func(system, ¶m_1, ¶m_2).raw; + system.CurrentArmInterface().SetReg(1, param_1); + system.CurrentArmInterface().SetReg(2, param_2); + FuncReturn(system, retval); +} + +// Used by GetThreadId32 +template <ResultCode func(Core::System&, Handle, u32*, u32*, u32*)> +void SvcWrap32(Core::System& system) { + u32 param_1 = 0; + u32 param_2 = 0; + u32 param_3 = 0; + + const u32 retval = func(system, Param32(system, 2), ¶m_1, ¶m_2, ¶m_3).raw; + system.CurrentArmInterface().SetReg(1, param_1); + system.CurrentArmInterface().SetReg(2, param_2); + system.CurrentArmInterface().SetReg(3, param_3); + FuncReturn(system, retval); +} + // Used by SignalProcessWideKey32 template <void func(Core::System&, u32, s32)> void SvcWrap32(Core::System& system) { func(system, static_cast<u32>(Param(system, 0)), static_cast<s32>(Param(system, 1))); } -// Used by SendSyncRequest32 +// Used by SetThreadPriority32 +template <ResultCode func(Core::System&, Handle, u32)> +void SvcWrap32(Core::System& system) { + const u32 retval = + func(system, static_cast<Handle>(Param(system, 0)), static_cast<u32>(Param(system, 1))).raw; + FuncReturn(system, retval); +} + +// Used by SetThreadCoreMask32 +template <ResultCode func(Core::System&, Handle, u32, u32, u32)> +void SvcWrap32(Core::System& system) { + const u32 retval = + func(system, static_cast<Handle>(Param(system, 0)), static_cast<u32>(Param(system, 1)), + static_cast<u32>(Param(system, 2)), static_cast<u32>(Param(system, 3))) + .raw; + FuncReturn(system, retval); +} + +// Used by WaitProcessWideKeyAtomic32 +template <ResultCode func(Core::System&, u32, u32, Handle, u32, u32)> +void SvcWrap32(Core::System& system) { + const u32 retval = + func(system, static_cast<u32>(Param(system, 0)), static_cast<u32>(Param(system, 1)), + static_cast<Handle>(Param(system, 2)), static_cast<u32>(Param(system, 3)), + static_cast<u32>(Param(system, 4))) + .raw; + FuncReturn(system, retval); +} + +// Used by WaitForAddress32 +template <ResultCode func(Core::System&, u32, u32, s32, u32, u32)> +void SvcWrap32(Core::System& system) { + const u32 retval = func(system, static_cast<u32>(Param(system, 0)), + static_cast<u32>(Param(system, 1)), static_cast<s32>(Param(system, 2)), + static_cast<u32>(Param(system, 3)), static_cast<u32>(Param(system, 4))) + .raw; + FuncReturn(system, retval); +} + +// Used by SignalToAddress32 +template <ResultCode func(Core::System&, u32, u32, s32, s32)> +void SvcWrap32(Core::System& system) { + const u32 retval = + func(system, static_cast<u32>(Param(system, 0)), static_cast<u32>(Param(system, 1)), + static_cast<s32>(Param(system, 2)), static_cast<s32>(Param(system, 3))) + .raw; + FuncReturn(system, retval); +} + +// Used by SendSyncRequest32, ArbitrateUnlock32 template <ResultCode func(Core::System&, u32)> void SvcWrap32(Core::System& system) { FuncReturn(system, func(system, static_cast<u32>(Param(system, 0))).raw); } +// Used by CreateTransferMemory32 +template <ResultCode func(Core::System&, Handle*, u32, u32, u32)> +void SvcWrap32(Core::System& system) { + Handle handle = 0; + const u32 retval = + func(system, &handle, Param32(system, 1), Param32(system, 2), Param32(system, 3)).raw; + system.CurrentArmInterface().SetReg(1, handle); + FuncReturn(system, retval); +} + // Used by WaitSynchronization32 template <ResultCode func(Core::System&, u32, u32, s32, u32, Handle*)> void SvcWrap32(Core::System& system) { diff --git a/src/core/hle/kernel/synchronization.cpp b/src/core/hle/kernel/synchronization.cpp index dc37fad1a..851b702a5 100644 --- a/src/core/hle/kernel/synchronization.cpp +++ b/src/core/hle/kernel/synchronization.cpp @@ -10,78 +10,107 @@ #include "core/hle/kernel/synchronization.h" #include "core/hle/kernel/synchronization_object.h" #include "core/hle/kernel/thread.h" +#include "core/hle/kernel/time_manager.h" namespace Kernel { -/// Default thread wakeup callback for WaitSynchronization -static bool DefaultThreadWakeupCallback(ThreadWakeupReason reason, std::shared_ptr<Thread> thread, - std::shared_ptr<SynchronizationObject> object, - std::size_t index) { - ASSERT(thread->GetStatus() == ThreadStatus::WaitSynch); - - if (reason == ThreadWakeupReason::Timeout) { - thread->SetWaitSynchronizationResult(RESULT_TIMEOUT); - return true; - } - - ASSERT(reason == ThreadWakeupReason::Signal); - thread->SetWaitSynchronizationResult(RESULT_SUCCESS); - thread->SetWaitSynchronizationOutput(static_cast<u32>(index)); - return true; -} - Synchronization::Synchronization(Core::System& system) : system{system} {} void Synchronization::SignalObject(SynchronizationObject& obj) const { + auto& kernel = system.Kernel(); + SchedulerLock lock(kernel); + auto& time_manager = kernel.TimeManager(); if (obj.IsSignaled()) { - obj.WakeupAllWaitingThreads(); + for (auto thread : obj.GetWaitingThreads()) { + if (thread->GetSchedulingStatus() == ThreadSchedStatus::Paused) { + if (thread->GetStatus() != ThreadStatus::WaitHLEEvent) { + ASSERT(thread->GetStatus() == ThreadStatus::WaitSynch); + ASSERT(thread->IsWaitingSync()); + } + thread->SetSynchronizationResults(&obj, RESULT_SUCCESS); + thread->ResumeFromWait(); + } + } + obj.ClearWaitingThreads(); } } std::pair<ResultCode, Handle> Synchronization::WaitFor( std::vector<std::shared_ptr<SynchronizationObject>>& sync_objects, s64 nano_seconds) { + auto& kernel = system.Kernel(); auto* const thread = system.CurrentScheduler().GetCurrentThread(); - // Find the first object that is acquirable in the provided list of objects - const auto itr = std::find_if(sync_objects.begin(), sync_objects.end(), - [thread](const std::shared_ptr<SynchronizationObject>& object) { - return object->IsSignaled(); - }); - - if (itr != sync_objects.end()) { - // We found a ready object, acquire it and set the result value - SynchronizationObject* object = itr->get(); - object->Acquire(thread); - const u32 index = static_cast<s32>(std::distance(sync_objects.begin(), itr)); - return {RESULT_SUCCESS, index}; + Handle event_handle = InvalidHandle; + { + SchedulerLockAndSleep lock(kernel, event_handle, thread, nano_seconds); + const auto itr = + std::find_if(sync_objects.begin(), sync_objects.end(), + [thread](const std::shared_ptr<SynchronizationObject>& object) { + return object->IsSignaled(); + }); + + if (itr != sync_objects.end()) { + // We found a ready object, acquire it and set the result value + SynchronizationObject* object = itr->get(); + object->Acquire(thread); + const u32 index = static_cast<s32>(std::distance(sync_objects.begin(), itr)); + lock.CancelSleep(); + return {RESULT_SUCCESS, index}; + } + + if (nano_seconds == 0) { + lock.CancelSleep(); + return {RESULT_TIMEOUT, InvalidHandle}; + } + + if (thread->IsPendingTermination()) { + lock.CancelSleep(); + return {ERR_THREAD_TERMINATING, InvalidHandle}; + } + + if (thread->IsSyncCancelled()) { + thread->SetSyncCancelled(false); + lock.CancelSleep(); + return {ERR_SYNCHRONIZATION_CANCELED, InvalidHandle}; + } + + for (auto& object : sync_objects) { + object->AddWaitingThread(SharedFrom(thread)); + } + + thread->SetSynchronizationObjects(&sync_objects); + thread->SetSynchronizationResults(nullptr, RESULT_TIMEOUT); + thread->SetStatus(ThreadStatus::WaitSynch); + thread->SetWaitingSync(true); } + thread->SetWaitingSync(false); - // No objects were ready to be acquired, prepare to suspend the thread. - - // If a timeout value of 0 was provided, just return the Timeout error code instead of - // suspending the thread. - if (nano_seconds == 0) { - return {RESULT_TIMEOUT, InvalidHandle}; + if (event_handle != InvalidHandle) { + auto& time_manager = kernel.TimeManager(); + time_manager.UnscheduleTimeEvent(event_handle); } - if (thread->IsSyncCancelled()) { - thread->SetSyncCancelled(false); - return {ERR_SYNCHRONIZATION_CANCELED, InvalidHandle}; + { + SchedulerLock lock(kernel); + ResultCode signaling_result = thread->GetSignalingResult(); + SynchronizationObject* signaling_object = thread->GetSignalingObject(); + thread->SetSynchronizationObjects(nullptr); + auto shared_thread = SharedFrom(thread); + for (auto& obj : sync_objects) { + obj->RemoveWaitingThread(shared_thread); + } + if (signaling_object != nullptr) { + const auto itr = std::find_if( + sync_objects.begin(), sync_objects.end(), + [signaling_object](const std::shared_ptr<SynchronizationObject>& object) { + return object.get() == signaling_object; + }); + ASSERT(itr != sync_objects.end()); + signaling_object->Acquire(thread); + const u32 index = static_cast<s32>(std::distance(sync_objects.begin(), itr)); + return {signaling_result, index}; + } + return {signaling_result, -1}; } - - for (auto& object : sync_objects) { - object->AddWaitingThread(SharedFrom(thread)); - } - - thread->SetSynchronizationObjects(std::move(sync_objects)); - thread->SetStatus(ThreadStatus::WaitSynch); - - // Create an event to wake the thread up after the specified nanosecond delay has passed - thread->WakeAfterDelay(nano_seconds); - thread->SetWakeupCallback(DefaultThreadWakeupCallback); - - system.PrepareReschedule(thread->GetProcessorID()); - - return {RESULT_TIMEOUT, InvalidHandle}; } } // namespace Kernel diff --git a/src/core/hle/kernel/synchronization_object.cpp b/src/core/hle/kernel/synchronization_object.cpp index 43f3eef18..ba4d39157 100644 --- a/src/core/hle/kernel/synchronization_object.cpp +++ b/src/core/hle/kernel/synchronization_object.cpp @@ -38,68 +38,8 @@ void SynchronizationObject::RemoveWaitingThread(std::shared_ptr<Thread> thread) waiting_threads.erase(itr); } -std::shared_ptr<Thread> SynchronizationObject::GetHighestPriorityReadyThread() const { - Thread* candidate = nullptr; - u32 candidate_priority = THREADPRIO_LOWEST + 1; - - for (const auto& thread : waiting_threads) { - const ThreadStatus thread_status = thread->GetStatus(); - - // The list of waiting threads must not contain threads that are not waiting to be awakened. - ASSERT_MSG(thread_status == ThreadStatus::WaitSynch || - thread_status == ThreadStatus::WaitHLEEvent, - "Inconsistent thread statuses in waiting_threads"); - - if (thread->GetPriority() >= candidate_priority) - continue; - - if (ShouldWait(thread.get())) - continue; - - candidate = thread.get(); - candidate_priority = thread->GetPriority(); - } - - return SharedFrom(candidate); -} - -void SynchronizationObject::WakeupWaitingThread(std::shared_ptr<Thread> thread) { - ASSERT(!ShouldWait(thread.get())); - - if (!thread) { - return; - } - - if (thread->IsSleepingOnWait()) { - for (const auto& object : thread->GetSynchronizationObjects()) { - ASSERT(!object->ShouldWait(thread.get())); - object->Acquire(thread.get()); - } - } else { - Acquire(thread.get()); - } - - const std::size_t index = thread->GetSynchronizationObjectIndex(SharedFrom(this)); - - thread->ClearSynchronizationObjects(); - - thread->CancelWakeupTimer(); - - bool resume = true; - if (thread->HasWakeupCallback()) { - resume = thread->InvokeWakeupCallback(ThreadWakeupReason::Signal, thread, SharedFrom(this), - index); - } - if (resume) { - thread->ResumeFromWait(); - kernel.PrepareReschedule(thread->GetProcessorID()); - } -} - -void SynchronizationObject::WakeupAllWaitingThreads() { - while (auto thread = GetHighestPriorityReadyThread()) { - WakeupWaitingThread(thread); - } +void SynchronizationObject::ClearWaitingThreads() { + waiting_threads.clear(); } const std::vector<std::shared_ptr<Thread>>& SynchronizationObject::GetWaitingThreads() const { diff --git a/src/core/hle/kernel/synchronization_object.h b/src/core/hle/kernel/synchronization_object.h index 741c31faf..f89b24204 100644 --- a/src/core/hle/kernel/synchronization_object.h +++ b/src/core/hle/kernel/synchronization_object.h @@ -12,6 +12,7 @@ namespace Kernel { class KernelCore; +class Synchronization; class Thread; /// Class that represents a Kernel object that a thread can be waiting on @@ -49,24 +50,11 @@ public: */ void RemoveWaitingThread(std::shared_ptr<Thread> thread); - /** - * Wake up all threads waiting on this object that can be awoken, in priority order, - * and set the synchronization result and output of the thread. - */ - void WakeupAllWaitingThreads(); - - /** - * Wakes up a single thread waiting on this object. - * @param thread Thread that is waiting on this object to wakeup. - */ - void WakeupWaitingThread(std::shared_ptr<Thread> thread); - - /// Obtains the highest priority thread that is ready to run from this object's waiting list. - std::shared_ptr<Thread> GetHighestPriorityReadyThread() const; - /// Get a const reference to the waiting threads list for debug use const std::vector<std::shared_ptr<Thread>>& GetWaitingThreads() const; + void ClearWaitingThreads(); + protected: bool is_signaled{}; // Tells if this sync object is signalled; diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp index db7f379ac..2b1092697 100644 --- a/src/core/hle/kernel/thread.cpp +++ b/src/core/hle/kernel/thread.cpp @@ -9,12 +9,21 @@ #include "common/assert.h" #include "common/common_types.h" +#include "common/fiber.h" #include "common/logging/log.h" #include "common/thread_queue_list.h" #include "core/arm/arm_interface.h" +#ifdef ARCHITECTURE_x86_64 +#include "core/arm/dynarmic/arm_dynarmic_32.h" +#include "core/arm/dynarmic/arm_dynarmic_64.h" +#endif +#include "core/arm/cpu_interrupt_handler.h" +#include "core/arm/exclusive_monitor.h" +#include "core/arm/unicorn/arm_unicorn.h" #include "core/core.h" #include "core/core_timing.h" #include "core/core_timing_util.h" +#include "core/cpu_manager.h" #include "core/hardware_properties.h" #include "core/hle/kernel/errors.h" #include "core/hle/kernel/handle_table.h" @@ -23,6 +32,7 @@ #include "core/hle/kernel/process.h" #include "core/hle/kernel/scheduler.h" #include "core/hle/kernel/thread.h" +#include "core/hle/kernel/time_manager.h" #include "core/hle/result.h" #include "core/memory.h" @@ -44,46 +54,26 @@ Thread::Thread(KernelCore& kernel) : SynchronizationObject{kernel} {} Thread::~Thread() = default; void Thread::Stop() { - // Cancel any outstanding wakeup events for this thread - Core::System::GetInstance().CoreTiming().UnscheduleEvent(kernel.ThreadWakeupCallbackEventType(), - global_handle); - kernel.GlobalHandleTable().Close(global_handle); - global_handle = 0; - SetStatus(ThreadStatus::Dead); - Signal(); - - // Clean up any dangling references in objects that this thread was waiting for - for (auto& wait_object : wait_objects) { - wait_object->RemoveWaitingThread(SharedFrom(this)); - } - wait_objects.clear(); - - owner_process->UnregisterThread(this); - - // Mark the TLS slot in the thread's page as free. - owner_process->FreeTLSRegion(tls_address); -} - -void Thread::WakeAfterDelay(s64 nanoseconds) { - // Don't schedule a wakeup if the thread wants to wait forever - if (nanoseconds == -1) - return; + { + SchedulerLock lock(kernel); + SetStatus(ThreadStatus::Dead); + Signal(); + kernel.GlobalHandleTable().Close(global_handle); - // This function might be called from any thread so we have to be cautious and use the - // thread-safe version of ScheduleEvent. - const s64 cycles = Core::Timing::nsToCycles(std::chrono::nanoseconds{nanoseconds}); - Core::System::GetInstance().CoreTiming().ScheduleEvent( - cycles, kernel.ThreadWakeupCallbackEventType(), global_handle); -} + if (owner_process) { + owner_process->UnregisterThread(this); -void Thread::CancelWakeupTimer() { - Core::System::GetInstance().CoreTiming().UnscheduleEvent(kernel.ThreadWakeupCallbackEventType(), - global_handle); + // Mark the TLS slot in the thread's page as free. + owner_process->FreeTLSRegion(tls_address); + } + arm_interface.reset(); + has_exited = true; + } + global_handle = 0; } void Thread::ResumeFromWait() { - ASSERT_MSG(wait_objects.empty(), "Thread is waking up while waiting for objects"); - + SchedulerLock lock(kernel); switch (status) { case ThreadStatus::Paused: case ThreadStatus::WaitSynch: @@ -99,7 +89,7 @@ void Thread::ResumeFromWait() { case ThreadStatus::Ready: // The thread's wakeup callback must have already been cleared when the thread was first // awoken. - ASSERT(wakeup_callback == nullptr); + ASSERT(hle_callback == nullptr); // If the thread is waiting on multiple wait objects, it might be awoken more than once // before actually resuming. We can ignore subsequent wakeups if the thread status has // already been set to ThreadStatus::Ready. @@ -115,24 +105,31 @@ void Thread::ResumeFromWait() { return; } - wakeup_callback = nullptr; + SetStatus(ThreadStatus::Ready); +} + +void Thread::OnWakeUp() { + SchedulerLock lock(kernel); - if (activity == ThreadActivity::Paused) { - SetStatus(ThreadStatus::Paused); - return; - } + SetStatus(ThreadStatus::Ready); +} +ResultCode Thread::Start() { + SchedulerLock lock(kernel); SetStatus(ThreadStatus::Ready); + return RESULT_SUCCESS; } void Thread::CancelWait() { - if (GetSchedulingStatus() != ThreadSchedStatus::Paused) { + SchedulerLock lock(kernel); + if (GetSchedulingStatus() != ThreadSchedStatus::Paused || !is_waiting_on_sync) { is_sync_cancelled = true; return; } + // TODO(Blinkhawk): Implement cancel of server session is_sync_cancelled = false; - SetWaitSynchronizationResult(ERR_SYNCHRONIZATION_CANCELED); - ResumeFromWait(); + SetSynchronizationResults(nullptr, ERR_SYNCHRONIZATION_CANCELED); + SetStatus(ThreadStatus::Ready); } static void ResetThreadContext32(Core::ARM_Interface::ThreadContext32& context, u32 stack_top, @@ -153,12 +150,29 @@ static void ResetThreadContext64(Core::ARM_Interface::ThreadContext64& context, context.fpcr = 0; } -ResultVal<std::shared_ptr<Thread>> Thread::Create(KernelCore& kernel, std::string name, - VAddr entry_point, u32 priority, u64 arg, - s32 processor_id, VAddr stack_top, - Process& owner_process) { +std::shared_ptr<Common::Fiber>& Thread::GetHostContext() { + return host_context; +} + +ResultVal<std::shared_ptr<Thread>> Thread::Create(Core::System& system, ThreadType type_flags, + std::string name, VAddr entry_point, u32 priority, + u64 arg, s32 processor_id, VAddr stack_top, + Process* owner_process) { + std::function<void(void*)> init_func = system.GetCpuManager().GetGuestThreadStartFunc(); + void* init_func_parameter = system.GetCpuManager().GetStartFuncParamater(); + return Create(system, type_flags, name, entry_point, priority, arg, processor_id, stack_top, + owner_process, std::move(init_func), init_func_parameter); +} + +ResultVal<std::shared_ptr<Thread>> Thread::Create(Core::System& system, ThreadType type_flags, + std::string name, VAddr entry_point, u32 priority, + u64 arg, s32 processor_id, VAddr stack_top, + Process* owner_process, + std::function<void(void*)>&& thread_start_func, + void* thread_start_parameter) { + auto& kernel = system.Kernel(); // Check if priority is in ranged. Lowest priority -> highest priority id. - if (priority > THREADPRIO_LOWEST) { + if (priority > THREADPRIO_LOWEST && ((type_flags & THREADTYPE_IDLE) == 0)) { LOG_ERROR(Kernel_SVC, "Invalid thread priority: {}", priority); return ERR_INVALID_THREAD_PRIORITY; } @@ -168,11 +182,12 @@ ResultVal<std::shared_ptr<Thread>> Thread::Create(KernelCore& kernel, std::strin return ERR_INVALID_PROCESSOR_ID; } - auto& system = Core::System::GetInstance(); - if (!system.Memory().IsValidVirtualAddress(owner_process, entry_point)) { - LOG_ERROR(Kernel_SVC, "(name={}): invalid entry {:016X}", name, entry_point); - // TODO (bunnei): Find the correct error code to use here - return RESULT_UNKNOWN; + if (owner_process) { + if (!system.Memory().IsValidVirtualAddress(*owner_process, entry_point)) { + LOG_ERROR(Kernel_SVC, "(name={}): invalid entry {:016X}", name, entry_point); + // TODO (bunnei): Find the correct error code to use here + return RESULT_UNKNOWN; + } } std::shared_ptr<Thread> thread = std::make_shared<Thread>(kernel); @@ -183,51 +198,82 @@ ResultVal<std::shared_ptr<Thread>> Thread::Create(KernelCore& kernel, std::strin thread->stack_top = stack_top; thread->tpidr_el0 = 0; thread->nominal_priority = thread->current_priority = priority; - thread->last_running_ticks = system.CoreTiming().GetTicks(); + thread->last_running_ticks = 0; thread->processor_id = processor_id; thread->ideal_core = processor_id; thread->affinity_mask = 1ULL << processor_id; - thread->wait_objects.clear(); + thread->wait_objects = nullptr; thread->mutex_wait_address = 0; thread->condvar_wait_address = 0; thread->wait_handle = 0; thread->name = std::move(name); thread->global_handle = kernel.GlobalHandleTable().Create(thread).Unwrap(); - thread->owner_process = &owner_process; - auto& scheduler = kernel.GlobalScheduler(); - scheduler.AddThread(thread); - thread->tls_address = thread->owner_process->CreateTLSRegion(); - - thread->owner_process->RegisterThread(thread.get()); + thread->owner_process = owner_process; + thread->type = type_flags; + if ((type_flags & THREADTYPE_IDLE) == 0) { + auto& scheduler = kernel.GlobalScheduler(); + scheduler.AddThread(thread); + } + if (owner_process) { + thread->tls_address = thread->owner_process->CreateTLSRegion(); + thread->owner_process->RegisterThread(thread.get()); + } else { + thread->tls_address = 0; + } + // TODO(peachum): move to ScheduleThread() when scheduler is added so selected core is used + // to initialize the context + thread->arm_interface.reset(); + if ((type_flags & THREADTYPE_HLE) == 0) { +#ifdef ARCHITECTURE_x86_64 + if (owner_process && !owner_process->Is64BitProcess()) { + thread->arm_interface = std::make_unique<Core::ARM_Dynarmic_32>( + system, kernel.Interrupts(), kernel.IsMulticore(), kernel.GetExclusiveMonitor(), + processor_id); + } else { + thread->arm_interface = std::make_unique<Core::ARM_Dynarmic_64>( + system, kernel.Interrupts(), kernel.IsMulticore(), kernel.GetExclusiveMonitor(), + processor_id); + } - ResetThreadContext32(thread->context_32, static_cast<u32>(stack_top), - static_cast<u32>(entry_point), static_cast<u32>(arg)); - ResetThreadContext64(thread->context_64, stack_top, entry_point, arg); +#else + if (owner_process && !owner_process->Is64BitProcess()) { + thread->arm_interface = std::make_shared<Core::ARM_Unicorn>( + system, kernel.Interrupts(), kernel.IsMulticore(), ARM_Unicorn::Arch::AArch32, + processor_id); + } else { + thread->arm_interface = std::make_shared<Core::ARM_Unicorn>( + system, kernel.Interrupts(), kernel.IsMulticore(), ARM_Unicorn::Arch::AArch64, + processor_id); + } + LOG_WARNING(Core, "CPU JIT requested, but Dynarmic not available"); +#endif + ResetThreadContext32(thread->context_32, static_cast<u32>(stack_top), + static_cast<u32>(entry_point), static_cast<u32>(arg)); + ResetThreadContext64(thread->context_64, stack_top, entry_point, arg); + } + thread->host_context = + std::make_shared<Common::Fiber>(std::move(thread_start_func), thread_start_parameter); return MakeResult<std::shared_ptr<Thread>>(std::move(thread)); } void Thread::SetPriority(u32 priority) { + SchedulerLock lock(kernel); ASSERT_MSG(priority <= THREADPRIO_LOWEST && priority >= THREADPRIO_HIGHEST, "Invalid priority value."); nominal_priority = priority; UpdatePriority(); } -void Thread::SetWaitSynchronizationResult(ResultCode result) { - context_32.cpu_registers[0] = result.raw; - context_64.cpu_registers[0] = result.raw; -} - -void Thread::SetWaitSynchronizationOutput(s32 output) { - context_32.cpu_registers[1] = output; - context_64.cpu_registers[1] = output; +void Thread::SetSynchronizationResults(SynchronizationObject* object, ResultCode result) { + signaling_object = object; + signaling_result = result; } s32 Thread::GetSynchronizationObjectIndex(std::shared_ptr<SynchronizationObject> object) const { - ASSERT_MSG(!wait_objects.empty(), "Thread is not waiting for anything"); - const auto match = std::find(wait_objects.rbegin(), wait_objects.rend(), object); - return static_cast<s32>(std::distance(match, wait_objects.rend()) - 1); + ASSERT_MSG(!wait_objects->empty(), "Thread is not waiting for anything"); + const auto match = std::find(wait_objects->rbegin(), wait_objects->rend(), object); + return static_cast<s32>(std::distance(match, wait_objects->rend()) - 1); } VAddr Thread::GetCommandBufferAddress() const { @@ -236,6 +282,14 @@ VAddr Thread::GetCommandBufferAddress() const { return GetTLSAddress() + command_header_offset; } +Core::ARM_Interface& Thread::ArmInterface() { + return *arm_interface; +} + +const Core::ARM_Interface& Thread::ArmInterface() const { + return *arm_interface; +} + void Thread::SetStatus(ThreadStatus new_status) { if (new_status == status) { return; @@ -257,10 +311,6 @@ void Thread::SetStatus(ThreadStatus new_status) { break; } - if (status == ThreadStatus::Running) { - last_running_ticks = Core::System::GetInstance().CoreTiming().GetTicks(); - } - status = new_status; } @@ -341,75 +391,116 @@ void Thread::UpdatePriority() { lock_owner->UpdatePriority(); } -void Thread::ChangeCore(u32 core, u64 mask) { - SetCoreAndAffinityMask(core, mask); -} - bool Thread::AllSynchronizationObjectsReady() const { - return std::none_of(wait_objects.begin(), wait_objects.end(), + return std::none_of(wait_objects->begin(), wait_objects->end(), [this](const std::shared_ptr<SynchronizationObject>& object) { return object->ShouldWait(this); }); } -bool Thread::InvokeWakeupCallback(ThreadWakeupReason reason, std::shared_ptr<Thread> thread, - std::shared_ptr<SynchronizationObject> object, - std::size_t index) { - ASSERT(wakeup_callback); - return wakeup_callback(reason, std::move(thread), std::move(object), index); +bool Thread::InvokeHLECallback(std::shared_ptr<Thread> thread) { + ASSERT(hle_callback); + return hle_callback(std::move(thread)); } -void Thread::SetActivity(ThreadActivity value) { - activity = value; +ResultCode Thread::SetActivity(ThreadActivity value) { + SchedulerLock lock(kernel); + + auto sched_status = GetSchedulingStatus(); + + if (sched_status != ThreadSchedStatus::Runnable && sched_status != ThreadSchedStatus::Paused) { + return ERR_INVALID_STATE; + } + + if (IsPendingTermination()) { + return RESULT_SUCCESS; + } if (value == ThreadActivity::Paused) { - // Set status if not waiting - if (status == ThreadStatus::Ready || status == ThreadStatus::Running) { - SetStatus(ThreadStatus::Paused); - kernel.PrepareReschedule(processor_id); + if ((pausing_state & static_cast<u32>(ThreadSchedFlags::ThreadPauseFlag)) != 0) { + return ERR_INVALID_STATE; + } + AddSchedulingFlag(ThreadSchedFlags::ThreadPauseFlag); + } else { + if ((pausing_state & static_cast<u32>(ThreadSchedFlags::ThreadPauseFlag)) == 0) { + return ERR_INVALID_STATE; } - } else if (status == ThreadStatus::Paused) { - // Ready to reschedule - ResumeFromWait(); + RemoveSchedulingFlag(ThreadSchedFlags::ThreadPauseFlag); } + return RESULT_SUCCESS; } -void Thread::Sleep(s64 nanoseconds) { - // Sleep current thread and check for next thread to schedule - SetStatus(ThreadStatus::WaitSleep); +ResultCode Thread::Sleep(s64 nanoseconds) { + Handle event_handle{}; + { + SchedulerLockAndSleep lock(kernel, event_handle, this, nanoseconds); + SetStatus(ThreadStatus::WaitSleep); + } - // Create an event to wake the thread up after the specified nanosecond delay has passed - WakeAfterDelay(nanoseconds); + if (event_handle != InvalidHandle) { + auto& time_manager = kernel.TimeManager(); + time_manager.UnscheduleTimeEvent(event_handle); + } + return RESULT_SUCCESS; +} + +std::pair<ResultCode, bool> Thread::YieldSimple() { + bool is_redundant = false; + { + SchedulerLock lock(kernel); + is_redundant = kernel.GlobalScheduler().YieldThread(this); + } + return {RESULT_SUCCESS, is_redundant}; +} + +std::pair<ResultCode, bool> Thread::YieldAndBalanceLoad() { + bool is_redundant = false; + { + SchedulerLock lock(kernel); + is_redundant = kernel.GlobalScheduler().YieldThreadAndBalanceLoad(this); + } + return {RESULT_SUCCESS, is_redundant}; } -bool Thread::YieldSimple() { - auto& scheduler = kernel.GlobalScheduler(); - return scheduler.YieldThread(this); +std::pair<ResultCode, bool> Thread::YieldAndWaitForLoadBalancing() { + bool is_redundant = false; + { + SchedulerLock lock(kernel); + is_redundant = kernel.GlobalScheduler().YieldThreadAndWaitForLoadBalancing(this); + } + return {RESULT_SUCCESS, is_redundant}; } -bool Thread::YieldAndBalanceLoad() { - auto& scheduler = kernel.GlobalScheduler(); - return scheduler.YieldThreadAndBalanceLoad(this); +void Thread::AddSchedulingFlag(ThreadSchedFlags flag) { + const u32 old_state = scheduling_state; + pausing_state |= static_cast<u32>(flag); + const u32 base_scheduling = static_cast<u32>(GetSchedulingStatus()); + scheduling_state = base_scheduling | pausing_state; + kernel.GlobalScheduler().AdjustSchedulingOnStatus(this, old_state); } -bool Thread::YieldAndWaitForLoadBalancing() { - auto& scheduler = kernel.GlobalScheduler(); - return scheduler.YieldThreadAndWaitForLoadBalancing(this); +void Thread::RemoveSchedulingFlag(ThreadSchedFlags flag) { + const u32 old_state = scheduling_state; + pausing_state &= ~static_cast<u32>(flag); + const u32 base_scheduling = static_cast<u32>(GetSchedulingStatus()); + scheduling_state = base_scheduling | pausing_state; + kernel.GlobalScheduler().AdjustSchedulingOnStatus(this, old_state); } void Thread::SetSchedulingStatus(ThreadSchedStatus new_status) { - const u32 old_flags = scheduling_state; + const u32 old_state = scheduling_state; scheduling_state = (scheduling_state & static_cast<u32>(ThreadSchedMasks::HighMask)) | static_cast<u32>(new_status); - AdjustSchedulingOnStatus(old_flags); + kernel.GlobalScheduler().AdjustSchedulingOnStatus(this, old_state); } void Thread::SetCurrentPriority(u32 new_priority) { const u32 old_priority = std::exchange(current_priority, new_priority); - AdjustSchedulingOnPriority(old_priority); + kernel.GlobalScheduler().AdjustSchedulingOnPriority(this, old_priority); } ResultCode Thread::SetCoreAndAffinityMask(s32 new_core, u64 new_affinity_mask) { + SchedulerLock lock(kernel); const auto HighestSetCore = [](u64 mask, u32 max_cores) { for (s32 core = static_cast<s32>(max_cores - 1); core >= 0; core--) { if (((mask >> core) & 1) != 0) { @@ -443,111 +534,12 @@ ResultCode Thread::SetCoreAndAffinityMask(s32 new_core, u64 new_affinity_mask) { processor_id = ideal_core; } } - AdjustSchedulingOnAffinity(old_affinity_mask, old_core); + kernel.GlobalScheduler().AdjustSchedulingOnAffinity(this, old_affinity_mask, old_core); } } return RESULT_SUCCESS; } -void Thread::AdjustSchedulingOnStatus(u32 old_flags) { - if (old_flags == scheduling_state) { - return; - } - - auto& scheduler = kernel.GlobalScheduler(); - if (static_cast<ThreadSchedStatus>(old_flags & static_cast<u32>(ThreadSchedMasks::LowMask)) == - ThreadSchedStatus::Runnable) { - // In this case the thread was running, now it's pausing/exitting - if (processor_id >= 0) { - scheduler.Unschedule(current_priority, static_cast<u32>(processor_id), this); - } - - for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) { - if (core != static_cast<u32>(processor_id) && ((affinity_mask >> core) & 1) != 0) { - scheduler.Unsuggest(current_priority, core, this); - } - } - } else if (GetSchedulingStatus() == ThreadSchedStatus::Runnable) { - // The thread is now set to running from being stopped - if (processor_id >= 0) { - scheduler.Schedule(current_priority, static_cast<u32>(processor_id), this); - } - - for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) { - if (core != static_cast<u32>(processor_id) && ((affinity_mask >> core) & 1) != 0) { - scheduler.Suggest(current_priority, core, this); - } - } - } - - scheduler.SetReselectionPending(); -} - -void Thread::AdjustSchedulingOnPriority(u32 old_priority) { - if (GetSchedulingStatus() != ThreadSchedStatus::Runnable) { - return; - } - auto& scheduler = kernel.GlobalScheduler(); - if (processor_id >= 0) { - scheduler.Unschedule(old_priority, static_cast<u32>(processor_id), this); - } - - for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) { - if (core != static_cast<u32>(processor_id) && ((affinity_mask >> core) & 1) != 0) { - scheduler.Unsuggest(old_priority, core, this); - } - } - - // Add thread to the new priority queues. - Thread* current_thread = GetCurrentThread(); - - if (processor_id >= 0) { - if (current_thread == this) { - scheduler.SchedulePrepend(current_priority, static_cast<u32>(processor_id), this); - } else { - scheduler.Schedule(current_priority, static_cast<u32>(processor_id), this); - } - } - - for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) { - if (core != static_cast<u32>(processor_id) && ((affinity_mask >> core) & 1) != 0) { - scheduler.Suggest(current_priority, core, this); - } - } - - scheduler.SetReselectionPending(); -} - -void Thread::AdjustSchedulingOnAffinity(u64 old_affinity_mask, s32 old_core) { - auto& scheduler = kernel.GlobalScheduler(); - if (GetSchedulingStatus() != ThreadSchedStatus::Runnable || - current_priority >= THREADPRIO_COUNT) { - return; - } - - for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) { - if (((old_affinity_mask >> core) & 1) != 0) { - if (core == static_cast<u32>(old_core)) { - scheduler.Unschedule(current_priority, core, this); - } else { - scheduler.Unsuggest(current_priority, core, this); - } - } - } - - for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) { - if (((affinity_mask >> core) & 1) != 0) { - if (core == static_cast<u32>(processor_id)) { - scheduler.Schedule(current_priority, core, this); - } else { - scheduler.Suggest(current_priority, core, this); - } - } - } - - scheduler.SetReselectionPending(); -} - //////////////////////////////////////////////////////////////////////////////////////////////////// /** diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h index 23fdef8a4..c0342c462 100644 --- a/src/core/hle/kernel/thread.h +++ b/src/core/hle/kernel/thread.h @@ -6,26 +6,47 @@ #include <functional> #include <string> +#include <utility> #include <vector> #include "common/common_types.h" +#include "common/spin_lock.h" #include "core/arm/arm_interface.h" #include "core/hle/kernel/object.h" #include "core/hle/kernel/synchronization_object.h" #include "core/hle/result.h" +namespace Common { +class Fiber; +} + +namespace Core { +class ARM_Interface; +class System; +} // namespace Core + namespace Kernel { +class GlobalScheduler; class KernelCore; class Process; class Scheduler; enum ThreadPriority : u32 { - THREADPRIO_HIGHEST = 0, ///< Highest thread priority - THREADPRIO_USERLAND_MAX = 24, ///< Highest thread priority for userland apps - THREADPRIO_DEFAULT = 44, ///< Default thread priority for userland apps - THREADPRIO_LOWEST = 63, ///< Lowest thread priority - THREADPRIO_COUNT = 64, ///< Total number of possible thread priorities. + THREADPRIO_HIGHEST = 0, ///< Highest thread priority + THREADPRIO_MAX_CORE_MIGRATION = 2, ///< Highest priority for a core migration + THREADPRIO_USERLAND_MAX = 24, ///< Highest thread priority for userland apps + THREADPRIO_DEFAULT = 44, ///< Default thread priority for userland apps + THREADPRIO_LOWEST = 63, ///< Lowest thread priority + THREADPRIO_COUNT = 64, ///< Total number of possible thread priorities. +}; + +enum ThreadType : u32 { + THREADTYPE_USER = 0x1, + THREADTYPE_KERNEL = 0x2, + THREADTYPE_HLE = 0x4, + THREADTYPE_IDLE = 0x8, + THREADTYPE_SUSPEND = 0x10, }; enum ThreadProcessorId : s32 { @@ -107,26 +128,45 @@ public: using ThreadSynchronizationObjects = std::vector<std::shared_ptr<SynchronizationObject>>; - using WakeupCallback = - std::function<bool(ThreadWakeupReason reason, std::shared_ptr<Thread> thread, - std::shared_ptr<SynchronizationObject> object, std::size_t index)>; + using HLECallback = std::function<bool(std::shared_ptr<Thread> thread)>; + + /** + * Creates and returns a new thread. The new thread is immediately scheduled + * @param system The instance of the whole system + * @param name The friendly name desired for the thread + * @param entry_point The address at which the thread should start execution + * @param priority The thread's priority + * @param arg User data to pass to the thread + * @param processor_id The ID(s) of the processors on which the thread is desired to be run + * @param stack_top The address of the thread's stack top + * @param owner_process The parent process for the thread, if null, it's a kernel thread + * @return A shared pointer to the newly created thread + */ + static ResultVal<std::shared_ptr<Thread>> Create(Core::System& system, ThreadType type_flags, + std::string name, VAddr entry_point, + u32 priority, u64 arg, s32 processor_id, + VAddr stack_top, Process* owner_process); /** * Creates and returns a new thread. The new thread is immediately scheduled - * @param kernel The kernel instance this thread will be created under. + * @param system The instance of the whole system * @param name The friendly name desired for the thread * @param entry_point The address at which the thread should start execution * @param priority The thread's priority * @param arg User data to pass to the thread * @param processor_id The ID(s) of the processors on which the thread is desired to be run * @param stack_top The address of the thread's stack top - * @param owner_process The parent process for the thread + * @param owner_process The parent process for the thread, if null, it's a kernel thread + * @param thread_start_func The function where the host context will start. + * @param thread_start_parameter The parameter which will passed to host context on init * @return A shared pointer to the newly created thread */ - static ResultVal<std::shared_ptr<Thread>> Create(KernelCore& kernel, std::string name, - VAddr entry_point, u32 priority, u64 arg, - s32 processor_id, VAddr stack_top, - Process& owner_process); + static ResultVal<std::shared_ptr<Thread>> Create(Core::System& system, ThreadType type_flags, + std::string name, VAddr entry_point, + u32 priority, u64 arg, s32 processor_id, + VAddr stack_top, Process* owner_process, + std::function<void(void*)>&& thread_start_func, + void* thread_start_parameter); std::string GetName() const override { return name; @@ -181,7 +221,7 @@ public: void UpdatePriority(); /// Changes the core that the thread is running or scheduled to run on. - void ChangeCore(u32 core, u64 mask); + ResultCode SetCoreAndAffinityMask(s32 new_core, u64 new_affinity_mask); /** * Gets the thread's thread ID @@ -194,6 +234,10 @@ public: /// Resumes a thread from waiting void ResumeFromWait(); + void OnWakeUp(); + + ResultCode Start(); + /// Cancels a waiting operation that this thread may or may not be within. /// /// When the thread is within a waiting state, this will set the thread's @@ -202,26 +246,19 @@ public: /// void CancelWait(); - /** - * Schedules an event to wake up the specified thread after the specified delay - * @param nanoseconds The time this thread will be allowed to sleep for - */ - void WakeAfterDelay(s64 nanoseconds); + void SetSynchronizationResults(SynchronizationObject* object, ResultCode result); - /// Cancel any outstanding wakeup events for this thread - void CancelWakeupTimer(); + Core::ARM_Interface& ArmInterface(); - /** - * Sets the result after the thread awakens (from svcWaitSynchronization) - * @param result Value to set to the returned result - */ - void SetWaitSynchronizationResult(ResultCode result); + const Core::ARM_Interface& ArmInterface() const; - /** - * Sets the output parameter value after the thread awakens (from svcWaitSynchronization) - * @param output Value to set to the output parameter - */ - void SetWaitSynchronizationOutput(s32 output); + SynchronizationObject* GetSignalingObject() const { + return signaling_object; + } + + ResultCode GetSignalingResult() const { + return signaling_result; + } /** * Retrieves the index that this particular object occupies in the list of objects @@ -269,11 +306,6 @@ public: */ VAddr GetCommandBufferAddress() const; - /// Returns whether this thread is waiting on objects from a WaitSynchronization call. - bool IsSleepingOnWait() const { - return status == ThreadStatus::WaitSynch; - } - ThreadContext32& GetContext32() { return context_32; } @@ -290,6 +322,28 @@ public: return context_64; } + bool IsHLEThread() const { + return (type & THREADTYPE_HLE) != 0; + } + + bool IsSuspendThread() const { + return (type & THREADTYPE_SUSPEND) != 0; + } + + bool IsIdleThread() const { + return (type & THREADTYPE_IDLE) != 0; + } + + bool WasRunning() const { + return was_running; + } + + void SetWasRunning(bool value) { + was_running = value; + } + + std::shared_ptr<Common::Fiber>& GetHostContext(); + ThreadStatus GetStatus() const { return status; } @@ -325,18 +379,18 @@ public: } const ThreadSynchronizationObjects& GetSynchronizationObjects() const { - return wait_objects; + return *wait_objects; } - void SetSynchronizationObjects(ThreadSynchronizationObjects objects) { - wait_objects = std::move(objects); + void SetSynchronizationObjects(ThreadSynchronizationObjects* objects) { + wait_objects = objects; } void ClearSynchronizationObjects() { - for (const auto& waiting_object : wait_objects) { + for (const auto& waiting_object : *wait_objects) { waiting_object->RemoveWaitingThread(SharedFrom(this)); } - wait_objects.clear(); + wait_objects->clear(); } /// Determines whether all the objects this thread is waiting on are ready. @@ -386,26 +440,35 @@ public: arb_wait_address = address; } - bool HasWakeupCallback() const { - return wakeup_callback != nullptr; + bool HasHLECallback() const { + return hle_callback != nullptr; } - void SetWakeupCallback(WakeupCallback callback) { - wakeup_callback = std::move(callback); + void SetHLECallback(HLECallback callback) { + hle_callback = std::move(callback); } - void InvalidateWakeupCallback() { - SetWakeupCallback(nullptr); + void SetHLETimeEvent(Handle time_event) { + hle_time_event = time_event; } - /** - * Invokes the thread's wakeup callback. - * - * @pre A valid wakeup callback has been set. Violating this precondition - * will cause an assertion to trigger. - */ - bool InvokeWakeupCallback(ThreadWakeupReason reason, std::shared_ptr<Thread> thread, - std::shared_ptr<SynchronizationObject> object, std::size_t index); + void SetHLESyncObject(SynchronizationObject* object) { + hle_object = object; + } + + Handle GetHLETimeEvent() const { + return hle_time_event; + } + + SynchronizationObject* GetHLESyncObject() const { + return hle_object; + } + + void InvalidateHLECallback() { + SetHLECallback(nullptr); + } + + bool InvokeHLECallback(std::shared_ptr<Thread> thread); u32 GetIdealCore() const { return ideal_core; @@ -415,23 +478,19 @@ public: return affinity_mask; } - ThreadActivity GetActivity() const { - return activity; - } - - void SetActivity(ThreadActivity value); + ResultCode SetActivity(ThreadActivity value); /// Sleeps this thread for the given amount of nanoseconds. - void Sleep(s64 nanoseconds); + ResultCode Sleep(s64 nanoseconds); /// Yields this thread without rebalancing loads. - bool YieldSimple(); + std::pair<ResultCode, bool> YieldSimple(); /// Yields this thread and does a load rebalancing. - bool YieldAndBalanceLoad(); + std::pair<ResultCode, bool> YieldAndBalanceLoad(); /// Yields this thread and if the core is left idle, loads are rebalanced - bool YieldAndWaitForLoadBalancing(); + std::pair<ResultCode, bool> YieldAndWaitForLoadBalancing(); void IncrementYieldCount() { yield_count++; @@ -446,6 +505,10 @@ public: static_cast<u32>(ThreadSchedMasks::LowMask)); } + bool IsRunnable() const { + return scheduling_state == static_cast<u32>(ThreadSchedStatus::Runnable); + } + bool IsRunning() const { return is_running; } @@ -466,17 +529,67 @@ public: return global_handle; } + bool IsWaitingForArbitration() const { + return waiting_for_arbitration; + } + + void WaitForArbitration(bool set) { + waiting_for_arbitration = set; + } + + bool IsWaitingSync() const { + return is_waiting_on_sync; + } + + void SetWaitingSync(bool is_waiting) { + is_waiting_on_sync = is_waiting; + } + + bool IsPendingTermination() const { + return will_be_terminated || GetSchedulingStatus() == ThreadSchedStatus::Exited; + } + + bool IsPaused() const { + return pausing_state != 0; + } + + bool IsContinuousOnSVC() const { + return is_continuous_on_svc; + } + + void SetContinuousOnSVC(bool is_continuous) { + is_continuous_on_svc = is_continuous; + } + + bool IsPhantomMode() const { + return is_phantom_mode; + } + + void SetPhantomMode(bool phantom) { + is_phantom_mode = phantom; + } + + bool HasExited() const { + return has_exited; + } + private: + friend class GlobalScheduler; + friend class Scheduler; + void SetSchedulingStatus(ThreadSchedStatus new_status); + void AddSchedulingFlag(ThreadSchedFlags flag); + void RemoveSchedulingFlag(ThreadSchedFlags flag); + void SetCurrentPriority(u32 new_priority); - ResultCode SetCoreAndAffinityMask(s32 new_core, u64 new_affinity_mask); - void AdjustSchedulingOnStatus(u32 old_flags); - void AdjustSchedulingOnPriority(u32 old_priority); void AdjustSchedulingOnAffinity(u64 old_affinity_mask, s32 old_core); + Common::SpinLock context_guard{}; ThreadContext32 context_32{}; ThreadContext64 context_64{}; + std::unique_ptr<Core::ARM_Interface> arm_interface{}; + std::shared_ptr<Common::Fiber> host_context{}; u64 thread_id = 0; @@ -485,6 +598,8 @@ private: VAddr entry_point = 0; VAddr stack_top = 0; + ThreadType type; + /// Nominal thread priority, as set by the emulated application. /// The nominal priority is the thread priority without priority /// inheritance taken into account. @@ -509,7 +624,10 @@ private: /// Objects that the thread is waiting on, in the same order as they were /// passed to WaitSynchronization. - ThreadSynchronizationObjects wait_objects; + ThreadSynchronizationObjects* wait_objects; + + SynchronizationObject* signaling_object; + ResultCode signaling_result{RESULT_SUCCESS}; /// List of threads that are waiting for a mutex that is held by this thread. MutexWaitingThreads wait_mutex_threads; @@ -526,30 +644,39 @@ private: /// If waiting for an AddressArbiter, this is the address being waited on. VAddr arb_wait_address{0}; + bool waiting_for_arbitration{}; /// Handle used as userdata to reference this object when inserting into the CoreTiming queue. Handle global_handle = 0; - /// Callback that will be invoked when the thread is resumed from a waiting state. If the thread - /// was waiting via WaitSynchronization then the object will be the last object that became - /// available. In case of a timeout, the object will be nullptr. - WakeupCallback wakeup_callback; + /// Callback for HLE Events + HLECallback hle_callback; + Handle hle_time_event; + SynchronizationObject* hle_object; Scheduler* scheduler = nullptr; u32 ideal_core{0xFFFFFFFF}; u64 affinity_mask{0x1}; - ThreadActivity activity = ThreadActivity::Normal; - s32 ideal_core_override = -1; u64 affinity_mask_override = 0x1; u32 affinity_override_count = 0; u32 scheduling_state = 0; + u32 pausing_state = 0; bool is_running = false; + bool is_waiting_on_sync = false; bool is_sync_cancelled = false; + bool is_continuous_on_svc = false; + + bool will_be_terminated = false; + bool is_phantom_mode = false; + bool has_exited = false; + + bool was_running = false; + std::string name; }; diff --git a/src/core/hle/kernel/time_manager.cpp b/src/core/hle/kernel/time_manager.cpp index 21b290468..941305e8e 100644 --- a/src/core/hle/kernel/time_manager.cpp +++ b/src/core/hle/kernel/time_manager.cpp @@ -8,30 +8,37 @@ #include "core/core_timing_util.h" #include "core/hle/kernel/handle_table.h" #include "core/hle/kernel/kernel.h" +#include "core/hle/kernel/scheduler.h" #include "core/hle/kernel/thread.h" #include "core/hle/kernel/time_manager.h" namespace Kernel { -TimeManager::TimeManager(Core::System& system) : system{system} { +TimeManager::TimeManager(Core::System& system_) : system{system_} { time_manager_event_type = Core::Timing::CreateEvent( "Kernel::TimeManagerCallback", [this](u64 thread_handle, [[maybe_unused]] s64 cycles_late) { + SchedulerLock lock(system.Kernel()); Handle proper_handle = static_cast<Handle>(thread_handle); + if (cancelled_events[proper_handle]) { + return; + } std::shared_ptr<Thread> thread = this->system.Kernel().RetrieveThreadFromGlobalHandleTable(proper_handle); - thread->ResumeFromWait(); + thread->OnWakeUp(); }); } void TimeManager::ScheduleTimeEvent(Handle& event_handle, Thread* timetask, s64 nanoseconds) { + event_handle = timetask->GetGlobalHandle(); if (nanoseconds > 0) { ASSERT(timetask); - event_handle = timetask->GetGlobalHandle(); - const s64 cycles = Core::Timing::nsToCycles(std::chrono::nanoseconds{nanoseconds}); - system.CoreTiming().ScheduleEvent(cycles, time_manager_event_type, event_handle); + ASSERT(timetask->GetStatus() != ThreadStatus::Ready); + ASSERT(timetask->GetStatus() != ThreadStatus::WaitMutex); + system.CoreTiming().ScheduleEvent(nanoseconds, time_manager_event_type, event_handle); } else { event_handle = InvalidHandle; } + cancelled_events[event_handle] = false; } void TimeManager::UnscheduleTimeEvent(Handle event_handle) { @@ -39,6 +46,12 @@ void TimeManager::UnscheduleTimeEvent(Handle event_handle) { return; } system.CoreTiming().UnscheduleEvent(time_manager_event_type, event_handle); + cancelled_events[event_handle] = true; +} + +void TimeManager::CancelTimeEvent(Thread* time_task) { + Handle event_handle = time_task->GetGlobalHandle(); + UnscheduleTimeEvent(event_handle); } } // namespace Kernel diff --git a/src/core/hle/kernel/time_manager.h b/src/core/hle/kernel/time_manager.h index eaec486d1..307a18765 100644 --- a/src/core/hle/kernel/time_manager.h +++ b/src/core/hle/kernel/time_manager.h @@ -5,6 +5,7 @@ #pragma once #include <memory> +#include <unordered_map> #include "core/hle/kernel/object.h" @@ -35,9 +36,12 @@ public: /// Unschedule an existing time event void UnscheduleTimeEvent(Handle event_handle); + void CancelTimeEvent(Thread* time_task); + private: Core::System& system; std::shared_ptr<Core::Timing::EventType> time_manager_event_type; + std::unordered_map<Handle, bool> cancelled_events; }; } // namespace Kernel diff --git a/src/core/hle/service/acc/acc.cpp b/src/core/hle/service/acc/acc.cpp index 630a8b048..8ac856ec3 100644 --- a/src/core/hle/service/acc/acc.cpp +++ b/src/core/hle/service/acc/acc.cpp @@ -44,6 +44,218 @@ static constexpr u32 SanitizeJPEGSize(std::size_t size) { return static_cast<u32>(std::min(size, max_jpeg_image_size)); } +class IManagerForSystemService final : public ServiceFramework<IManagerForSystemService> { +public: + explicit IManagerForSystemService(Common::UUID user_id) + : ServiceFramework("IManagerForSystemService") { + // clang-format off + static const FunctionInfo functions[] = { + {0, nullptr, "CheckAvailability"}, + {1, nullptr, "GetAccountId"}, + {2, nullptr, "EnsureIdTokenCacheAsync"}, + {3, nullptr, "LoadIdTokenCache"}, + {100, nullptr, "SetSystemProgramIdentification"}, + {101, nullptr, "RefreshNotificationTokenAsync"}, // 7.0.0+ + {110, nullptr, "GetServiceEntryRequirementCache"}, // 4.0.0+ + {111, nullptr, "InvalidateServiceEntryRequirementCache"}, // 4.0.0+ + {112, nullptr, "InvalidateTokenCache"}, // 4.0.0 - 6.2.0 + {113, nullptr, "GetServiceEntryRequirementCacheForOnlinePlay"}, // 6.1.0+ + {120, nullptr, "GetNintendoAccountId"}, + {121, nullptr, "CalculateNintendoAccountAuthenticationFingerprint"}, // 9.0.0+ + {130, nullptr, "GetNintendoAccountUserResourceCache"}, + {131, nullptr, "RefreshNintendoAccountUserResourceCacheAsync"}, + {132, nullptr, "RefreshNintendoAccountUserResourceCacheAsyncIfSecondsElapsed"}, + {133, nullptr, "GetNintendoAccountVerificationUrlCache"}, // 9.0.0+ + {134, nullptr, "RefreshNintendoAccountVerificationUrlCache"}, // 9.0.0+ + {135, nullptr, "RefreshNintendoAccountVerificationUrlCacheAsyncIfSecondsElapsed"}, // 9.0.0+ + {140, nullptr, "GetNetworkServiceLicenseCache"}, // 5.0.0+ + {141, nullptr, "RefreshNetworkServiceLicenseCacheAsync"}, // 5.0.0+ + {142, nullptr, "RefreshNetworkServiceLicenseCacheAsyncIfSecondsElapsed"}, // 5.0.0+ + {150, nullptr, "CreateAuthorizationRequest"}, + }; + // clang-format on + + RegisterHandlers(functions); + } +}; + +// 3.0.0+ +class IFloatingRegistrationRequest final : public ServiceFramework<IFloatingRegistrationRequest> { +public: + explicit IFloatingRegistrationRequest(Common::UUID user_id) + : ServiceFramework("IFloatingRegistrationRequest") { + // clang-format off + static const FunctionInfo functions[] = { + {0, nullptr, "GetSessionId"}, + {12, nullptr, "GetAccountId"}, + {13, nullptr, "GetLinkedNintendoAccountId"}, + {14, nullptr, "GetNickname"}, + {15, nullptr, "GetProfileImage"}, + {21, nullptr, "LoadIdTokenCache"}, + {100, nullptr, "RegisterUser"}, // [1.0.0-3.0.2] RegisterAsync + {101, nullptr, "RegisterUserWithUid"}, // [1.0.0-3.0.2] RegisterWithUidAsync + {102, nullptr, "RegisterNetworkServiceAccountAsync"}, // 4.0.0+ + {103, nullptr, "RegisterNetworkServiceAccountWithUidAsync"}, // 4.0.0+ + {110, nullptr, "SetSystemProgramIdentification"}, + {111, nullptr, "EnsureIdTokenCacheAsync"}, + }; + // clang-format on + + RegisterHandlers(functions); + } +}; + +class IAdministrator final : public ServiceFramework<IAdministrator> { +public: + explicit IAdministrator(Common::UUID user_id) : ServiceFramework("IAdministrator") { + // clang-format off + static const FunctionInfo functions[] = { + {0, nullptr, "CheckAvailability"}, + {1, nullptr, "GetAccountId"}, + {2, nullptr, "EnsureIdTokenCacheAsync"}, + {3, nullptr, "LoadIdTokenCache"}, + {100, nullptr, "SetSystemProgramIdentification"}, + {101, nullptr, "RefreshNotificationTokenAsync"}, // 7.0.0+ + {110, nullptr, "GetServiceEntryRequirementCache"}, // 4.0.0+ + {111, nullptr, "InvalidateServiceEntryRequirementCache"}, // 4.0.0+ + {112, nullptr, "InvalidateTokenCache"}, // 4.0.0 - 6.2.0 + {113, nullptr, "GetServiceEntryRequirementCacheForOnlinePlay"}, // 6.1.0+ + {120, nullptr, "GetNintendoAccountId"}, + {121, nullptr, "CalculateNintendoAccountAuthenticationFingerprint"}, // 9.0.0+ + {130, nullptr, "GetNintendoAccountUserResourceCache"}, + {131, nullptr, "RefreshNintendoAccountUserResourceCacheAsync"}, + {132, nullptr, "RefreshNintendoAccountUserResourceCacheAsyncIfSecondsElapsed"}, + {133, nullptr, "GetNintendoAccountVerificationUrlCache"}, // 9.0.0+ + {134, nullptr, "RefreshNintendoAccountVerificationUrlCacheAsync"}, // 9.0.0+ + {135, nullptr, "RefreshNintendoAccountVerificationUrlCacheAsyncIfSecondsElapsed"}, // 9.0.0+ + {140, nullptr, "GetNetworkServiceLicenseCache"}, // 5.0.0+ + {141, nullptr, "RefreshNetworkServiceLicenseCacheAsync"}, // 5.0.0+ + {142, nullptr, "RefreshNetworkServiceLicenseCacheAsyncIfSecondsElapsed"}, // 5.0.0+ + {150, nullptr, "CreateAuthorizationRequest"}, + {200, nullptr, "IsRegistered"}, + {201, nullptr, "RegisterAsync"}, + {202, nullptr, "UnregisterAsync"}, + {203, nullptr, "DeleteRegistrationInfoLocally"}, + {220, nullptr, "SynchronizeProfileAsync"}, + {221, nullptr, "UploadProfileAsync"}, + {222, nullptr, "SynchronizaProfileAsyncIfSecondsElapsed"}, + {250, nullptr, "IsLinkedWithNintendoAccount"}, + {251, nullptr, "CreateProcedureToLinkWithNintendoAccount"}, + {252, nullptr, "ResumeProcedureToLinkWithNintendoAccount"}, + {255, nullptr, "CreateProcedureToUpdateLinkageStateOfNintendoAccount"}, + {256, nullptr, "ResumeProcedureToUpdateLinkageStateOfNintendoAccount"}, + {260, nullptr, "CreateProcedureToLinkNnidWithNintendoAccount"}, // 3.0.0+ + {261, nullptr, "ResumeProcedureToLinkNnidWithNintendoAccount"}, // 3.0.0+ + {280, nullptr, "ProxyProcedureToAcquireApplicationAuthorizationForNintendoAccount"}, + {290, nullptr, "GetRequestForNintendoAccountUserResourceView"}, // 8.0.0+ + {300, nullptr, "TryRecoverNintendoAccountUserStateAsync"}, // 6.0.0+ + {400, nullptr, "IsServiceEntryRequirementCacheRefreshRequiredForOnlinePlay"}, // 6.1.0+ + {401, nullptr, "RefreshServiceEntryRequirementCacheForOnlinePlayAsync"}, // 6.1.0+ + {900, nullptr, "GetAuthenticationInfoForWin"}, // 9.0.0+ + {901, nullptr, "ImportAsyncForWin"}, // 9.0.0+ + {997, nullptr, "DebugUnlinkNintendoAccountAsync"}, + {998, nullptr, "DebugSetAvailabilityErrorDetail"}, + }; + // clang-format on + + RegisterHandlers(functions); + } +}; + +class IAuthorizationRequest final : public ServiceFramework<IAuthorizationRequest> { +public: + explicit IAuthorizationRequest(Common::UUID user_id) + : ServiceFramework("IAuthorizationRequest") { + // clang-format off + static const FunctionInfo functions[] = { + {0, nullptr, "GetSessionId"}, + {10, nullptr, "InvokeWithoutInteractionAsync"}, + {19, nullptr, "IsAuthorized"}, + {20, nullptr, "GetAuthorizationCode"}, + {21, nullptr, "GetIdToken"}, + {22, nullptr, "GetState"}, + }; + // clang-format on + + RegisterHandlers(functions); + } +}; + +class IOAuthProcedure final : public ServiceFramework<IOAuthProcedure> { +public: + explicit IOAuthProcedure(Common::UUID user_id) : ServiceFramework("IOAuthProcedure") { + // clang-format off + static const FunctionInfo functions[] = { + {0, nullptr, "PrepareAsync"}, + {1, nullptr, "GetRequest"}, + {2, nullptr, "ApplyResponse"}, + {3, nullptr, "ApplyResponseAsync"}, + {10, nullptr, "Suspend"}, + }; + // clang-format on + + RegisterHandlers(functions); + } +}; + +// 3.0.0+ +class IOAuthProcedureForExternalNsa final : public ServiceFramework<IOAuthProcedureForExternalNsa> { +public: + explicit IOAuthProcedureForExternalNsa(Common::UUID user_id) + : ServiceFramework("IOAuthProcedureForExternalNsa") { + // clang-format off + static const FunctionInfo functions[] = { + {0, nullptr, "PrepareAsync"}, + {1, nullptr, "GetRequest"}, + {2, nullptr, "ApplyResponse"}, + {3, nullptr, "ApplyResponseAsync"}, + {10, nullptr, "Suspend"}, + {100, nullptr, "GetAccountId"}, + {101, nullptr, "GetLinkedNintendoAccountId"}, + {102, nullptr, "GetNickname"}, + {103, nullptr, "GetProfileImage"}, + }; + // clang-format on + + RegisterHandlers(functions); + } +}; + +class IOAuthProcedureForNintendoAccountLinkage final + : public ServiceFramework<IOAuthProcedureForNintendoAccountLinkage> { +public: + explicit IOAuthProcedureForNintendoAccountLinkage(Common::UUID user_id) + : ServiceFramework("IOAuthProcedureForNintendoAccountLinkage") { + // clang-format off + static const FunctionInfo functions[] = { + {0, nullptr, "PrepareAsync"}, + {1, nullptr, "GetRequest"}, + {2, nullptr, "ApplyResponse"}, + {3, nullptr, "ApplyResponseAsync"}, + {10, nullptr, "Suspend"}, + {100, nullptr, "GetRequestWithTheme"}, + {101, nullptr, "IsNetworkServiceAccountReplaced"}, + {199, nullptr, "GetUrlForIntroductionOfExtraMembership"}, // 2.0.0 - 5.1.0 + }; + // clang-format on + + RegisterHandlers(functions); + } +}; + +class INotifier final : public ServiceFramework<INotifier> { +public: + explicit INotifier(Common::UUID user_id) : ServiceFramework("INotifier") { + // clang-format off + static const FunctionInfo functions[] = { + {0, nullptr, "GetSystemEvent"}, + }; + // clang-format on + + RegisterHandlers(functions); + } +}; + class IProfileCommon : public ServiceFramework<IProfileCommon> { public: explicit IProfileCommon(const char* name, bool editor_commands, Common::UUID user_id, @@ -226,6 +438,54 @@ public: : IProfileCommon("IProfileEditor", true, user_id, profile_manager) {} }; +class IAsyncContext final : public ServiceFramework<IAsyncContext> { +public: + explicit IAsyncContext(Common::UUID user_id) : ServiceFramework("IAsyncContext") { + // clang-format off + static const FunctionInfo functions[] = { + {0, nullptr, "GetSystemEvent"}, + {1, nullptr, "Cancel"}, + {2, nullptr, "HasDone"}, + {3, nullptr, "GetResult"}, + }; + // clang-format on + + RegisterHandlers(functions); + } +}; + +class ISessionObject final : public ServiceFramework<ISessionObject> { +public: + explicit ISessionObject(Common::UUID user_id) : ServiceFramework("ISessionObject") { + // clang-format off + static const FunctionInfo functions[] = { + {999, nullptr, "Dummy"}, + }; + // clang-format on + + RegisterHandlers(functions); + } +}; + +class IGuestLoginRequest final : public ServiceFramework<IGuestLoginRequest> { +public: + explicit IGuestLoginRequest(Common::UUID) : ServiceFramework("IGuestLoginRequest") { + // clang-format off + static const FunctionInfo functions[] = { + {0, nullptr, "GetSessionId"}, + {11, nullptr, "Unknown"}, // 1.0.0 - 2.3.0 (the name is blank on Switchbrew) + {12, nullptr, "GetAccountId"}, + {13, nullptr, "GetLinkedNintendoAccountId"}, + {14, nullptr, "GetNickname"}, + {15, nullptr, "GetProfileImage"}, + {21, nullptr, "LoadIdTokenCache"}, // 3.0.0+ + }; + // clang-format on + + RegisterHandlers(functions); + } +}; + class IManagerForApplication final : public ServiceFramework<IManagerForApplication> { public: explicit IManagerForApplication(Common::UUID user_id) @@ -265,6 +525,87 @@ private: Common::UUID user_id; }; +// 6.0.0+ +class IAsyncNetworkServiceLicenseKindContext final + : public ServiceFramework<IAsyncNetworkServiceLicenseKindContext> { +public: + explicit IAsyncNetworkServiceLicenseKindContext(Common::UUID user_id) + : ServiceFramework("IAsyncNetworkServiceLicenseKindContext") { + // clang-format off + static const FunctionInfo functions[] = { + {0, nullptr, "GetSystemEvent"}, + {1, nullptr, "Cancel"}, + {2, nullptr, "HasDone"}, + {3, nullptr, "GetResult"}, + {4, nullptr, "GetNetworkServiceLicenseKind"}, + }; + // clang-format on + + RegisterHandlers(functions); + } +}; + +// 8.0.0+ +class IOAuthProcedureForUserRegistration final + : public ServiceFramework<IOAuthProcedureForUserRegistration> { +public: + explicit IOAuthProcedureForUserRegistration(Common::UUID user_id) + : ServiceFramework("IOAuthProcedureForUserRegistration") { + // clang-format off + static const FunctionInfo functions[] = { + {0, nullptr, "PrepareAsync"}, + {1, nullptr, "GetRequest"}, + {2, nullptr, "ApplyResponse"}, + {3, nullptr, "ApplyResponseAsync"}, + {10, nullptr, "Suspend"}, + {100, nullptr, "GetAccountId"}, + {101, nullptr, "GetLinkedNintendoAccountId"}, + {102, nullptr, "GetNickname"}, + {103, nullptr, "GetProfileImage"}, + {110, nullptr, "RegisterUserAsync"}, + {111, nullptr, "GetUid"}, + }; + // clang-format on + + RegisterHandlers(functions); + } +}; + +class DAUTH_O final : public ServiceFramework<DAUTH_O> { +public: + explicit DAUTH_O(Common::UUID) : ServiceFramework("dauth:o") { + // clang-format off + static const FunctionInfo functions[] = { + {0, nullptr, "EnsureAuthenticationTokenCacheAsync"}, // [5.0.0-5.1.0] GeneratePostData + {1, nullptr, "LoadAuthenticationTokenCache"}, // 6.0.0+ + {2, nullptr, "InvalidateAuthenticationTokenCache"}, // 6.0.0+ + {10, nullptr, "EnsureEdgeTokenCacheAsync"}, // 6.0.0+ + {11, nullptr, "LoadEdgeTokenCache"}, // 6.0.0+ + {12, nullptr, "InvalidateEdgeTokenCache"}, // 6.0.0+ + }; + // clang-format on + + RegisterHandlers(functions); + } +}; + +// 6.0.0+ +class IAsyncResult final : public ServiceFramework<IAsyncResult> { +public: + explicit IAsyncResult(Common::UUID user_id) : ServiceFramework("IAsyncResult") { + // clang-format off + static const FunctionInfo functions[] = { + {0, nullptr, "GetResult"}, + {1, nullptr, "Cancel"}, + {2, nullptr, "IsAvailable"}, + {3, nullptr, "GetSystemEvent"}, + }; + // clang-format on + + RegisterHandlers(functions); + } +}; + void Module::Interface::GetUserCount(Kernel::HLERequestContext& ctx) { LOG_DEBUG(Service_ACC, "called"); IPC::ResponseBuilder rb{ctx, 3}; @@ -435,6 +776,15 @@ void Module::Interface::ListQualifiedUsers(Kernel::HLERequestContext& ctx) { rb.Push(RESULT_SUCCESS); } +void Module::Interface::ListOpenContextStoredUsers(Kernel::HLERequestContext& ctx) { + LOG_WARNING(Service_ACC, "(STUBBED) called"); + + // TODO(ogniK): Handle open contexts + ctx.WriteBuffer(profile_manager->GetOpenUsers()); + IPC::ResponseBuilder rb{ctx, 2}; + rb.Push(RESULT_SUCCESS); +} + void Module::Interface::TrySelectUserWithoutInteraction(Kernel::HLERequestContext& ctx) { LOG_DEBUG(Service_ACC, "called"); // A u8 is passed into this function which we can safely ignore. It's to determine if we have diff --git a/src/core/hle/service/acc/acc.h b/src/core/hle/service/acc/acc.h index 74ca39d6e..d4c6395c6 100644 --- a/src/core/hle/service/acc/acc.h +++ b/src/core/hle/service/acc/acc.h @@ -34,6 +34,7 @@ public: void IsUserAccountSwitchLocked(Kernel::HLERequestContext& ctx); void GetProfileEditor(Kernel::HLERequestContext& ctx); void ListQualifiedUsers(Kernel::HLERequestContext& ctx); + void ListOpenContextStoredUsers(Kernel::HLERequestContext& ctx); private: ResultCode InitializeApplicationInfoBase(); diff --git a/src/core/hle/service/acc/acc_aa.cpp b/src/core/hle/service/acc/acc_aa.cpp index 3bac6bcd1..51f119b12 100644 --- a/src/core/hle/service/acc/acc_aa.cpp +++ b/src/core/hle/service/acc/acc_aa.cpp @@ -13,8 +13,8 @@ ACC_AA::ACC_AA(std::shared_ptr<Module> module, std::shared_ptr<ProfileManager> p {0, nullptr, "EnsureCacheAsync"}, {1, nullptr, "LoadCache"}, {2, nullptr, "GetDeviceAccountId"}, - {50, nullptr, "RegisterNotificationTokenAsync"}, - {51, nullptr, "UnregisterNotificationTokenAsync"}, + {50, nullptr, "RegisterNotificationTokenAsync"}, // 1.0.0 - 6.2.0 + {51, nullptr, "UnregisterNotificationTokenAsync"}, // 1.0.0 - 6.2.0 }; RegisterHandlers(functions); } diff --git a/src/core/hle/service/acc/acc_su.cpp b/src/core/hle/service/acc/acc_su.cpp index 2eefc6df5..d2bb8c2c8 100644 --- a/src/core/hle/service/acc/acc_su.cpp +++ b/src/core/hle/service/acc/acc_su.cpp @@ -17,28 +17,28 @@ ACC_SU::ACC_SU(std::shared_ptr<Module> module, std::shared_ptr<ProfileManager> p {3, &ACC_SU::ListOpenUsers, "ListOpenUsers"}, {4, &ACC_SU::GetLastOpenedUser, "GetLastOpenedUser"}, {5, &ACC_SU::GetProfile, "GetProfile"}, - {6, nullptr, "GetProfileDigest"}, + {6, nullptr, "GetProfileDigest"}, // 3.0.0+ {50, &ACC_SU::IsUserRegistrationRequestPermitted, "IsUserRegistrationRequestPermitted"}, {51, &ACC_SU::TrySelectUserWithoutInteraction, "TrySelectUserWithoutInteraction"}, - {60, nullptr, "ListOpenContextStoredUsers"}, - {99, nullptr, "DebugActivateOpenContextRetention"}, + {60, &ACC_SU::ListOpenContextStoredUsers, "ListOpenContextStoredUsers"}, // 5.0.0 - 5.1.0 + {99, nullptr, "DebugActivateOpenContextRetention"}, // 6.0.0+ {100, nullptr, "GetUserRegistrationNotifier"}, {101, nullptr, "GetUserStateChangeNotifier"}, {102, nullptr, "GetBaasAccountManagerForSystemService"}, {103, nullptr, "GetBaasUserAvailabilityChangeNotifier"}, {104, nullptr, "GetProfileUpdateNotifier"}, - {105, nullptr, "CheckNetworkServiceAvailabilityAsync"}, - {106, nullptr, "GetProfileSyncNotifier"}, + {105, nullptr, "CheckNetworkServiceAvailabilityAsync"}, // 4.0.0+ + {106, nullptr, "GetProfileSyncNotifier"}, // 9.0.0+ {110, nullptr, "StoreSaveDataThumbnail"}, {111, nullptr, "ClearSaveDataThumbnail"}, {112, nullptr, "LoadSaveDataThumbnail"}, - {113, nullptr, "GetSaveDataThumbnailExistence"}, - {120, nullptr, "ListOpenUsersInApplication"}, - {130, nullptr, "ActivateOpenContextRetention"}, - {140, &ACC_SU::ListQualifiedUsers, "ListQualifiedUsers"}, - {150, nullptr, "AuthenticateApplicationAsync"}, - {190, nullptr, "GetUserLastOpenedApplication"}, - {191, nullptr, "ActivateOpenContextHolder"}, + {113, nullptr, "GetSaveDataThumbnailExistence"}, // 5.0.0+ + {120, nullptr, "ListOpenUsersInApplication"}, // 10.0.0+ + {130, nullptr, "ActivateOpenContextRetention"}, // 6.0.0+ + {140, &ACC_SU::ListQualifiedUsers, "ListQualifiedUsers"}, // 6.0.0+ + {150, nullptr, "AuthenticateApplicationAsync"}, // 10.0.0+ + {190, nullptr, "GetUserLastOpenedApplication"}, // 1.0.0 - 9.2.0 + {191, nullptr, "ActivateOpenContextHolder"}, // 7.0.0+ {200, nullptr, "BeginUserRegistration"}, {201, nullptr, "CompleteUserRegistration"}, {202, nullptr, "CancelUserRegistration"}, @@ -46,15 +46,15 @@ ACC_SU::ACC_SU(std::shared_ptr<Module> module, std::shared_ptr<ProfileManager> p {204, nullptr, "SetUserPosition"}, {205, &ACC_SU::GetProfileEditor, "GetProfileEditor"}, {206, nullptr, "CompleteUserRegistrationForcibly"}, - {210, nullptr, "CreateFloatingRegistrationRequest"}, - {211, nullptr, "CreateProcedureToRegisterUserWithNintendoAccount"}, - {212, nullptr, "ResumeProcedureToRegisterUserWithNintendoAccount"}, + {210, nullptr, "CreateFloatingRegistrationRequest"}, // 3.0.0+ + {211, nullptr, "CreateProcedureToRegisterUserWithNintendoAccount"}, // 8.0.0+ + {212, nullptr, "ResumeProcedureToRegisterUserWithNintendoAccount"}, // 8.0.0+ {230, nullptr, "AuthenticateServiceAsync"}, {250, nullptr, "GetBaasAccountAdministrator"}, {290, nullptr, "ProxyProcedureForGuestLoginWithNintendoAccount"}, - {291, nullptr, "ProxyProcedureForFloatingRegistrationWithNintendoAccount"}, + {291, nullptr, "ProxyProcedureForFloatingRegistrationWithNintendoAccount"}, // 3.0.0+ {299, nullptr, "SuspendBackgroundDaemon"}, - {997, nullptr, "DebugInvalidateTokenCacheForUser"}, + {997, nullptr, "DebugInvalidateTokenCacheForUser"}, // 3.0.0+ {998, nullptr, "DebugSetUserStateClose"}, {999, nullptr, "DebugSetUserStateOpen"}, }; diff --git a/src/core/hle/service/acc/acc_u0.cpp b/src/core/hle/service/acc/acc_u0.cpp index fb4e7e772..cb44e06b7 100644 --- a/src/core/hle/service/acc/acc_u0.cpp +++ b/src/core/hle/service/acc/acc_u0.cpp @@ -17,23 +17,23 @@ ACC_U0::ACC_U0(std::shared_ptr<Module> module, std::shared_ptr<ProfileManager> p {3, &ACC_U0::ListOpenUsers, "ListOpenUsers"}, {4, &ACC_U0::GetLastOpenedUser, "GetLastOpenedUser"}, {5, &ACC_U0::GetProfile, "GetProfile"}, - {6, nullptr, "GetProfileDigest"}, + {6, nullptr, "GetProfileDigest"}, // 3.0.0+ {50, &ACC_U0::IsUserRegistrationRequestPermitted, "IsUserRegistrationRequestPermitted"}, {51, &ACC_U0::TrySelectUserWithoutInteraction, "TrySelectUserWithoutInteraction"}, - {60, nullptr, "ListOpenContextStoredUsers"}, - {99, nullptr, "DebugActivateOpenContextRetention"}, + {60, &ACC_U0::ListOpenContextStoredUsers, "ListOpenContextStoredUsers"}, // 5.0.0 - 5.1.0 + {99, nullptr, "DebugActivateOpenContextRetention"}, // 6.0.0+ {100, &ACC_U0::InitializeApplicationInfo, "InitializeApplicationInfo"}, {101, &ACC_U0::GetBaasAccountManagerForApplication, "GetBaasAccountManagerForApplication"}, {102, nullptr, "AuthenticateApplicationAsync"}, - {103, nullptr, "CheckNetworkServiceAvailabilityAsync"}, + {103, nullptr, "CheckNetworkServiceAvailabilityAsync"}, // 4.0.0+ {110, nullptr, "StoreSaveDataThumbnail"}, {111, nullptr, "ClearSaveDataThumbnail"}, {120, nullptr, "CreateGuestLoginRequest"}, - {130, nullptr, "LoadOpenContext"}, - {131, nullptr, "ListOpenContextStoredUsers"}, - {140, &ACC_U0::InitializeApplicationInfoRestricted, "InitializeApplicationInfoRestricted"}, - {141, &ACC_U0::ListQualifiedUsers, "ListQualifiedUsers"}, - {150, &ACC_U0::IsUserAccountSwitchLocked, "IsUserAccountSwitchLocked"}, + {130, nullptr, "LoadOpenContext"}, // 5.0.0+ + {131, &ACC_U0::ListOpenContextStoredUsers, "ListOpenContextStoredUsers"}, // 6.0.0+ + {140, &ACC_U0::InitializeApplicationInfoRestricted, "InitializeApplicationInfoRestricted"}, // 6.0.0+ + {141, &ACC_U0::ListQualifiedUsers, "ListQualifiedUsers"}, // 6.0.0+ + {150, &ACC_U0::IsUserAccountSwitchLocked, "IsUserAccountSwitchLocked"}, // 6.0.0+ }; // clang-format on diff --git a/src/core/hle/service/acc/acc_u1.cpp b/src/core/hle/service/acc/acc_u1.cpp index 9f29cdc82..a4aa5316a 100644 --- a/src/core/hle/service/acc/acc_u1.cpp +++ b/src/core/hle/service/acc/acc_u1.cpp @@ -17,28 +17,29 @@ ACC_U1::ACC_U1(std::shared_ptr<Module> module, std::shared_ptr<ProfileManager> p {3, &ACC_U1::ListOpenUsers, "ListOpenUsers"}, {4, &ACC_U1::GetLastOpenedUser, "GetLastOpenedUser"}, {5, &ACC_U1::GetProfile, "GetProfile"}, - {6, nullptr, "GetProfileDigest"}, + {6, nullptr, "GetProfileDigest"}, // 3.0.0+ {50, &ACC_U1::IsUserRegistrationRequestPermitted, "IsUserRegistrationRequestPermitted"}, {51, &ACC_U1::TrySelectUserWithoutInteraction, "TrySelectUserWithoutInteraction"}, - {60, nullptr, "ListOpenContextStoredUsers"}, - {99, nullptr, "DebugActivateOpenContextRetention"}, + {60, &ACC_U1::ListOpenContextStoredUsers, "ListOpenContextStoredUsers"}, // 5.0.0 - 5.1.0 + {99, nullptr, "DebugActivateOpenContextRetention"}, // 6.0.0+ {100, nullptr, "GetUserRegistrationNotifier"}, {101, nullptr, "GetUserStateChangeNotifier"}, {102, nullptr, "GetBaasAccountManagerForSystemService"}, - {103, nullptr, "GetProfileUpdateNotifier"}, - {104, nullptr, "CheckNetworkServiceAvailabilityAsync"}, - {105, nullptr, "GetBaasUserAvailabilityChangeNotifier"}, - {106, nullptr, "GetProfileSyncNotifier"}, + {103, nullptr, "GetBaasUserAvailabilityChangeNotifier"}, + {104, nullptr, "GetProfileUpdateNotifier"}, + {105, nullptr, "CheckNetworkServiceAvailabilityAsync"}, // 4.0.0+ + {106, nullptr, "GetProfileSyncNotifier"}, // 9.0.0+ {110, nullptr, "StoreSaveDataThumbnail"}, {111, nullptr, "ClearSaveDataThumbnail"}, {112, nullptr, "LoadSaveDataThumbnail"}, - {113, nullptr, "GetSaveDataThumbnailExistence"}, - {130, nullptr, "ActivateOpenContextRetention"}, - {140, &ACC_U1::ListQualifiedUsers, "ListQualifiedUsers"}, - {150, nullptr, "AuthenticateApplicationAsync"}, - {190, nullptr, "GetUserLastOpenedApplication"}, - {191, nullptr, "ActivateOpenContextHolder"}, - {997, nullptr, "DebugInvalidateTokenCacheForUser"}, + {113, nullptr, "GetSaveDataThumbnailExistence"}, // 5.0.0+ + {120, nullptr, "ListOpenUsersInApplication"}, // 10.0.0+ + {130, nullptr, "ActivateOpenContextRetention"}, // 6.0.0+ + {140, &ACC_U1::ListQualifiedUsers, "ListQualifiedUsers"}, // 6.0.0+ + {150, nullptr, "AuthenticateApplicationAsync"}, // 10.0.0+ + {190, nullptr, "GetUserLastOpenedApplication"}, // 1.0.0 - 9.2.0 + {191, nullptr, "ActivateOpenContextHolder"}, // 7.0.0+ + {997, nullptr, "DebugInvalidateTokenCacheForUser"}, // 3.0.0+ {998, nullptr, "DebugSetUserStateClose"}, {999, nullptr, "DebugSetUserStateOpen"}, }; diff --git a/src/core/hle/service/am/am.cpp b/src/core/hle/service/am/am.cpp index 4df74c4f9..256449aa7 100644 --- a/src/core/hle/service/am/am.cpp +++ b/src/core/hle/service/am/am.cpp @@ -10,6 +10,7 @@ #include "core/core.h" #include "core/file_sys/control_metadata.h" #include "core/file_sys/patch_manager.h" +#include "core/file_sys/registered_cache.h" #include "core/file_sys/savedata_factory.h" #include "core/hle/ipc_helpers.h" #include "core/hle/kernel/kernel.h" @@ -68,6 +69,7 @@ IWindowController::IWindowController(Core::System& system_) static const FunctionInfo functions[] = { {0, nullptr, "CreateWindow"}, {1, &IWindowController::GetAppletResourceUserId, "GetAppletResourceUserId"}, + {2, nullptr, "GetAppletResourceUserIdOfCallerApplet"}, {10, &IWindowController::AcquireForegroundRights, "AcquireForegroundRights"}, {11, nullptr, "ReleaseForegroundRights"}, {12, nullptr, "RejectToChangeIntoBackground"}, @@ -189,8 +191,8 @@ IDisplayController::IDisplayController() : ServiceFramework("IDisplayController" {5, nullptr, "GetLastForegroundCaptureImageEx"}, {6, nullptr, "GetLastApplicationCaptureImageEx"}, {7, nullptr, "GetCallerAppletCaptureImageEx"}, - {8, nullptr, "TakeScreenShotOfOwnLayer"}, // 2.0.0+ - {9, nullptr, "CopyBetweenCaptureBuffers"}, // 5.0.0+ + {8, nullptr, "TakeScreenShotOfOwnLayer"}, + {9, nullptr, "CopyBetweenCaptureBuffers"}, {10, nullptr, "AcquireLastApplicationCaptureBuffer"}, {11, nullptr, "ReleaseLastApplicationCaptureBuffer"}, {12, nullptr, "AcquireLastForegroundCaptureBuffer"}, @@ -200,17 +202,14 @@ IDisplayController::IDisplayController() : ServiceFramework("IDisplayController" {16, nullptr, "AcquireLastApplicationCaptureBufferEx"}, {17, nullptr, "AcquireLastForegroundCaptureBufferEx"}, {18, nullptr, "AcquireCallerAppletCaptureBufferEx"}, - // 2.0.0+ {20, nullptr, "ClearCaptureBuffer"}, {21, nullptr, "ClearAppletTransitionBuffer"}, - // 4.0.0+ {22, nullptr, "AcquireLastApplicationCaptureSharedBuffer"}, {23, nullptr, "ReleaseLastApplicationCaptureSharedBuffer"}, {24, nullptr, "AcquireLastForegroundCaptureSharedBuffer"}, {25, nullptr, "ReleaseLastForegroundCaptureSharedBuffer"}, {26, nullptr, "AcquireCallerAppletCaptureSharedBuffer"}, {27, nullptr, "ReleaseCallerAppletCaptureSharedBuffer"}, - // 6.0.0+ {28, nullptr, "TakeScreenShotOfOwnLayerEx"}, }; // clang-format on @@ -225,7 +224,7 @@ IDebugFunctions::IDebugFunctions() : ServiceFramework{"IDebugFunctions"} { static const FunctionInfo functions[] = { {0, nullptr, "NotifyMessageToHomeMenuForDebug"}, {1, nullptr, "OpenMainApplication"}, - {10, nullptr, "EmulateButtonEvent"}, + {10, nullptr, "PerformSystemButtonPressing"}, {20, nullptr, "InvalidateTransitionLayer"}, {30, nullptr, "RequestLaunchApplicationWithUserAndArgumentForDebug"}, {40, nullptr, "GetAppletResourceUsageInfo"}, @@ -267,13 +266,13 @@ ISelfController::ISelfController(Core::System& system, {16, &ISelfController::SetOutOfFocusSuspendingEnabled, "SetOutOfFocusSuspendingEnabled"}, {17, nullptr, "SetControllerFirmwareUpdateSection"}, {18, nullptr, "SetRequiresCaptureButtonShortPressedMessage"}, - {19, &ISelfController::SetScreenShotImageOrientation, "SetScreenShotImageOrientation"}, + {19, &ISelfController::SetAlbumImageOrientation, "SetAlbumImageOrientation"}, {20, nullptr, "SetDesirableKeyboardLayout"}, {40, &ISelfController::CreateManagedDisplayLayer, "CreateManagedDisplayLayer"}, {41, nullptr, "IsSystemBufferSharingEnabled"}, {42, nullptr, "GetSystemSharedLayerHandle"}, {43, nullptr, "GetSystemSharedBufferHandle"}, - {44, nullptr, "CreateManagedDisplaySeparableLayer"}, + {44, &ISelfController::CreateManagedDisplaySeparableLayer, "CreateManagedDisplaySeparableLayer"}, {45, nullptr, "SetManagedDisplayLayerSeparationMode"}, {50, &ISelfController::SetHandlesRequestToDisplay, "SetHandlesRequestToDisplay"}, {51, nullptr, "ApproveToDisplay"}, @@ -443,7 +442,7 @@ void ISelfController::SetOutOfFocusSuspendingEnabled(Kernel::HLERequestContext& rb.Push(RESULT_SUCCESS); } -void ISelfController::SetScreenShotImageOrientation(Kernel::HLERequestContext& ctx) { +void ISelfController::SetAlbumImageOrientation(Kernel::HLERequestContext& ctx) { LOG_WARNING(Service_AM, "(STUBBED) called"); IPC::ResponseBuilder rb{ctx, 2}; @@ -463,6 +462,24 @@ void ISelfController::CreateManagedDisplayLayer(Kernel::HLERequestContext& ctx) rb.Push(*layer_id); } +void ISelfController::CreateManagedDisplaySeparableLayer(Kernel::HLERequestContext& ctx) { + LOG_WARNING(Service_AM, "(STUBBED) called"); + + // TODO(Subv): Find out how AM determines the display to use, for now just + // create the layer in the Default display. + // This calls nn::vi::CreateRecordingLayer() which creates another layer. + // Currently we do not support more than 1 layer per display, output 1 layer id for now. + // Outputting 1 layer id instead of the expected 2 has not been observed to cause any adverse + // side effects. + // TODO: Support multiple layers + const auto display_id = nvflinger->OpenDisplay("Default"); + const auto layer_id = nvflinger->CreateLayer(*display_id); + + IPC::ResponseBuilder rb{ctx, 4}; + rb.Push(RESULT_SUCCESS); + rb.Push(*layer_id); +} + void ISelfController::SetHandlesRequestToDisplay(Kernel::HLERequestContext& ctx) { LOG_WARNING(Service_AM, "(STUBBED) called"); @@ -607,6 +624,7 @@ ICommonStateGetter::ICommonStateGetter(Core::System& system, {20, nullptr, "PushToGeneralChannel"}, {30, nullptr, "GetHomeButtonReaderLockAccessor"}, {31, nullptr, "GetReaderLockAccessorEx"}, + {32, nullptr, "GetWriterLockAccessorEx"}, {40, nullptr, "GetCradleFwVersion"}, {50, &ICommonStateGetter::IsVrModeEnabled, "IsVrModeEnabled"}, {51, &ICommonStateGetter::SetVrModeEnabled, "SetVrModeEnabled"}, @@ -731,14 +749,14 @@ void ICommonStateGetter::GetDefaultDisplayResolution(Kernel::HLERequestContext& if (Settings::values.use_docked_mode) { rb.Push(static_cast<u32>(Service::VI::DisplayResolution::DockedWidth) * - static_cast<u32>(Settings::values.resolution_factor)); + static_cast<u32>(Settings::values.resolution_factor.GetValue())); rb.Push(static_cast<u32>(Service::VI::DisplayResolution::DockedHeight) * - static_cast<u32>(Settings::values.resolution_factor)); + static_cast<u32>(Settings::values.resolution_factor.GetValue())); } else { rb.Push(static_cast<u32>(Service::VI::DisplayResolution::UndockedWidth) * - static_cast<u32>(Settings::values.resolution_factor)); + static_cast<u32>(Settings::values.resolution_factor.GetValue())); rb.Push(static_cast<u32>(Service::VI::DisplayResolution::UndockedHeight) * - static_cast<u32>(Settings::values.resolution_factor)); + static_cast<u32>(Settings::values.resolution_factor.GetValue())); } } @@ -842,7 +860,7 @@ public: {110, nullptr, "NeedsToExitProcess"}, {120, nullptr, "GetLibraryAppletInfo"}, {150, nullptr, "RequestForAppletToGetForeground"}, - {160, nullptr, "GetIndirectLayerConsumerHandle"}, + {160, &ILibraryAppletAccessor::GetIndirectLayerConsumerHandle, "GetIndirectLayerConsumerHandle"}, }; // clang-format on @@ -961,6 +979,18 @@ private: rb.PushCopyObjects(applet->GetBroker().GetInteractiveDataEvent()); } + void GetIndirectLayerConsumerHandle(Kernel::HLERequestContext& ctx) { + LOG_WARNING(Service_AM, "(STUBBED) called"); + + // We require a non-zero handle to be valid. Using 0xdeadbeef allows us to trace if this is + // actually used anywhere + constexpr u64 handle = 0xdeadbeef; + + IPC::ResponseBuilder rb{ctx, 4}; + rb.Push(RESULT_SUCCESS); + rb.Push(handle); + } + std::shared_ptr<Applets::Applet> applet; }; @@ -1132,6 +1162,7 @@ IApplicationFunctions::IApplicationFunctions(Core::System& system_) {24, nullptr, "GetLaunchStorageInfoForDebug"}, {25, &IApplicationFunctions::ExtendSaveData, "ExtendSaveData"}, {26, &IApplicationFunctions::GetSaveDataSize, "GetSaveDataSize"}, + {27, nullptr, "CreateCacheStorage"}, {30, &IApplicationFunctions::BeginBlockingHomeButtonShortAndLongPressed, "BeginBlockingHomeButtonShortAndLongPressed"}, {31, &IApplicationFunctions::EndBlockingHomeButtonShortAndLongPressed, "EndBlockingHomeButtonShortAndLongPressed"}, {32, &IApplicationFunctions::BeginBlockingHomeButton, "BeginBlockingHomeButton"}, @@ -1157,6 +1188,8 @@ IApplicationFunctions::IApplicationFunctions(Core::System& system_) {120, nullptr, "ExecuteProgram"}, {121, nullptr, "ClearUserChannel"}, {122, nullptr, "UnpopToUserChannel"}, + {123, nullptr, "GetPreviousProgramIndex"}, + {124, nullptr, "EnableApplicationAllThreadDumpOnCrash"}, {130, &IApplicationFunctions::GetGpuErrorDetectedSystemEvent, "GetGpuErrorDetectedSystemEvent"}, {140, &IApplicationFunctions::GetFriendInvitationStorageChannelEvent, "GetFriendInvitationStorageChannelEvent"}, {141, nullptr, "TryPopFromFriendInvitationStorageChannel"}, @@ -1339,14 +1372,25 @@ void IApplicationFunctions::GetDisplayVersion(Kernel::HLERequestContext& ctx) { std::array<u8, 0x10> version_string{}; - FileSys::PatchManager pm{system.CurrentProcess()->GetTitleID()}; - const auto res = pm.GetControlMetadata(); + const auto res = [this] { + const auto title_id = system.CurrentProcess()->GetTitleID(); + + FileSys::PatchManager pm{title_id}; + auto res = pm.GetControlMetadata(); + if (res.first != nullptr) { + return res; + } + + FileSys::PatchManager pm_update{FileSys::GetUpdateTitleID(title_id)}; + return pm_update.GetControlMetadata(); + }(); + if (res.first != nullptr) { const auto& version = res.first->GetVersionString(); std::copy(version.begin(), version.end(), version_string.begin()); } else { - constexpr u128 default_version = {1, 0}; - std::memcpy(version_string.data(), default_version.data(), sizeof(u128)); + constexpr char default_version[]{"1.0.0"}; + std::memcpy(version_string.data(), default_version, sizeof(default_version)); } IPC::ResponseBuilder rb{ctx, 6}; diff --git a/src/core/hle/service/am/am.h b/src/core/hle/service/am/am.h index 469f7f814..6cfb11b48 100644 --- a/src/core/hle/service/am/am.h +++ b/src/core/hle/service/am/am.h @@ -138,8 +138,9 @@ private: void SetFocusHandlingMode(Kernel::HLERequestContext& ctx); void SetRestartMessageEnabled(Kernel::HLERequestContext& ctx); void SetOutOfFocusSuspendingEnabled(Kernel::HLERequestContext& ctx); - void SetScreenShotImageOrientation(Kernel::HLERequestContext& ctx); + void SetAlbumImageOrientation(Kernel::HLERequestContext& ctx); void CreateManagedDisplayLayer(Kernel::HLERequestContext& ctx); + void CreateManagedDisplaySeparableLayer(Kernel::HLERequestContext& ctx); void SetHandlesRequestToDisplay(Kernel::HLERequestContext& ctx); void SetIdleTimeDetectionExtension(Kernel::HLERequestContext& ctx); void GetIdleTimeDetectionExtension(Kernel::HLERequestContext& ctx); diff --git a/src/core/hle/service/am/applets/software_keyboard.cpp b/src/core/hle/service/am/applets/software_keyboard.cpp index 54e63c138..fbe3686ae 100644 --- a/src/core/hle/service/am/applets/software_keyboard.cpp +++ b/src/core/hle/service/am/applets/software_keyboard.cpp @@ -30,7 +30,7 @@ static Core::Frontend::SoftwareKeyboardParameters ConvertToFrontendParameters( config.sub_text.size()); params.guide_text = Common::UTF16StringFromFixedZeroTerminatedBuffer(config.guide_text.data(), config.guide_text.size()); - params.initial_text = initial_text; + params.initial_text = std::move(initial_text); params.max_length = config.length_limit == 0 ? DEFAULT_MAX_LENGTH : config.length_limit; params.password = static_cast<bool>(config.is_password); params.cursor_at_beginning = static_cast<bool>(config.initial_cursor_position); @@ -60,7 +60,7 @@ void SoftwareKeyboard::Initialize() { std::memcpy(&config, keyboard_config.data(), sizeof(KeyboardConfig)); const auto work_buffer_storage = broker.PopNormalDataToApplet(); - ASSERT(work_buffer_storage != nullptr); + ASSERT_OR_EXECUTE(work_buffer_storage != nullptr, { return; }); const auto& work_buffer = work_buffer_storage->GetData(); if (config.initial_string_size == 0) @@ -109,7 +109,7 @@ void SoftwareKeyboard::Execute() { const auto parameters = ConvertToFrontendParameters(config, initial_text); - frontend.RequestText([this](std::optional<std::u16string> text) { WriteText(text); }, + frontend.RequestText([this](std::optional<std::u16string> text) { WriteText(std::move(text)); }, parameters); } diff --git a/src/core/hle/service/am/spsm.cpp b/src/core/hle/service/am/spsm.cpp index 003ee8667..f27729ce7 100644 --- a/src/core/hle/service/am/spsm.cpp +++ b/src/core/hle/service/am/spsm.cpp @@ -10,17 +10,17 @@ SPSM::SPSM() : ServiceFramework{"spsm"} { // clang-format off static const FunctionInfo functions[] = { {0, nullptr, "GetState"}, - {1, nullptr, "SleepSystemAndWaitAwake"}, - {2, nullptr, "Unknown1"}, - {3, nullptr, "Unknown2"}, + {1, nullptr, "EnterSleep"}, + {2, nullptr, "GetLastWakeReason"}, + {3, nullptr, "Shutdown"}, {4, nullptr, "GetNotificationMessageEventHandle"}, - {5, nullptr, "Unknown3"}, - {6, nullptr, "Unknown4"}, - {7, nullptr, "Unknown5"}, + {5, nullptr, "ReceiveNotificationMessage"}, + {6, nullptr, "AnalyzeLogForLastSleepWakeSequence"}, + {7, nullptr, "ResetEventLog"}, {8, nullptr, "AnalyzePerformanceLogForLastSleepWakeSequence"}, {9, nullptr, "ChangeHomeButtonLongPressingTime"}, - {10, nullptr, "Unknown6"}, - {11, nullptr, "Unknown7"}, + {10, nullptr, "PutErrorState"}, + {11, nullptr, "InvalidateCurrentHomeButtonPressing"}, }; // clang-format on diff --git a/src/core/hle/service/aoc/aoc_u.cpp b/src/core/hle/service/aoc/aoc_u.cpp index 4227a4adf..8e79f707b 100644 --- a/src/core/hle/service/aoc/aoc_u.cpp +++ b/src/core/hle/service/aoc/aoc_u.cpp @@ -60,6 +60,7 @@ AOC_U::AOC_U(Core::System& system) {6, nullptr, "PrepareAddOnContentByApplicationId"}, {7, &AOC_U::PrepareAddOnContent, "PrepareAddOnContent"}, {8, &AOC_U::GetAddOnContentListChangedEvent, "GetAddOnContentListChangedEvent"}, + {9, nullptr, "GetAddOnContentLostErrorCode"}, {100, nullptr, "CreateEcPurchasedEventManager"}, {101, nullptr, "CreatePermanentEcPurchasedEventManager"}, }; diff --git a/src/core/hle/service/bcat/bcat.cpp b/src/core/hle/service/bcat/bcat.cpp index 8bb2528c9..b31766212 100644 --- a/src/core/hle/service/bcat/bcat.cpp +++ b/src/core/hle/service/bcat/bcat.cpp @@ -14,6 +14,8 @@ BCAT::BCAT(Core::System& system, std::shared_ptr<Module> module, {0, &BCAT::CreateBcatService, "CreateBcatService"}, {1, &BCAT::CreateDeliveryCacheStorageService, "CreateDeliveryCacheStorageService"}, {2, &BCAT::CreateDeliveryCacheStorageServiceWithApplicationId, "CreateDeliveryCacheStorageServiceWithApplicationId"}, + {3, nullptr, "CreateDeliveryCacheProgressService"}, + {4, nullptr, "CreateDeliveryCacheProgressServiceWithApplicationId"}, }; // clang-format on RegisterHandlers(functions); diff --git a/src/core/hle/service/bcat/module.cpp b/src/core/hle/service/bcat/module.cpp index 34aba7a27..603b64d4f 100644 --- a/src/core/hle/service/bcat/module.cpp +++ b/src/core/hle/service/bcat/module.cpp @@ -143,10 +143,13 @@ public: {20401, nullptr, "UnregisterSystemApplicationDeliveryTask"}, {20410, nullptr, "SetSystemApplicationDeliveryTaskTimer"}, {30100, &IBcatService::SetPassphrase, "SetPassphrase"}, + {30101, nullptr, "Unknown"}, + {30102, nullptr, "Unknown2"}, {30200, nullptr, "RegisterBackgroundDeliveryTask"}, {30201, nullptr, "UnregisterBackgroundDeliveryTask"}, {30202, nullptr, "BlockDeliveryTask"}, {30203, nullptr, "UnblockDeliveryTask"}, + {30210, nullptr, "SetDeliveryTaskTimer"}, {30300, nullptr, "RegisterSystemApplicationDeliveryTasks"}, {90100, nullptr, "EnumerateBackgroundDeliveryTask"}, {90200, nullptr, "GetDeliveryList"}, diff --git a/src/core/hle/service/bpc/bpc.cpp b/src/core/hle/service/bpc/bpc.cpp index 1c1ecdb60..fac6b2f9c 100644 --- a/src/core/hle/service/bpc/bpc.cpp +++ b/src/core/hle/service/bpc/bpc.cpp @@ -23,9 +23,14 @@ public: {5, nullptr, "GetBoardPowerControlEvent"}, {6, nullptr, "GetSleepButtonState"}, {7, nullptr, "GetPowerEvent"}, - {8, nullptr, "Unknown1"}, - {9, nullptr, "Unknown2"}, - {10, nullptr, "Unknown3"}, + {8, nullptr, "CreateWakeupTimer"}, + {9, nullptr, "CancelWakeupTimer"}, + {10, nullptr, "EnableWakeupTimerOnDevice"}, + {11, nullptr, "CreateWakeupTimerEx"}, + {12, nullptr, "GetLastEnabledWakeupTimerType"}, + {13, nullptr, "CleanAllWakeupTimers"}, + {14, nullptr, "Unknown"}, + {15, nullptr, "Unknown2"}, }; // clang-format on @@ -38,10 +43,11 @@ public: explicit BPC_R() : ServiceFramework{"bpc:r"} { // clang-format off static const FunctionInfo functions[] = { - {0, nullptr, "GetExternalRtcValue"}, - {1, nullptr, "SetExternalRtcValue"}, - {2, nullptr, "ReadExternalRtcResetFlag"}, - {3, nullptr, "ClearExternalRtcResetFlag"}, + {0, nullptr, "GetRtcTime"}, + {1, nullptr, "SetRtcTime"}, + {2, nullptr, "GetRtcResetDetected"}, + {3, nullptr, "ClearRtcResetDetected"}, + {4, nullptr, "SetUpRtcResetOnShutdown"}, }; // clang-format on diff --git a/src/core/hle/service/btdrv/btdrv.cpp b/src/core/hle/service/btdrv/btdrv.cpp index 40a06c9fd..f311afa2f 100644 --- a/src/core/hle/service/btdrv/btdrv.cpp +++ b/src/core/hle/service/btdrv/btdrv.cpp @@ -58,102 +58,103 @@ public: {1, nullptr, "InitializeBluetooth"}, {2, nullptr, "EnableBluetooth"}, {3, nullptr, "DisableBluetooth"}, - {4, nullptr, "CleanupBluetooth"}, + {4, nullptr, "FinalizeBluetooth"}, {5, nullptr, "GetAdapterProperties"}, {6, nullptr, "GetAdapterProperty"}, {7, nullptr, "SetAdapterProperty"}, - {8, nullptr, "StartDiscovery"}, - {9, nullptr, "CancelDiscovery"}, + {8, nullptr, "StartInquiry"}, + {9, nullptr, "StopInquiry"}, {10, nullptr, "CreateBond"}, {11, nullptr, "RemoveBond"}, {12, nullptr, "CancelBond"}, - {13, nullptr, "PinReply"}, - {14, nullptr, "SspReply"}, + {13, nullptr, "RespondToPinRequest"}, + {14, nullptr, "RespondToSspRequest"}, {15, nullptr, "GetEventInfo"}, {16, nullptr, "InitializeHid"}, - {17, nullptr, "HidConnect"}, - {18, nullptr, "HidDisconnect"}, - {19, nullptr, "HidSendData"}, - {20, nullptr, "HidSendData2"}, - {21, nullptr, "HidSetReport"}, - {22, nullptr, "HidGetReport"}, - {23, nullptr, "HidWakeController"}, - {24, nullptr, "HidAddPairedDevice"}, - {25, nullptr, "HidGetPairedDevice"}, - {26, nullptr, "CleanupHid"}, - {27, nullptr, "HidGetEventInfo"}, - {28, nullptr, "ExtSetTsi"}, - {29, nullptr, "ExtSetBurstMode"}, - {30, nullptr, "ExtSetZeroRetran"}, - {31, nullptr, "ExtSetMcMode"}, - {32, nullptr, "ExtStartLlrMode"}, - {33, nullptr, "ExtExitLlrMode"}, - {34, nullptr, "ExtSetRadio"}, - {35, nullptr, "ExtSetVisibility"}, - {36, nullptr, "ExtSetTbfcScan"}, + {17, nullptr, "OpenHidConnection"}, + {18, nullptr, "CloseHidConnection"}, + {19, nullptr, "WriteHidData"}, + {20, nullptr, "WriteHidData2"}, + {21, nullptr, "SetHidReport"}, + {22, nullptr, "GetHidReport"}, + {23, nullptr, "TriggerConnection"}, + {24, nullptr, "AddPairedDeviceInfo"}, + {25, nullptr, "GetPairedDeviceInfo"}, + {26, nullptr, "FinalizeHid"}, + {27, nullptr, "GetHidEventInfo"}, + {28, nullptr, "SetTsi"}, + {29, nullptr, "EnableBurstMode"}, + {30, nullptr, "SetZeroRetransmission"}, + {31, nullptr, "EnableMcMode"}, + {32, nullptr, "EnableLlrScan"}, + {33, nullptr, "DisableLlrScan"}, + {34, nullptr, "EnableRadio"}, + {35, nullptr, "SetVisibility"}, + {36, nullptr, "EnableTbfcScan"}, {37, nullptr, "RegisterHidReportEvent"}, - {38, nullptr, "HidGetReportEventInfo"}, + {38, nullptr, "GetHidReportEventInfo"}, {39, nullptr, "GetLatestPlr"}, - {40, nullptr, "ExtGetPendingConnections"}, + {40, nullptr, "GetPendingConnections"}, {41, nullptr, "GetChannelMap"}, - {42, nullptr, "EnableBluetoothBoostSetting"}, - {43, nullptr, "IsBluetoothBoostSettingEnabled"}, - {44, nullptr, "EnableBluetoothAfhSetting"}, - {45, nullptr, "IsBluetoothAfhSettingEnabled"}, - {46, nullptr, "InitializeBluetoothLe"}, - {47, nullptr, "EnableBluetoothLe"}, - {48, nullptr, "DisableBluetoothLe"}, - {49, nullptr, "CleanupBluetoothLe"}, - {50, nullptr, "SetLeVisibility"}, - {51, nullptr, "SetLeConnectionParameter"}, - {52, nullptr, "SetLeDefaultConnectionParameter"}, - {53, nullptr, "SetLeAdvertiseData"}, - {54, nullptr, "SetLeAdvertiseParameter"}, - {55, nullptr, "StartLeScan"}, - {56, nullptr, "StopLeScan"}, - {57, nullptr, "AddLeScanFilterCondition"}, - {58, nullptr, "DeleteLeScanFilterCondition"}, - {59, nullptr, "DeleteLeScanFilter"}, - {60, nullptr, "ClearLeScanFilters"}, - {61, nullptr, "EnableLeScanFilter"}, - {62, nullptr, "RegisterLeClient"}, - {63, nullptr, "UnregisterLeClient"}, - {64, nullptr, "UnregisterLeClientAll"}, - {65, nullptr, "LeClientConnect"}, - {66, nullptr, "LeClientCancelConnection"}, - {67, nullptr, "LeClientDisconnect"}, - {68, nullptr, "LeClientGetAttributes"}, - {69, nullptr, "LeClientDiscoverService"}, - {70, nullptr, "LeClientConfigureMtu"}, - {71, nullptr, "RegisterLeServer"}, - {72, nullptr, "UnregisterLeServer"}, - {73, nullptr, "LeServerConnect"}, - {74, nullptr, "LeServerDisconnect"}, - {75, nullptr, "CreateLeService"}, - {76, nullptr, "StartLeService"}, - {77, nullptr, "AddLeCharacteristic"}, - {78, nullptr, "AddLeDescriptor"}, - {79, nullptr, "GetLeCoreEventInfo"}, - {80, nullptr, "LeGetFirstCharacteristic"}, - {81, nullptr, "LeGetNextCharacteristic"}, - {82, nullptr, "LeGetFirstDescriptor"}, - {83, nullptr, "LeGetNextDescriptor"}, - {84, nullptr, "RegisterLeCoreDataPath"}, - {85, nullptr, "UnregisterLeCoreDataPath"}, - {86, nullptr, "RegisterLeHidDataPath"}, - {87, nullptr, "UnregisterLeHidDataPath"}, - {88, nullptr, "RegisterLeDataPath"}, - {89, nullptr, "UnregisterLeDataPath"}, - {90, nullptr, "LeClientReadCharacteristic"}, - {91, nullptr, "LeClientReadDescriptor"}, - {92, nullptr, "LeClientWriteCharacteristic"}, - {93, nullptr, "LeClientWriteDescriptor"}, - {94, nullptr, "LeClientRegisterNotification"}, - {95, nullptr, "LeClientDeregisterNotification"}, + {42, nullptr, "EnableTxPowerBoostSetting"}, + {43, nullptr, "IsTxPowerBoostSettingEnabled"}, + {44, nullptr, "EnableAfhSetting"}, + {45, nullptr, "IsAfhSettingEnabled"}, + {46, nullptr, "InitializeBle"}, + {47, nullptr, "EnableBle"}, + {48, nullptr, "DisableBle"}, + {49, nullptr, "FinalizeBle"}, + {50, nullptr, "SetBleVisibility"}, + {51, nullptr, "SetBleConnectionParameter"}, + {52, nullptr, "SetBleDefaultConnectionParameter"}, + {53, nullptr, "SetBleAdvertiseData"}, + {54, nullptr, "SetBleAdvertiseParameter"}, + {55, nullptr, "StartBleScan"}, + {56, nullptr, "StopBleScan"}, + {57, nullptr, "AddBleScanFilterCondition"}, + {58, nullptr, "DeleteBleScanFilterCondition"}, + {59, nullptr, "DeleteBleScanFilter"}, + {60, nullptr, "ClearBleScanFilters"}, + {61, nullptr, "EnableBleScanFilter"}, + {62, nullptr, "RegisterGattClient"}, + {63, nullptr, "UnregisterGattClient"}, + {64, nullptr, "UnregisterAllGattClients"}, + {65, nullptr, "ConnectGattServer"}, + {66, nullptr, "CancelConnectGattServer"}, + {67, nullptr, "DisconnectGattServer"}, + {68, nullptr, "GetGattAttribute"}, + {69, nullptr, "GetGattService"}, + {70, nullptr, "ConfigureAttMtu"}, + {71, nullptr, "RegisterGattServer"}, + {72, nullptr, "UnregisterGattServer"}, + {73, nullptr, "ConnectGattClient"}, + {74, nullptr, "DisconnectGattClient"}, + {75, nullptr, "AddGattService"}, + {76, nullptr, "EnableGattService"}, + {77, nullptr, "AddGattCharacteristic"}, + {78, nullptr, "AddGattDescriptor"}, + {79, nullptr, "GetBleManagedEventInfo"}, + {80, nullptr, "GetGattFirstCharacteristic"}, + {81, nullptr, "GetGattNextCharacteristic"}, + {82, nullptr, "GetGattFirstDescriptor"}, + {83, nullptr, "GetGattNextDescriptor"}, + {84, nullptr, "RegisterGattManagedDataPath"}, + {85, nullptr, "UnregisterGattManagedDataPath"}, + {86, nullptr, "RegisterGattHidDataPath"}, + {87, nullptr, "UnregisterGattHidDataPath"}, + {88, nullptr, "RegisterGattDataPath"}, + {89, nullptr, "UnregisterGattDataPath"}, + {90, nullptr, "ReadGattCharacteristic"}, + {91, nullptr, "ReadGattDescriptor"}, + {92, nullptr, "WriteGattCharacteristic"}, + {93, nullptr, "WriteGattDescriptor"}, + {94, nullptr, "RegisterGattNotification"}, + {95, nullptr, "UnregisterGattNotification"}, {96, nullptr, "GetLeHidEventInfo"}, {97, nullptr, "RegisterBleHidEvent"}, - {98, nullptr, "SetLeScanParameter"}, - {256, nullptr, "GetIsManufacturingMode"}, + {98, nullptr, "SetBleScanParameter"}, + {99, nullptr, "MoveToSecondaryPiconet"}, + {256, nullptr, "IsManufacturingMode"}, {257, nullptr, "EmulateBluetoothCrash"}, {258, nullptr, "GetBleChannelMap"}, }; diff --git a/src/core/hle/service/btm/btm.cpp b/src/core/hle/service/btm/btm.cpp index 251b3c9df..0d251c6d0 100644 --- a/src/core/hle/service/btm/btm.cpp +++ b/src/core/hle/service/btm/btm.cpp @@ -132,66 +132,71 @@ public: explicit BTM() : ServiceFramework{"btm"} { // clang-format off static const FunctionInfo functions[] = { - {0, nullptr, "Unknown1"}, - {1, nullptr, "Unknown2"}, - {2, nullptr, "RegisterSystemEventForConnectedDeviceCondition"}, - {3, nullptr, "Unknown3"}, - {4, nullptr, "Unknown4"}, - {5, nullptr, "Unknown5"}, - {6, nullptr, "Unknown6"}, - {7, nullptr, "Unknown7"}, - {8, nullptr, "RegisterSystemEventForRegisteredDeviceInfo"}, - {9, nullptr, "Unknown8"}, - {10, nullptr, "Unknown9"}, - {11, nullptr, "Unknown10"}, - {12, nullptr, "Unknown11"}, - {13, nullptr, "Unknown12"}, + {0, nullptr, "GetState"}, + {1, nullptr, "GetHostDeviceProperty"}, + {2, nullptr, "AcquireDeviceConditionEvent"}, + {3, nullptr, "GetDeviceCondition"}, + {4, nullptr, "SetBurstMode"}, + {5, nullptr, "SetSlotMode"}, + {6, nullptr, "SetBluetoothMode"}, + {7, nullptr, "SetWlanMode"}, + {8, nullptr, "AcquireDeviceInfoEvent"}, + {9, nullptr, "GetDeviceInfo"}, + {10, nullptr, "AddDeviceInfo"}, + {11, nullptr, "RemoveDeviceInfo"}, + {12, nullptr, "IncreaseDeviceInfoOrder"}, + {13, nullptr, "LlrNotify"}, {14, nullptr, "EnableRadio"}, {15, nullptr, "DisableRadio"}, - {16, nullptr, "Unknown13"}, - {17, nullptr, "Unknown14"}, - {18, nullptr, "Unknown15"}, - {19, nullptr, "Unknown16"}, - {20, nullptr, "Unknown17"}, - {21, nullptr, "Unknown18"}, - {22, nullptr, "Unknown19"}, - {23, nullptr, "Unknown20"}, - {24, nullptr, "Unknown21"}, - {25, nullptr, "Unknown22"}, - {26, nullptr, "Unknown23"}, - {27, nullptr, "Unknown24"}, - {28, nullptr, "Unknown25"}, - {29, nullptr, "Unknown26"}, - {30, nullptr, "Unknown27"}, - {31, nullptr, "Unknown28"}, - {32, nullptr, "Unknown29"}, - {33, nullptr, "Unknown30"}, - {34, nullptr, "Unknown31"}, - {35, nullptr, "Unknown32"}, - {36, nullptr, "Unknown33"}, - {37, nullptr, "Unknown34"}, - {38, nullptr, "Unknown35"}, - {39, nullptr, "Unknown36"}, - {40, nullptr, "Unknown37"}, - {41, nullptr, "Unknown38"}, - {42, nullptr, "Unknown39"}, - {43, nullptr, "Unknown40"}, - {44, nullptr, "Unknown41"}, - {45, nullptr, "Unknown42"}, - {46, nullptr, "Unknown43"}, - {47, nullptr, "Unknown44"}, - {48, nullptr, "Unknown45"}, - {49, nullptr, "Unknown46"}, - {50, nullptr, "Unknown47"}, - {51, nullptr, "Unknown48"}, - {52, nullptr, "Unknown49"}, - {53, nullptr, "Unknown50"}, - {54, nullptr, "Unknown51"}, - {55, nullptr, "Unknown52"}, - {56, nullptr, "Unknown53"}, - {57, nullptr, "Unknown54"}, - {58, nullptr, "Unknown55"}, - {59, nullptr, "Unknown56"}, + {16, nullptr, "HidDisconnect"}, + {17, nullptr, "HidSetRetransmissionMode"}, + {18, nullptr, "AcquireAwakeReqEvent"}, + {19, nullptr, "AcquireLlrStateEvent"}, + {20, nullptr, "IsLlrStarted"}, + {21, nullptr, "EnableSlotSaving"}, + {22, nullptr, "ProtectDeviceInfo"}, + {23, nullptr, "AcquireBleScanEvent"}, + {24, nullptr, "GetBleScanParameterGeneral"}, + {25, nullptr, "GetBleScanParameterSmartDevice"}, + {26, nullptr, "StartBleScanForGeneral"}, + {27, nullptr, "StopBleScanForGeneral"}, + {28, nullptr, "GetBleScanResultsForGeneral"}, + {29, nullptr, "StartBleScanForPairedDevice"}, + {30, nullptr, "StopBleScanForPairedDevice"}, + {31, nullptr, "StartBleScanForSmartDevice"}, + {32, nullptr, "StopBleScanForSmartDevice"}, + {33, nullptr, "GetBleScanResultsForSmartDevice"}, + {34, nullptr, "AcquireBleConnectionEvent"}, + {35, nullptr, "BleConnect"}, + {36, nullptr, "BleOverrideConnection"}, + {37, nullptr, "BleDisconnect"}, + {38, nullptr, "BleGetConnectionState"}, + {39, nullptr, "BleGetGattClientConditionList"}, + {40, nullptr, "AcquireBlePairingEvent"}, + {41, nullptr, "BlePairDevice"}, + {42, nullptr, "BleUnpairDeviceOnBoth"}, + {43, nullptr, "BleUnpairDevice"}, + {44, nullptr, "BleGetPairedAddresses"}, + {45, nullptr, "AcquireBleServiceDiscoveryEvent"}, + {46, nullptr, "GetGattServices"}, + {47, nullptr, "GetGattService"}, + {48, nullptr, "GetGattIncludedServices"}, + {49, nullptr, "GetBelongingService"}, + {50, nullptr, "GetGattCharacteristics"}, + {51, nullptr, "GetGattDescriptors"}, + {52, nullptr, "AcquireBleMtuConfigEvent"}, + {53, nullptr, "ConfigureBleMtu"}, + {54, nullptr, "GetBleMtu"}, + {55, nullptr, "RegisterBleGattDataPath"}, + {56, nullptr, "UnregisterBleGattDataPath"}, + {57, nullptr, "RegisterAppletResourceUserId"}, + {58, nullptr, "UnregisterAppletResourceUserId"}, + {59, nullptr, "SetAppletResourceUserId"}, + {60, nullptr, "Unknown60"}, + {61, nullptr, "Unknown61"}, + {62, nullptr, "Unknown62"}, + {63, nullptr, "Unknown63"}, + {64, nullptr, "Unknown64"}, }; // clang-format on @@ -204,19 +209,19 @@ public: explicit BTM_DBG() : ServiceFramework{"btm:dbg"} { // clang-format off static const FunctionInfo functions[] = { - {0, nullptr, "RegisterSystemEventForDiscovery"}, - {1, nullptr, "Unknown1"}, - {2, nullptr, "Unknown2"}, - {3, nullptr, "Unknown3"}, - {4, nullptr, "Unknown4"}, - {5, nullptr, "Unknown5"}, - {6, nullptr, "Unknown6"}, - {7, nullptr, "Unknown7"}, - {8, nullptr, "Unknown8"}, - {9, nullptr, "Unknown9"}, - {10, nullptr, "Unknown10"}, - {11, nullptr, "Unknown11"}, - {12, nullptr, "Unknown11"}, + {0, nullptr, "AcquireDiscoveryEvent"}, + {1, nullptr, "StartDiscovery"}, + {2, nullptr, "CancelDiscovery"}, + {3, nullptr, "GetDeviceProperty"}, + {4, nullptr, "CreateBond"}, + {5, nullptr, "CancelBond"}, + {6, nullptr, "SetTsiMode"}, + {7, nullptr, "GeneralTest"}, + {8, nullptr, "HidConnect"}, + {9, nullptr, "GeneralGet"}, + {10, nullptr, "GetGattClientDisconnectionReason"}, + {11, nullptr, "GetBleConnectionParameter"}, + {12, nullptr, "GetBleConnectionParameterRequest"}, }; // clang-format on diff --git a/src/core/hle/service/caps/caps.cpp b/src/core/hle/service/caps/caps.cpp index 26c8a7081..ba5749b84 100644 --- a/src/core/hle/service/caps/caps.cpp +++ b/src/core/hle/service/caps/caps.cpp @@ -1,4 +1,4 @@ -// Copyright 2018 yuzu emulator team +// Copyright 2018 yuzu Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. diff --git a/src/core/hle/service/caps/caps.h b/src/core/hle/service/caps/caps.h index fc70a4c27..b8c67b6e2 100644 --- a/src/core/hle/service/caps/caps.h +++ b/src/core/hle/service/caps/caps.h @@ -1,4 +1,4 @@ -// Copyright 2018 yuzu emulator team +// Copyright 2018 yuzu Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. @@ -12,73 +12,79 @@ class ServiceManager; namespace Service::Capture { -enum AlbumImageOrientation { +enum class AlbumImageOrientation { Orientation0 = 0, Orientation1 = 1, Orientation2 = 2, Orientation3 = 3, }; -enum AlbumReportOption { +enum class AlbumReportOption { Disable = 0, Enable = 1, }; -enum ContentType : u8 { +enum class ContentType : u8 { Screenshot = 0, Movie = 1, ExtraMovie = 3, }; -enum AlbumStorage : u8 { +enum class AlbumStorage : u8 { NAND = 0, SD = 1, }; struct AlbumFileDateTime { - u16 year; - u8 month; - u8 day; - u8 hour; - u8 minute; - u8 second; - u8 uid; + s16 year{}; + s8 month{}; + s8 day{}; + s8 hour{}; + s8 minute{}; + s8 second{}; + s8 uid{}; }; +static_assert(sizeof(AlbumFileDateTime) == 0x8, "AlbumFileDateTime has incorrect size."); struct AlbumEntry { - u64 size; - u64 application_id; - AlbumFileDateTime datetime; - AlbumStorage storage; - ContentType content; - u8 padding[6]; + u64 size{}; + u64 application_id{}; + AlbumFileDateTime datetime{}; + AlbumStorage storage{}; + ContentType content{}; + INSERT_PADDING_BYTES(6); }; +static_assert(sizeof(AlbumEntry) == 0x20, "AlbumEntry has incorrect size."); struct AlbumFileEntry { - u64 size; - u64 hash; - AlbumFileDateTime datetime; - AlbumStorage storage; - ContentType content; - u8 padding[5]; - u8 unknown; + u64 size{}; // Size of the entry + u64 hash{}; // AES256 with hardcoded key over AlbumEntry + AlbumFileDateTime datetime{}; + AlbumStorage storage{}; + ContentType content{}; + INSERT_PADDING_BYTES(5); + u8 unknown{1}; // Set to 1 on official SW }; +static_assert(sizeof(AlbumFileEntry) == 0x20, "AlbumFileEntry has incorrect size."); struct ApplicationAlbumEntry { - u64 size; - u64 hash; - AlbumFileDateTime datetime; - AlbumStorage storage; - ContentType content; - u8 padding[5]; - u8 unknown; + u64 size{}; // Size of the entry + u64 hash{}; // AES256 with hardcoded key over AlbumEntry + AlbumFileDateTime datetime{}; + AlbumStorage storage{}; + ContentType content{}; + INSERT_PADDING_BYTES(5); + u8 unknown{1}; // Set to 1 on official SW }; +static_assert(sizeof(ApplicationAlbumEntry) == 0x20, "ApplicationAlbumEntry has incorrect size."); struct ApplicationAlbumFileEntry { - ApplicationAlbumEntry entry; - AlbumFileDateTime datetime; - u64 unknown; + ApplicationAlbumEntry entry{}; + AlbumFileDateTime datetime{}; + u64 unknown{}; }; +static_assert(sizeof(ApplicationAlbumFileEntry) == 0x30, + "ApplicationAlbumFileEntry has incorrect size."); /// Registers all Capture services with the specified service manager. void InstallInterfaces(SM::ServiceManager& sm); diff --git a/src/core/hle/service/caps/caps_a.cpp b/src/core/hle/service/caps/caps_a.cpp index 88a3fdc05..a0a3b2ae3 100644 --- a/src/core/hle/service/caps/caps_a.cpp +++ b/src/core/hle/service/caps/caps_a.cpp @@ -1,4 +1,4 @@ -// Copyright 2020 yuzu emulator team +// Copyright 2020 yuzu Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. diff --git a/src/core/hle/service/caps/caps_a.h b/src/core/hle/service/caps/caps_a.h index 8de832491..cb93aad5b 100644 --- a/src/core/hle/service/caps/caps_a.h +++ b/src/core/hle/service/caps/caps_a.h @@ -1,4 +1,4 @@ -// Copyright 2020 yuzu emulator team +// Copyright 2020 yuzu Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. diff --git a/src/core/hle/service/caps/caps_c.cpp b/src/core/hle/service/caps/caps_c.cpp index ea6452ffa..ab17a187e 100644 --- a/src/core/hle/service/caps/caps_c.cpp +++ b/src/core/hle/service/caps/caps_c.cpp @@ -1,4 +1,4 @@ -// Copyright 2020 yuzu emulator team +// Copyright 2020 yuzu Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. diff --git a/src/core/hle/service/caps/caps_c.h b/src/core/hle/service/caps/caps_c.h index d07cdb441..a9d028689 100644 --- a/src/core/hle/service/caps/caps_c.h +++ b/src/core/hle/service/caps/caps_c.h @@ -1,4 +1,4 @@ -// Copyright 2020 yuzu emulator team +// Copyright 2020 yuzu Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. diff --git a/src/core/hle/service/caps/caps_sc.cpp b/src/core/hle/service/caps/caps_sc.cpp index d01a8a58e..822ee96c8 100644 --- a/src/core/hle/service/caps/caps_sc.cpp +++ b/src/core/hle/service/caps/caps_sc.cpp @@ -1,4 +1,4 @@ -// Copyright 2020 yuzu emulator team +// Copyright 2020 yuzu Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. diff --git a/src/core/hle/service/caps/caps_sc.h b/src/core/hle/service/caps/caps_sc.h index 9ba372f7a..ac3e929ca 100644 --- a/src/core/hle/service/caps/caps_sc.h +++ b/src/core/hle/service/caps/caps_sc.h @@ -1,4 +1,4 @@ -// Copyright 2020 yuzu emulator team +// Copyright 2020 yuzu Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. diff --git a/src/core/hle/service/caps/caps_ss.cpp b/src/core/hle/service/caps/caps_ss.cpp index eaa3a7494..24dc716e7 100644 --- a/src/core/hle/service/caps/caps_ss.cpp +++ b/src/core/hle/service/caps/caps_ss.cpp @@ -1,4 +1,4 @@ -// Copyright 2020 yuzu emulator team +// Copyright 2020 yuzu Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. diff --git a/src/core/hle/service/caps/caps_ss.h b/src/core/hle/service/caps/caps_ss.h index e258a6925..450686e4f 100644 --- a/src/core/hle/service/caps/caps_ss.h +++ b/src/core/hle/service/caps/caps_ss.h @@ -1,4 +1,4 @@ -// Copyright 2020 yuzu emulator team +// Copyright 2020 yuzu Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. diff --git a/src/core/hle/service/caps/caps_su.cpp b/src/core/hle/service/caps/caps_su.cpp index e8b0698e8..fffb2ecf9 100644 --- a/src/core/hle/service/caps/caps_su.cpp +++ b/src/core/hle/service/caps/caps_su.cpp @@ -1,4 +1,4 @@ -// Copyright 2020 yuzu emulator team +// Copyright 2020 yuzu Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. diff --git a/src/core/hle/service/caps/caps_su.h b/src/core/hle/service/caps/caps_su.h index c494d7c84..62c9603a9 100644 --- a/src/core/hle/service/caps/caps_su.h +++ b/src/core/hle/service/caps/caps_su.h @@ -1,4 +1,4 @@ -// Copyright 2020 yuzu emulator team +// Copyright 2020 yuzu Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. diff --git a/src/core/hle/service/caps/caps_u.cpp b/src/core/hle/service/caps/caps_u.cpp index 78bab6ed8..f36d8de2d 100644 --- a/src/core/hle/service/caps/caps_u.cpp +++ b/src/core/hle/service/caps/caps_u.cpp @@ -1,4 +1,4 @@ -// Copyright 2020 yuzu emulator team +// Copyright 2020 yuzu Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. @@ -58,19 +58,25 @@ void CAPS_U::GetAlbumContentsFileListForApplication(Kernel::HLERequestContext& c // u8 ContentType, two s64s, and an u64 AppletResourceUserId. Returns an output u64 for total // output entries (which is copied to a s32 by official SW). IPC::RequestParser rp{ctx}; - [[maybe_unused]] const auto application_album_file_entries = rp.PopRaw<std::array<u8, 0x30>>(); - const auto pid = rp.Pop<s32>(); - const auto content_type = rp.PopRaw<ContentType>(); - [[maybe_unused]] const auto start_datetime = rp.PopRaw<AlbumFileDateTime>(); - [[maybe_unused]] const auto end_datetime = rp.PopRaw<AlbumFileDateTime>(); - const auto applet_resource_user_id = rp.Pop<u64>(); + const auto pid{rp.Pop<s32>()}; + const auto content_type{rp.PopEnum<ContentType>()}; + const auto start_posix_time{rp.Pop<s64>()}; + const auto end_posix_time{rp.Pop<s64>()}; + const auto applet_resource_user_id{rp.Pop<u64>()}; + + // TODO: Update this when we implement the album. + // Currently we do not have a method of accessing album entries, set this to 0 for now. + constexpr s32 total_entries{0}; + LOG_WARNING(Service_Capture, - "(STUBBED) called. pid={}, content_type={}, applet_resource_user_id={}", pid, - content_type, applet_resource_user_id); + "(STUBBED) called. pid={}, content_type={}, start_posix_time={}, " + "end_posix_time={}, applet_resource_user_id={}, total_entries={}", + pid, content_type, start_posix_time, end_posix_time, applet_resource_user_id, + total_entries); IPC::ResponseBuilder rb{ctx, 3}; rb.Push(RESULT_SUCCESS); - rb.Push<s32>(0); + rb.Push(total_entries); } } // namespace Service::Capture diff --git a/src/core/hle/service/caps/caps_u.h b/src/core/hle/service/caps/caps_u.h index e6e0716ff..689364de4 100644 --- a/src/core/hle/service/caps/caps_u.h +++ b/src/core/hle/service/caps/caps_u.h @@ -1,4 +1,4 @@ -// Copyright 2020 yuzu emulator team +// Copyright 2020 yuzu Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. diff --git a/src/core/hle/service/es/es.cpp b/src/core/hle/service/es/es.cpp index f8e9df4b1..a41c73c48 100644 --- a/src/core/hle/service/es/es.cpp +++ b/src/core/hle/service/es/es.cpp @@ -27,8 +27,8 @@ public: {8, &ETicket::GetTitleKey, "GetTitleKey"}, {9, &ETicket::CountCommonTicket, "CountCommonTicket"}, {10, &ETicket::CountPersonalizedTicket, "CountPersonalizedTicket"}, - {11, &ETicket::ListCommonTicket, "ListCommonTicket"}, - {12, &ETicket::ListPersonalizedTicket, "ListPersonalizedTicket"}, + {11, &ETicket::ListCommonTicketRightsIds, "ListCommonTicketRightsIds"}, + {12, &ETicket::ListPersonalizedTicketRightsIds, "ListPersonalizedTicketRightsIds"}, {13, nullptr, "ListMissingPersonalizedTicket"}, {14, &ETicket::GetCommonTicketSize, "GetCommonTicketSize"}, {15, &ETicket::GetPersonalizedTicketSize, "GetPersonalizedTicketSize"}, @@ -55,7 +55,46 @@ public: {36, nullptr, "DeleteAllInactiveELicenseRequiredPersonalizedTicket"}, {37, nullptr, "OwnTicket2"}, {38, nullptr, "OwnTicket3"}, + {501, nullptr, "Unknown501"}, + {502, nullptr, "Unknown502"}, {503, nullptr, "GetTitleKey"}, + {504, nullptr, "Unknown504"}, + {508, nullptr, "Unknown508"}, + {509, nullptr, "Unknown509"}, + {510, nullptr, "Unknown510"}, + {511, nullptr, "Unknown511"}, + {1001, nullptr, "Unknown1001"}, + {1002, nullptr, "Unknown1001"}, + {1003, nullptr, "Unknown1003"}, + {1004, nullptr, "Unknown1004"}, + {1005, nullptr, "Unknown1005"}, + {1006, nullptr, "Unknown1006"}, + {1007, nullptr, "Unknown1007"}, + {1009, nullptr, "Unknown1009"}, + {1010, nullptr, "Unknown1010"}, + {1011, nullptr, "Unknown1011"}, + {1012, nullptr, "Unknown1012"}, + {1013, nullptr, "Unknown1013"}, + {1014, nullptr, "Unknown1014"}, + {1015, nullptr, "Unknown1015"}, + {1016, nullptr, "Unknown1016"}, + {1017, nullptr, "Unknown1017"}, + {1018, nullptr, "Unknown1018"}, + {1019, nullptr, "Unknown1019"}, + {1020, nullptr, "Unknown1020"}, + {1021, nullptr, "Unknown1021"}, + {1501, nullptr, "Unknown1501"}, + {1502, nullptr, "Unknown1502"}, + {1503, nullptr, "Unknown1503"}, + {1504, nullptr, "Unknown1504"}, + {1505, nullptr, "Unknown1505"}, + {2000, nullptr, "Unknown2000"}, + {2001, nullptr, "Unknown2001"}, + {2100, nullptr, "Unknown2100"}, + {2501, nullptr, "Unknown2501"}, + {2502, nullptr, "Unknown2502"}, + {3001, nullptr, "Unknown3001"}, + {3002, nullptr, "Unknown3002"}, }; // clang-format on RegisterHandlers(functions); @@ -147,7 +186,7 @@ private: rb.Push<u32>(count); } - void ListCommonTicket(Kernel::HLERequestContext& ctx) { + void ListCommonTicketRightsIds(Kernel::HLERequestContext& ctx) { u32 out_entries; if (keys.GetCommonTickets().empty()) out_entries = 0; @@ -170,7 +209,7 @@ private: rb.Push<u32>(out_entries); } - void ListPersonalizedTicket(Kernel::HLERequestContext& ctx) { + void ListPersonalizedTicketRightsIds(Kernel::HLERequestContext& ctx) { u32 out_entries; if (keys.GetPersonalizedTickets().empty()) out_entries = 0; @@ -263,7 +302,7 @@ private: rb.Push<u64>(write_size); } - Core::Crypto::KeyManager keys; + Core::Crypto::KeyManager& keys = Core::Crypto::KeyManager::Instance(); }; void InstallInterfaces(SM::ServiceManager& service_manager) { diff --git a/src/core/hle/service/eupld/eupld.cpp b/src/core/hle/service/eupld/eupld.cpp index 2df30acee..0d6d244f4 100644 --- a/src/core/hle/service/eupld/eupld.cpp +++ b/src/core/hle/service/eupld/eupld.cpp @@ -19,6 +19,7 @@ public: {1, nullptr, "ImportCrt"}, {2, nullptr, "ImportPki"}, {3, nullptr, "SetAutoUpload"}, + {4, nullptr, "GetAutoUpload"}, }; // clang-format on diff --git a/src/core/hle/service/friend/friend.cpp b/src/core/hle/service/friend/friend.cpp index 68f259b70..b7adaffc7 100644 --- a/src/core/hle/service/friend/friend.cpp +++ b/src/core/hle/service/friend/friend.cpp @@ -25,9 +25,13 @@ public: {10101, &IFriendService::GetFriendList, "GetFriendList"}, {10102, nullptr, "UpdateFriendInfo"}, {10110, nullptr, "GetFriendProfileImage"}, + {10120, nullptr, "Unknown10120"}, + {10121, nullptr, "Unknown10121"}, {10200, nullptr, "SendFriendRequestForApplication"}, {10211, nullptr, "AddFacedFriendRequestForApplication"}, {10400, &IFriendService::GetBlockedUserListIds, "GetBlockedUserListIds"}, + {10420, nullptr, "Unknown10420"}, + {10421, nullptr, "Unknown10421"}, {10500, nullptr, "GetProfileList"}, {10600, nullptr, "DeclareOpenOnlinePlaySession"}, {10601, &IFriendService::DeclareCloseOnlinePlaySession, "DeclareCloseOnlinePlaySession"}, @@ -97,6 +101,8 @@ public: {30900, nullptr, "SendFriendInvitation"}, {30910, nullptr, "ReadFriendInvitation"}, {30911, nullptr, "ReadAllFriendInvitations"}, + {40100, nullptr, "Unknown40100"}, + {40400, nullptr, "Unknown40400"}, {49900, nullptr, "DeleteNetworkServiceAccountCache"}, }; // clang-format on diff --git a/src/core/hle/service/grc/grc.cpp b/src/core/hle/service/grc/grc.cpp index 24910ac6c..401e0b208 100644 --- a/src/core/hle/service/grc/grc.cpp +++ b/src/core/hle/service/grc/grc.cpp @@ -17,6 +17,9 @@ public: static const FunctionInfo functions[] = { {1, nullptr, "OpenContinuousRecorder"}, {2, nullptr, "OpenGameMovieTrimmer"}, + {3, nullptr, "OpenOffscreenRecorder"}, + {101, nullptr, "CreateMovieMaker"}, + {9903, nullptr, "SetOffscreenRecordingMarker"} }; // clang-format on diff --git a/src/core/hle/service/hid/controllers/debug_pad.cpp b/src/core/hle/service/hid/controllers/debug_pad.cpp index 1f2131ec8..cb35919e9 100644 --- a/src/core/hle/service/hid/controllers/debug_pad.cpp +++ b/src/core/hle/service/hid/controllers/debug_pad.cpp @@ -23,7 +23,7 @@ void Controller_DebugPad::OnRelease() {} void Controller_DebugPad::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data, std::size_t size) { - shared_memory.header.timestamp = core_timing.GetTicks(); + shared_memory.header.timestamp = core_timing.GetCPUTicks(); shared_memory.header.total_entry_count = 17; if (!IsControllerActivated()) { diff --git a/src/core/hle/service/hid/controllers/gesture.cpp b/src/core/hle/service/hid/controllers/gesture.cpp index 6e990dd00..b7b7bfeae 100644 --- a/src/core/hle/service/hid/controllers/gesture.cpp +++ b/src/core/hle/service/hid/controllers/gesture.cpp @@ -19,7 +19,7 @@ void Controller_Gesture::OnRelease() {} void Controller_Gesture::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data, std::size_t size) { - shared_memory.header.timestamp = core_timing.GetTicks(); + shared_memory.header.timestamp = core_timing.GetCPUTicks(); shared_memory.header.total_entry_count = 17; if (!IsControllerActivated()) { diff --git a/src/core/hle/service/hid/controllers/keyboard.cpp b/src/core/hle/service/hid/controllers/keyboard.cpp index 9a8d354ba..feae89525 100644 --- a/src/core/hle/service/hid/controllers/keyboard.cpp +++ b/src/core/hle/service/hid/controllers/keyboard.cpp @@ -21,7 +21,7 @@ void Controller_Keyboard::OnRelease() {} void Controller_Keyboard::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data, std::size_t size) { - shared_memory.header.timestamp = core_timing.GetTicks(); + shared_memory.header.timestamp = core_timing.GetCPUTicks(); shared_memory.header.total_entry_count = 17; if (!IsControllerActivated()) { diff --git a/src/core/hle/service/hid/controllers/mouse.cpp b/src/core/hle/service/hid/controllers/mouse.cpp index 93d88ea50..ac40989c5 100644 --- a/src/core/hle/service/hid/controllers/mouse.cpp +++ b/src/core/hle/service/hid/controllers/mouse.cpp @@ -19,7 +19,7 @@ void Controller_Mouse::OnRelease() {} void Controller_Mouse::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data, std::size_t size) { - shared_memory.header.timestamp = core_timing.GetTicks(); + shared_memory.header.timestamp = core_timing.GetCPUTicks(); shared_memory.header.total_entry_count = 17; if (!IsControllerActivated()) { diff --git a/src/core/hle/service/hid/controllers/npad.cpp b/src/core/hle/service/hid/controllers/npad.cpp index c55d900e2..ef67ad690 100644 --- a/src/core/hle/service/hid/controllers/npad.cpp +++ b/src/core/hle/service/hid/controllers/npad.cpp @@ -328,7 +328,7 @@ void Controller_NPad::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* const auto& last_entry = main_controller->npad[main_controller->common.last_entry_index]; - main_controller->common.timestamp = core_timing.GetTicks(); + main_controller->common.timestamp = core_timing.GetCPUTicks(); main_controller->common.last_entry_index = (main_controller->common.last_entry_index + 1) % 17; @@ -566,6 +566,14 @@ void Controller_NPad::DisconnectNPad(u32 npad_id) { connected_controllers[NPadIdToIndex(npad_id)].is_connected = false; } +void Controller_NPad::SetGyroscopeZeroDriftMode(GyroscopeZeroDriftMode drift_mode) { + gyroscope_zero_drift_mode = drift_mode; +} + +Controller_NPad::GyroscopeZeroDriftMode Controller_NPad::GetGyroscopeZeroDriftMode() const { + return gyroscope_zero_drift_mode; +} + void Controller_NPad::StartLRAssignmentMode() { // Nothing internally is used for lr assignment mode. Since we have the ability to set the // controller types from boot, it doesn't really matter about showing a selection screen diff --git a/src/core/hle/service/hid/controllers/npad.h b/src/core/hle/service/hid/controllers/npad.h index 931f03430..5d4c58a43 100644 --- a/src/core/hle/service/hid/controllers/npad.h +++ b/src/core/hle/service/hid/controllers/npad.h @@ -58,6 +58,12 @@ public: }; static_assert(sizeof(Vibration) == 0x10, "Vibration is an invalid size"); + enum class GyroscopeZeroDriftMode : u32 { + Loose = 0, + Standard = 1, + Tight = 2, + }; + enum class NpadHoldType : u64 { Vertical = 0, Horizontal = 1, @@ -117,6 +123,8 @@ public: void ConnectNPad(u32 npad_id); void DisconnectNPad(u32 npad_id); + void SetGyroscopeZeroDriftMode(GyroscopeZeroDriftMode drift_mode); + GyroscopeZeroDriftMode GetGyroscopeZeroDriftMode() const; LedPattern GetLedPattern(u32 npad_id); void SetVibrationEnabled(bool can_vibrate); bool IsVibrationEnabled() const; @@ -324,8 +332,8 @@ private: std::array<Kernel::EventPair, 10> styleset_changed_events; Vibration last_processed_vibration{}; std::array<ControllerHolder, 10> connected_controllers{}; + GyroscopeZeroDriftMode gyroscope_zero_drift_mode{GyroscopeZeroDriftMode::Standard}; bool can_controllers_vibrate{true}; - std::array<ControllerPad, 10> npad_pad_states{}; bool is_in_lr_assignment_mode{false}; Core::System& system; diff --git a/src/core/hle/service/hid/controllers/stubbed.cpp b/src/core/hle/service/hid/controllers/stubbed.cpp index 9e527d176..e7483bfa2 100644 --- a/src/core/hle/service/hid/controllers/stubbed.cpp +++ b/src/core/hle/service/hid/controllers/stubbed.cpp @@ -23,7 +23,7 @@ void Controller_Stubbed::OnUpdate(const Core::Timing::CoreTiming& core_timing, u } CommonHeader header{}; - header.timestamp = core_timing.GetTicks(); + header.timestamp = core_timing.GetCPUTicks(); header.total_entry_count = 17; header.entry_count = 0; header.last_entry_index = 0; diff --git a/src/core/hle/service/hid/controllers/touchscreen.cpp b/src/core/hle/service/hid/controllers/touchscreen.cpp index 1c6e55566..e326f8f5c 100644 --- a/src/core/hle/service/hid/controllers/touchscreen.cpp +++ b/src/core/hle/service/hid/controllers/touchscreen.cpp @@ -22,7 +22,7 @@ void Controller_Touchscreen::OnRelease() {} void Controller_Touchscreen::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data, std::size_t size) { - shared_memory.header.timestamp = core_timing.GetTicks(); + shared_memory.header.timestamp = core_timing.GetCPUTicks(); shared_memory.header.total_entry_count = 17; if (!IsControllerActivated()) { @@ -49,7 +49,7 @@ void Controller_Touchscreen::OnUpdate(const Core::Timing::CoreTiming& core_timin touch_entry.diameter_x = Settings::values.touchscreen.diameter_x; touch_entry.diameter_y = Settings::values.touchscreen.diameter_y; touch_entry.rotation_angle = Settings::values.touchscreen.rotation_angle; - const u64 tick = core_timing.GetTicks(); + const u64 tick = core_timing.GetCPUTicks(); touch_entry.delta_time = tick - last_touch; last_touch = tick; touch_entry.finger = Settings::values.touchscreen.finger; diff --git a/src/core/hle/service/hid/controllers/xpad.cpp b/src/core/hle/service/hid/controllers/xpad.cpp index 27511b27b..2503ef241 100644 --- a/src/core/hle/service/hid/controllers/xpad.cpp +++ b/src/core/hle/service/hid/controllers/xpad.cpp @@ -20,7 +20,7 @@ void Controller_XPad::OnRelease() {} void Controller_XPad::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data, std::size_t size) { for (auto& xpad_entry : shared_memory.shared_memory_entries) { - xpad_entry.header.timestamp = core_timing.GetTicks(); + xpad_entry.header.timestamp = core_timing.GetCPUTicks(); xpad_entry.header.total_entry_count = 17; if (!IsControllerActivated()) { diff --git a/src/core/hle/service/hid/hid.cpp b/src/core/hle/service/hid/hid.cpp index c84cb1483..e9020e0dc 100644 --- a/src/core/hle/service/hid/hid.cpp +++ b/src/core/hle/service/hid/hid.cpp @@ -39,11 +39,9 @@ namespace Service::HID { // Updating period for each HID device. // TODO(ogniK): Find actual polling rate of hid -constexpr s64 pad_update_ticks = static_cast<s64>(Core::Hardware::BASE_CLOCK_RATE / 66); -[[maybe_unused]] constexpr s64 accelerometer_update_ticks = - static_cast<s64>(Core::Hardware::BASE_CLOCK_RATE / 100); -[[maybe_unused]] constexpr s64 gyroscope_update_ticks = - static_cast<s64>(Core::Hardware::BASE_CLOCK_RATE / 100); +constexpr s64 pad_update_ticks = static_cast<s64>(1000000000 / 66); +[[maybe_unused]] constexpr s64 accelerometer_update_ticks = static_cast<s64>(1000000000 / 100); +[[maybe_unused]] constexpr s64 gyroscope_update_ticks = static_cast<s64>(1000000000 / 100); constexpr std::size_t SHARED_MEMORY_SIZE = 0x40000; IAppletResource::IAppletResource(Core::System& system) @@ -78,8 +76,8 @@ IAppletResource::IAppletResource(Core::System& system) // Register update callbacks pad_update_event = - Core::Timing::CreateEvent("HID::UpdatePadCallback", [this](u64 userdata, s64 cycles_late) { - UpdateControllers(userdata, cycles_late); + Core::Timing::CreateEvent("HID::UpdatePadCallback", [this](u64 userdata, s64 ns_late) { + UpdateControllers(userdata, ns_late); }); // TODO(shinyquagsire23): Other update callbacks? (accel, gyro?) @@ -109,7 +107,7 @@ void IAppletResource::GetSharedMemoryHandle(Kernel::HLERequestContext& ctx) { rb.PushCopyObjects(shared_mem); } -void IAppletResource::UpdateControllers(u64 userdata, s64 cycles_late) { +void IAppletResource::UpdateControllers(u64 userdata, s64 ns_late) { auto& core_timing = system.CoreTiming(); const bool should_reload = Settings::values.is_device_reload_pending.exchange(false); @@ -120,7 +118,7 @@ void IAppletResource::UpdateControllers(u64 userdata, s64 cycles_late) { controller->OnUpdate(core_timing, shared_mem->GetPointer(), SHARED_MEMORY_SIZE); } - core_timing.ScheduleEvent(pad_update_ticks - cycles_late, pad_update_event); + core_timing.ScheduleEvent(pad_update_ticks - ns_late, pad_update_event); } class IActiveVibrationDeviceList final : public ServiceFramework<IActiveVibrationDeviceList> { @@ -161,7 +159,7 @@ Hid::Hid(Core::System& system) : ServiceFramework("hid"), system(system) { {40, nullptr, "AcquireXpadIdEventHandle"}, {41, nullptr, "ReleaseXpadIdEventHandle"}, {51, &Hid::ActivateXpad, "ActivateXpad"}, - {55, nullptr, "GetXpadIds"}, + {55, &Hid::GetXpadIDs, "GetXpadIds"}, {56, nullptr, "ActivateJoyXpad"}, {58, nullptr, "GetJoyXpadLifoHandle"}, {59, nullptr, "GetJoyXpadIds"}, @@ -185,8 +183,8 @@ Hid::Hid(Core::System& system) : ServiceFramework("hid"), system(system) { {77, nullptr, "GetAccelerometerPlayMode"}, {78, nullptr, "ResetAccelerometerPlayMode"}, {79, &Hid::SetGyroscopeZeroDriftMode, "SetGyroscopeZeroDriftMode"}, - {80, nullptr, "GetGyroscopeZeroDriftMode"}, - {81, nullptr, "ResetGyroscopeZeroDriftMode"}, + {80, &Hid::GetGyroscopeZeroDriftMode, "GetGyroscopeZeroDriftMode"}, + {81, &Hid::ResetGyroscopeZeroDriftMode, "ResetGyroscopeZeroDriftMode"}, {82, &Hid::IsSixAxisSensorAtRest, "IsSixAxisSensorAtRest"}, {83, nullptr, "IsFirmwareUpdateAvailableForSixAxisSensor"}, {91, &Hid::ActivateGesture, "ActivateGesture"}, @@ -230,15 +228,15 @@ Hid::Hid(Core::System& system) : ServiceFramework("hid"), system(system) { {211, nullptr, "IsVibrationDeviceMounted"}, {300, &Hid::ActivateConsoleSixAxisSensor, "ActivateConsoleSixAxisSensor"}, {301, &Hid::StartConsoleSixAxisSensor, "StartConsoleSixAxisSensor"}, - {302, nullptr, "StopConsoleSixAxisSensor"}, - {303, nullptr, "ActivateSevenSixAxisSensor"}, - {304, nullptr, "StartSevenSixAxisSensor"}, + {302, &Hid::StopConsoleSixAxisSensor, "StopConsoleSixAxisSensor"}, + {303, &Hid::ActivateSevenSixAxisSensor, "ActivateSevenSixAxisSensor"}, + {304, &Hid::StartSevenSixAxisSensor, "StartSevenSixAxisSensor"}, {305, &Hid::StopSevenSixAxisSensor, "StopSevenSixAxisSensor"}, {306, &Hid::InitializeSevenSixAxisSensor, "InitializeSevenSixAxisSensor"}, - {307, nullptr, "FinalizeSevenSixAxisSensor"}, + {307, &Hid::FinalizeSevenSixAxisSensor, "FinalizeSevenSixAxisSensor"}, {308, nullptr, "SetSevenSixAxisSensorFusionStrength"}, {309, nullptr, "GetSevenSixAxisSensorFusionStrength"}, - {310, nullptr, "ResetSevenSixAxisSensorTimestamp"}, + {310, &Hid::ResetSevenSixAxisSensorTimestamp, "ResetSevenSixAxisSensorTimestamp"}, {400, nullptr, "IsUsbFullKeyControllerEnabled"}, {401, nullptr, "EnableUsbFullKeyController"}, {402, nullptr, "IsUsbFullKeyControllerConnected"}, @@ -319,6 +317,17 @@ void Hid::ActivateXpad(Kernel::HLERequestContext& ctx) { rb.Push(RESULT_SUCCESS); } +void Hid::GetXpadIDs(Kernel::HLERequestContext& ctx) { + IPC::RequestParser rp{ctx}; + const auto applet_resource_user_id{rp.Pop<u64>()}; + + LOG_DEBUG(Service_HID, "(STUBBED) called, applet_resource_user_id={}", applet_resource_user_id); + + IPC::ResponseBuilder rb{ctx, 3}; + rb.Push(RESULT_SUCCESS); + rb.Push(0); +} + void Hid::ActivateDebugPad(Kernel::HLERequestContext& ctx) { IPC::RequestParser rp{ctx}; const auto applet_resource_user_id{rp.Pop<u64>()}; @@ -363,6 +372,15 @@ void Hid::ActivateKeyboard(Kernel::HLERequestContext& ctx) { rb.Push(RESULT_SUCCESS); } +void Hid::SendKeyboardLockKeyEvent(Kernel::HLERequestContext& ctx) { + IPC::RequestParser rp{ctx}; + const auto flags{rp.Pop<u32>()}; + LOG_WARNING(Service_HID, "(STUBBED) called. flags={}", flags); + + IPC::ResponseBuilder rb{ctx, 2}; + rb.Push(RESULT_SUCCESS); +} + void Hid::ActivateGesture(Kernel::HLERequestContext& ctx) { IPC::RequestParser rp{ctx}; const auto unknown{rp.Pop<u32>()}; @@ -402,15 +420,59 @@ void Hid::StartSixAxisSensor(Kernel::HLERequestContext& ctx) { rb.Push(RESULT_SUCCESS); } +void Hid::StopSixAxisSensor(Kernel::HLERequestContext& ctx) { + IPC::RequestParser rp{ctx}; + const auto handle{rp.Pop<u32>()}; + const auto applet_resource_user_id{rp.Pop<u64>()}; + + LOG_WARNING(Service_HID, "(STUBBED) called, handle={}, applet_resource_user_id={}", handle, + applet_resource_user_id); + + IPC::ResponseBuilder rb{ctx, 2}; + rb.Push(RESULT_SUCCESS); +} + void Hid::SetGyroscopeZeroDriftMode(Kernel::HLERequestContext& ctx) { IPC::RequestParser rp{ctx}; const auto handle{rp.Pop<u32>()}; const auto drift_mode{rp.Pop<u32>()}; const auto applet_resource_user_id{rp.Pop<u64>()}; - LOG_WARNING(Service_HID, - "(STUBBED) called, handle={}, drift_mode={}, applet_resource_user_id={}", handle, - drift_mode, applet_resource_user_id); + applet_resource->GetController<Controller_NPad>(HidController::NPad) + .SetGyroscopeZeroDriftMode(Controller_NPad::GyroscopeZeroDriftMode{drift_mode}); + + LOG_DEBUG(Service_HID, "called, handle={}, drift_mode={}, applet_resource_user_id={}", handle, + drift_mode, applet_resource_user_id); + + IPC::ResponseBuilder rb{ctx, 2}; + rb.Push(RESULT_SUCCESS); +} + +void Hid::GetGyroscopeZeroDriftMode(Kernel::HLERequestContext& ctx) { + IPC::RequestParser rp{ctx}; + const auto handle{rp.Pop<u32>()}; + const auto applet_resource_user_id{rp.Pop<u64>()}; + + LOG_DEBUG(Service_HID, "called, handle={}, applet_resource_user_id={}", handle, + applet_resource_user_id); + + IPC::ResponseBuilder rb{ctx, 3}; + rb.Push(RESULT_SUCCESS); + rb.Push<u32>( + static_cast<u32>(applet_resource->GetController<Controller_NPad>(HidController::NPad) + .GetGyroscopeZeroDriftMode())); +} + +void Hid::ResetGyroscopeZeroDriftMode(Kernel::HLERequestContext& ctx) { + IPC::RequestParser rp{ctx}; + const auto handle{rp.Pop<u32>()}; + const auto applet_resource_user_id{rp.Pop<u64>()}; + + applet_resource->GetController<Controller_NPad>(HidController::NPad) + .SetGyroscopeZeroDriftMode(Controller_NPad::GyroscopeZeroDriftMode::Standard); + + LOG_DEBUG(Service_HID, "called, handle={}, applet_resource_user_id={}", handle, + applet_resource_user_id); IPC::ResponseBuilder rb{ctx, 2}; rb.Push(RESULT_SUCCESS); @@ -821,33 +883,35 @@ void Hid::StartConsoleSixAxisSensor(Kernel::HLERequestContext& ctx) { rb.Push(RESULT_SUCCESS); } -void Hid::StopSixAxisSensor(Kernel::HLERequestContext& ctx) { +void Hid::StopConsoleSixAxisSensor(Kernel::HLERequestContext& ctx) { IPC::RequestParser rp{ctx}; const auto handle{rp.Pop<u32>()}; + const auto applet_resource_user_id{rp.Pop<u64>()}; - LOG_WARNING(Service_HID, "(STUBBED) called, handle={}", handle); + LOG_WARNING(Service_HID, "(STUBBED) called, handle={}, applet_resource_user_id={}", handle, + applet_resource_user_id); IPC::ResponseBuilder rb{ctx, 2}; rb.Push(RESULT_SUCCESS); } -void Hid::SetIsPalmaAllConnectable(Kernel::HLERequestContext& ctx) { +void Hid::ActivateSevenSixAxisSensor(Kernel::HLERequestContext& ctx) { IPC::RequestParser rp{ctx}; const auto applet_resource_user_id{rp.Pop<u64>()}; - const auto unknown{rp.Pop<u32>()}; - LOG_WARNING(Service_HID, "(STUBBED) called, applet_resource_user_id={}, unknown={}", - applet_resource_user_id, unknown); + LOG_WARNING(Service_HID, "(STUBBED) called, applet_resource_user_id={}", + applet_resource_user_id); IPC::ResponseBuilder rb{ctx, 2}; rb.Push(RESULT_SUCCESS); } -void Hid::SetPalmaBoostMode(Kernel::HLERequestContext& ctx) { +void Hid::StartSevenSixAxisSensor(Kernel::HLERequestContext& ctx) { IPC::RequestParser rp{ctx}; - const auto unknown{rp.Pop<u32>()}; + const auto applet_resource_user_id{rp.Pop<u64>()}; - LOG_WARNING(Service_HID, "(STUBBED) called, unknown={}", unknown); + LOG_WARNING(Service_HID, "(STUBBED) called, applet_resource_user_id={}", + applet_resource_user_id); IPC::ResponseBuilder rb{ctx, 2}; rb.Push(RESULT_SUCCESS); @@ -871,10 +935,46 @@ void Hid::InitializeSevenSixAxisSensor(Kernel::HLERequestContext& ctx) { rb.Push(RESULT_SUCCESS); } -void Hid::SendKeyboardLockKeyEvent(Kernel::HLERequestContext& ctx) { +void Hid::FinalizeSevenSixAxisSensor(Kernel::HLERequestContext& ctx) { IPC::RequestParser rp{ctx}; - const auto flags{rp.Pop<u32>()}; - LOG_WARNING(Service_HID, "(STUBBED) called. flags={}", flags); + const auto applet_resource_user_id{rp.Pop<u64>()}; + + LOG_WARNING(Service_HID, "(STUBBED) called, applet_resource_user_id={}", + applet_resource_user_id); + + IPC::ResponseBuilder rb{ctx, 2}; + rb.Push(RESULT_SUCCESS); +} + +void Hid::ResetSevenSixAxisSensorTimestamp(Kernel::HLERequestContext& ctx) { + IPC::RequestParser rp{ctx}; + const auto applet_resource_user_id{rp.Pop<u64>()}; + + LOG_WARNING(Service_HID, "(STUBBED) called, applet_resource_user_id={}", + applet_resource_user_id); + + IPC::ResponseBuilder rb{ctx, 2}; + rb.Push(RESULT_SUCCESS); +} + +void Hid::SetIsPalmaAllConnectable(Kernel::HLERequestContext& ctx) { + IPC::RequestParser rp{ctx}; + const auto applet_resource_user_id{rp.Pop<u64>()}; + const auto is_palma_all_connectable{rp.Pop<bool>()}; + + LOG_WARNING(Service_HID, + "(STUBBED) called, applet_resource_user_id={}, is_palma_all_connectable={}", + applet_resource_user_id, is_palma_all_connectable); + + IPC::ResponseBuilder rb{ctx, 2}; + rb.Push(RESULT_SUCCESS); +} + +void Hid::SetPalmaBoostMode(Kernel::HLERequestContext& ctx) { + IPC::RequestParser rp{ctx}; + const auto palma_boost_mode{rp.Pop<bool>()}; + + LOG_WARNING(Service_HID, "(STUBBED) called, palma_boost_mode={}", palma_boost_mode); IPC::ResponseBuilder rb{ctx, 2}; rb.Push(RESULT_SUCCESS); diff --git a/src/core/hle/service/hid/hid.h b/src/core/hle/service/hid/hid.h index c8ed4ad8b..6fb048360 100644 --- a/src/core/hle/service/hid/hid.h +++ b/src/core/hle/service/hid/hid.h @@ -86,14 +86,19 @@ public: private: void CreateAppletResource(Kernel::HLERequestContext& ctx); void ActivateXpad(Kernel::HLERequestContext& ctx); + void GetXpadIDs(Kernel::HLERequestContext& ctx); void ActivateDebugPad(Kernel::HLERequestContext& ctx); void ActivateTouchScreen(Kernel::HLERequestContext& ctx); void ActivateMouse(Kernel::HLERequestContext& ctx); void ActivateKeyboard(Kernel::HLERequestContext& ctx); + void SendKeyboardLockKeyEvent(Kernel::HLERequestContext& ctx); void ActivateGesture(Kernel::HLERequestContext& ctx); void ActivateNpadWithRevision(Kernel::HLERequestContext& ctx); void StartSixAxisSensor(Kernel::HLERequestContext& ctx); + void StopSixAxisSensor(Kernel::HLERequestContext& ctx); void SetGyroscopeZeroDriftMode(Kernel::HLERequestContext& ctx); + void GetGyroscopeZeroDriftMode(Kernel::HLERequestContext& ctx); + void ResetGyroscopeZeroDriftMode(Kernel::HLERequestContext& ctx); void IsSixAxisSensorAtRest(Kernel::HLERequestContext& ctx); void SetSupportedNpadStyleSet(Kernel::HLERequestContext& ctx); void GetSupportedNpadStyleSet(Kernel::HLERequestContext& ctx); @@ -125,12 +130,15 @@ private: void IsVibrationPermitted(Kernel::HLERequestContext& ctx); void ActivateConsoleSixAxisSensor(Kernel::HLERequestContext& ctx); void StartConsoleSixAxisSensor(Kernel::HLERequestContext& ctx); - void StopSixAxisSensor(Kernel::HLERequestContext& ctx); - void SetIsPalmaAllConnectable(Kernel::HLERequestContext& ctx); - void SetPalmaBoostMode(Kernel::HLERequestContext& ctx); + void StopConsoleSixAxisSensor(Kernel::HLERequestContext& ctx); + void ActivateSevenSixAxisSensor(Kernel::HLERequestContext& ctx); + void StartSevenSixAxisSensor(Kernel::HLERequestContext& ctx); void StopSevenSixAxisSensor(Kernel::HLERequestContext& ctx); void InitializeSevenSixAxisSensor(Kernel::HLERequestContext& ctx); - void SendKeyboardLockKeyEvent(Kernel::HLERequestContext& ctx); + void FinalizeSevenSixAxisSensor(Kernel::HLERequestContext& ctx); + void ResetSevenSixAxisSensorTimestamp(Kernel::HLERequestContext& ctx); + void SetIsPalmaAllConnectable(Kernel::HLERequestContext& ctx); + void SetPalmaBoostMode(Kernel::HLERequestContext& ctx); std::shared_ptr<IAppletResource> applet_resource; Core::System& system; diff --git a/src/core/hle/service/hid/irs.cpp b/src/core/hle/service/hid/irs.cpp index 36ed6f7da..e82fd031b 100644 --- a/src/core/hle/service/hid/irs.cpp +++ b/src/core/hle/service/hid/irs.cpp @@ -98,7 +98,7 @@ void IRS::GetImageTransferProcessorState(Kernel::HLERequestContext& ctx) { IPC::ResponseBuilder rb{ctx, 5}; rb.Push(RESULT_SUCCESS); - rb.PushRaw<u64>(system.CoreTiming().GetTicks()); + rb.PushRaw<u64>(system.CoreTiming().GetCPUTicks()); rb.PushRaw<u32>(0); } diff --git a/src/core/hle/service/lbl/lbl.cpp b/src/core/hle/service/lbl/lbl.cpp index e8f9f2d29..17350b403 100644 --- a/src/core/hle/service/lbl/lbl.cpp +++ b/src/core/hle/service/lbl/lbl.cpp @@ -47,6 +47,7 @@ public: {26, &LBL::EnableVrMode, "EnableVrMode"}, {27, &LBL::DisableVrMode, "DisableVrMode"}, {28, &LBL::IsVrModeEnabled, "IsVrModeEnabled"}, + {29, nullptr, "IsAutoBrightnessControlSupported"}, }; // clang-format on diff --git a/src/core/hle/service/ldn/ldn.cpp b/src/core/hle/service/ldn/ldn.cpp index 92adde6d4..49972cd69 100644 --- a/src/core/hle/service/ldn/ldn.cpp +++ b/src/core/hle/service/ldn/ldn.cpp @@ -69,6 +69,7 @@ public: {101, nullptr, "GetNetworkInfoLatestUpdate"}, {102, nullptr, "Scan"}, {103, nullptr, "ScanPrivate"}, + {104, nullptr, "SetWirelessControllerRestriction"}, {200, nullptr, "OpenAccessPoint"}, {201, nullptr, "CloseAccessPoint"}, {202, nullptr, "CreateNetwork"}, diff --git a/src/core/hle/service/ldr/ldr.cpp b/src/core/hle/service/ldr/ldr.cpp index 6ad3be1b3..64a526b9e 100644 --- a/src/core/hle/service/ldr/ldr.cpp +++ b/src/core/hle/service/ldr/ldr.cpp @@ -39,42 +39,61 @@ constexpr ResultCode ERROR_NOT_INITIALIZED{ErrorModule::Loader, 87}; constexpr std::size_t MAXIMUM_LOADED_RO{0x40}; constexpr std::size_t MAXIMUM_MAP_RETRIES{0x200}; +constexpr std::size_t TEXT_INDEX{0}; +constexpr std::size_t RO_INDEX{1}; +constexpr std::size_t DATA_INDEX{2}; + +struct NRRCertification { + u64_le application_id_mask; + u64_le application_id_pattern; + INSERT_PADDING_BYTES(0x10); + std::array<u8, 0x100> public_key; // Also known as modulus + std::array<u8, 0x100> signature; +}; +static_assert(sizeof(NRRCertification) == 0x220, "NRRCertification has invalid size."); + struct NRRHeader { u32_le magic; - INSERT_PADDING_BYTES(12); - u64_le title_id_mask; - u64_le title_id_pattern; - INSERT_PADDING_BYTES(16); - std::array<u8, 0x100> modulus; - std::array<u8, 0x100> signature_1; - std::array<u8, 0x100> signature_2; - u64_le title_id; + u32_le certification_signature_key_generation; // 9.0.0+ + INSERT_PADDING_WORDS(2); + NRRCertification certification; + std::array<u8, 0x100> signature; + u64_le application_id; u32_le size; - INSERT_PADDING_BYTES(4); + u8 nrr_kind; // 7.0.0+ + INSERT_PADDING_BYTES(3); u32_le hash_offset; u32_le hash_count; - INSERT_PADDING_BYTES(8); + INSERT_PADDING_WORDS(2); +}; +static_assert(sizeof(NRRHeader) == 0x350, "NRRHeader has invalid size."); + +struct SegmentHeader { + u32_le memory_offset; + u32_le memory_size; }; -static_assert(sizeof(NRRHeader) == 0x350, "NRRHeader has incorrect size."); +static_assert(sizeof(SegmentHeader) == 0x8, "SegmentHeader has invalid size."); struct NROHeader { + // Switchbrew calls this "Start" (0x10) INSERT_PADDING_WORDS(1); u32_le mod_offset; INSERT_PADDING_WORDS(2); + + // Switchbrew calls this "Header" (0x70) u32_le magic; u32_le version; u32_le nro_size; u32_le flags; - u32_le text_offset; - u32_le text_size; - u32_le ro_offset; - u32_le ro_size; - u32_le rw_offset; - u32_le rw_size; + // .text, .ro, .data + std::array<SegmentHeader, 3> segment_headers; u32_le bss_size; INSERT_PADDING_WORDS(1); std::array<u8, 0x20> build_id; - INSERT_PADDING_BYTES(0x20); + u32_le dso_handle_offset; + INSERT_PADDING_WORDS(1); + // .apiInfo, .dynstr, .dynsym + std::array<SegmentHeader, 3> segment_headers_2; }; static_assert(sizeof(NROHeader) == 0x80, "NROHeader has invalid size."); @@ -91,6 +110,7 @@ struct NROInfo { std::size_t data_size{}; VAddr src_addr{}; }; +static_assert(sizeof(NROInfo) == 0x60, "NROInfo has invalid size."); class DebugMonitor final : public ServiceFramework<DebugMonitor> { public: @@ -226,11 +246,11 @@ public: return; } - if (system.CurrentProcess()->GetTitleID() != header.title_id) { + if (system.CurrentProcess()->GetTitleID() != header.application_id) { LOG_ERROR(Service_LDR, "Attempting to load NRR with title ID other than current process. (actual " "{:016X})!", - header.title_id); + header.application_id); IPC::ResponseBuilder rb{ctx, 2}; rb.Push(ERROR_INVALID_NRR); return; @@ -348,10 +368,10 @@ public: ResultCode LoadNro(Kernel::Process* process, const NROHeader& nro_header, VAddr nro_addr, VAddr start) const { - const VAddr text_start{start + nro_header.text_offset}; - const VAddr ro_start{start + nro_header.ro_offset}; - const VAddr data_start{start + nro_header.rw_offset}; - const VAddr bss_start{data_start + nro_header.rw_size}; + const VAddr text_start{start + nro_header.segment_headers[TEXT_INDEX].memory_offset}; + const VAddr ro_start{start + nro_header.segment_headers[RO_INDEX].memory_offset}; + const VAddr data_start{start + nro_header.segment_headers[DATA_INDEX].memory_offset}; + const VAddr bss_start{data_start + nro_header.segment_headers[DATA_INDEX].memory_size}; const VAddr bss_end_addr{ Common::AlignUp(bss_start + nro_header.bss_size, Kernel::Memory::PageSize)}; @@ -360,9 +380,12 @@ public: system.Memory().ReadBlock(src_addr, source_data.data(), source_data.size()); system.Memory().WriteBlock(dst_addr, source_data.data(), source_data.size()); }}; - CopyCode(nro_addr + nro_header.text_offset, text_start, nro_header.text_size); - CopyCode(nro_addr + nro_header.ro_offset, ro_start, nro_header.ro_size); - CopyCode(nro_addr + nro_header.rw_offset, data_start, nro_header.rw_size); + CopyCode(nro_addr + nro_header.segment_headers[TEXT_INDEX].memory_offset, text_start, + nro_header.segment_headers[TEXT_INDEX].memory_size); + CopyCode(nro_addr + nro_header.segment_headers[RO_INDEX].memory_offset, ro_start, + nro_header.segment_headers[RO_INDEX].memory_size); + CopyCode(nro_addr + nro_header.segment_headers[DATA_INDEX].memory_offset, data_start, + nro_header.segment_headers[DATA_INDEX].memory_size); CASCADE_CODE(process->PageTable().SetCodeMemoryPermission( text_start, ro_start - text_start, Kernel::Memory::MemoryPermission::ReadAndExecute)); @@ -484,9 +507,11 @@ public: } // Track the loaded NRO - nro.insert_or_assign(*map_result, NROInfo{hash, *map_result, nro_size, bss_address, - bss_size, header.text_size, header.ro_size, - header.rw_size, nro_address}); + nro.insert_or_assign(*map_result, + NROInfo{hash, *map_result, nro_size, bss_address, bss_size, + header.segment_headers[TEXT_INDEX].memory_size, + header.segment_headers[RO_INDEX].memory_size, + header.segment_headers[DATA_INDEX].memory_size, nro_address}); // Invalidate JIT caches for the newly mapped process code system.InvalidateCpuInstructionCaches(); @@ -584,11 +609,21 @@ private: static bool IsValidNRO(const NROHeader& header, u64 nro_size, u64 bss_size) { return header.magic == Common::MakeMagic('N', 'R', 'O', '0') && header.nro_size == nro_size && header.bss_size == bss_size && - header.ro_offset == header.text_offset + header.text_size && - header.rw_offset == header.ro_offset + header.ro_size && - nro_size == header.rw_offset + header.rw_size && - Common::Is4KBAligned(header.text_size) && Common::Is4KBAligned(header.ro_size) && - Common::Is4KBAligned(header.rw_size); + + header.segment_headers[RO_INDEX].memory_offset == + header.segment_headers[TEXT_INDEX].memory_offset + + header.segment_headers[TEXT_INDEX].memory_size && + + header.segment_headers[DATA_INDEX].memory_offset == + header.segment_headers[RO_INDEX].memory_offset + + header.segment_headers[RO_INDEX].memory_size && + + nro_size == header.segment_headers[DATA_INDEX].memory_offset + + header.segment_headers[DATA_INDEX].memory_size && + + Common::Is4KBAligned(header.segment_headers[TEXT_INDEX].memory_size) && + Common::Is4KBAligned(header.segment_headers[RO_INDEX].memory_size) && + Common::Is4KBAligned(header.segment_headers[DATA_INDEX].memory_size); } Core::System& system; }; diff --git a/src/core/hle/service/lm/manager.cpp b/src/core/hle/service/lm/manager.cpp index b67081b86..3ee2374e7 100644 --- a/src/core/hle/service/lm/manager.cpp +++ b/src/core/hle/service/lm/manager.cpp @@ -86,7 +86,8 @@ std::string FormatField(Field type, const std::vector<u8>& data) { return Common::StringFromFixedZeroTerminatedBuffer( reinterpret_cast<const char*>(data.data()), data.size()); default: - UNIMPLEMENTED(); + UNIMPLEMENTED_MSG("Unimplemented field type={}", type); + return ""; } } diff --git a/src/core/hle/service/mig/mig.cpp b/src/core/hle/service/mig/mig.cpp index d16367f2c..113a4665c 100644 --- a/src/core/hle/service/mig/mig.cpp +++ b/src/core/hle/service/mig/mig.cpp @@ -20,6 +20,12 @@ public: {101, nullptr, "ResumeServer"}, {200, nullptr, "CreateClient"}, {201, nullptr, "ResumeClient"}, + {1001, nullptr, "Unknown1001"}, + {1010, nullptr, "Unknown1010"}, + {1100, nullptr, "Unknown1100"}, + {1101, nullptr, "Unknown1101"}, + {1200, nullptr, "Unknown1200"}, + {1201, nullptr, "Unknown1201"} }; // clang-format on diff --git a/src/core/hle/service/mm/mm_u.cpp b/src/core/hle/service/mm/mm_u.cpp index def63dc8a..25c24e537 100644 --- a/src/core/hle/service/mm/mm_u.cpp +++ b/src/core/hle/service/mm/mm_u.cpp @@ -14,14 +14,14 @@ public: explicit MM_U() : ServiceFramework{"mm:u"} { // clang-format off static const FunctionInfo functions[] = { - {0, &MM_U::Initialize, "Initialize"}, - {1, &MM_U::Finalize, "Finalize"}, - {2, &MM_U::SetAndWait, "SetAndWait"}, - {3, &MM_U::Get, "Get"}, - {4, &MM_U::InitializeWithId, "InitializeWithId"}, - {5, &MM_U::FinalizeWithId, "FinalizeWithId"}, - {6, &MM_U::SetAndWaitWithId, "SetAndWaitWithId"}, - {7, &MM_U::GetWithId, "GetWithId"}, + {0, &MM_U::InitializeOld, "InitializeOld"}, + {1, &MM_U::FinalizeOld, "FinalizeOld"}, + {2, &MM_U::SetAndWaitOld, "SetAndWaitOld"}, + {3, &MM_U::GetOld, "GetOld"}, + {4, &MM_U::Initialize, "Initialize"}, + {5, &MM_U::Finalize, "Finalize"}, + {6, &MM_U::SetAndWait, "SetAndWait"}, + {7, &MM_U::Get, "Get"}, }; // clang-format on @@ -29,21 +29,21 @@ public: } private: - void Initialize(Kernel::HLERequestContext& ctx) { + void InitializeOld(Kernel::HLERequestContext& ctx) { LOG_WARNING(Service_MM, "(STUBBED) called"); IPC::ResponseBuilder rb{ctx, 2}; rb.Push(RESULT_SUCCESS); } - void Finalize(Kernel::HLERequestContext& ctx) { + void FinalizeOld(Kernel::HLERequestContext& ctx) { LOG_WARNING(Service_MM, "(STUBBED) called"); IPC::ResponseBuilder rb{ctx, 2}; rb.Push(RESULT_SUCCESS); } - void SetAndWait(Kernel::HLERequestContext& ctx) { + void SetAndWaitOld(Kernel::HLERequestContext& ctx) { IPC::RequestParser rp{ctx}; min = rp.Pop<u32>(); max = rp.Pop<u32>(); @@ -54,7 +54,7 @@ private: rb.Push(RESULT_SUCCESS); } - void Get(Kernel::HLERequestContext& ctx) { + void GetOld(Kernel::HLERequestContext& ctx) { LOG_WARNING(Service_MM, "(STUBBED) called"); IPC::ResponseBuilder rb{ctx, 3}; @@ -62,7 +62,7 @@ private: rb.Push(current); } - void InitializeWithId(Kernel::HLERequestContext& ctx) { + void Initialize(Kernel::HLERequestContext& ctx) { LOG_WARNING(Service_MM, "(STUBBED) called"); IPC::ResponseBuilder rb{ctx, 3}; @@ -70,14 +70,14 @@ private: rb.Push<u32>(id); // Any non zero value } - void FinalizeWithId(Kernel::HLERequestContext& ctx) { + void Finalize(Kernel::HLERequestContext& ctx) { LOG_WARNING(Service_MM, "(STUBBED) called"); IPC::ResponseBuilder rb{ctx, 2}; rb.Push(RESULT_SUCCESS); } - void SetAndWaitWithId(Kernel::HLERequestContext& ctx) { + void SetAndWait(Kernel::HLERequestContext& ctx) { IPC::RequestParser rp{ctx}; u32 input_id = rp.Pop<u32>(); min = rp.Pop<u32>(); @@ -90,7 +90,7 @@ private: rb.Push(RESULT_SUCCESS); } - void GetWithId(Kernel::HLERequestContext& ctx) { + void Get(Kernel::HLERequestContext& ctx) { LOG_WARNING(Service_MM, "(STUBBED) called"); IPC::ResponseBuilder rb{ctx, 3}; diff --git a/src/core/hle/service/ncm/ncm.cpp b/src/core/hle/service/ncm/ncm.cpp index ec9aae04a..e38dea1f4 100644 --- a/src/core/hle/service/ncm/ncm.cpp +++ b/src/core/hle/service/ncm/ncm.cpp @@ -28,16 +28,16 @@ public: {7, nullptr, "ResolveApplicationLegalInformationPath"}, {8, nullptr, "RedirectApplicationLegalInformationPath"}, {9, nullptr, "Refresh"}, - {10, nullptr, "RedirectProgramPath2"}, - {11, nullptr, "Refresh2"}, - {12, nullptr, "DeleteProgramPath"}, - {13, nullptr, "DeleteApplicationControlPath"}, - {14, nullptr, "DeleteApplicationHtmlDocumentPath"}, - {15, nullptr, "DeleteApplicationLegalInformationPath"}, - {16, nullptr, ""}, - {17, nullptr, ""}, - {18, nullptr, ""}, - {19, nullptr, ""}, + {10, nullptr, "RedirectApplicationProgramPath"}, + {11, nullptr, "ClearApplicationRedirection"}, + {12, nullptr, "EraseProgramRedirection"}, + {13, nullptr, "EraseApplicationControlRedirection"}, + {14, nullptr, "EraseApplicationHtmlDocumentRedirection"}, + {15, nullptr, "EraseApplicationLegalInformationRedirection"}, + {16, nullptr, "ResolveProgramPathForDebug"}, + {17, nullptr, "RedirectProgramPathForDebug"}, + {18, nullptr, "RedirectApplicationProgramPathForDebug"}, + {19, nullptr, "EraseProgramRedirectionForDebug"}, }; // clang-format on diff --git a/src/core/hle/service/nfc/nfc.cpp b/src/core/hle/service/nfc/nfc.cpp index b7b34ce7e..780ea30fe 100644 --- a/src/core/hle/service/nfc/nfc.cpp +++ b/src/core/hle/service/nfc/nfc.cpp @@ -198,9 +198,9 @@ public: static const FunctionInfo functions[] = { {0, nullptr, "Initialize"}, {1, nullptr, "Finalize"}, - {2, nullptr, "GetState"}, - {3, nullptr, "IsNfcEnabled"}, - {100, nullptr, "SetNfcEnabled"}, + {2, nullptr, "GetStateOld"}, + {3, nullptr, "IsNfcEnabledOld"}, + {100, nullptr, "SetNfcEnabledOld"}, {400, nullptr, "InitializeSystem"}, {401, nullptr, "FinalizeSystem"}, {402, nullptr, "GetState"}, diff --git a/src/core/hle/service/ns/ns.cpp b/src/core/hle/service/ns/ns.cpp index 3e4dd2f7a..886450be2 100644 --- a/src/core/hle/service/ns/ns.cpp +++ b/src/core/hle/service/ns/ns.cpp @@ -366,7 +366,8 @@ ResultVal<u8> IApplicationManagerInterface::GetApplicationDesiredLanguage( LOG_DEBUG(Service_NS, "called with supported_languages={:08X}", supported_languages); // Get language code from settings - const auto language_code = Set::GetLanguageCodeFromIndex(Settings::values.language_index); + const auto language_code = + Set::GetLanguageCodeFromIndex(Settings::values.language_index.GetValue()); // Convert to application language, get priority list const auto application_language = ConvertToApplicationLanguage(language_code); diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp index cc2192e5c..fba89e7a6 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp @@ -25,7 +25,7 @@ u32 nvhost_ctrl_gpu::ioctl(Ioctl command, const std::vector<u8>& input, case IoctlCommand::IocGetCharacteristicsCommand: return GetCharacteristics(input, output, output2, version); case IoctlCommand::IocGetTPCMasksCommand: - return GetTPCMasks(input, output); + return GetTPCMasks(input, output, output2, version); case IoctlCommand::IocGetActiveSlotMaskCommand: return GetActiveSlotMask(input, output); case IoctlCommand::IocZcullGetCtxSizeCommand: @@ -98,17 +98,22 @@ u32 nvhost_ctrl_gpu::GetCharacteristics(const std::vector<u8>& input, std::vecto return 0; } -u32 nvhost_ctrl_gpu::GetTPCMasks(const std::vector<u8>& input, std::vector<u8>& output) { +u32 nvhost_ctrl_gpu::GetTPCMasks(const std::vector<u8>& input, std::vector<u8>& output, + std::vector<u8>& output2, IoctlVersion version) { IoctlGpuGetTpcMasksArgs params{}; std::memcpy(¶ms, input.data(), input.size()); - LOG_INFO(Service_NVDRV, "called, mask=0x{:X}, mask_buf_addr=0x{:X}", params.mask_buf_size, - params.mask_buf_addr); - // TODO(ogniK): Confirm value on hardware - if (params.mask_buf_size) - params.tpc_mask_size = 4 * 1; // 4 * num_gpc - else - params.tpc_mask_size = 0; - std::memcpy(output.data(), ¶ms, sizeof(params)); + LOG_DEBUG(Service_NVDRV, "called, mask_buffer_size=0x{:X}", params.mask_buffer_size); + if (params.mask_buffer_size != 0) { + params.tcp_mask = 3; + } + + if (version == IoctlVersion::Version3) { + std::memcpy(output.data(), input.data(), output.size()); + std::memcpy(output2.data(), ¶ms.tcp_mask, output2.size()); + } else { + std::memcpy(output.data(), ¶ms, output.size()); + } + return 0; } @@ -195,8 +200,7 @@ u32 nvhost_ctrl_gpu::GetGpuTime(const std::vector<u8>& input, std::vector<u8>& o IoctlGetGpuTime params{}; std::memcpy(¶ms, input.data(), input.size()); - const auto ns = Core::Timing::CyclesToNs(system.CoreTiming().GetTicks()); - params.gpu_time = static_cast<u64_le>(ns.count()); + params.gpu_time = static_cast<u64_le>(system.CoreTiming().GetGlobalTimeNs().count()); std::memcpy(output.data(), ¶ms, output.size()); return 0; } diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h index 07b644ec5..ef60f72ce 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h @@ -92,16 +92,11 @@ private: "IoctlCharacteristics is incorrect size"); struct IoctlGpuGetTpcMasksArgs { - /// [in] TPC mask buffer size reserved by userspace. Should be at least - /// sizeof(__u32) * fls(gpc_mask) to receive TPC mask for each GPC. - /// [out] full kernel buffer size - u32_le mask_buf_size; - u32_le reserved; - - /// [in] pointer to TPC mask buffer. It will receive one 32-bit TPC mask per GPC or 0 if - /// GPC is not enabled or not present. This parameter is ignored if mask_buf_size is 0. - u64_le mask_buf_addr; - u64_le tpc_mask_size; // Nintendo add this? + u32_le mask_buffer_size{}; + INSERT_PADDING_WORDS(1); + u64_le mask_buffer_address{}; + u32_le tcp_mask{}; + INSERT_PADDING_WORDS(1); }; static_assert(sizeof(IoctlGpuGetTpcMasksArgs) == 24, "IoctlGpuGetTpcMasksArgs is incorrect size"); @@ -166,7 +161,8 @@ private: u32 GetCharacteristics(const std::vector<u8>& input, std::vector<u8>& output, std::vector<u8>& output2, IoctlVersion version); - u32 GetTPCMasks(const std::vector<u8>& input, std::vector<u8>& output); + u32 GetTPCMasks(const std::vector<u8>& input, std::vector<u8>& output, std::vector<u8>& output2, + IoctlVersion version); u32 GetActiveSlotMask(const std::vector<u8>& input, std::vector<u8>& output); u32 ZCullGetCtxSize(const std::vector<u8>& input, std::vector<u8>& output); u32 ZCullGetInfo(const std::vector<u8>& input, std::vector<u8>& output); diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp index 437bc5dee..2f44d3779 100644 --- a/src/core/hle/service/nvflinger/nvflinger.cpp +++ b/src/core/hle/service/nvflinger/nvflinger.cpp @@ -9,6 +9,7 @@ #include "common/logging/log.h" #include "common/microprofile.h" #include "common/scope_exit.h" +#include "common/thread.h" #include "core/core.h" #include "core/core_timing.h" #include "core/core_timing_util.h" @@ -27,8 +28,35 @@ namespace Service::NVFlinger { -constexpr s64 frame_ticks = static_cast<s64>(Core::Hardware::BASE_CLOCK_RATE / 60); -constexpr s64 frame_ticks_30fps = static_cast<s64>(Core::Hardware::BASE_CLOCK_RATE / 30); +constexpr s64 frame_ticks = static_cast<s64>(1000000000 / 60); +constexpr s64 frame_ticks_30fps = static_cast<s64>(1000000000 / 30); + +void NVFlinger::VSyncThread(NVFlinger& nv_flinger) { + nv_flinger.SplitVSync(); +} + +void NVFlinger::SplitVSync() { + system.RegisterHostThread(); + std::string name = "yuzu:VSyncThread"; + MicroProfileOnThreadCreate(name.c_str()); + Common::SetCurrentThreadName(name.c_str()); + Common::SetCurrentThreadPriority(Common::ThreadPriority::High); + s64 delay = 0; + while (is_running) { + guard->lock(); + const s64 time_start = system.CoreTiming().GetGlobalTimeNs().count(); + Compose(); + const auto ticks = GetNextTicks(); + const s64 time_end = system.CoreTiming().GetGlobalTimeNs().count(); + const s64 time_passed = time_end - time_start; + const s64 next_time = std::max<s64>(0, ticks - time_passed - delay); + guard->unlock(); + if (next_time > 0) { + wait_event->WaitFor(std::chrono::nanoseconds{next_time}); + } + delay = (system.CoreTiming().GetGlobalTimeNs().count() - time_end) - next_time; + } +} NVFlinger::NVFlinger(Core::System& system) : system(system) { displays.emplace_back(0, "Default", system); @@ -36,22 +64,36 @@ NVFlinger::NVFlinger(Core::System& system) : system(system) { displays.emplace_back(2, "Edid", system); displays.emplace_back(3, "Internal", system); displays.emplace_back(4, "Null", system); + guard = std::make_shared<std::mutex>(); // Schedule the screen composition events composition_event = - Core::Timing::CreateEvent("ScreenComposition", [this](u64 userdata, s64 cycles_late) { + Core::Timing::CreateEvent("ScreenComposition", [this](u64 userdata, s64 ns_late) { + Lock(); Compose(); - const auto ticks = - Settings::values.force_30fps_mode ? frame_ticks_30fps : GetNextTicks(); - this->system.CoreTiming().ScheduleEvent(std::max<s64>(0LL, ticks - cycles_late), + const auto ticks = GetNextTicks(); + this->system.CoreTiming().ScheduleEvent(std::max<s64>(0LL, ticks - ns_late), composition_event); }); - - system.CoreTiming().ScheduleEvent(frame_ticks, composition_event); + if (system.IsMulticore()) { + is_running = true; + wait_event = std::make_unique<Common::Event>(); + vsync_thread = std::make_unique<std::thread>(VSyncThread, std::ref(*this)); + } else { + system.CoreTiming().ScheduleEvent(frame_ticks, composition_event); + } } NVFlinger::~NVFlinger() { - system.CoreTiming().UnscheduleEvent(composition_event, 0); + if (system.IsMulticore()) { + is_running = false; + wait_event->Set(); + vsync_thread->join(); + vsync_thread.reset(); + wait_event.reset(); + } else { + system.CoreTiming().UnscheduleEvent(composition_event, 0); + } } void NVFlinger::SetNVDrvInstance(std::shared_ptr<Nvidia::Module> instance) { @@ -199,10 +241,12 @@ void NVFlinger::Compose() { auto& gpu = system.GPU(); const auto& multi_fence = buffer->get().multi_fence; + guard->unlock(); for (u32 fence_id = 0; fence_id < multi_fence.num_fences; fence_id++) { const auto& fence = multi_fence.fences[fence_id]; gpu.WaitFence(fence.id, fence.value); } + guard->lock(); MicroProfileFlip(); @@ -223,7 +267,7 @@ void NVFlinger::Compose() { s64 NVFlinger::GetNextTicks() const { constexpr s64 max_hertz = 120LL; - return (Core::Hardware::BASE_CLOCK_RATE * (1LL << swap_interval)) / max_hertz; + return (1000000000 * (1LL << swap_interval)) / max_hertz; } } // namespace Service::NVFlinger diff --git a/src/core/hle/service/nvflinger/nvflinger.h b/src/core/hle/service/nvflinger/nvflinger.h index 57a21f33b..e4959a9af 100644 --- a/src/core/hle/service/nvflinger/nvflinger.h +++ b/src/core/hle/service/nvflinger/nvflinger.h @@ -4,15 +4,22 @@ #pragma once +#include <atomic> #include <memory> +#include <mutex> #include <optional> #include <string> #include <string_view> +#include <thread> #include <vector> #include "common/common_types.h" #include "core/hle/kernel/object.h" +namespace Common { +class Event; +} // namespace Common + namespace Core::Timing { class CoreTiming; struct EventType; @@ -79,6 +86,10 @@ public: s64 GetNextTicks() const; + std::unique_lock<std::mutex> Lock() { + return std::unique_lock{*guard}; + } + private: /// Finds the display identified by the specified ID. VI::Display* FindDisplay(u64 display_id); @@ -92,6 +103,10 @@ private: /// Finds the layer identified by the specified ID in the desired display. const VI::Layer* FindLayer(u64 display_id, u64 layer_id) const; + static void VSyncThread(NVFlinger& nv_flinger); + + void SplitVSync(); + std::shared_ptr<Nvidia::Module> nvdrv; std::vector<VI::Display> displays; @@ -108,7 +123,13 @@ private: /// Event that handles screen composition. std::shared_ptr<Core::Timing::EventType> composition_event; + std::shared_ptr<std::mutex> guard; + Core::System& system; + + std::unique_ptr<std::thread> vsync_thread; + std::unique_ptr<Common::Event> wait_event; + std::atomic<bool> is_running{}; }; } // namespace Service::NVFlinger diff --git a/src/core/hle/service/prepo/prepo.cpp b/src/core/hle/service/prepo/prepo.cpp index 9d36ea0d0..cde3312da 100644 --- a/src/core/hle/service/prepo/prepo.cpp +++ b/src/core/hle/service/prepo/prepo.cpp @@ -80,8 +80,13 @@ private: const auto user_id = rp.PopRaw<u128>(); const auto process_id = rp.PopRaw<u64>(); std::vector<std::vector<u8>> data{ctx.ReadBuffer(0)}; + if constexpr (Type == Core::Reporter::PlayReportType::Old2) { - data.emplace_back(ctx.ReadBuffer(1)); + const auto read_buffer_count = + ctx.BufferDescriptorX().size() + ctx.BufferDescriptorA().size(); + if (read_buffer_count > 1) { + data.emplace_back(ctx.ReadBuffer(1)); + } } LOG_DEBUG( diff --git a/src/core/hle/service/set/set.cpp b/src/core/hle/service/set/set.cpp index f3b4b286c..34fe2fd82 100644 --- a/src/core/hle/service/set/set.cpp +++ b/src/core/hle/service/set/set.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include <algorithm> +#include <array> #include <chrono> #include "common/logging/log.h" #include "core/hle/ipc_helpers.h" @@ -31,6 +32,44 @@ constexpr std::array<LanguageCode, 17> available_language_codes = {{ LanguageCode::ZH_HANT, }}; +enum class KeyboardLayout : u64 { + Japanese = 0, + EnglishUs = 1, + EnglishUsInternational = 2, + EnglishUk = 3, + French = 4, + FrenchCa = 5, + Spanish = 6, + SpanishLatin = 7, + German = 8, + Italian = 9, + Portuguese = 10, + Russian = 11, + Korean = 12, + ChineseSimplified = 13, + ChineseTraditional = 14, +}; + +constexpr std::array<std::pair<LanguageCode, KeyboardLayout>, 17> language_to_layout{{ + {LanguageCode::JA, KeyboardLayout::Japanese}, + {LanguageCode::EN_US, KeyboardLayout::EnglishUs}, + {LanguageCode::FR, KeyboardLayout::French}, + {LanguageCode::DE, KeyboardLayout::German}, + {LanguageCode::IT, KeyboardLayout::Italian}, + {LanguageCode::ES, KeyboardLayout::Spanish}, + {LanguageCode::ZH_CN, KeyboardLayout::ChineseSimplified}, + {LanguageCode::KO, KeyboardLayout::Korean}, + {LanguageCode::NL, KeyboardLayout::EnglishUsInternational}, + {LanguageCode::PT, KeyboardLayout::Portuguese}, + {LanguageCode::RU, KeyboardLayout::Russian}, + {LanguageCode::ZH_TW, KeyboardLayout::ChineseTraditional}, + {LanguageCode::EN_GB, KeyboardLayout::EnglishUk}, + {LanguageCode::FR_CA, KeyboardLayout::FrenchCa}, + {LanguageCode::ES_419, KeyboardLayout::SpanishLatin}, + {LanguageCode::ZH_HANS, KeyboardLayout::ChineseSimplified}, + {LanguageCode::ZH_HANT, KeyboardLayout::ChineseTraditional}, +}}; + constexpr std::size_t pre4_0_0_max_entries = 15; constexpr std::size_t post4_0_0_max_entries = 17; @@ -50,6 +89,25 @@ void GetAvailableLanguageCodesImpl(Kernel::HLERequestContext& ctx, std::size_t m ctx.WriteBuffer(available_language_codes.data(), copy_size); PushResponseLanguageCode(ctx, copy_amount); } + +void GetKeyCodeMapImpl(Kernel::HLERequestContext& ctx) { + const auto language_code = available_language_codes[Settings::values.language_index.GetValue()]; + const auto key_code = + std::find_if(language_to_layout.cbegin(), language_to_layout.cend(), + [=](const auto& element) { return element.first == language_code; }); + KeyboardLayout layout = KeyboardLayout::EnglishUs; + if (key_code == language_to_layout.cend()) { + LOG_ERROR(Service_SET, + "Could not find keyboard layout for language index {}, defaulting to English us", + Settings::values.language_index.GetValue()); + } else { + layout = key_code->second; + } + + IPC::ResponseBuilder rb{ctx, 2}; + rb.Push(RESULT_SUCCESS); + ctx.WriteBuffer(&layout, sizeof(KeyboardLayout)); +} } // Anonymous namespace LanguageCode GetLanguageCodeFromIndex(std::size_t index) { @@ -105,11 +163,11 @@ void SET::GetQuestFlag(Kernel::HLERequestContext& ctx) { } void SET::GetLanguageCode(Kernel::HLERequestContext& ctx) { - LOG_DEBUG(Service_SET, "called {}", Settings::values.language_index); + LOG_DEBUG(Service_SET, "called {}", Settings::values.language_index.GetValue()); IPC::ResponseBuilder rb{ctx, 4}; rb.Push(RESULT_SUCCESS); - rb.PushEnum(available_language_codes[Settings::values.language_index]); + rb.PushEnum(available_language_codes[Settings::values.language_index.GetValue()]); } void SET::GetRegionCode(Kernel::HLERequestContext& ctx) { @@ -117,7 +175,17 @@ void SET::GetRegionCode(Kernel::HLERequestContext& ctx) { IPC::ResponseBuilder rb{ctx, 3}; rb.Push(RESULT_SUCCESS); - rb.Push(Settings::values.region_index); + rb.Push(Settings::values.region_index.GetValue()); +} + +void SET::GetKeyCodeMap(Kernel::HLERequestContext& ctx) { + LOG_DEBUG(Service_SET, "Called {}", ctx.Description()); + GetKeyCodeMapImpl(ctx); +} + +void SET::GetKeyCodeMap2(Kernel::HLERequestContext& ctx) { + LOG_DEBUG(Service_SET, "Called {}", ctx.Description()); + GetKeyCodeMapImpl(ctx); } SET::SET() : ServiceFramework("set") { @@ -130,9 +198,9 @@ SET::SET() : ServiceFramework("set") { {4, &SET::GetRegionCode, "GetRegionCode"}, {5, &SET::GetAvailableLanguageCodes2, "GetAvailableLanguageCodes2"}, {6, &SET::GetAvailableLanguageCodeCount2, "GetAvailableLanguageCodeCount2"}, - {7, nullptr, "GetKeyCodeMap"}, + {7, &SET::GetKeyCodeMap, "GetKeyCodeMap"}, {8, &SET::GetQuestFlag, "GetQuestFlag"}, - {9, nullptr, "GetKeyCodeMap2"}, + {9, &SET::GetKeyCodeMap2, "GetKeyCodeMap2"}, {10, nullptr, "GetFirmwareVersionForDebug"}, }; // clang-format on diff --git a/src/core/hle/service/set/set.h b/src/core/hle/service/set/set.h index 6084b345d..8ac9c169d 100644 --- a/src/core/hle/service/set/set.h +++ b/src/core/hle/service/set/set.h @@ -44,6 +44,8 @@ private: void GetAvailableLanguageCodeCount2(Kernel::HLERequestContext& ctx); void GetQuestFlag(Kernel::HLERequestContext& ctx); void GetRegionCode(Kernel::HLERequestContext& ctx); + void GetKeyCodeMap(Kernel::HLERequestContext& ctx); + void GetKeyCodeMap2(Kernel::HLERequestContext& ctx); }; } // namespace Service::Set diff --git a/src/core/hle/service/sm/sm.cpp b/src/core/hle/service/sm/sm.cpp index 6ada13be4..d872de16c 100644 --- a/src/core/hle/service/sm/sm.cpp +++ b/src/core/hle/service/sm/sm.cpp @@ -142,7 +142,7 @@ void SM::GetService(Kernel::HLERequestContext& ctx) { } // Wake the threads waiting on the ServerPort - server_port->WakeupAllWaitingThreads(); + server_port->Signal(); LOG_DEBUG(Service_SM, "called service={} -> session={}", name, client->GetObjectId()); IPC::ResponseBuilder rb{ctx, 2, 0, 1, IPC::ResponseBuilder::Flags::AlwaysMoveHandles}; diff --git a/src/core/hle/service/spl/module.cpp b/src/core/hle/service/spl/module.cpp index e724d4ab8..865ed3b91 100644 --- a/src/core/hle/service/spl/module.cpp +++ b/src/core/hle/service/spl/module.cpp @@ -19,7 +19,7 @@ namespace Service::SPL { Module::Interface::Interface(std::shared_ptr<Module> module, const char* name) : ServiceFramework(name), module(std::move(module)), - rng(Settings::values.rng_seed.value_or(std::time(nullptr))) {} + rng(Settings::values.rng_seed.GetValue().value_or(std::time(nullptr))) {} Module::Interface::~Interface() = default; diff --git a/src/core/hle/service/time/standard_steady_clock_core.cpp b/src/core/hle/service/time/standard_steady_clock_core.cpp index 1575f0b49..59a272f4a 100644 --- a/src/core/hle/service/time/standard_steady_clock_core.cpp +++ b/src/core/hle/service/time/standard_steady_clock_core.cpp @@ -11,9 +11,8 @@ namespace Service::Time::Clock { TimeSpanType StandardSteadyClockCore::GetCurrentRawTimePoint(Core::System& system) { - const TimeSpanType ticks_time_span{TimeSpanType::FromTicks( - Core::Timing::CpuCyclesToClockCycles(system.CoreTiming().GetTicks()), - Core::Hardware::CNTFREQ)}; + const TimeSpanType ticks_time_span{ + TimeSpanType::FromTicks(system.CoreTiming().GetClockTicks(), Core::Hardware::CNTFREQ)}; TimeSpanType raw_time_point{setup_value.nanoseconds + ticks_time_span.nanoseconds}; if (raw_time_point.nanoseconds < cached_raw_time_point.nanoseconds) { diff --git a/src/core/hle/service/time/tick_based_steady_clock_core.cpp b/src/core/hle/service/time/tick_based_steady_clock_core.cpp index 44d5bc651..8baaa2a6a 100644 --- a/src/core/hle/service/time/tick_based_steady_clock_core.cpp +++ b/src/core/hle/service/time/tick_based_steady_clock_core.cpp @@ -11,9 +11,8 @@ namespace Service::Time::Clock { SteadyClockTimePoint TickBasedSteadyClockCore::GetTimePoint(Core::System& system) { - const TimeSpanType ticks_time_span{TimeSpanType::FromTicks( - Core::Timing::CpuCyclesToClockCycles(system.CoreTiming().GetTicks()), - Core::Hardware::CNTFREQ)}; + const TimeSpanType ticks_time_span{ + TimeSpanType::FromTicks(system.CoreTiming().GetClockTicks(), Core::Hardware::CNTFREQ)}; return {ticks_time_span.ToSeconds(), GetClockSourceId()}; } diff --git a/src/core/hle/service/time/time.cpp b/src/core/hle/service/time/time.cpp index cc1dbd575..13e4b3818 100644 --- a/src/core/hle/service/time/time.cpp +++ b/src/core/hle/service/time/time.cpp @@ -241,9 +241,8 @@ void Module::Interface::CalculateMonotonicSystemClockBaseTimePoint(Kernel::HLERe const auto current_time_point{steady_clock_core.GetCurrentTimePoint(system)}; if (current_time_point.clock_source_id == context.steady_time_point.clock_source_id) { - const auto ticks{Clock::TimeSpanType::FromTicks( - Core::Timing::CpuCyclesToClockCycles(system.CoreTiming().GetTicks()), - Core::Hardware::CNTFREQ)}; + const auto ticks{Clock::TimeSpanType::FromTicks(system.CoreTiming().GetClockTicks(), + Core::Hardware::CNTFREQ)}; const s64 base_time_point{context.offset + current_time_point.time_point - ticks.ToSeconds()}; IPC::ResponseBuilder rb{ctx, (sizeof(s64) / 4) + 2}; diff --git a/src/core/hle/service/time/time_sharedmemory.cpp b/src/core/hle/service/time/time_sharedmemory.cpp index 999ec1e51..e0ae9f874 100644 --- a/src/core/hle/service/time/time_sharedmemory.cpp +++ b/src/core/hle/service/time/time_sharedmemory.cpp @@ -30,8 +30,7 @@ void SharedMemory::SetupStandardSteadyClock(Core::System& system, const Common::UUID& clock_source_id, Clock::TimeSpanType current_time_point) { const Clock::TimeSpanType ticks_time_span{Clock::TimeSpanType::FromTicks( - Core::Timing::CpuCyclesToClockCycles(system.CoreTiming().GetTicks()), - Core::Hardware::CNTFREQ)}; + system.CoreTiming().GetClockTicks(), Core::Hardware::CNTFREQ)}; const Clock::SteadyClockContext context{ static_cast<u64>(current_time_point.nanoseconds - ticks_time_span.nanoseconds), clock_source_id}; diff --git a/src/core/hle/service/vi/vi.cpp b/src/core/hle/service/vi/vi.cpp index 67b45e7c0..ea7b4ae13 100644 --- a/src/core/hle/service/vi/vi.cpp +++ b/src/core/hle/service/vi/vi.cpp @@ -511,6 +511,7 @@ private: LOG_DEBUG(Service_VI, "called. id=0x{:08X} transaction={:X}, flags=0x{:08X}", id, static_cast<u32>(transaction), flags); + nv_flinger->Lock(); auto& buffer_queue = nv_flinger->FindBufferQueue(id); switch (transaction) { @@ -518,9 +519,9 @@ private: IGBPConnectRequestParcel request{ctx.ReadBuffer()}; IGBPConnectResponseParcel response{ static_cast<u32>(static_cast<u32>(DisplayResolution::UndockedWidth) * - Settings::values.resolution_factor), + Settings::values.resolution_factor.GetValue()), static_cast<u32>(static_cast<u32>(DisplayResolution::UndockedHeight) * - Settings::values.resolution_factor)}; + Settings::values.resolution_factor.GetValue())}; ctx.WriteBuffer(response.Serialize()); break; } @@ -550,6 +551,7 @@ private: [=](std::shared_ptr<Kernel::Thread> thread, Kernel::HLERequestContext& ctx, Kernel::ThreadWakeupReason reason) { // Repeat TransactParcel DequeueBuffer when a buffer is available + nv_flinger->Lock(); auto& buffer_queue = nv_flinger->FindBufferQueue(id); auto result = buffer_queue.DequeueBuffer(width, height); ASSERT_MSG(result != std::nullopt, "Could not dequeue buffer."); @@ -747,14 +749,14 @@ private: if (Settings::values.use_docked_mode) { rb.Push(static_cast<u32>(Service::VI::DisplayResolution::DockedWidth) * - static_cast<u32>(Settings::values.resolution_factor)); + static_cast<u32>(Settings::values.resolution_factor.GetValue())); rb.Push(static_cast<u32>(Service::VI::DisplayResolution::DockedHeight) * - static_cast<u32>(Settings::values.resolution_factor)); + static_cast<u32>(Settings::values.resolution_factor.GetValue())); } else { rb.Push(static_cast<u32>(Service::VI::DisplayResolution::UndockedWidth) * - static_cast<u32>(Settings::values.resolution_factor)); + static_cast<u32>(Settings::values.resolution_factor.GetValue())); rb.Push(static_cast<u32>(Service::VI::DisplayResolution::UndockedHeight) * - static_cast<u32>(Settings::values.resolution_factor)); + static_cast<u32>(Settings::values.resolution_factor.GetValue())); } rb.PushRaw<float>(60.0f); // This wouldn't seem to be correct for 30 fps games. @@ -1029,9 +1031,9 @@ private: // between docked and undocked dimensions. We take the liberty of applying // the resolution scaling factor here. rb.Push(static_cast<u64>(DisplayResolution::UndockedWidth) * - static_cast<u32>(Settings::values.resolution_factor)); + static_cast<u32>(Settings::values.resolution_factor.GetValue())); rb.Push(static_cast<u64>(DisplayResolution::UndockedHeight) * - static_cast<u32>(Settings::values.resolution_factor)); + static_cast<u32>(Settings::values.resolution_factor.GetValue())); } void SetLayerScalingMode(Kernel::HLERequestContext& ctx) { @@ -1064,8 +1066,8 @@ private: LOG_WARNING(Service_VI, "(STUBBED) called"); DisplayInfo display_info; - display_info.width *= static_cast<u64>(Settings::values.resolution_factor); - display_info.height *= static_cast<u64>(Settings::values.resolution_factor); + display_info.width *= static_cast<u64>(Settings::values.resolution_factor.GetValue()); + display_info.height *= static_cast<u64>(Settings::values.resolution_factor.GetValue()); ctx.WriteBuffer(&display_info, sizeof(DisplayInfo)); IPC::ResponseBuilder rb{ctx, 4}; rb.Push(RESULT_SUCCESS); diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 9d87045a0..2c5588933 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -8,6 +8,7 @@ #include <utility> #include "common/assert.h" +#include "common/atomic_ops.h" #include "common/common_types.h" #include "common/logging/log.h" #include "common/page_table.h" @@ -29,15 +30,12 @@ namespace Core::Memory { struct Memory::Impl { explicit Impl(Core::System& system_) : system{system_} {} - void SetCurrentPageTable(Kernel::Process& process) { + void SetCurrentPageTable(Kernel::Process& process, u32 core_id) { current_page_table = &process.PageTable().PageTableImpl(); const std::size_t address_space_width = process.PageTable().GetAddressSpaceWidth(); - system.ArmInterface(0).PageTableChanged(*current_page_table, address_space_width); - system.ArmInterface(1).PageTableChanged(*current_page_table, address_space_width); - system.ArmInterface(2).PageTableChanged(*current_page_table, address_space_width); - system.ArmInterface(3).PageTableChanged(*current_page_table, address_space_width); + system.ArmInterface(core_id).PageTableChanged(*current_page_table, address_space_width); } void MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size, PAddr target) { @@ -179,6 +177,22 @@ struct Memory::Impl { } } + bool WriteExclusive8(const VAddr addr, const u8 data, const u8 expected) { + return WriteExclusive<u8>(addr, data, expected); + } + + bool WriteExclusive16(const VAddr addr, const u16 data, const u16 expected) { + return WriteExclusive<u16_le>(addr, data, expected); + } + + bool WriteExclusive32(const VAddr addr, const u32 data, const u32 expected) { + return WriteExclusive<u32_le>(addr, data, expected); + } + + bool WriteExclusive64(const VAddr addr, const u64 data, const u64 expected) { + return WriteExclusive<u64_le>(addr, data, expected); + } + std::string ReadCString(VAddr vaddr, std::size_t max_length) { std::string string; string.reserve(max_length); @@ -534,9 +548,9 @@ struct Memory::Impl { // longer exist, and we should just leave the pagetable entry blank. page_type = Common::PageType::Unmapped; } else { - page_type = Common::PageType::Memory; current_page_table->pointers[vaddr >> PAGE_BITS] = pointer - (vaddr & ~PAGE_MASK); + page_type = Common::PageType::Memory; } break; } @@ -577,9 +591,12 @@ struct Memory::Impl { base + page_table.pointers.size()); if (!target) { + ASSERT_MSG(type != Common::PageType::Memory, + "Mapping memory page without a pointer @ {:016x}", base * PAGE_SIZE); + while (base != end) { - page_table.pointers[base] = nullptr; page_table.attributes[base] = type; + page_table.pointers[base] = nullptr; page_table.backing_addr[base] = 0; base += 1; @@ -682,6 +699,67 @@ struct Memory::Impl { } } + template <typename T> + bool WriteExclusive(const VAddr vaddr, const T data, const T expected) { + u8* page_pointer = current_page_table->pointers[vaddr >> PAGE_BITS]; + if (page_pointer != nullptr) { + // NOTE: Avoid adding any extra logic to this fast-path block + T volatile* pointer = reinterpret_cast<T volatile*>(&page_pointer[vaddr]); + return Common::AtomicCompareAndSwap(pointer, data, expected); + } + + const Common::PageType type = current_page_table->attributes[vaddr >> PAGE_BITS]; + switch (type) { + case Common::PageType::Unmapped: + LOG_ERROR(HW_Memory, "Unmapped Write{} 0x{:08X} @ 0x{:016X}", sizeof(data) * 8, + static_cast<u32>(data), vaddr); + return true; + case Common::PageType::Memory: + ASSERT_MSG(false, "Mapped memory page without a pointer @ {:016X}", vaddr); + break; + case Common::PageType::RasterizerCachedMemory: { + u8* host_ptr{GetPointerFromRasterizerCachedMemory(vaddr)}; + system.GPU().InvalidateRegion(vaddr, sizeof(T)); + T volatile* pointer = reinterpret_cast<T volatile*>(&host_ptr); + return Common::AtomicCompareAndSwap(pointer, data, expected); + break; + } + default: + UNREACHABLE(); + } + return true; + } + + bool WriteExclusive128(const VAddr vaddr, const u128 data, const u128 expected) { + u8* const page_pointer = current_page_table->pointers[vaddr >> PAGE_BITS]; + if (page_pointer != nullptr) { + // NOTE: Avoid adding any extra logic to this fast-path block + u64 volatile* pointer = reinterpret_cast<u64 volatile*>(&page_pointer[vaddr]); + return Common::AtomicCompareAndSwap(pointer, data, expected); + } + + const Common::PageType type = current_page_table->attributes[vaddr >> PAGE_BITS]; + switch (type) { + case Common::PageType::Unmapped: + LOG_ERROR(HW_Memory, "Unmapped Write{} 0x{:08X} @ 0x{:016X}{:016X}", sizeof(data) * 8, + static_cast<u64>(data[1]), static_cast<u64>(data[0]), vaddr); + return true; + case Common::PageType::Memory: + ASSERT_MSG(false, "Mapped memory page without a pointer @ {:016X}", vaddr); + break; + case Common::PageType::RasterizerCachedMemory: { + u8* host_ptr{GetPointerFromRasterizerCachedMemory(vaddr)}; + system.GPU().InvalidateRegion(vaddr, sizeof(u128)); + u64 volatile* pointer = reinterpret_cast<u64 volatile*>(&host_ptr); + return Common::AtomicCompareAndSwap(pointer, data, expected); + break; + } + default: + UNREACHABLE(); + } + return true; + } + Common::PageTable* current_page_table = nullptr; Core::System& system; }; @@ -689,8 +767,8 @@ struct Memory::Impl { Memory::Memory(Core::System& system) : impl{std::make_unique<Impl>(system)} {} Memory::~Memory() = default; -void Memory::SetCurrentPageTable(Kernel::Process& process) { - impl->SetCurrentPageTable(process); +void Memory::SetCurrentPageTable(Kernel::Process& process, u32 core_id) { + impl->SetCurrentPageTable(process, core_id); } void Memory::MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size, PAddr target) { @@ -764,6 +842,26 @@ void Memory::Write64(VAddr addr, u64 data) { impl->Write64(addr, data); } +bool Memory::WriteExclusive8(VAddr addr, u8 data, u8 expected) { + return impl->WriteExclusive8(addr, data, expected); +} + +bool Memory::WriteExclusive16(VAddr addr, u16 data, u16 expected) { + return impl->WriteExclusive16(addr, data, expected); +} + +bool Memory::WriteExclusive32(VAddr addr, u32 data, u32 expected) { + return impl->WriteExclusive32(addr, data, expected); +} + +bool Memory::WriteExclusive64(VAddr addr, u64 data, u64 expected) { + return impl->WriteExclusive64(addr, data, expected); +} + +bool Memory::WriteExclusive128(VAddr addr, u128 data, u128 expected) { + return impl->WriteExclusive128(addr, data, expected); +} + std::string Memory::ReadCString(VAddr vaddr, std::size_t max_length) { return impl->ReadCString(vaddr, max_length); } diff --git a/src/core/memory.h b/src/core/memory.h index 9292f3b0a..4a1cc63f4 100644 --- a/src/core/memory.h +++ b/src/core/memory.h @@ -64,7 +64,7 @@ public: * * @param process The process to use the page table of. */ - void SetCurrentPageTable(Kernel::Process& process); + void SetCurrentPageTable(Kernel::Process& process, u32 core_id); /** * Maps an allocated buffer onto a region of the emulated process address space. @@ -245,6 +245,71 @@ public: void Write64(VAddr addr, u64 data); /** + * Writes a 8-bit unsigned integer to the given virtual address in + * the current process' address space if and only if the address contains + * the expected value. This operation is atomic. + * + * @param addr The virtual address to write the 8-bit unsigned integer to. + * @param data The 8-bit unsigned integer to write to the given virtual address. + * @param expected The 8-bit unsigned integer to check against the given virtual address. + * + * @post The memory range [addr, sizeof(data)) contains the given data value. + */ + bool WriteExclusive8(VAddr addr, u8 data, u8 expected); + + /** + * Writes a 16-bit unsigned integer to the given virtual address in + * the current process' address space if and only if the address contains + * the expected value. This operation is atomic. + * + * @param addr The virtual address to write the 16-bit unsigned integer to. + * @param data The 16-bit unsigned integer to write to the given virtual address. + * @param expected The 16-bit unsigned integer to check against the given virtual address. + * + * @post The memory range [addr, sizeof(data)) contains the given data value. + */ + bool WriteExclusive16(VAddr addr, u16 data, u16 expected); + + /** + * Writes a 32-bit unsigned integer to the given virtual address in + * the current process' address space if and only if the address contains + * the expected value. This operation is atomic. + * + * @param addr The virtual address to write the 32-bit unsigned integer to. + * @param data The 32-bit unsigned integer to write to the given virtual address. + * @param expected The 32-bit unsigned integer to check against the given virtual address. + * + * @post The memory range [addr, sizeof(data)) contains the given data value. + */ + bool WriteExclusive32(VAddr addr, u32 data, u32 expected); + + /** + * Writes a 64-bit unsigned integer to the given virtual address in + * the current process' address space if and only if the address contains + * the expected value. This operation is atomic. + * + * @param addr The virtual address to write the 64-bit unsigned integer to. + * @param data The 64-bit unsigned integer to write to the given virtual address. + * @param expected The 64-bit unsigned integer to check against the given virtual address. + * + * @post The memory range [addr, sizeof(data)) contains the given data value. + */ + bool WriteExclusive64(VAddr addr, u64 data, u64 expected); + + /** + * Writes a 128-bit unsigned integer to the given virtual address in + * the current process' address space if and only if the address contains + * the expected value. This operation is atomic. + * + * @param addr The virtual address to write the 128-bit unsigned integer to. + * @param data The 128-bit unsigned integer to write to the given virtual address. + * @param expected The 128-bit unsigned integer to check against the given virtual address. + * + * @post The memory range [addr, sizeof(data)) contains the given data value. + */ + bool WriteExclusive128(VAddr addr, u128 data, u128 expected); + + /** * Reads a null-terminated string from the given virtual address. * This function will continually read characters until either: * diff --git a/src/core/memory/cheat_engine.cpp b/src/core/memory/cheat_engine.cpp index b139e8465..53d27859b 100644 --- a/src/core/memory/cheat_engine.cpp +++ b/src/core/memory/cheat_engine.cpp @@ -20,7 +20,7 @@ namespace Core::Memory { -constexpr s64 CHEAT_ENGINE_TICKS = static_cast<s64>(Core::Hardware::BASE_CLOCK_RATE / 12); +constexpr s64 CHEAT_ENGINE_TICKS = static_cast<s64>(1000000000 / 12); constexpr u32 KEYPAD_BITMASK = 0x3FFFFFF; StandardVmCallbacks::StandardVmCallbacks(Core::System& system, const CheatProcessMetadata& metadata) @@ -190,7 +190,7 @@ CheatEngine::~CheatEngine() { void CheatEngine::Initialize() { event = Core::Timing::CreateEvent( "CheatEngine::FrameCallback::" + Common::HexToString(metadata.main_nso_build_id), - [this](u64 userdata, s64 cycles_late) { FrameCallback(userdata, cycles_late); }); + [this](u64 userdata, s64 ns_late) { FrameCallback(userdata, ns_late); }); core_timing.ScheduleEvent(CHEAT_ENGINE_TICKS, event); metadata.process_id = system.CurrentProcess()->GetProcessID(); @@ -217,7 +217,7 @@ void CheatEngine::Reload(std::vector<CheatEntry> cheats) { MICROPROFILE_DEFINE(Cheat_Engine, "Add-Ons", "Cheat Engine", MP_RGB(70, 200, 70)); -void CheatEngine::FrameCallback(u64 userdata, s64 cycles_late) { +void CheatEngine::FrameCallback(u64 userdata, s64 ns_late) { if (is_pending_reload.exchange(false)) { vm.LoadProgram(cheats); } @@ -230,7 +230,7 @@ void CheatEngine::FrameCallback(u64 userdata, s64 cycles_late) { vm.Execute(metadata); - core_timing.ScheduleEvent(CHEAT_ENGINE_TICKS - cycles_late, event); + core_timing.ScheduleEvent(CHEAT_ENGINE_TICKS - ns_late, event); } } // namespace Core::Memory diff --git a/src/core/perf_stats.cpp b/src/core/perf_stats.cpp index f1ae9d4df..29339ead7 100644 --- a/src/core/perf_stats.cpp +++ b/src/core/perf_stats.cpp @@ -119,13 +119,14 @@ double PerfStats::GetLastFrameTimeScale() { } void FrameLimiter::DoFrameLimiting(microseconds current_system_time_us) { - if (!Settings::values.use_frame_limit) { + if (!Settings::values.use_frame_limit.GetValue() || + Settings::values.use_multi_core.GetValue()) { return; } auto now = Clock::now(); - const double sleep_scale = Settings::values.frame_limit / 100.0; + const double sleep_scale = Settings::values.frame_limit.GetValue() / 100.0; // Max lag caused by slow frames. Shouldn't be more than the length of a frame at the current // speed percent or it will clamp too much and prevent this from properly limiting to that diff --git a/src/core/settings.cpp b/src/core/settings.cpp index 4edff9cd8..d3886c4ec 100644 --- a/src/core/settings.cpp +++ b/src/core/settings.cpp @@ -62,6 +62,7 @@ const std::array<const char*, NumMouseButtons> mapping = {{ } Values values = {}; +bool configuring_global = true; std::string GetTimeZoneString() { static constexpr std::array<const char*, 46> timezones{{ @@ -73,9 +74,9 @@ std::string GetTimeZoneString() { "UCT", "Universal", "UTC", "W-SU", "WET", "Zulu", }}; - ASSERT(Settings::values.time_zone_index < timezones.size()); + ASSERT(Settings::values.time_zone_index.GetValue() < timezones.size()); - return timezones[Settings::values.time_zone_index]; + return timezones[Settings::values.time_zone_index.GetValue()]; } void Apply() { @@ -97,25 +98,25 @@ void LogSetting(const std::string& name, const T& value) { void LogSettings() { LOG_INFO(Config, "yuzu Configuration:"); - LogSetting("System_UseDockedMode", Settings::values.use_docked_mode); - LogSetting("System_RngSeed", Settings::values.rng_seed.value_or(0)); + LogSetting("Controls_UseDockedMode", Settings::values.use_docked_mode); + LogSetting("System_RngSeed", Settings::values.rng_seed.GetValue().value_or(0)); LogSetting("System_CurrentUser", Settings::values.current_user); - LogSetting("System_LanguageIndex", Settings::values.language_index); - LogSetting("System_RegionIndex", Settings::values.region_index); - LogSetting("System_TimeZoneIndex", Settings::values.time_zone_index); - LogSetting("Core_UseMultiCore", Settings::values.use_multi_core); - LogSetting("Renderer_UseResolutionFactor", Settings::values.resolution_factor); - LogSetting("Renderer_UseFrameLimit", Settings::values.use_frame_limit); - LogSetting("Renderer_FrameLimit", Settings::values.frame_limit); - LogSetting("Renderer_UseDiskShaderCache", Settings::values.use_disk_shader_cache); - LogSetting("Renderer_GPUAccuracyLevel", Settings::values.gpu_accuracy); + LogSetting("System_LanguageIndex", Settings::values.language_index.GetValue()); + LogSetting("System_RegionIndex", Settings::values.region_index.GetValue()); + LogSetting("System_TimeZoneIndex", Settings::values.time_zone_index.GetValue()); + LogSetting("Core_UseMultiCore", Settings::values.use_multi_core.GetValue()); + LogSetting("Renderer_UseResolutionFactor", Settings::values.resolution_factor.GetValue()); + LogSetting("Renderer_UseFrameLimit", Settings::values.use_frame_limit.GetValue()); + LogSetting("Renderer_FrameLimit", Settings::values.frame_limit.GetValue()); + LogSetting("Renderer_UseDiskShaderCache", Settings::values.use_disk_shader_cache.GetValue()); + LogSetting("Renderer_GPUAccuracyLevel", Settings::values.gpu_accuracy.GetValue()); LogSetting("Renderer_UseAsynchronousGpuEmulation", - Settings::values.use_asynchronous_gpu_emulation); - LogSetting("Renderer_UseVsync", Settings::values.use_vsync); - LogSetting("Renderer_UseAssemblyShaders", Settings::values.use_assembly_shaders); - LogSetting("Renderer_AnisotropicFilteringLevel", Settings::values.max_anisotropy); + Settings::values.use_asynchronous_gpu_emulation.GetValue()); + LogSetting("Renderer_UseVsync", Settings::values.use_vsync.GetValue()); + LogSetting("Renderer_UseAssemblyShaders", Settings::values.use_assembly_shaders.GetValue()); + LogSetting("Renderer_AnisotropicFilteringLevel", Settings::values.max_anisotropy.GetValue()); LogSetting("Audio_OutputEngine", Settings::values.sink_id); - LogSetting("Audio_EnableAudioStretching", Settings::values.enable_audio_stretching); + LogSetting("Audio_EnableAudioStretching", Settings::values.enable_audio_stretching.GetValue()); LogSetting("Audio_OutputDevice", Settings::values.audio_device_id); LogSetting("DataStorage_UseVirtualSd", Settings::values.use_virtual_sd); LogSetting("DataStorage_NandDir", FileUtil::GetUserPath(FileUtil::UserPath::NANDDir)); @@ -127,12 +128,60 @@ void LogSettings() { LogSetting("Services_BCATBoxcatLocal", Settings::values.bcat_boxcat_local); } +float Volume() { + if (values.audio_muted) { + return 0.0f; + } + return values.volume.GetValue(); +} + bool IsGPULevelExtreme() { - return values.gpu_accuracy == GPUAccuracy::Extreme; + return values.gpu_accuracy.GetValue() == GPUAccuracy::Extreme; } bool IsGPULevelHigh() { - return values.gpu_accuracy == GPUAccuracy::Extreme || values.gpu_accuracy == GPUAccuracy::High; + return values.gpu_accuracy.GetValue() == GPUAccuracy::Extreme || + values.gpu_accuracy.GetValue() == GPUAccuracy::High; +} + +void RestoreGlobalState() { + // If a game is running, DO NOT restore the global settings state + if (Core::System::GetInstance().IsPoweredOn()) { + return; + } + + // Audio + values.enable_audio_stretching.SetGlobal(true); + values.volume.SetGlobal(true); + + // Core + values.use_multi_core.SetGlobal(true); + + // Renderer + values.renderer_backend.SetGlobal(true); + values.vulkan_device.SetGlobal(true); + values.aspect_ratio.SetGlobal(true); + values.max_anisotropy.SetGlobal(true); + values.use_frame_limit.SetGlobal(true); + values.frame_limit.SetGlobal(true); + values.use_disk_shader_cache.SetGlobal(true); + values.gpu_accuracy.SetGlobal(true); + values.use_asynchronous_gpu_emulation.SetGlobal(true); + values.use_vsync.SetGlobal(true); + values.use_assembly_shaders.SetGlobal(true); + values.use_fast_gpu_time.SetGlobal(true); + values.force_30fps_mode.SetGlobal(true); + values.bg_red.SetGlobal(true); + values.bg_green.SetGlobal(true); + values.bg_blue.SetGlobal(true); + + // System + values.language_index.SetGlobal(true); + values.region_index.SetGlobal(true); + values.time_zone_index.SetGlobal(true); + values.rng_seed.SetGlobal(true); + values.custom_rtc.SetGlobal(true); + values.sound_index.SetGlobal(true); } } // namespace Settings diff --git a/src/core/settings.h b/src/core/settings.h index 78eb33737..850ca4072 100644 --- a/src/core/settings.h +++ b/src/core/settings.h @@ -382,20 +382,85 @@ enum class GPUAccuracy : u32 { Extreme = 2, }; +extern bool configuring_global; + +template <typename Type> +class Setting final { +public: + Setting() = default; + explicit Setting(Type val) : global{val} {} + ~Setting() = default; + void SetGlobal(bool to_global) { + use_global = to_global; + } + bool UsingGlobal() const { + return use_global; + } + Type GetValue(bool need_global = false) const { + if (use_global || need_global) { + return global; + } + return local; + } + void SetValue(const Type& value) { + if (use_global) { + global = value; + } else { + local = value; + } + } + +private: + bool use_global = true; + Type global{}; + Type local{}; +}; + struct Values { + // Audio + std::string audio_device_id; + std::string sink_id; + bool audio_muted; + Setting<bool> enable_audio_stretching; + Setting<float> volume; + + // Core + Setting<bool> use_multi_core; + + // Renderer + Setting<RendererBackend> renderer_backend; + bool renderer_debug; + Setting<int> vulkan_device; + + Setting<u16> resolution_factor = Setting(static_cast<u16>(1)); + Setting<int> aspect_ratio; + Setting<int> max_anisotropy; + Setting<bool> use_frame_limit; + Setting<u16> frame_limit; + Setting<bool> use_disk_shader_cache; + Setting<GPUAccuracy> gpu_accuracy; + Setting<bool> use_asynchronous_gpu_emulation; + Setting<bool> use_vsync; + Setting<bool> use_assembly_shaders; + Setting<bool> force_30fps_mode; + Setting<bool> use_fast_gpu_time; + + Setting<float> bg_red; + Setting<float> bg_green; + Setting<float> bg_blue; + // System - bool use_docked_mode; - std::optional<u32> rng_seed; + Setting<std::optional<u32>> rng_seed; // Measured in seconds since epoch - std::optional<std::chrono::seconds> custom_rtc; + Setting<std::optional<std::chrono::seconds>> custom_rtc; // Set on game boot, reset on stop. Seconds difference between current time and `custom_rtc` std::chrono::seconds custom_rtc_differential; s32 current_user; - s32 language_index; - s32 region_index; - s32 time_zone_index; - s32 sound_index; + Setting<s32> language_index; + Setting<s32> region_index; + Setting<s32> time_zone_index; + Setting<s32> sound_index; // Controls std::array<PlayerInput, 10> players; @@ -419,8 +484,7 @@ struct Values { u16 udp_input_port; u8 udp_pad_index; - // Core - bool use_multi_core; + bool use_docked_mode; // Data Storage bool use_virtual_sd; @@ -432,38 +496,6 @@ struct Values { NANDUserSize nand_user_size; SDMCSize sdmc_size; - // Renderer - RendererBackend renderer_backend; - bool renderer_debug; - int vulkan_device; - - float resolution_factor; - int aspect_ratio; - int max_anisotropy; - bool use_frame_limit; - u16 frame_limit; - bool use_disk_shader_cache; - GPUAccuracy gpu_accuracy; - bool use_asynchronous_gpu_emulation; - bool use_vsync; - bool use_assembly_shaders; - bool force_30fps_mode; - bool use_fast_gpu_time; - - float bg_red; - float bg_green; - float bg_blue; - - std::string log_filter; - - bool use_dev_keys; - - // Audio - std::string sink_id; - bool enable_audio_stretching; - std::string audio_device_id; - float volume; - // Debugging bool record_frame_times; bool use_gdbstub; @@ -474,8 +506,13 @@ struct Values { bool reporting_services; bool quest_flag; bool disable_cpu_opt; + bool disable_macro_jit; - // BCAT + // Misceallaneous + std::string log_filter; + bool use_dev_keys; + + // Services std::string bcat_backend; bool bcat_boxcat_local; @@ -489,6 +526,8 @@ struct Values { std::map<u64, std::vector<std::string>> disabled_addons; } extern values; +float Volume(); + bool IsGPULevelExtreme(); bool IsGPULevelHigh(); @@ -497,4 +536,7 @@ std::string GetTimeZoneString(); void Apply(); void LogSettings(); +// Restore the global state of all applicable settings in the Values struct +void RestoreGlobalState(); + } // namespace Settings diff --git a/src/core/telemetry_session.cpp b/src/core/telemetry_session.cpp index c781b3cfc..78915e6db 100644 --- a/src/core/telemetry_session.cpp +++ b/src/core/telemetry_session.cpp @@ -189,19 +189,24 @@ void TelemetrySession::AddInitialInfo(Loader::AppLoader& app_loader) { // Log user configuration information constexpr auto field_type = Telemetry::FieldType::UserConfig; AddField(field_type, "Audio_SinkId", Settings::values.sink_id); - AddField(field_type, "Audio_EnableAudioStretching", Settings::values.enable_audio_stretching); - AddField(field_type, "Core_UseMultiCore", Settings::values.use_multi_core); - AddField(field_type, "Renderer_Backend", TranslateRenderer(Settings::values.renderer_backend)); - AddField(field_type, "Renderer_ResolutionFactor", Settings::values.resolution_factor); - AddField(field_type, "Renderer_UseFrameLimit", Settings::values.use_frame_limit); - AddField(field_type, "Renderer_FrameLimit", Settings::values.frame_limit); - AddField(field_type, "Renderer_UseDiskShaderCache", Settings::values.use_disk_shader_cache); + AddField(field_type, "Audio_EnableAudioStretching", + Settings::values.enable_audio_stretching.GetValue()); + AddField(field_type, "Core_UseMultiCore", Settings::values.use_multi_core.GetValue()); + AddField(field_type, "Renderer_Backend", + TranslateRenderer(Settings::values.renderer_backend.GetValue())); + AddField(field_type, "Renderer_ResolutionFactor", + Settings::values.resolution_factor.GetValue()); + AddField(field_type, "Renderer_UseFrameLimit", Settings::values.use_frame_limit.GetValue()); + AddField(field_type, "Renderer_FrameLimit", Settings::values.frame_limit.GetValue()); + AddField(field_type, "Renderer_UseDiskShaderCache", + Settings::values.use_disk_shader_cache.GetValue()); AddField(field_type, "Renderer_GPUAccuracyLevel", - TranslateGPUAccuracyLevel(Settings::values.gpu_accuracy)); + TranslateGPUAccuracyLevel(Settings::values.gpu_accuracy.GetValue())); AddField(field_type, "Renderer_UseAsynchronousGpuEmulation", - Settings::values.use_asynchronous_gpu_emulation); - AddField(field_type, "Renderer_UseVsync", Settings::values.use_vsync); - AddField(field_type, "Renderer_UseAssemblyShaders", Settings::values.use_assembly_shaders); + Settings::values.use_asynchronous_gpu_emulation.GetValue()); + AddField(field_type, "Renderer_UseVsync", Settings::values.use_vsync.GetValue()); + AddField(field_type, "Renderer_UseAssemblyShaders", + Settings::values.use_assembly_shaders.GetValue()); AddField(field_type, "System_UseDockedMode", Settings::values.use_docked_mode); } diff --git a/src/core/tools/freezer.cpp b/src/core/tools/freezer.cpp index b2c6c537e..8b0c50d11 100644 --- a/src/core/tools/freezer.cpp +++ b/src/core/tools/freezer.cpp @@ -14,7 +14,7 @@ namespace Tools { namespace { -constexpr s64 MEMORY_FREEZER_TICKS = static_cast<s64>(Core::Hardware::BASE_CLOCK_RATE / 60); +constexpr s64 MEMORY_FREEZER_TICKS = static_cast<s64>(1000000000 / 60); u64 MemoryReadWidth(Core::Memory::Memory& memory, u32 width, VAddr addr) { switch (width) { @@ -57,7 +57,7 @@ Freezer::Freezer(Core::Timing::CoreTiming& core_timing_, Core::Memory::Memory& m : core_timing{core_timing_}, memory{memory_} { event = Core::Timing::CreateEvent( "MemoryFreezer::FrameCallback", - [this](u64 userdata, s64 cycles_late) { FrameCallback(userdata, cycles_late); }); + [this](u64 userdata, s64 ns_late) { FrameCallback(userdata, ns_late); }); core_timing.ScheduleEvent(MEMORY_FREEZER_TICKS, event); } @@ -158,7 +158,7 @@ std::vector<Freezer::Entry> Freezer::GetEntries() const { return entries; } -void Freezer::FrameCallback(u64 userdata, s64 cycles_late) { +void Freezer::FrameCallback(u64 userdata, s64 ns_late) { if (!IsActive()) { LOG_DEBUG(Common_Memory, "Memory freezer has been deactivated, ending callback events."); return; @@ -173,7 +173,7 @@ void Freezer::FrameCallback(u64 userdata, s64 cycles_late) { MemoryWriteWidth(memory, entry.width, entry.address, entry.value); } - core_timing.ScheduleEvent(MEMORY_FREEZER_TICKS - cycles_late, event); + core_timing.ScheduleEvent(MEMORY_FREEZER_TICKS - ns_late, event); } void Freezer::FillEntryReads() { diff --git a/src/input_common/CMakeLists.txt b/src/input_common/CMakeLists.txt index a9c2392b1..3bd76dd23 100644 --- a/src/input_common/CMakeLists.txt +++ b/src/input_common/CMakeLists.txt @@ -7,6 +7,10 @@ add_library(input_common STATIC main.h motion_emu.cpp motion_emu.h + gcadapter/gc_adapter.cpp + gcadapter/gc_adapter.h + gcadapter/gc_poller.cpp + gcadapter/gc_poller.h sdl/sdl.cpp sdl/sdl.h udp/client.cpp @@ -26,5 +30,7 @@ if(SDL2_FOUND) target_compile_definitions(input_common PRIVATE HAVE_SDL2) endif() +target_link_libraries(input_common PUBLIC ${LIBUSB_LIBRARIES}) + create_target_directory_groups(input_common) target_link_libraries(input_common PUBLIC core PRIVATE common Boost::boost) diff --git a/src/input_common/gcadapter/gc_adapter.cpp b/src/input_common/gcadapter/gc_adapter.cpp new file mode 100644 index 000000000..6d9f4d9eb --- /dev/null +++ b/src/input_common/gcadapter/gc_adapter.cpp @@ -0,0 +1,398 @@ +// Copyright 2014 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#include <chrono> +#include <thread> +#include "common/logging/log.h" +#include "input_common/gcadapter/gc_adapter.h" + +namespace GCAdapter { + +/// Used to loop through and assign button in poller +constexpr std::array<PadButton, 12> PadButtonArray{ + PadButton::PAD_BUTTON_LEFT, PadButton::PAD_BUTTON_RIGHT, PadButton::PAD_BUTTON_DOWN, + PadButton::PAD_BUTTON_UP, PadButton::PAD_TRIGGER_Z, PadButton::PAD_TRIGGER_R, + PadButton::PAD_TRIGGER_L, PadButton::PAD_BUTTON_A, PadButton::PAD_BUTTON_B, + PadButton::PAD_BUTTON_X, PadButton::PAD_BUTTON_Y, PadButton::PAD_BUTTON_START, +}; + +Adapter::Adapter() { + if (usb_adapter_handle != nullptr) { + return; + } + LOG_INFO(Input, "GC Adapter Initialization started"); + + current_status = NO_ADAPTER_DETECTED; + + const int init_res = libusb_init(&libusb_ctx); + if (init_res == LIBUSB_SUCCESS) { + StartScanThread(); + } else { + LOG_ERROR(Input, "libusb could not be initialized. failed with error = {}", init_res); + } +} + +GCPadStatus Adapter::GetPadStatus(int port, const std::array<u8, 37>& adapter_payload) { + GCPadStatus pad = {}; + bool get_origin = false; + + ControllerTypes type = ControllerTypes(adapter_payload[1 + (9 * port)] >> 4); + if (type != ControllerTypes::None) { + get_origin = true; + } + + adapter_controllers_status[port] = type; + + static constexpr std::array<PadButton, 8> b1_buttons{ + PadButton::PAD_BUTTON_A, PadButton::PAD_BUTTON_B, PadButton::PAD_BUTTON_X, + PadButton::PAD_BUTTON_Y, PadButton::PAD_BUTTON_LEFT, PadButton::PAD_BUTTON_RIGHT, + PadButton::PAD_BUTTON_DOWN, PadButton::PAD_BUTTON_UP, + }; + + static constexpr std::array<PadButton, 4> b2_buttons{ + PadButton::PAD_BUTTON_START, + PadButton::PAD_TRIGGER_Z, + PadButton::PAD_TRIGGER_R, + PadButton::PAD_TRIGGER_L, + }; + + if (adapter_controllers_status[port] != ControllerTypes::None) { + const u8 b1 = adapter_payload[1 + (9 * port) + 1]; + const u8 b2 = adapter_payload[1 + (9 * port) + 2]; + + for (std::size_t i = 0; i < b1_buttons.size(); ++i) { + if ((b1 & (1U << i)) != 0) { + pad.button |= static_cast<u16>(b1_buttons[i]); + } + } + + for (std::size_t j = 0; j < b2_buttons.size(); ++j) { + if ((b2 & (1U << j)) != 0) { + pad.button |= static_cast<u16>(b2_buttons[j]); + } + } + + if (get_origin) { + pad.button |= PAD_GET_ORIGIN; + } + + pad.stick_x = adapter_payload[1 + (9 * port) + 3]; + pad.stick_y = adapter_payload[1 + (9 * port) + 4]; + pad.substick_x = adapter_payload[1 + (9 * port) + 5]; + pad.substick_y = adapter_payload[1 + (9 * port) + 6]; + pad.trigger_left = adapter_payload[1 + (9 * port) + 7]; + pad.trigger_right = adapter_payload[1 + (9 * port) + 8]; + } + return pad; +} + +void Adapter::PadToState(const GCPadStatus& pad, GCState& state) { + for (const auto& button : PadButtonArray) { + const u16 button_value = static_cast<u16>(button); + state.buttons.insert_or_assign(button_value, pad.button & button_value); + } + + state.axes.insert_or_assign(static_cast<u8>(PadAxes::StickX), pad.stick_x); + state.axes.insert_or_assign(static_cast<u8>(PadAxes::StickY), pad.stick_y); + state.axes.insert_or_assign(static_cast<u8>(PadAxes::SubstickX), pad.substick_x); + state.axes.insert_or_assign(static_cast<u8>(PadAxes::SubstickY), pad.substick_y); + state.axes.insert_or_assign(static_cast<u8>(PadAxes::TriggerLeft), pad.trigger_left); + state.axes.insert_or_assign(static_cast<u8>(PadAxes::TriggerRight), pad.trigger_right); +} + +void Adapter::Read() { + LOG_DEBUG(Input, "GC Adapter Read() thread started"); + + int payload_size_in, payload_size_copy; + std::array<u8, 37> adapter_payload; + std::array<u8, 37> adapter_payload_copy; + std::array<GCPadStatus, 4> pads; + + while (adapter_thread_running) { + libusb_interrupt_transfer(usb_adapter_handle, input_endpoint, adapter_payload.data(), + sizeof(adapter_payload), &payload_size_in, 16); + payload_size_copy = 0; + // this mutex might be redundant? + { + std::lock_guard<std::mutex> lk(s_mutex); + std::copy(std::begin(adapter_payload), std::end(adapter_payload), + std::begin(adapter_payload_copy)); + payload_size_copy = payload_size_in; + } + + if (payload_size_copy != sizeof(adapter_payload_copy) || + adapter_payload_copy[0] != LIBUSB_DT_HID) { + LOG_ERROR(Input, "error reading payload (size: {}, type: {:02x})", payload_size_copy, + adapter_payload_copy[0]); + adapter_thread_running = false; // error reading from adapter, stop reading. + break; + } + for (std::size_t port = 0; port < pads.size(); ++port) { + pads[port] = GetPadStatus(port, adapter_payload_copy); + if (DeviceConnected(port) && configuring) { + if (pads[port].button != PAD_GET_ORIGIN) { + pad_queue[port].Push(pads[port]); + } + + // Accounting for a threshold here because of some controller variance + if (pads[port].stick_x > pads[port].MAIN_STICK_CENTER_X + pads[port].THRESHOLD || + pads[port].stick_x < pads[port].MAIN_STICK_CENTER_X - pads[port].THRESHOLD) { + pads[port].axis = GCAdapter::PadAxes::StickX; + pads[port].axis_value = pads[port].stick_x; + pad_queue[port].Push(pads[port]); + } + if (pads[port].stick_y > pads[port].MAIN_STICK_CENTER_Y + pads[port].THRESHOLD || + pads[port].stick_y < pads[port].MAIN_STICK_CENTER_Y - pads[port].THRESHOLD) { + pads[port].axis = GCAdapter::PadAxes::StickY; + pads[port].axis_value = pads[port].stick_y; + pad_queue[port].Push(pads[port]); + } + if (pads[port].substick_x > pads[port].C_STICK_CENTER_X + pads[port].THRESHOLD || + pads[port].substick_x < pads[port].C_STICK_CENTER_X - pads[port].THRESHOLD) { + pads[port].axis = GCAdapter::PadAxes::SubstickX; + pads[port].axis_value = pads[port].substick_x; + pad_queue[port].Push(pads[port]); + } + if (pads[port].substick_y > pads[port].C_STICK_CENTER_Y + pads[port].THRESHOLD || + pads[port].substick_y < pads[port].C_STICK_CENTER_Y - pads[port].THRESHOLD) { + pads[port].axis = GCAdapter::PadAxes::SubstickY; + pads[port].axis_value = pads[port].substick_y; + pad_queue[port].Push(pads[port]); + } + if (pads[port].trigger_left > pads[port].TRIGGER_THRESHOLD) { + pads[port].axis = GCAdapter::PadAxes::TriggerLeft; + pads[port].axis_value = pads[port].trigger_left; + pad_queue[port].Push(pads[port]); + } + if (pads[port].trigger_right > pads[port].TRIGGER_THRESHOLD) { + pads[port].axis = GCAdapter::PadAxes::TriggerRight; + pads[port].axis_value = pads[port].trigger_right; + pad_queue[port].Push(pads[port]); + } + } + PadToState(pads[port], state[port]); + } + std::this_thread::yield(); + } +} + +void Adapter::ScanThreadFunc() { + LOG_INFO(Input, "GC Adapter scanning thread started"); + + while (detect_thread_running) { + if (usb_adapter_handle == nullptr) { + std::lock_guard<std::mutex> lk(initialization_mutex); + Setup(); + } + std::this_thread::sleep_for(std::chrono::milliseconds(500)); + } +} + +void Adapter::StartScanThread() { + if (detect_thread_running) { + return; + } + if (!libusb_ctx) { + return; + } + + detect_thread_running = true; + detect_thread = std::thread([=] { ScanThreadFunc(); }); +} + +void Adapter::StopScanThread() { + detect_thread_running = false; + detect_thread.join(); +} + +void Adapter::Setup() { + // Reset the error status in case the adapter gets unplugged + if (current_status < 0) { + current_status = NO_ADAPTER_DETECTED; + } + + adapter_controllers_status.fill(ControllerTypes::None); + + // pointer to list of connected usb devices + libusb_device** devices{}; + + // populate the list of devices, get the count + const ssize_t device_count = libusb_get_device_list(libusb_ctx, &devices); + if (device_count < 0) { + LOG_ERROR(Input, "libusb_get_device_list failed with error: {}", device_count); + detect_thread_running = false; // Stop the loop constantly checking for gc adapter + // TODO: For hotplug+gc adapter checkbox implementation, revert this. + return; + } + + if (devices != nullptr) { + for (std::size_t index = 0; index < device_count; ++index) { + if (CheckDeviceAccess(devices[index])) { + // GC Adapter found and accessible, registering it + GetGCEndpoint(devices[index]); + break; + } + } + libusb_free_device_list(devices, 1); + } +} + +bool Adapter::CheckDeviceAccess(libusb_device* device) { + libusb_device_descriptor desc; + const int get_descriptor_error = libusb_get_device_descriptor(device, &desc); + if (get_descriptor_error) { + // could not acquire the descriptor, no point in trying to use it. + LOG_ERROR(Input, "libusb_get_device_descriptor failed with error: {}", + get_descriptor_error); + return false; + } + + if (desc.idVendor != 0x057e || desc.idProduct != 0x0337) { + // This isn't the device we are looking for. + return false; + } + const int open_error = libusb_open(device, &usb_adapter_handle); + + if (open_error == LIBUSB_ERROR_ACCESS) { + LOG_ERROR(Input, "Yuzu can not gain access to this device: ID {:04X}:{:04X}.", + desc.idVendor, desc.idProduct); + return false; + } + if (open_error) { + LOG_ERROR(Input, "libusb_open failed to open device with error = {}", open_error); + return false; + } + + int kernel_driver_error = libusb_kernel_driver_active(usb_adapter_handle, 0); + if (kernel_driver_error == 1) { + kernel_driver_error = libusb_detach_kernel_driver(usb_adapter_handle, 0); + if (kernel_driver_error != 0 && kernel_driver_error != LIBUSB_ERROR_NOT_SUPPORTED) { + LOG_ERROR(Input, "libusb_detach_kernel_driver failed with error = {}", + kernel_driver_error); + } + } + + if (kernel_driver_error && kernel_driver_error != LIBUSB_ERROR_NOT_SUPPORTED) { + libusb_close(usb_adapter_handle); + usb_adapter_handle = nullptr; + return false; + } + + const int interface_claim_error = libusb_claim_interface(usb_adapter_handle, 0); + if (interface_claim_error) { + LOG_ERROR(Input, "libusb_claim_interface failed with error = {}", interface_claim_error); + libusb_close(usb_adapter_handle); + usb_adapter_handle = nullptr; + return false; + } + + return true; +} + +void Adapter::GetGCEndpoint(libusb_device* device) { + libusb_config_descriptor* config = nullptr; + const int config_descriptor_return = libusb_get_config_descriptor(device, 0, &config); + if (config_descriptor_return != LIBUSB_SUCCESS) { + LOG_ERROR(Input, "libusb_get_config_descriptor failed with error = {}", + config_descriptor_return); + return; + } + + for (u8 ic = 0; ic < config->bNumInterfaces; ic++) { + const libusb_interface* interfaceContainer = &config->interface[ic]; + for (int i = 0; i < interfaceContainer->num_altsetting; i++) { + const libusb_interface_descriptor* interface = &interfaceContainer->altsetting[i]; + for (u8 e = 0; e < interface->bNumEndpoints; e++) { + const libusb_endpoint_descriptor* endpoint = &interface->endpoint[e]; + if (endpoint->bEndpointAddress & LIBUSB_ENDPOINT_IN) { + input_endpoint = endpoint->bEndpointAddress; + } else { + output_endpoint = endpoint->bEndpointAddress; + } + } + } + } + // This transfer seems to be responsible for clearing the state of the adapter + // Used to clear the "busy" state of when the device is unexpectedly unplugged + unsigned char clear_payload = 0x13; + libusb_interrupt_transfer(usb_adapter_handle, output_endpoint, &clear_payload, + sizeof(clear_payload), nullptr, 16); + + adapter_thread_running = true; + current_status = ADAPTER_DETECTED; + adapter_input_thread = std::thread([=] { Read(); }); // Read input +} + +Adapter::~Adapter() { + StopScanThread(); + Reset(); +} + +void Adapter::Reset() { + std::unique_lock<std::mutex> lock(initialization_mutex, std::defer_lock); + if (!lock.try_lock()) { + return; + } + if (current_status != ADAPTER_DETECTED) { + return; + } + + if (adapter_thread_running) { + adapter_thread_running = false; + } + adapter_input_thread.join(); + + adapter_controllers_status.fill(ControllerTypes::None); + current_status = NO_ADAPTER_DETECTED; + + if (usb_adapter_handle) { + libusb_release_interface(usb_adapter_handle, 1); + libusb_close(usb_adapter_handle); + usb_adapter_handle = nullptr; + } + + if (libusb_ctx) { + libusb_exit(libusb_ctx); + } +} + +bool Adapter::DeviceConnected(int port) { + return adapter_controllers_status[port] != ControllerTypes::None; +} + +void Adapter::ResetDeviceType(int port) { + adapter_controllers_status[port] = ControllerTypes::None; +} + +void Adapter::BeginConfiguration() { + for (auto& pq : pad_queue) { + pq.Clear(); + } + configuring = true; +} + +void Adapter::EndConfiguration() { + for (auto& pq : pad_queue) { + pq.Clear(); + } + configuring = false; +} + +std::array<Common::SPSCQueue<GCPadStatus>, 4>& Adapter::GetPadQueue() { + return pad_queue; +} + +const std::array<Common::SPSCQueue<GCPadStatus>, 4>& Adapter::GetPadQueue() const { + return pad_queue; +} + +std::array<GCState, 4>& Adapter::GetPadState() { + return state; +} + +const std::array<GCState, 4>& Adapter::GetPadState() const { + return state; +} + +} // namespace GCAdapter diff --git a/src/input_common/gcadapter/gc_adapter.h b/src/input_common/gcadapter/gc_adapter.h new file mode 100644 index 000000000..b1c2a1958 --- /dev/null +++ b/src/input_common/gcadapter/gc_adapter.h @@ -0,0 +1,161 @@ +// Copyright 2014 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#pragma once +#include <algorithm> +#include <functional> +#include <mutex> +#include <thread> +#include <unordered_map> +#include <libusb.h> +#include "common/common_types.h" +#include "common/threadsafe_queue.h" + +namespace GCAdapter { + +enum { + PAD_USE_ORIGIN = 0x0080, + PAD_GET_ORIGIN = 0x2000, + PAD_ERR_STATUS = 0x8000, +}; + +enum class PadButton { + PAD_BUTTON_LEFT = 0x0001, + PAD_BUTTON_RIGHT = 0x0002, + PAD_BUTTON_DOWN = 0x0004, + PAD_BUTTON_UP = 0x0008, + PAD_TRIGGER_Z = 0x0010, + PAD_TRIGGER_R = 0x0020, + PAD_TRIGGER_L = 0x0040, + PAD_BUTTON_A = 0x0100, + PAD_BUTTON_B = 0x0200, + PAD_BUTTON_X = 0x0400, + PAD_BUTTON_Y = 0x0800, + PAD_BUTTON_START = 0x1000, + // Below is for compatibility with "AxisButton" type + PAD_STICK = 0x2000, +}; + +extern const std::array<PadButton, 12> PadButtonArray; + +enum class PadAxes : u8 { + StickX, + StickY, + SubstickX, + SubstickY, + TriggerLeft, + TriggerRight, + Undefined, +}; + +struct GCPadStatus { + u16 button{}; // Or-ed PAD_BUTTON_* and PAD_TRIGGER_* bits + u8 stick_x{}; // 0 <= stick_x <= 255 + u8 stick_y{}; // 0 <= stick_y <= 255 + u8 substick_x{}; // 0 <= substick_x <= 255 + u8 substick_y{}; // 0 <= substick_y <= 255 + u8 trigger_left{}; // 0 <= trigger_left <= 255 + u8 trigger_right{}; // 0 <= trigger_right <= 255 + + static constexpr u8 MAIN_STICK_CENTER_X = 0x80; + static constexpr u8 MAIN_STICK_CENTER_Y = 0x80; + static constexpr u8 MAIN_STICK_RADIUS = 0x7f; + static constexpr u8 C_STICK_CENTER_X = 0x80; + static constexpr u8 C_STICK_CENTER_Y = 0x80; + static constexpr u8 C_STICK_RADIUS = 0x7f; + static constexpr u8 THRESHOLD = 10; + + // 256/4, at least a quarter press to count as a press. For polling mostly + static constexpr u8 TRIGGER_THRESHOLD = 64; + + u8 port{}; + PadAxes axis{PadAxes::Undefined}; + u8 axis_value{255}; +}; + +struct GCState { + std::unordered_map<int, bool> buttons; + std::unordered_map<int, u16> axes; +}; + +enum class ControllerTypes { None, Wired, Wireless }; + +enum { + NO_ADAPTER_DETECTED = 0, + ADAPTER_DETECTED = 1, +}; + +class Adapter { +public: + /// Initialize the GC Adapter capture and read sequence + Adapter(); + + /// Close the adapter read thread and release the adapter + ~Adapter(); + /// Used for polling + void BeginConfiguration(); + void EndConfiguration(); + + std::array<Common::SPSCQueue<GCPadStatus>, 4>& GetPadQueue(); + const std::array<Common::SPSCQueue<GCPadStatus>, 4>& GetPadQueue() const; + + std::array<GCState, 4>& GetPadState(); + const std::array<GCState, 4>& GetPadState() const; + +private: + GCPadStatus GetPadStatus(int port, const std::array<u8, 37>& adapter_payload); + + void PadToState(const GCPadStatus& pad, GCState& state); + + void Read(); + void ScanThreadFunc(); + /// Begin scanning for the GC Adapter. + void StartScanThread(); + + /// Stop scanning for the adapter + void StopScanThread(); + + /// Returns true if there is a device connected to port + bool DeviceConnected(int port); + + /// Resets status of device connected to port + void ResetDeviceType(int port); + + /// Returns true if we successfully gain access to GC Adapter + bool CheckDeviceAccess(libusb_device* device); + + /// Captures GC Adapter endpoint address, + void GetGCEndpoint(libusb_device* device); + + /// For shutting down, clear all data, join all threads, release usb + void Reset(); + + /// For use in initialization, querying devices to find the adapter + void Setup(); + + int current_status = NO_ADAPTER_DETECTED; + libusb_device_handle* usb_adapter_handle = nullptr; + std::array<ControllerTypes, 4> adapter_controllers_status{}; + + std::mutex s_mutex; + + std::thread adapter_input_thread; + bool adapter_thread_running; + + std::mutex initialization_mutex; + std::thread detect_thread; + bool detect_thread_running = false; + + libusb_context* libusb_ctx; + + u8 input_endpoint = 0; + u8 output_endpoint = 0; + + bool configuring = false; + + std::array<Common::SPSCQueue<GCPadStatus>, 4> pad_queue; + std::array<GCState, 4> state; +}; + +} // namespace GCAdapter diff --git a/src/input_common/gcadapter/gc_poller.cpp b/src/input_common/gcadapter/gc_poller.cpp new file mode 100644 index 000000000..385ce8430 --- /dev/null +++ b/src/input_common/gcadapter/gc_poller.cpp @@ -0,0 +1,272 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <atomic> +#include <list> +#include <mutex> +#include <utility> +#include "common/threadsafe_queue.h" +#include "input_common/gcadapter/gc_adapter.h" +#include "input_common/gcadapter/gc_poller.h" + +namespace InputCommon { + +class GCButton final : public Input::ButtonDevice { +public: + explicit GCButton(int port_, int button_, GCAdapter::Adapter* adapter) + : port(port_), button(button_), gcadapter(adapter) {} + + ~GCButton() override; + + bool GetStatus() const override { + return gcadapter->GetPadState()[port].buttons.at(button); + } + +private: + const int port; + const int button; + GCAdapter::Adapter* gcadapter; +}; + +class GCAxisButton final : public Input::ButtonDevice { +public: + explicit GCAxisButton(int port_, int axis_, float threshold_, bool trigger_if_greater_, + GCAdapter::Adapter* adapter) + : port(port_), axis(axis_), threshold(threshold_), trigger_if_greater(trigger_if_greater_), + gcadapter(adapter) { + // L/R triggers range is only in positive direction beginning near 0 + // 0.0 threshold equates to near half trigger press, but threshold accounts for variability. + if (axis > 3) { + threshold *= -0.5; + } + } + + bool GetStatus() const override { + const float axis_value = (gcadapter->GetPadState()[port].axes.at(axis) - 128.0f) / 128.0f; + if (trigger_if_greater) { + // TODO: Might be worthwile to set a slider for the trigger threshold. It is currently + // always set to 0.5 in configure_input_player.cpp ZL/ZR HandleClick + return axis_value > threshold; + } + return axis_value < -threshold; + } + +private: + const int port; + const int axis; + float threshold; + bool trigger_if_greater; + GCAdapter::Adapter* gcadapter; +}; + +GCButtonFactory::GCButtonFactory(std::shared_ptr<GCAdapter::Adapter> adapter_) + : adapter(std::move(adapter_)) {} + +GCButton::~GCButton() = default; + +std::unique_ptr<Input::ButtonDevice> GCButtonFactory::Create(const Common::ParamPackage& params) { + const int button_id = params.Get("button", 0); + const int port = params.Get("port", 0); + + constexpr int PAD_STICK_ID = static_cast<u16>(GCAdapter::PadButton::PAD_STICK); + + // button is not an axis/stick button + if (button_id != PAD_STICK_ID) { + auto button = std::make_unique<GCButton>(port, button_id, adapter.get()); + return std::move(button); + } + + // For Axis buttons, used by the binary sticks. + if (button_id == PAD_STICK_ID) { + const int axis = params.Get("axis", 0); + const float threshold = params.Get("threshold", 0.25f); + const std::string direction_name = params.Get("direction", ""); + bool trigger_if_greater; + if (direction_name == "+") { + trigger_if_greater = true; + } else if (direction_name == "-") { + trigger_if_greater = false; + } else { + trigger_if_greater = true; + LOG_ERROR(Input, "Unknown direction {}", direction_name); + } + return std::make_unique<GCAxisButton>(port, axis, threshold, trigger_if_greater, + adapter.get()); + } +} + +Common::ParamPackage GCButtonFactory::GetNextInput() { + Common::ParamPackage params; + GCAdapter::GCPadStatus pad; + auto& queue = adapter->GetPadQueue(); + for (std::size_t port = 0; port < queue.size(); ++port) { + while (queue[port].Pop(pad)) { + // This while loop will break on the earliest detected button + params.Set("engine", "gcpad"); + params.Set("port", static_cast<int>(port)); + for (const auto& button : GCAdapter::PadButtonArray) { + const u16 button_value = static_cast<u16>(button); + if (pad.button & button_value) { + params.Set("button", button_value); + break; + } + } + + // For Axis button implementation + if (pad.axis != GCAdapter::PadAxes::Undefined) { + params.Set("axis", static_cast<u8>(pad.axis)); + params.Set("button", static_cast<u16>(GCAdapter::PadButton::PAD_STICK)); + if (pad.axis_value > 128) { + params.Set("direction", "+"); + params.Set("threshold", "0.25"); + } else { + params.Set("direction", "-"); + params.Set("threshold", "-0.25"); + } + break; + } + } + } + return params; +} + +void GCButtonFactory::BeginConfiguration() { + polling = true; + adapter->BeginConfiguration(); +} + +void GCButtonFactory::EndConfiguration() { + polling = false; + adapter->EndConfiguration(); +} + +class GCAnalog final : public Input::AnalogDevice { +public: + GCAnalog(int port_, int axis_x_, int axis_y_, float deadzone_, GCAdapter::Adapter* adapter) + : port(port_), axis_x(axis_x_), axis_y(axis_y_), deadzone(deadzone_), gcadapter(adapter) {} + + float GetAxis(int axis) const { + std::lock_guard lock{mutex}; + // division is not by a perfect 128 to account for some variance in center location + // e.g. my device idled at 131 in X, 120 in Y, and full range of motion was in range + // [20-230] + return (gcadapter->GetPadState()[port].axes.at(axis) - 128.0f) / 95.0f; + } + + std::pair<float, float> GetAnalog(int axis_x, int axis_y) const { + float x = GetAxis(axis_x); + float y = GetAxis(axis_y); + + // Make sure the coordinates are in the unit circle, + // otherwise normalize it. + float r = x * x + y * y; + if (r > 1.0f) { + r = std::sqrt(r); + x /= r; + y /= r; + } + + return {x, y}; + } + + std::tuple<float, float> GetStatus() const override { + const auto [x, y] = GetAnalog(axis_x, axis_y); + const float r = std::sqrt((x * x) + (y * y)); + if (r > deadzone) { + return {x / r * (r - deadzone) / (1 - deadzone), + y / r * (r - deadzone) / (1 - deadzone)}; + } + return {0.0f, 0.0f}; + } + + bool GetAnalogDirectionStatus(Input::AnalogDirection direction) const override { + const auto [x, y] = GetStatus(); + const float directional_deadzone = 0.4f; + switch (direction) { + case Input::AnalogDirection::RIGHT: + return x > directional_deadzone; + case Input::AnalogDirection::LEFT: + return x < -directional_deadzone; + case Input::AnalogDirection::UP: + return y > directional_deadzone; + case Input::AnalogDirection::DOWN: + return y < -directional_deadzone; + } + return false; + } + +private: + const int port; + const int axis_x; + const int axis_y; + const float deadzone; + mutable std::mutex mutex; + GCAdapter::Adapter* gcadapter; +}; + +/// An analog device factory that creates analog devices from GC Adapter +GCAnalogFactory::GCAnalogFactory(std::shared_ptr<GCAdapter::Adapter> adapter_) + : adapter(std::move(adapter_)) {} + +/** + * Creates analog device from joystick axes + * @param params contains parameters for creating the device: + * - "port": the nth gcpad on the adapter + * - "axis_x": the index of the axis to be bind as x-axis + * - "axis_y": the index of the axis to be bind as y-axis + */ +std::unique_ptr<Input::AnalogDevice> GCAnalogFactory::Create(const Common::ParamPackage& params) { + const int port = params.Get("port", 0); + const int axis_x = params.Get("axis_x", 0); + const int axis_y = params.Get("axis_y", 1); + const float deadzone = std::clamp(params.Get("deadzone", 0.0f), 0.0f, .99f); + + return std::make_unique<GCAnalog>(port, axis_x, axis_y, deadzone, adapter.get()); +} + +void GCAnalogFactory::BeginConfiguration() { + polling = true; + adapter->BeginConfiguration(); +} + +void GCAnalogFactory::EndConfiguration() { + polling = false; + adapter->EndConfiguration(); +} + +Common::ParamPackage GCAnalogFactory::GetNextInput() { + GCAdapter::GCPadStatus pad; + auto& queue = adapter->GetPadQueue(); + for (std::size_t port = 0; port < queue.size(); ++port) { + while (queue[port].Pop(pad)) { + if (pad.axis == GCAdapter::PadAxes::Undefined || + std::abs((pad.axis_value - 128.0f) / 128.0f) < 0.1) { + continue; + } + // An analog device needs two axes, so we need to store the axis for later and wait for + // a second input event. The axes also must be from the same joystick. + const u8 axis = static_cast<u8>(pad.axis); + if (analog_x_axis == -1) { + analog_x_axis = axis; + controller_number = port; + } else if (analog_y_axis == -1 && analog_x_axis != axis && controller_number == port) { + analog_y_axis = axis; + } + } + } + Common::ParamPackage params; + if (analog_x_axis != -1 && analog_y_axis != -1) { + params.Set("engine", "gcpad"); + params.Set("port", controller_number); + params.Set("axis_x", analog_x_axis); + params.Set("axis_y", analog_y_axis); + analog_x_axis = -1; + analog_y_axis = -1; + controller_number = -1; + return params; + } + return params; +} + +} // namespace InputCommon diff --git a/src/input_common/gcadapter/gc_poller.h b/src/input_common/gcadapter/gc_poller.h new file mode 100644 index 000000000..e96af7d51 --- /dev/null +++ b/src/input_common/gcadapter/gc_poller.h @@ -0,0 +1,67 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <memory> +#include "core/frontend/input.h" +#include "input_common/gcadapter/gc_adapter.h" + +namespace InputCommon { + +/** + * A button device factory representing a gcpad. It receives gcpad events and forward them + * to all button devices it created. + */ +class GCButtonFactory final : public Input::Factory<Input::ButtonDevice> { +public: + explicit GCButtonFactory(std::shared_ptr<GCAdapter::Adapter> adapter_); + + /** + * Creates a button device from a button press + * @param params contains parameters for creating the device: + * - "code": the code of the key to bind with the button + */ + std::unique_ptr<Input::ButtonDevice> Create(const Common::ParamPackage& params) override; + + Common::ParamPackage GetNextInput(); + + /// For device input configuration/polling + void BeginConfiguration(); + void EndConfiguration(); + + bool IsPolling() const { + return polling; + } + +private: + std::shared_ptr<GCAdapter::Adapter> adapter; + bool polling = false; +}; + +/// An analog device factory that creates analog devices from GC Adapter +class GCAnalogFactory final : public Input::Factory<Input::AnalogDevice> { +public: + explicit GCAnalogFactory(std::shared_ptr<GCAdapter::Adapter> adapter_); + + std::unique_ptr<Input::AnalogDevice> Create(const Common::ParamPackage& params) override; + Common::ParamPackage GetNextInput(); + + /// For device input configuration/polling + void BeginConfiguration(); + void EndConfiguration(); + + bool IsPolling() const { + return polling; + } + +private: + std::shared_ptr<GCAdapter::Adapter> adapter; + int analog_x_axis = -1; + int analog_y_axis = -1; + int controller_number = -1; + bool polling = false; +}; + +} // namespace InputCommon diff --git a/src/input_common/keyboard.cpp b/src/input_common/keyboard.cpp index 078374be5..afb8e6612 100644 --- a/src/input_common/keyboard.cpp +++ b/src/input_common/keyboard.cpp @@ -76,7 +76,7 @@ std::unique_ptr<Input::ButtonDevice> Keyboard::Create(const Common::ParamPackage int key_code = params.Get("code", 0); std::unique_ptr<KeyButton> button = std::make_unique<KeyButton>(key_button_list); key_button_list->AddKeyButton(key_code, button.get()); - return std::move(button); + return button; } void Keyboard::PressKey(int key_code) { diff --git a/src/input_common/main.cpp b/src/input_common/main.cpp index 95e351e24..fd0af1019 100644 --- a/src/input_common/main.cpp +++ b/src/input_common/main.cpp @@ -4,8 +4,11 @@ #include <memory> #include <thread> +#include <libusb.h> #include "common/param_package.h" #include "input_common/analog_from_button.h" +#include "input_common/gcadapter/gc_adapter.h" +#include "input_common/gcadapter/gc_poller.h" #include "input_common/keyboard.h" #include "input_common/main.h" #include "input_common/motion_emu.h" @@ -22,8 +25,16 @@ static std::shared_ptr<MotionEmu> motion_emu; static std::unique_ptr<SDL::State> sdl; #endif static std::unique_ptr<CemuhookUDP::State> udp; +static std::shared_ptr<GCButtonFactory> gcbuttons; +static std::shared_ptr<GCAnalogFactory> gcanalog; void Init() { + auto gcadapter = std::make_shared<GCAdapter::Adapter>(); + gcbuttons = std::make_shared<GCButtonFactory>(gcadapter); + Input::RegisterFactory<Input::ButtonDevice>("gcpad", gcbuttons); + gcanalog = std::make_shared<GCAnalogFactory>(gcadapter); + Input::RegisterFactory<Input::AnalogDevice>("gcpad", gcanalog); + keyboard = std::make_shared<Keyboard>(); Input::RegisterFactory<Input::ButtonDevice>("keyboard", keyboard); Input::RegisterFactory<Input::AnalogDevice>("analog_from_button", @@ -48,6 +59,11 @@ void Shutdown() { sdl.reset(); #endif udp.reset(); + Input::UnregisterFactory<Input::ButtonDevice>("gcpad"); + Input::UnregisterFactory<Input::AnalogDevice>("gcpad"); + + gcbuttons.reset(); + gcanalog.reset(); } Keyboard* GetKeyboard() { @@ -58,6 +74,14 @@ MotionEmu* GetMotionEmu() { return motion_emu.get(); } +GCButtonFactory* GetGCButtons() { + return gcbuttons.get(); +} + +GCAnalogFactory* GetGCAnalogs() { + return gcanalog.get(); +} + std::string GenerateKeyboardParam(int key_code) { Common::ParamPackage param{ {"engine", "keyboard"}, diff --git a/src/input_common/main.h b/src/input_common/main.h index 77a0ce90b..0e32856f6 100644 --- a/src/input_common/main.h +++ b/src/input_common/main.h @@ -7,6 +7,7 @@ #include <memory> #include <string> #include <vector> +#include "input_common/gcadapter/gc_poller.h" namespace Common { class ParamPackage; @@ -30,6 +31,10 @@ class MotionEmu; /// Gets the motion emulation factory. MotionEmu* GetMotionEmu(); +GCButtonFactory* GetGCButtons(); + +GCAnalogFactory* GetGCAnalogs(); + /// Generates a serialized param package for creating a keyboard button device std::string GenerateKeyboardParam(int key_code); diff --git a/src/input_common/motion_emu.cpp b/src/input_common/motion_emu.cpp index 868251628..d4cdf76a3 100644 --- a/src/input_common/motion_emu.cpp +++ b/src/input_common/motion_emu.cpp @@ -145,7 +145,7 @@ std::unique_ptr<Input::MotionDevice> MotionEmu::Create(const Common::ParamPackag // Previously created device is disconnected here. Having two motion devices for 3DS is not // expected. current_device = device_wrapper->device; - return std::move(device_wrapper); + return device_wrapper; } void MotionEmu::BeginTilt(int x, int y) { diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt index c7038b217..47ef30aa9 100644 --- a/src/tests/CMakeLists.txt +++ b/src/tests/CMakeLists.txt @@ -1,6 +1,7 @@ add_executable(tests common/bit_field.cpp common/bit_utils.cpp + common/fibers.cpp common/multi_level_queue.cpp common/param_package.cpp common/ring_buffer.cpp diff --git a/src/tests/common/fibers.cpp b/src/tests/common/fibers.cpp new file mode 100644 index 000000000..4fd92428f --- /dev/null +++ b/src/tests/common/fibers.cpp @@ -0,0 +1,358 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <atomic> +#include <cstdlib> +#include <functional> +#include <memory> +#include <thread> +#include <unordered_map> +#include <vector> + +#include <catch2/catch.hpp> +#include <math.h> +#include "common/common_types.h" +#include "common/fiber.h" +#include "common/spin_lock.h" + +namespace Common { + +class TestControl1 { +public: + TestControl1() = default; + + void DoWork(); + + void ExecuteThread(u32 id); + + std::unordered_map<std::thread::id, u32> ids; + std::vector<std::shared_ptr<Common::Fiber>> thread_fibers; + std::vector<std::shared_ptr<Common::Fiber>> work_fibers; + std::vector<u32> items; + std::vector<u32> results; +}; + +static void WorkControl1(void* control) { + auto* test_control = static_cast<TestControl1*>(control); + test_control->DoWork(); +} + +void TestControl1::DoWork() { + std::thread::id this_id = std::this_thread::get_id(); + u32 id = ids[this_id]; + u32 value = items[id]; + for (u32 i = 0; i < id; i++) { + value++; + } + results[id] = value; + Fiber::YieldTo(work_fibers[id], thread_fibers[id]); +} + +void TestControl1::ExecuteThread(u32 id) { + std::thread::id this_id = std::this_thread::get_id(); + ids[this_id] = id; + auto thread_fiber = Fiber::ThreadToFiber(); + thread_fibers[id] = thread_fiber; + work_fibers[id] = std::make_shared<Fiber>(std::function<void(void*)>{WorkControl1}, this); + items[id] = rand() % 256; + Fiber::YieldTo(thread_fibers[id], work_fibers[id]); + thread_fibers[id]->Exit(); +} + +static void ThreadStart1(u32 id, TestControl1& test_control) { + test_control.ExecuteThread(id); +} + +/** This test checks for fiber setup configuration and validates that fibers are + * doing all the work required. + */ +TEST_CASE("Fibers::Setup", "[common]") { + constexpr std::size_t num_threads = 7; + TestControl1 test_control{}; + test_control.thread_fibers.resize(num_threads); + test_control.work_fibers.resize(num_threads); + test_control.items.resize(num_threads, 0); + test_control.results.resize(num_threads, 0); + std::vector<std::thread> threads; + for (u32 i = 0; i < num_threads; i++) { + threads.emplace_back(ThreadStart1, i, std::ref(test_control)); + } + for (u32 i = 0; i < num_threads; i++) { + threads[i].join(); + } + for (u32 i = 0; i < num_threads; i++) { + REQUIRE(test_control.items[i] + i == test_control.results[i]); + } +} + +class TestControl2 { +public: + TestControl2() = default; + + void DoWork1() { + trap2 = false; + while (trap.load()) + ; + for (u32 i = 0; i < 12000; i++) { + value1 += i; + } + Fiber::YieldTo(fiber1, fiber3); + std::thread::id this_id = std::this_thread::get_id(); + u32 id = ids[this_id]; + assert1 = id == 1; + value2 += 5000; + Fiber::YieldTo(fiber1, thread_fibers[id]); + } + + void DoWork2() { + while (trap2.load()) + ; + value2 = 2000; + trap = false; + Fiber::YieldTo(fiber2, fiber1); + assert3 = false; + } + + void DoWork3() { + std::thread::id this_id = std::this_thread::get_id(); + u32 id = ids[this_id]; + assert2 = id == 0; + value1 += 1000; + Fiber::YieldTo(fiber3, thread_fibers[id]); + } + + void ExecuteThread(u32 id); + + void CallFiber1() { + std::thread::id this_id = std::this_thread::get_id(); + u32 id = ids[this_id]; + Fiber::YieldTo(thread_fibers[id], fiber1); + } + + void CallFiber2() { + std::thread::id this_id = std::this_thread::get_id(); + u32 id = ids[this_id]; + Fiber::YieldTo(thread_fibers[id], fiber2); + } + + void Exit(); + + bool assert1{}; + bool assert2{}; + bool assert3{true}; + u32 value1{}; + u32 value2{}; + std::atomic<bool> trap{true}; + std::atomic<bool> trap2{true}; + std::unordered_map<std::thread::id, u32> ids; + std::vector<std::shared_ptr<Common::Fiber>> thread_fibers; + std::shared_ptr<Common::Fiber> fiber1; + std::shared_ptr<Common::Fiber> fiber2; + std::shared_ptr<Common::Fiber> fiber3; +}; + +static void WorkControl2_1(void* control) { + auto* test_control = static_cast<TestControl2*>(control); + test_control->DoWork1(); +} + +static void WorkControl2_2(void* control) { + auto* test_control = static_cast<TestControl2*>(control); + test_control->DoWork2(); +} + +static void WorkControl2_3(void* control) { + auto* test_control = static_cast<TestControl2*>(control); + test_control->DoWork3(); +} + +void TestControl2::ExecuteThread(u32 id) { + std::thread::id this_id = std::this_thread::get_id(); + ids[this_id] = id; + auto thread_fiber = Fiber::ThreadToFiber(); + thread_fibers[id] = thread_fiber; +} + +void TestControl2::Exit() { + std::thread::id this_id = std::this_thread::get_id(); + u32 id = ids[this_id]; + thread_fibers[id]->Exit(); +} + +static void ThreadStart2_1(u32 id, TestControl2& test_control) { + test_control.ExecuteThread(id); + test_control.CallFiber1(); + test_control.Exit(); +} + +static void ThreadStart2_2(u32 id, TestControl2& test_control) { + test_control.ExecuteThread(id); + test_control.CallFiber2(); + test_control.Exit(); +} + +/** This test checks for fiber thread exchange configuration and validates that fibers are + * that a fiber has been succesfully transfered from one thread to another and that the TLS + * region of the thread is kept while changing fibers. + */ +TEST_CASE("Fibers::InterExchange", "[common]") { + TestControl2 test_control{}; + test_control.thread_fibers.resize(2); + test_control.fiber1 = + std::make_shared<Fiber>(std::function<void(void*)>{WorkControl2_1}, &test_control); + test_control.fiber2 = + std::make_shared<Fiber>(std::function<void(void*)>{WorkControl2_2}, &test_control); + test_control.fiber3 = + std::make_shared<Fiber>(std::function<void(void*)>{WorkControl2_3}, &test_control); + std::thread thread1(ThreadStart2_1, 0, std::ref(test_control)); + std::thread thread2(ThreadStart2_2, 1, std::ref(test_control)); + thread1.join(); + thread2.join(); + REQUIRE(test_control.assert1); + REQUIRE(test_control.assert2); + REQUIRE(test_control.assert3); + REQUIRE(test_control.value2 == 7000); + u32 cal_value = 0; + for (u32 i = 0; i < 12000; i++) { + cal_value += i; + } + cal_value += 1000; + REQUIRE(test_control.value1 == cal_value); +} + +class TestControl3 { +public: + TestControl3() = default; + + void DoWork1() { + value1 += 1; + Fiber::YieldTo(fiber1, fiber2); + std::thread::id this_id = std::this_thread::get_id(); + u32 id = ids[this_id]; + value3 += 1; + Fiber::YieldTo(fiber1, thread_fibers[id]); + } + + void DoWork2() { + value2 += 1; + std::thread::id this_id = std::this_thread::get_id(); + u32 id = ids[this_id]; + Fiber::YieldTo(fiber2, thread_fibers[id]); + } + + void ExecuteThread(u32 id); + + void CallFiber1() { + std::thread::id this_id = std::this_thread::get_id(); + u32 id = ids[this_id]; + Fiber::YieldTo(thread_fibers[id], fiber1); + } + + void Exit(); + + u32 value1{}; + u32 value2{}; + u32 value3{}; + std::unordered_map<std::thread::id, u32> ids; + std::vector<std::shared_ptr<Common::Fiber>> thread_fibers; + std::shared_ptr<Common::Fiber> fiber1; + std::shared_ptr<Common::Fiber> fiber2; +}; + +static void WorkControl3_1(void* control) { + auto* test_control = static_cast<TestControl3*>(control); + test_control->DoWork1(); +} + +static void WorkControl3_2(void* control) { + auto* test_control = static_cast<TestControl3*>(control); + test_control->DoWork2(); +} + +void TestControl3::ExecuteThread(u32 id) { + std::thread::id this_id = std::this_thread::get_id(); + ids[this_id] = id; + auto thread_fiber = Fiber::ThreadToFiber(); + thread_fibers[id] = thread_fiber; +} + +void TestControl3::Exit() { + std::thread::id this_id = std::this_thread::get_id(); + u32 id = ids[this_id]; + thread_fibers[id]->Exit(); +} + +static void ThreadStart3(u32 id, TestControl3& test_control) { + test_control.ExecuteThread(id); + test_control.CallFiber1(); + test_control.Exit(); +} + +/** This test checks for one two threads racing for starting the same fiber. + * It checks execution occured in an ordered manner and by no time there were + * two contexts at the same time. + */ +TEST_CASE("Fibers::StartRace", "[common]") { + TestControl3 test_control{}; + test_control.thread_fibers.resize(2); + test_control.fiber1 = + std::make_shared<Fiber>(std::function<void(void*)>{WorkControl3_1}, &test_control); + test_control.fiber2 = + std::make_shared<Fiber>(std::function<void(void*)>{WorkControl3_2}, &test_control); + std::thread thread1(ThreadStart3, 0, std::ref(test_control)); + std::thread thread2(ThreadStart3, 1, std::ref(test_control)); + thread1.join(); + thread2.join(); + REQUIRE(test_control.value1 == 1); + REQUIRE(test_control.value2 == 1); + REQUIRE(test_control.value3 == 1); +} + +class TestControl4; + +static void WorkControl4(void* control); + +class TestControl4 { +public: + TestControl4() { + fiber1 = std::make_shared<Fiber>(std::function<void(void*)>{WorkControl4}, this); + goal_reached = false; + rewinded = false; + } + + void Execute() { + thread_fiber = Fiber::ThreadToFiber(); + Fiber::YieldTo(thread_fiber, fiber1); + thread_fiber->Exit(); + } + + void DoWork() { + fiber1->SetRewindPoint(std::function<void(void*)>{WorkControl4}, this); + if (rewinded) { + goal_reached = true; + Fiber::YieldTo(fiber1, thread_fiber); + } + rewinded = true; + fiber1->Rewind(); + } + + std::shared_ptr<Common::Fiber> fiber1; + std::shared_ptr<Common::Fiber> thread_fiber; + bool goal_reached; + bool rewinded; +}; + +static void WorkControl4(void* control) { + auto* test_control = static_cast<TestControl4*>(control); + test_control->DoWork(); +} + +TEST_CASE("Fibers::Rewind", "[common]") { + TestControl4 test_control{}; + test_control.Execute(); + REQUIRE(test_control.goal_reached); + REQUIRE(test_control.rewinded); +} + +} // namespace Common diff --git a/src/tests/core/core_timing.cpp b/src/tests/core/core_timing.cpp index ff2d11cc8..e66db1940 100644 --- a/src/tests/core/core_timing.cpp +++ b/src/tests/core/core_timing.cpp @@ -18,29 +18,26 @@ namespace { // Numbers are chosen randomly to make sure the correct one is given. constexpr std::array<u64, 5> CB_IDS{{42, 144, 93, 1026, UINT64_C(0xFFFF7FFFF7FFFF)}}; constexpr int MAX_SLICE_LENGTH = 10000; // Copied from CoreTiming internals +constexpr std::array<u64, 5> calls_order{{2, 0, 1, 4, 3}}; +std::array<s64, 5> delays{}; std::bitset<CB_IDS.size()> callbacks_ran_flags; u64 expected_callback = 0; -s64 lateness = 0; template <unsigned int IDX> -void CallbackTemplate(u64 userdata, s64 cycles_late) { +void HostCallbackTemplate(u64 userdata, s64 nanoseconds_late) { static_assert(IDX < CB_IDS.size(), "IDX out of range"); callbacks_ran_flags.set(IDX); REQUIRE(CB_IDS[IDX] == userdata); - REQUIRE(CB_IDS[IDX] == expected_callback); - REQUIRE(lateness == cycles_late); -} - -u64 callbacks_done = 0; - -void EmptyCallback(u64 userdata, s64 cycles_late) { - ++callbacks_done; + REQUIRE(CB_IDS[IDX] == CB_IDS[calls_order[expected_callback]]); + delays[IDX] = nanoseconds_late; + ++expected_callback; } struct ScopeInit final { ScopeInit() { - core_timing.Initialize(); + core_timing.SetMulticore(true); + core_timing.Initialize([]() {}); } ~ScopeInit() { core_timing.Shutdown(); @@ -49,110 +46,101 @@ struct ScopeInit final { Core::Timing::CoreTiming core_timing; }; -void AdvanceAndCheck(Core::Timing::CoreTiming& core_timing, u32 idx, u32 context = 0, - int expected_lateness = 0, int cpu_downcount = 0) { - callbacks_ran_flags = 0; - expected_callback = CB_IDS[idx]; - lateness = expected_lateness; - - // Pretend we executed X cycles of instructions. - core_timing.SwitchContext(context); - core_timing.AddTicks(core_timing.GetDowncount() - cpu_downcount); - core_timing.Advance(); - core_timing.SwitchContext((context + 1) % 4); +#pragma optimize("", off) - REQUIRE(decltype(callbacks_ran_flags)().set(idx) == callbacks_ran_flags); +u64 TestTimerSpeed(Core::Timing::CoreTiming& core_timing) { + u64 start = core_timing.GetGlobalTimeNs().count(); + u64 placebo = 0; + for (std::size_t i = 0; i < 1000; i++) { + placebo += core_timing.GetGlobalTimeNs().count(); + } + u64 end = core_timing.GetGlobalTimeNs().count(); + return (end - start); } + +#pragma optimize("", on) + } // Anonymous namespace TEST_CASE("CoreTiming[BasicOrder]", "[core]") { ScopeInit guard; auto& core_timing = guard.core_timing; + std::vector<std::shared_ptr<Core::Timing::EventType>> events{ + Core::Timing::CreateEvent("callbackA", HostCallbackTemplate<0>), + Core::Timing::CreateEvent("callbackB", HostCallbackTemplate<1>), + Core::Timing::CreateEvent("callbackC", HostCallbackTemplate<2>), + Core::Timing::CreateEvent("callbackD", HostCallbackTemplate<3>), + Core::Timing::CreateEvent("callbackE", HostCallbackTemplate<4>), + }; + + expected_callback = 0; + + core_timing.SyncPause(true); + + u64 one_micro = 1000U; + for (std::size_t i = 0; i < events.size(); i++) { + u64 order = calls_order[i]; + core_timing.ScheduleEvent(i * one_micro + 100U, events[order], CB_IDS[order]); + } + /// test pause + REQUIRE(callbacks_ran_flags.none()); - std::shared_ptr<Core::Timing::EventType> cb_a = - Core::Timing::CreateEvent("callbackA", CallbackTemplate<0>); - std::shared_ptr<Core::Timing::EventType> cb_b = - Core::Timing::CreateEvent("callbackB", CallbackTemplate<1>); - std::shared_ptr<Core::Timing::EventType> cb_c = - Core::Timing::CreateEvent("callbackC", CallbackTemplate<2>); - std::shared_ptr<Core::Timing::EventType> cb_d = - Core::Timing::CreateEvent("callbackD", CallbackTemplate<3>); - std::shared_ptr<Core::Timing::EventType> cb_e = - Core::Timing::CreateEvent("callbackE", CallbackTemplate<4>); - - // Enter slice 0 - core_timing.ResetRun(); - - // D -> B -> C -> A -> E - core_timing.SwitchContext(0); - core_timing.ScheduleEvent(1000, cb_a, CB_IDS[0]); - REQUIRE(1000 == core_timing.GetDowncount()); - core_timing.ScheduleEvent(500, cb_b, CB_IDS[1]); - REQUIRE(500 == core_timing.GetDowncount()); - core_timing.ScheduleEvent(800, cb_c, CB_IDS[2]); - REQUIRE(500 == core_timing.GetDowncount()); - core_timing.ScheduleEvent(100, cb_d, CB_IDS[3]); - REQUIRE(100 == core_timing.GetDowncount()); - core_timing.ScheduleEvent(1200, cb_e, CB_IDS[4]); - REQUIRE(100 == core_timing.GetDowncount()); - - AdvanceAndCheck(core_timing, 3, 0); - AdvanceAndCheck(core_timing, 1, 1); - AdvanceAndCheck(core_timing, 2, 2); - AdvanceAndCheck(core_timing, 0, 3); - AdvanceAndCheck(core_timing, 4, 0); -} - -TEST_CASE("CoreTiming[FairSharing]", "[core]") { + core_timing.Pause(false); // No need to sync - ScopeInit guard; - auto& core_timing = guard.core_timing; + while (core_timing.HasPendingEvents()) + ; - std::shared_ptr<Core::Timing::EventType> empty_callback = - Core::Timing::CreateEvent("empty_callback", EmptyCallback); + REQUIRE(callbacks_ran_flags.all()); - callbacks_done = 0; - u64 MAX_CALLBACKS = 10; - for (std::size_t i = 0; i < 10; i++) { - core_timing.ScheduleEvent(i * 3333U, empty_callback, 0); + for (std::size_t i = 0; i < delays.size(); i++) { + const double delay = static_cast<double>(delays[i]); + const double micro = delay / 1000.0f; + const double mili = micro / 1000.0f; + printf("HostTimer Pausing Delay[%zu]: %.3f %.6f\n", i, micro, mili); } - - const s64 advances = MAX_SLICE_LENGTH / 10; - core_timing.ResetRun(); - u64 current_time = core_timing.GetTicks(); - bool keep_running{}; - do { - keep_running = false; - for (u32 active_core = 0; active_core < 4; ++active_core) { - core_timing.SwitchContext(active_core); - if (core_timing.CanCurrentContextRun()) { - core_timing.AddTicks(std::min<s64>(advances, core_timing.GetDowncount())); - core_timing.Advance(); - } - keep_running |= core_timing.CanCurrentContextRun(); - } - } while (keep_running); - u64 current_time_2 = core_timing.GetTicks(); - - REQUIRE(MAX_CALLBACKS == callbacks_done); - REQUIRE(current_time_2 == current_time + MAX_SLICE_LENGTH * 4); } -TEST_CASE("Core::Timing[PredictableLateness]", "[core]") { +TEST_CASE("CoreTiming[BasicOrderNoPausing]", "[core]") { ScopeInit guard; auto& core_timing = guard.core_timing; + std::vector<std::shared_ptr<Core::Timing::EventType>> events{ + Core::Timing::CreateEvent("callbackA", HostCallbackTemplate<0>), + Core::Timing::CreateEvent("callbackB", HostCallbackTemplate<1>), + Core::Timing::CreateEvent("callbackC", HostCallbackTemplate<2>), + Core::Timing::CreateEvent("callbackD", HostCallbackTemplate<3>), + Core::Timing::CreateEvent("callbackE", HostCallbackTemplate<4>), + }; + + core_timing.SyncPause(true); + core_timing.SyncPause(false); + + expected_callback = 0; + + u64 start = core_timing.GetGlobalTimeNs().count(); + u64 one_micro = 1000U; + for (std::size_t i = 0; i < events.size(); i++) { + u64 order = calls_order[i]; + core_timing.ScheduleEvent(i * one_micro + 100U, events[order], CB_IDS[order]); + } + u64 end = core_timing.GetGlobalTimeNs().count(); + const double scheduling_time = static_cast<double>(end - start); + const double timer_time = static_cast<double>(TestTimerSpeed(core_timing)); - std::shared_ptr<Core::Timing::EventType> cb_a = - Core::Timing::CreateEvent("callbackA", CallbackTemplate<0>); - std::shared_ptr<Core::Timing::EventType> cb_b = - Core::Timing::CreateEvent("callbackB", CallbackTemplate<1>); + while (core_timing.HasPendingEvents()) + ; - // Enter slice 0 - core_timing.ResetRun(); + REQUIRE(callbacks_ran_flags.all()); - core_timing.ScheduleEvent(100, cb_a, CB_IDS[0]); - core_timing.ScheduleEvent(200, cb_b, CB_IDS[1]); + for (std::size_t i = 0; i < delays.size(); i++) { + const double delay = static_cast<double>(delays[i]); + const double micro = delay / 1000.0f; + const double mili = micro / 1000.0f; + printf("HostTimer No Pausing Delay[%zu]: %.3f %.6f\n", i, micro, mili); + } - AdvanceAndCheck(core_timing, 0, 0, 10, -10); // (100 - 10) - AdvanceAndCheck(core_timing, 1, 1, 50, -50); + const double micro = scheduling_time / 1000.0f; + const double mili = micro / 1000.0f; + printf("HostTimer No Pausing Scheduling Time: %.3f %.6f\n", micro, mili); + printf("HostTimer No Pausing Timer Time: %.3f %.6f\n", timer_time / 1000.f, + timer_time / 1000000.f); } diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index d6ee82836..21c46a567 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -3,6 +3,8 @@ add_library(video_core STATIC buffer_cache/buffer_cache.h buffer_cache/map_interval.cpp buffer_cache/map_interval.h + compatible_formats.cpp + compatible_formats.h dirty_flags.cpp dirty_flags.h dma_pusher.cpp @@ -25,6 +27,14 @@ add_library(video_core STATIC engines/shader_bytecode.h engines/shader_header.h engines/shader_type.h + macro/macro.cpp + macro/macro.h + macro/macro_hle.cpp + macro/macro_hle.h + macro/macro_interpreter.cpp + macro/macro_interpreter.h + macro/macro_jit_x64.cpp + macro/macro_jit_x64.h fence_manager.h gpu.cpp gpu.h @@ -36,8 +46,6 @@ add_library(video_core STATIC gpu_thread.h guest_driver.cpp guest_driver.h - macro_interpreter.cpp - macro_interpreter.h memory_manager.cpp memory_manager.h morton.cpp @@ -45,11 +53,11 @@ add_library(video_core STATIC query_cache.h rasterizer_accelerated.cpp rasterizer_accelerated.h - rasterizer_cache.cpp - rasterizer_cache.h rasterizer_interface.h renderer_base.cpp renderer_base.h + renderer_opengl/gl_arb_decompiler.cpp + renderer_opengl/gl_arb_decompiler.h renderer_opengl/gl_buffer_cache.cpp renderer_opengl/gl_buffer_cache.h renderer_opengl/gl_device.cpp @@ -89,6 +97,7 @@ add_library(video_core STATIC renderer_opengl/utils.h sampler_cache.cpp sampler_cache.h + shader_cache.h shader/decode/arithmetic.cpp shader/decode/arithmetic_immediate.cpp shader/decode/bfe.cpp diff --git a/src/video_core/buffer_cache/buffer_block.h b/src/video_core/buffer_cache/buffer_block.h index e35ee0b67..e64170e66 100644 --- a/src/video_core/buffer_cache/buffer_block.h +++ b/src/video_core/buffer_cache/buffer_block.h @@ -15,48 +15,47 @@ namespace VideoCommon { class BufferBlock { public: - bool Overlaps(const VAddr start, const VAddr end) const { + bool Overlaps(VAddr start, VAddr end) const { return (cpu_addr < end) && (cpu_addr_end > start); } - bool IsInside(const VAddr other_start, const VAddr other_end) const { + bool IsInside(VAddr other_start, VAddr other_end) const { return cpu_addr <= other_start && other_end <= cpu_addr_end; } - std::size_t GetOffset(const VAddr in_addr) { + std::size_t Offset(VAddr in_addr) const { return static_cast<std::size_t>(in_addr - cpu_addr); } - VAddr GetCpuAddr() const { + VAddr CpuAddr() const { return cpu_addr; } - VAddr GetCpuAddrEnd() const { + VAddr CpuAddrEnd() const { return cpu_addr_end; } - void SetCpuAddr(const VAddr new_addr) { + void SetCpuAddr(VAddr new_addr) { cpu_addr = new_addr; cpu_addr_end = new_addr + size; } - std::size_t GetSize() const { + std::size_t Size() const { return size; } - void SetEpoch(u64 new_epoch) { - epoch = new_epoch; + u64 Epoch() const { + return epoch; } - u64 GetEpoch() { - return epoch; + void SetEpoch(u64 new_epoch) { + epoch = new_epoch; } protected: - explicit BufferBlock(VAddr cpu_addr, const std::size_t size) : size{size} { - SetCpuAddr(cpu_addr); + explicit BufferBlock(VAddr cpu_addr_, std::size_t size_) : size{size_} { + SetCpuAddr(cpu_addr_); } - ~BufferBlock() = default; private: VAddr cpu_addr{}; diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index d9a4a1b4d..dd7ce8c99 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -30,23 +30,31 @@ namespace VideoCommon { -template <typename OwnerBuffer, typename BufferType, typename StreamBuffer> +template <typename Buffer, typename BufferType, typename StreamBuffer> class BufferCache { using IntervalSet = boost::icl::interval_set<VAddr>; using IntervalType = typename IntervalSet::interval_type; using VectorMapInterval = boost::container::small_vector<MapInterval*, 1>; + static constexpr u64 WRITE_PAGE_BIT = 11; + static constexpr u64 BLOCK_PAGE_BITS = 21; + static constexpr u64 BLOCK_PAGE_SIZE = 1ULL << BLOCK_PAGE_BITS; + public: - using BufferInfo = std::pair<BufferType, u64>; + struct BufferInfo { + BufferType handle; + u64 offset; + u64 address; + }; BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4, bool is_written = false, bool use_fast_cbuf = false) { std::lock_guard lock{mutex}; - const auto& memory_manager = system.GPU().MemoryManager(); + auto& memory_manager = system.GPU().MemoryManager(); const std::optional<VAddr> cpu_addr_opt = memory_manager.GpuToCpuAddress(gpu_addr); if (!cpu_addr_opt) { - return {GetEmptyBuffer(size), 0}; + return GetEmptyBuffer(size); } const VAddr cpu_addr = *cpu_addr_opt; @@ -55,37 +63,41 @@ public: constexpr std::size_t max_stream_size = 0x800; if (use_fast_cbuf || size < max_stream_size) { if (!is_written && !IsRegionWritten(cpu_addr, cpu_addr + size - 1)) { - auto& memory_manager = system.GPU().MemoryManager(); + const bool is_granular = memory_manager.IsGranularRange(gpu_addr, size); if (use_fast_cbuf) { - if (memory_manager.IsGranularRange(gpu_addr, size)) { - const auto host_ptr = memory_manager.GetPointer(gpu_addr); - return ConstBufferUpload(host_ptr, size); + u8* dest; + if (is_granular) { + dest = memory_manager.GetPointer(gpu_addr); } else { staging_buffer.resize(size); - memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size); - return ConstBufferUpload(staging_buffer.data(), size); + dest = staging_buffer.data(); + memory_manager.ReadBlockUnsafe(gpu_addr, dest, size); } + return ConstBufferUpload(dest, size); + } + if (is_granular) { + u8* const host_ptr = memory_manager.GetPointer(gpu_addr); + return StreamBufferUpload(size, alignment, [host_ptr, size](u8* dest) { + std::memcpy(dest, host_ptr, size); + }); } else { - if (memory_manager.IsGranularRange(gpu_addr, size)) { - const auto host_ptr = memory_manager.GetPointer(gpu_addr); - return StreamBufferUpload(host_ptr, size, alignment); - } else { - staging_buffer.resize(size); - memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size); - return StreamBufferUpload(staging_buffer.data(), size, alignment); - } + return StreamBufferUpload( + size, alignment, [&memory_manager, gpu_addr, size](u8* dest) { + memory_manager.ReadBlockUnsafe(gpu_addr, dest, size); + }); } } } - OwnerBuffer block = GetBlock(cpu_addr, size); + Buffer* const block = GetBlock(cpu_addr, size); MapInterval* const map = MapAddress(block, gpu_addr, cpu_addr, size); if (!map) { - return {GetEmptyBuffer(size), 0}; + return GetEmptyBuffer(size); } if (is_written) { map->MarkAsModified(true, GetModifiedTicks()); - if (Settings::IsGPULevelHigh() && Settings::values.use_asynchronous_gpu_emulation) { + if (Settings::IsGPULevelHigh() && + Settings::values.use_asynchronous_gpu_emulation.GetValue()) { MarkForAsyncFlush(map); } if (!map->is_written) { @@ -94,41 +106,49 @@ public: } } - return {ToHandle(block), static_cast<u64>(block->GetOffset(cpu_addr))}; + return BufferInfo{block->Handle(), block->Offset(cpu_addr), block->Address()}; } /// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset. BufferInfo UploadHostMemory(const void* raw_pointer, std::size_t size, std::size_t alignment = 4) { std::lock_guard lock{mutex}; - return StreamBufferUpload(raw_pointer, size, alignment); + return StreamBufferUpload(size, alignment, [raw_pointer, size](u8* dest) { + std::memcpy(dest, raw_pointer, size); + }); } - void Map(std::size_t max_size) { + /// Prepares the buffer cache for data uploading + /// @param max_size Maximum number of bytes that will be uploaded + /// @return True when a stream buffer invalidation was required, false otherwise + bool Map(std::size_t max_size) { std::lock_guard lock{mutex}; + bool invalidated; std::tie(buffer_ptr, buffer_offset_base, invalidated) = stream_buffer->Map(max_size, 4); buffer_offset = buffer_offset_base; + + return invalidated; } - /// Finishes the upload stream, returns true on bindings invalidation. - bool Unmap() { + /// Finishes the upload stream + void Unmap() { std::lock_guard lock{mutex}; - stream_buffer->Unmap(buffer_offset - buffer_offset_base); - return std::exchange(invalidated, false); } + /// Function called at the end of each frame, inteded for deferred operations void TickFrame() { ++epoch; + while (!pending_destruction.empty()) { // Delay at least 4 frames before destruction. // This is due to triple buffering happening on some drivers. static constexpr u64 epochs_to_destroy = 5; - if (pending_destruction.front()->GetEpoch() + epochs_to_destroy > epoch) { + if (pending_destruction.front()->Epoch() + epochs_to_destroy > epoch) { break; } - pending_destruction.pop_front(); + pending_destruction.pop(); } } @@ -239,28 +259,16 @@ public: committed_flushes.pop_front(); } - virtual BufferType GetEmptyBuffer(std::size_t size) = 0; + virtual BufferInfo GetEmptyBuffer(std::size_t size) = 0; protected: explicit BufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, std::unique_ptr<StreamBuffer> stream_buffer) - : rasterizer{rasterizer}, system{system}, stream_buffer{std::move(stream_buffer)}, - stream_buffer_handle{this->stream_buffer->GetHandle()} {} + : rasterizer{rasterizer}, system{system}, stream_buffer{std::move(stream_buffer)} {} ~BufferCache() = default; - virtual BufferType ToHandle(const OwnerBuffer& storage) = 0; - - virtual OwnerBuffer CreateBlock(VAddr cpu_addr, std::size_t size) = 0; - - virtual void UploadBlockData(const OwnerBuffer& buffer, std::size_t offset, std::size_t size, - const u8* data) = 0; - - virtual void DownloadBlockData(const OwnerBuffer& buffer, std::size_t offset, std::size_t size, - u8* data) = 0; - - virtual void CopyBlock(const OwnerBuffer& src, const OwnerBuffer& dst, std::size_t src_offset, - std::size_t dst_offset, std::size_t size) = 0; + virtual std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) = 0; virtual BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) { return {}; @@ -315,19 +323,18 @@ protected: } private: - MapInterval* MapAddress(const OwnerBuffer& block, GPUVAddr gpu_addr, VAddr cpu_addr, - std::size_t size) { + MapInterval* MapAddress(Buffer* block, GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size) { const VectorMapInterval overlaps = GetMapsInRange(cpu_addr, size); if (overlaps.empty()) { auto& memory_manager = system.GPU().MemoryManager(); const VAddr cpu_addr_end = cpu_addr + size; if (memory_manager.IsGranularRange(gpu_addr, size)) { u8* host_ptr = memory_manager.GetPointer(gpu_addr); - UploadBlockData(block, block->GetOffset(cpu_addr), size, host_ptr); + block->Upload(block->Offset(cpu_addr), size, host_ptr); } else { staging_buffer.resize(size); memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size); - UploadBlockData(block, block->GetOffset(cpu_addr), size, staging_buffer.data()); + block->Upload(block->Offset(cpu_addr), size, staging_buffer.data()); } return Register(MapInterval(cpu_addr, cpu_addr_end, gpu_addr)); } @@ -363,15 +370,15 @@ private: } if (modified_inheritance) { map->MarkAsModified(true, GetModifiedTicks()); - if (Settings::IsGPULevelHigh() && Settings::values.use_asynchronous_gpu_emulation) { + if (Settings::IsGPULevelHigh() && + Settings::values.use_asynchronous_gpu_emulation.GetValue()) { MarkForAsyncFlush(map); } } return map; } - void UpdateBlock(const OwnerBuffer& block, VAddr start, VAddr end, - const VectorMapInterval& overlaps) { + void UpdateBlock(Buffer* block, VAddr start, VAddr end, const VectorMapInterval& overlaps) { const IntervalType base_interval{start, end}; IntervalSet interval_set{}; interval_set.add(base_interval); @@ -380,13 +387,13 @@ private: interval_set.subtract(subtract); } for (auto& interval : interval_set) { - std::size_t size = interval.upper() - interval.lower(); - if (size > 0) { - staging_buffer.resize(size); - system.Memory().ReadBlockUnsafe(interval.lower(), staging_buffer.data(), size); - UploadBlockData(block, block->GetOffset(interval.lower()), size, - staging_buffer.data()); + const std::size_t size = interval.upper() - interval.lower(); + if (size == 0) { + continue; } + staging_buffer.resize(size); + system.Memory().ReadBlockUnsafe(interval.lower(), staging_buffer.data(), size); + block->Upload(block->Offset(interval.lower()), size, staging_buffer.data()); } } @@ -416,23 +423,27 @@ private: } void FlushMap(MapInterval* map) { + const auto it = blocks.find(map->start >> BLOCK_PAGE_BITS); + ASSERT_OR_EXECUTE(it != blocks.end(), return;); + + std::shared_ptr<Buffer> block = it->second; + const std::size_t size = map->end - map->start; - OwnerBuffer block = blocks[map->start >> block_page_bits]; staging_buffer.resize(size); - DownloadBlockData(block, block->GetOffset(map->start), size, staging_buffer.data()); + block->Download(block->Offset(map->start), size, staging_buffer.data()); system.Memory().WriteBlockUnsafe(map->start, staging_buffer.data(), size); map->MarkAsModified(false, 0); } - BufferInfo StreamBufferUpload(const void* raw_pointer, std::size_t size, - std::size_t alignment) { + template <typename Callable> + BufferInfo StreamBufferUpload(std::size_t size, std::size_t alignment, Callable&& callable) { AlignBuffer(alignment); const std::size_t uploaded_offset = buffer_offset; - std::memcpy(buffer_ptr, raw_pointer, size); + callable(buffer_ptr); buffer_ptr += size; buffer_offset += size; - return {stream_buffer_handle, uploaded_offset}; + return BufferInfo{stream_buffer->Handle(), uploaded_offset, stream_buffer->Address()}; } void AlignBuffer(std::size_t alignment) { @@ -442,97 +453,89 @@ private: buffer_offset = offset_aligned; } - OwnerBuffer EnlargeBlock(OwnerBuffer buffer) { - const std::size_t old_size = buffer->GetSize(); - const std::size_t new_size = old_size + block_page_size; - const VAddr cpu_addr = buffer->GetCpuAddr(); - OwnerBuffer new_buffer = CreateBlock(cpu_addr, new_size); - CopyBlock(buffer, new_buffer, 0, 0, old_size); - buffer->SetEpoch(epoch); - pending_destruction.push_back(buffer); + std::shared_ptr<Buffer> EnlargeBlock(std::shared_ptr<Buffer> buffer) { + const std::size_t old_size = buffer->Size(); + const std::size_t new_size = old_size + BLOCK_PAGE_SIZE; + const VAddr cpu_addr = buffer->CpuAddr(); + std::shared_ptr<Buffer> new_buffer = CreateBlock(cpu_addr, new_size); + new_buffer->CopyFrom(*buffer, 0, 0, old_size); + QueueDestruction(std::move(buffer)); + const VAddr cpu_addr_end = cpu_addr + new_size - 1; - u64 page_start = cpu_addr >> block_page_bits; - const u64 page_end = cpu_addr_end >> block_page_bits; - while (page_start <= page_end) { - blocks[page_start] = new_buffer; - ++page_start; + const u64 page_end = cpu_addr_end >> BLOCK_PAGE_BITS; + for (u64 page_start = cpu_addr >> BLOCK_PAGE_BITS; page_start <= page_end; ++page_start) { + blocks.insert_or_assign(page_start, new_buffer); } + return new_buffer; } - OwnerBuffer MergeBlocks(OwnerBuffer first, OwnerBuffer second) { - const std::size_t size_1 = first->GetSize(); - const std::size_t size_2 = second->GetSize(); - const VAddr first_addr = first->GetCpuAddr(); - const VAddr second_addr = second->GetCpuAddr(); + std::shared_ptr<Buffer> MergeBlocks(std::shared_ptr<Buffer> first, + std::shared_ptr<Buffer> second) { + const std::size_t size_1 = first->Size(); + const std::size_t size_2 = second->Size(); + const VAddr first_addr = first->CpuAddr(); + const VAddr second_addr = second->CpuAddr(); const VAddr new_addr = std::min(first_addr, second_addr); const std::size_t new_size = size_1 + size_2; - OwnerBuffer new_buffer = CreateBlock(new_addr, new_size); - CopyBlock(first, new_buffer, 0, new_buffer->GetOffset(first_addr), size_1); - CopyBlock(second, new_buffer, 0, new_buffer->GetOffset(second_addr), size_2); - first->SetEpoch(epoch); - second->SetEpoch(epoch); - pending_destruction.push_back(first); - pending_destruction.push_back(second); + + std::shared_ptr<Buffer> new_buffer = CreateBlock(new_addr, new_size); + new_buffer->CopyFrom(*first, 0, new_buffer->Offset(first_addr), size_1); + new_buffer->CopyFrom(*second, 0, new_buffer->Offset(second_addr), size_2); + QueueDestruction(std::move(first)); + QueueDestruction(std::move(second)); + const VAddr cpu_addr_end = new_addr + new_size - 1; - u64 page_start = new_addr >> block_page_bits; - const u64 page_end = cpu_addr_end >> block_page_bits; - while (page_start <= page_end) { - blocks[page_start] = new_buffer; - ++page_start; + const u64 page_end = cpu_addr_end >> BLOCK_PAGE_BITS; + for (u64 page_start = new_addr >> BLOCK_PAGE_BITS; page_start <= page_end; ++page_start) { + blocks.insert_or_assign(page_start, new_buffer); } return new_buffer; } - OwnerBuffer GetBlock(const VAddr cpu_addr, const std::size_t size) { - OwnerBuffer found; + Buffer* GetBlock(VAddr cpu_addr, std::size_t size) { + std::shared_ptr<Buffer> found; + const VAddr cpu_addr_end = cpu_addr + size - 1; - u64 page_start = cpu_addr >> block_page_bits; - const u64 page_end = cpu_addr_end >> block_page_bits; - while (page_start <= page_end) { + const u64 page_end = cpu_addr_end >> BLOCK_PAGE_BITS; + for (u64 page_start = cpu_addr >> BLOCK_PAGE_BITS; page_start <= page_end; ++page_start) { auto it = blocks.find(page_start); if (it == blocks.end()) { if (found) { found = EnlargeBlock(found); - } else { - const VAddr start_addr = (page_start << block_page_bits); - found = CreateBlock(start_addr, block_page_size); - blocks[page_start] = found; - } - } else { - if (found) { - if (found == it->second) { - ++page_start; - continue; - } - found = MergeBlocks(found, it->second); - } else { - found = it->second; + continue; } + const VAddr start_addr = page_start << BLOCK_PAGE_BITS; + found = CreateBlock(start_addr, BLOCK_PAGE_SIZE); + blocks.insert_or_assign(page_start, found); + continue; + } + if (!found) { + found = it->second; + continue; + } + if (found != it->second) { + found = MergeBlocks(std::move(found), it->second); } - ++page_start; } - return found; + return found.get(); } - void MarkRegionAsWritten(const VAddr start, const VAddr end) { - u64 page_start = start >> write_page_bit; - const u64 page_end = end >> write_page_bit; - while (page_start <= page_end) { + void MarkRegionAsWritten(VAddr start, VAddr end) { + const u64 page_end = end >> WRITE_PAGE_BIT; + for (u64 page_start = start >> WRITE_PAGE_BIT; page_start <= page_end; ++page_start) { auto it = written_pages.find(page_start); if (it != written_pages.end()) { it->second = it->second + 1; } else { - written_pages[page_start] = 1; + written_pages.insert_or_assign(page_start, 1); } - ++page_start; } } - void UnmarkRegionAsWritten(const VAddr start, const VAddr end) { - u64 page_start = start >> write_page_bit; - const u64 page_end = end >> write_page_bit; - while (page_start <= page_end) { + void UnmarkRegionAsWritten(VAddr start, VAddr end) { + const u64 page_end = end >> WRITE_PAGE_BIT; + for (u64 page_start = start >> WRITE_PAGE_BIT; page_start <= page_end; ++page_start) { auto it = written_pages.find(page_start); if (it != written_pages.end()) { if (it->second > 1) { @@ -541,22 +544,24 @@ private: written_pages.erase(it); } } - ++page_start; } } - bool IsRegionWritten(const VAddr start, const VAddr end) const { - u64 page_start = start >> write_page_bit; - const u64 page_end = end >> write_page_bit; - while (page_start <= page_end) { + bool IsRegionWritten(VAddr start, VAddr end) const { + const u64 page_end = end >> WRITE_PAGE_BIT; + for (u64 page_start = start >> WRITE_PAGE_BIT; page_start <= page_end; ++page_start) { if (written_pages.count(page_start) > 0) { return true; } - ++page_start; } return false; } + void QueueDestruction(std::shared_ptr<Buffer> buffer) { + buffer->SetEpoch(epoch); + pending_destruction.push(std::move(buffer)); + } + void MarkForAsyncFlush(MapInterval* map) { if (!uncommitted_flushes) { uncommitted_flushes = std::make_shared<std::unordered_set<MapInterval*>>(); @@ -568,9 +573,7 @@ private: Core::System& system; std::unique_ptr<StreamBuffer> stream_buffer; - BufferType stream_buffer_handle{}; - - bool invalidated = false; + BufferType stream_buffer_handle; u8* buffer_ptr = nullptr; u64 buffer_offset = 0; @@ -580,18 +583,15 @@ private: boost::intrusive::set<MapInterval, boost::intrusive::compare<MapIntervalCompare>> mapped_addresses; - static constexpr u64 write_page_bit = 11; std::unordered_map<u64, u32> written_pages; + std::unordered_map<u64, std::shared_ptr<Buffer>> blocks; - static constexpr u64 block_page_bits = 21; - static constexpr u64 block_page_size = 1ULL << block_page_bits; - std::unordered_map<u64, OwnerBuffer> blocks; - - std::list<OwnerBuffer> pending_destruction; + std::queue<std::shared_ptr<Buffer>> pending_destruction; u64 epoch = 0; u64 modified_ticks = 0; std::vector<u8> staging_buffer; + std::list<MapInterval*> marked_for_unregister; std::shared_ptr<std::unordered_set<MapInterval*>> uncommitted_flushes; diff --git a/src/video_core/compatible_formats.cpp b/src/video_core/compatible_formats.cpp new file mode 100644 index 000000000..6c426b035 --- /dev/null +++ b/src/video_core/compatible_formats.cpp @@ -0,0 +1,162 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <array> +#include <bitset> +#include <cstddef> + +#include "video_core/compatible_formats.h" +#include "video_core/surface.h" + +namespace VideoCore::Surface { + +namespace { + +// Compatibility table taken from Table 3.X.2 in: +// https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_texture_view.txt + +constexpr std::array VIEW_CLASS_128_BITS = { + PixelFormat::RGBA32F, + PixelFormat::RGBA32UI, +}; +// Missing formats: +// PixelFormat::RGBA32I + +constexpr std::array VIEW_CLASS_96_BITS = { + PixelFormat::RGB32F, +}; +// Missing formats: +// PixelFormat::RGB32UI, +// PixelFormat::RGB32I, + +constexpr std::array VIEW_CLASS_64_BITS = { + PixelFormat::RGBA16F, PixelFormat::RG32F, PixelFormat::RGBA16UI, PixelFormat::RG32UI, + PixelFormat::RGBA16U, PixelFormat::RGBA16F, PixelFormat::RGBA16S, +}; +// Missing formats: +// PixelFormat::RGBA16I +// PixelFormat::RG32I + +// TODO: How should we handle 48 bits? + +constexpr std::array VIEW_CLASS_32_BITS = { + PixelFormat::RG16F, PixelFormat::R11FG11FB10F, PixelFormat::R32F, + PixelFormat::A2B10G10R10U, PixelFormat::RG16UI, PixelFormat::R32UI, + PixelFormat::RG16I, PixelFormat::R32I, PixelFormat::ABGR8U, + PixelFormat::RG16, PixelFormat::ABGR8S, PixelFormat::RG16S, + PixelFormat::RGBA8_SRGB, PixelFormat::E5B9G9R9F, PixelFormat::BGRA8, + PixelFormat::BGRA8_SRGB, +}; +// Missing formats: +// PixelFormat::RGBA8UI +// PixelFormat::RGBA8I +// PixelFormat::RGB10_A2_UI + +// TODO: How should we handle 24 bits? + +constexpr std::array VIEW_CLASS_16_BITS = { + PixelFormat::R16F, PixelFormat::RG8UI, PixelFormat::R16UI, PixelFormat::R16I, + PixelFormat::RG8U, PixelFormat::R16U, PixelFormat::RG8S, PixelFormat::R16S, +}; +// Missing formats: +// PixelFormat::RG8I + +constexpr std::array VIEW_CLASS_8_BITS = { + PixelFormat::R8UI, + PixelFormat::R8U, +}; +// Missing formats: +// PixelFormat::R8I +// PixelFormat::R8S + +constexpr std::array VIEW_CLASS_RGTC1_RED = { + PixelFormat::DXN1, +}; +// Missing formats: +// COMPRESSED_SIGNED_RED_RGTC1 + +constexpr std::array VIEW_CLASS_RGTC2_RG = { + PixelFormat::DXN2UNORM, + PixelFormat::DXN2SNORM, +}; + +constexpr std::array VIEW_CLASS_BPTC_UNORM = { + PixelFormat::BC7U, + PixelFormat::BC7U_SRGB, +}; + +constexpr std::array VIEW_CLASS_BPTC_FLOAT = { + PixelFormat::BC6H_SF16, + PixelFormat::BC6H_UF16, +}; + +// Compatibility table taken from Table 4.X.1 in: +// https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_copy_image.txt + +constexpr std::array COPY_CLASS_128_BITS = { + PixelFormat::RGBA32UI, PixelFormat::RGBA32F, PixelFormat::DXT23, + PixelFormat::DXT23_SRGB, PixelFormat::DXT45, PixelFormat::DXT45_SRGB, + PixelFormat::DXN2SNORM, PixelFormat::BC7U, PixelFormat::BC7U_SRGB, + PixelFormat::BC6H_SF16, PixelFormat::BC6H_UF16, +}; +// Missing formats: +// PixelFormat::RGBA32I +// COMPRESSED_RG_RGTC2 + +constexpr std::array COPY_CLASS_64_BITS = { + PixelFormat::RGBA16F, PixelFormat::RG32F, PixelFormat::RGBA16UI, PixelFormat::RG32UI, + PixelFormat::RGBA16U, PixelFormat::RGBA16S, PixelFormat::DXT1_SRGB, PixelFormat::DXT1, + +}; +// Missing formats: +// PixelFormat::RGBA16I +// PixelFormat::RG32I, +// COMPRESSED_RGB_S3TC_DXT1_EXT +// COMPRESSED_SRGB_S3TC_DXT1_EXT +// COMPRESSED_RGBA_S3TC_DXT1_EXT +// COMPRESSED_SIGNED_RED_RGTC1 + +void Enable(FormatCompatibility::Table& compatiblity, size_t format_a, size_t format_b) { + compatiblity[format_a][format_b] = true; + compatiblity[format_b][format_a] = true; +} + +void Enable(FormatCompatibility::Table& compatibility, PixelFormat format_a, PixelFormat format_b) { + Enable(compatibility, static_cast<size_t>(format_a), static_cast<size_t>(format_b)); +} + +template <typename Range> +void EnableRange(FormatCompatibility::Table& compatibility, const Range& range) { + for (auto it_a = range.begin(); it_a != range.end(); ++it_a) { + for (auto it_b = it_a; it_b != range.end(); ++it_b) { + Enable(compatibility, *it_a, *it_b); + } + } +} + +} // Anonymous namespace + +FormatCompatibility::FormatCompatibility() { + for (size_t i = 0; i < MaxPixelFormat; ++i) { + // Identity is allowed + Enable(view, i, i); + } + + EnableRange(view, VIEW_CLASS_128_BITS); + EnableRange(view, VIEW_CLASS_96_BITS); + EnableRange(view, VIEW_CLASS_64_BITS); + EnableRange(view, VIEW_CLASS_32_BITS); + EnableRange(view, VIEW_CLASS_16_BITS); + EnableRange(view, VIEW_CLASS_8_BITS); + EnableRange(view, VIEW_CLASS_RGTC1_RED); + EnableRange(view, VIEW_CLASS_RGTC2_RG); + EnableRange(view, VIEW_CLASS_BPTC_UNORM); + EnableRange(view, VIEW_CLASS_BPTC_FLOAT); + + copy = view; + EnableRange(copy, COPY_CLASS_128_BITS); + EnableRange(copy, COPY_CLASS_64_BITS); +} + +} // namespace VideoCore::Surface diff --git a/src/video_core/compatible_formats.h b/src/video_core/compatible_formats.h new file mode 100644 index 000000000..d1082566d --- /dev/null +++ b/src/video_core/compatible_formats.h @@ -0,0 +1,32 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <array> +#include <bitset> +#include <cstddef> + +#include "video_core/surface.h" + +namespace VideoCore::Surface { + +class FormatCompatibility { +public: + using Table = std::array<std::bitset<MaxPixelFormat>, MaxPixelFormat>; + + explicit FormatCompatibility(); + + bool TestView(PixelFormat format_a, PixelFormat format_b) const noexcept { + return view[static_cast<size_t>(format_a)][static_cast<size_t>(format_b)]; + } + + bool TestCopy(PixelFormat format_a, PixelFormat format_b) const noexcept { + return copy[static_cast<size_t>(format_a)][static_cast<size_t>(format_b)]; + } + +private: + Table view; + Table copy; +}; + +} // namespace VideoCore::Surface diff --git a/src/video_core/engines/const_buffer_engine_interface.h b/src/video_core/engines/const_buffer_engine_interface.h index ebe139504..f46e81bb7 100644 --- a/src/video_core/engines/const_buffer_engine_interface.h +++ b/src/video_core/engines/const_buffer_engine_interface.h @@ -93,6 +93,7 @@ public: virtual SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const = 0; virtual SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer, u64 offset) const = 0; + virtual SamplerDescriptor AccessSampler(u32 handle) const = 0; virtual u32 GetBoundBuffer() const = 0; virtual VideoCore::GuestDriverProfile& AccessGuestDriverProfile() = 0; diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index f6237fc6a..a82b06a38 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp @@ -92,8 +92,11 @@ SamplerDescriptor KeplerCompute::AccessBindlessSampler(ShaderType stage, u64 con ASSERT(stage == ShaderType::Compute); const auto& tex_info_buffer = launch_description.const_buffer_config[const_buffer]; const GPUVAddr tex_info_address = tex_info_buffer.Address() + offset; + return AccessSampler(memory_manager.Read<u32>(tex_info_address)); +} - const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)}; +SamplerDescriptor KeplerCompute::AccessSampler(u32 handle) const { + const Texture::TextureHandle tex_handle{handle}; const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle); SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic); result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value()); diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h index 18ceedfaf..b7f668d88 100644 --- a/src/video_core/engines/kepler_compute.h +++ b/src/video_core/engines/kepler_compute.h @@ -219,6 +219,8 @@ public: SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer, u64 offset) const override; + SamplerDescriptor AccessSampler(u32 handle) const override; + u32 GetBoundBuffer() const override { return regs.tex_cb_index; } diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 004f6b261..c01436295 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -25,9 +25,8 @@ constexpr u32 MacroRegistersStart = 0xE00; Maxwell3D::Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager) : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager}, - macro_interpreter{*this}, upload_state{memory_manager, regs.upload} { + macro_engine{GetMacroEngine(*this)}, upload_state{memory_manager, regs.upload} { dirty.flags.flip(); - InitializeRegisterDefaults(); } @@ -106,7 +105,11 @@ void Maxwell3D::InitializeRegisterDefaults() { regs.rasterize_enable = 1; regs.rt_separate_frag_data = 1; regs.framebuffer_srgb = 1; + regs.line_width_aliased = 1.0f; + regs.line_width_smooth = 1.0f; regs.front_face = Maxwell3D::Regs::FrontFace::ClockWise; + regs.polygon_mode_back = Maxwell3D::Regs::PolygonMode::Fill; + regs.polygon_mode_front = Maxwell3D::Regs::PolygonMode::Fill; shadow_state = regs; @@ -116,7 +119,7 @@ void Maxwell3D::InitializeRegisterDefaults() { mme_inline[MAXWELL3D_REG_INDEX(index_array.count)] = true; } -void Maxwell3D::CallMacroMethod(u32 method, std::size_t num_parameters, const u32* parameters) { +void Maxwell3D::CallMacroMethod(u32 method, const std::vector<u32>& parameters) { // Reset the current macro. executing_macro = 0; @@ -125,7 +128,7 @@ void Maxwell3D::CallMacroMethod(u32 method, std::size_t num_parameters, const u3 ((method - MacroRegistersStart) >> 1) % static_cast<u32>(macro_positions.size()); // Execute the current macro. - macro_interpreter.Execute(macro_positions[entry], num_parameters, parameters); + macro_engine->Execute(*this, macro_positions[entry], parameters); if (mme_draw.current_mode != MMEDrawMode::Undefined) { FlushMMEInlineDraw(); } @@ -161,7 +164,7 @@ void Maxwell3D::CallMethod(u32 method, u32 method_argument, bool is_last_call) { // Call the macro when there are no more parameters in the command buffer if (is_last_call) { - CallMacroMethod(executing_macro, macro_params.size(), macro_params.data()); + CallMacroMethod(executing_macro, macro_params); macro_params.clear(); } return; @@ -197,7 +200,7 @@ void Maxwell3D::CallMethod(u32 method, u32 method_argument, bool is_last_call) { break; } case MAXWELL3D_REG_INDEX(macros.data): { - ProcessMacroUpload(arg); + macro_engine->AddCode(regs.macros.upload_address, arg); break; } case MAXWELL3D_REG_INDEX(macros.bind): { @@ -306,7 +309,7 @@ void Maxwell3D::CallMultiMethod(u32 method, const u32* base_start, u32 amount, // Call the macro when there are no more parameters in the command buffer if (amount == methods_pending) { - CallMacroMethod(executing_macro, macro_params.size(), macro_params.data()); + CallMacroMethod(executing_macro, macro_params); macro_params.clear(); } return; @@ -420,9 +423,7 @@ void Maxwell3D::FlushMMEInlineDraw() { } void Maxwell3D::ProcessMacroUpload(u32 data) { - ASSERT_MSG(regs.macros.upload_address < macro_memory.size(), - "upload_address exceeded macro_memory size!"); - macro_memory[regs.macros.upload_address++] = data; + macro_engine->AddCode(regs.macros.upload_address++, data); } void Maxwell3D::ProcessMacroBind(u32 data) { @@ -739,8 +740,11 @@ SamplerDescriptor Maxwell3D::AccessBindlessSampler(ShaderType stage, u64 const_b const auto& shader = state.shader_stages[static_cast<std::size_t>(stage)]; const auto& tex_info_buffer = shader.const_buffers[const_buffer]; const GPUVAddr tex_info_address = tex_info_buffer.address + offset; + return AccessSampler(memory_manager.Read<u32>(tex_info_address)); +} - const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)}; +SamplerDescriptor Maxwell3D::AccessSampler(u32 handle) const { + const Texture::TextureHandle tex_handle{handle}; const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle); SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic); result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value()); diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 05dd6b39b..ef1618990 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -23,7 +23,7 @@ #include "video_core/engines/engine_upload.h" #include "video_core/engines/shader_type.h" #include "video_core/gpu.h" -#include "video_core/macro_interpreter.h" +#include "video_core/macro/macro.h" #include "video_core/textures/texture.h" namespace Core { @@ -598,6 +598,7 @@ public: BitField<4, 3, u32> block_height; BitField<8, 3, u32> block_depth; BitField<12, 1, InvMemoryLayout> type; + BitField<16, 1, u32> is_3d; } memory_layout; union { BitField<0, 16, u32> layers; @@ -1403,6 +1404,8 @@ public: SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer, u64 offset) const override; + SamplerDescriptor AccessSampler(u32 handle) const override; + u32 GetBoundBuffer() const override { return regs.tex_cb_index; } @@ -1411,17 +1414,16 @@ public: const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const override; - /// Memory for macro code - it's undetermined how big this is, however 1MB is much larger than - /// we've seen used. - using MacroMemory = std::array<u32, 0x40000>; + bool ShouldExecute() const { + return execute_on; + } - /// Gets a reference to macro memory. - const MacroMemory& GetMacroMemory() const { - return macro_memory; + VideoCore::RasterizerInterface& GetRasterizer() { + return rasterizer; } - bool ShouldExecute() const { - return execute_on; + const VideoCore::RasterizerInterface& GetRasterizer() const { + return rasterizer; } /// Notify a memory write has happened. @@ -1468,16 +1470,13 @@ private: std::array<bool, Regs::NUM_REGS> mme_inline{}; - /// Memory for macro code - MacroMemory macro_memory; - /// Macro method that is currently being executed / being fed parameters. u32 executing_macro = 0; /// Parameters that have been submitted to the macro call so far. std::vector<u32> macro_params; /// Interpreter for the macro codes uploaded to the GPU. - MacroInterpreter macro_interpreter; + std::unique_ptr<MacroEngine> macro_engine; static constexpr u32 null_cb_data = 0xFFFFFFFF; struct { @@ -1506,7 +1505,7 @@ private: * @param num_parameters Number of arguments * @param parameters Arguments to the method call */ - void CallMacroMethod(u32 method, std::size_t num_parameters, const u32* parameters); + void CallMacroMethod(u32 method, const std::vector<u32>& parameters); /// Handles writes to the macro uploading register. void ProcessMacroUpload(u32 data); diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index e7cb87589..d374b73cf 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -661,6 +661,10 @@ union Instruction { constexpr Instruction(u64 value) : value{value} {} constexpr Instruction(const Instruction& instr) : value(instr.value) {} + constexpr bool Bit(u64 offset) const { + return ((value >> offset) & 1) != 0; + } + BitField<0, 8, Register> gpr0; BitField<8, 8, Register> gpr8; union { @@ -1874,7 +1878,9 @@ public: HSETP2_C, HSETP2_R, HSETP2_IMM, + HSET2_C, HSET2_R, + HSET2_IMM, POPC_C, POPC_R, POPC_IMM, @@ -2194,7 +2200,9 @@ private: INST("0111111-1-------", Id::HSETP2_C, Type::HalfSetPredicate, "HSETP2_C"), INST("0101110100100---", Id::HSETP2_R, Type::HalfSetPredicate, "HSETP2_R"), INST("0111111-0-------", Id::HSETP2_IMM, Type::HalfSetPredicate, "HSETP2_IMM"), + INST("0111110-1-------", Id::HSET2_C, Type::HalfSet, "HSET2_C"), INST("0101110100011---", Id::HSET2_R, Type::HalfSet, "HSET2_R"), + INST("0111110-0-------", Id::HSET2_IMM, Type::HalfSet, "HSET2_IMM"), INST("010110111010----", Id::FCMP_RR, Type::Arithmetic, "FCMP_RR"), INST("010010111010----", Id::FCMP_RC, Type::Arithmetic, "FCMP_RC"), INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"), diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 8eb017f65..758bfe148 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -2,6 +2,8 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <chrono> + #include "common/assert.h" #include "common/microprofile.h" #include "core/core.h" @@ -154,9 +156,8 @@ u64 GPU::GetTicks() const { constexpr u64 gpu_ticks_num = 384; constexpr u64 gpu_ticks_den = 625; - const u64 cpu_ticks = system.CoreTiming().GetTicks(); - u64 nanoseconds = Core::Timing::CyclesToNs(cpu_ticks).count(); - if (Settings::values.use_fast_gpu_time) { + u64 nanoseconds = system.CoreTiming().GetGlobalTimeNs().count(); + if (Settings::values.use_fast_gpu_time.GetValue()) { nanoseconds /= 256; } const u64 nanoseconds_num = nanoseconds / gpu_ticks_den; diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index a1b4c305c..2c42483bd 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -284,6 +284,12 @@ public: /// core timing events. virtual void Start() = 0; + /// Obtain the CPU Context + virtual void ObtainContext() = 0; + + /// Release the CPU Context + virtual void ReleaseContext() = 0; + /// Push GPU command entries to be processed virtual void PushGPUEntries(Tegra::CommandList&& entries) = 0; diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp index 53305ab43..7b855f63e 100644 --- a/src/video_core/gpu_asynch.cpp +++ b/src/video_core/gpu_asynch.cpp @@ -19,10 +19,17 @@ GPUAsynch::GPUAsynch(Core::System& system, std::unique_ptr<VideoCore::RendererBa GPUAsynch::~GPUAsynch() = default; void GPUAsynch::Start() { - cpu_context->MakeCurrent(); gpu_thread.StartThread(*renderer, *gpu_context, *dma_pusher); } +void GPUAsynch::ObtainContext() { + cpu_context->MakeCurrent(); +} + +void GPUAsynch::ReleaseContext() { + cpu_context->DoneCurrent(); +} + void GPUAsynch::PushGPUEntries(Tegra::CommandList&& entries) { gpu_thread.SubmitList(std::move(entries)); } diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h index 517658612..15e9f1d38 100644 --- a/src/video_core/gpu_asynch.h +++ b/src/video_core/gpu_asynch.h @@ -25,6 +25,8 @@ public: ~GPUAsynch() override; void Start() override; + void ObtainContext() override; + void ReleaseContext() override; void PushGPUEntries(Tegra::CommandList&& entries) override; void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; void FlushRegion(VAddr addr, u64 size) override; diff --git a/src/video_core/gpu_synch.cpp b/src/video_core/gpu_synch.cpp index 6f38a672a..aaeb9811d 100644 --- a/src/video_core/gpu_synch.cpp +++ b/src/video_core/gpu_synch.cpp @@ -13,10 +13,16 @@ GPUSynch::GPUSynch(Core::System& system, std::unique_ptr<VideoCore::RendererBase GPUSynch::~GPUSynch() = default; -void GPUSynch::Start() { +void GPUSynch::Start() {} + +void GPUSynch::ObtainContext() { context->MakeCurrent(); } +void GPUSynch::ReleaseContext() { + context->DoneCurrent(); +} + void GPUSynch::PushGPUEntries(Tegra::CommandList&& entries) { dma_pusher->Push(std::move(entries)); dma_pusher->DispatchCalls(); diff --git a/src/video_core/gpu_synch.h b/src/video_core/gpu_synch.h index 4a6e9a01d..762c20aa5 100644 --- a/src/video_core/gpu_synch.h +++ b/src/video_core/gpu_synch.h @@ -24,6 +24,8 @@ public: ~GPUSynch() override; void Start() override; + void ObtainContext() override; + void ReleaseContext() override; void PushGPUEntries(Tegra::CommandList&& entries) override; void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; void FlushRegion(VAddr addr, u64 size) override; diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index c3bb4fe06..738c6f0c1 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp @@ -4,6 +4,7 @@ #include "common/assert.h" #include "common/microprofile.h" +#include "common/thread.h" #include "core/core.h" #include "core/frontend/emu_window.h" #include "core/settings.h" @@ -18,7 +19,11 @@ namespace VideoCommon::GPUThread { static void RunThread(Core::System& system, VideoCore::RendererBase& renderer, Core::Frontend::GraphicsContext& context, Tegra::DmaPusher& dma_pusher, SynchState& state) { - MicroProfileOnThreadCreate("GpuThread"); + std::string name = "yuzu:GPU"; + MicroProfileOnThreadCreate(name.c_str()); + Common::SetCurrentThreadName(name.c_str()); + Common::SetCurrentThreadPriority(Common::ThreadPriority::High); + system.RegisterHostThread(); // Wait for first GPU command before acquiring the window context while (state.queue.Empty()) diff --git a/src/video_core/macro/macro.cpp b/src/video_core/macro/macro.cpp new file mode 100644 index 000000000..a50e7b4e0 --- /dev/null +++ b/src/video_core/macro/macro.cpp @@ -0,0 +1,91 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <optional> +#include <boost/container_hash/hash.hpp> +#include "common/assert.h" +#include "common/logging/log.h" +#include "core/settings.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/macro/macro.h" +#include "video_core/macro/macro_hle.h" +#include "video_core/macro/macro_interpreter.h" +#include "video_core/macro/macro_jit_x64.h" + +namespace Tegra { + +MacroEngine::MacroEngine(Engines::Maxwell3D& maxwell3d) + : hle_macros{std::make_unique<Tegra::HLEMacro>(maxwell3d)} {} + +MacroEngine::~MacroEngine() = default; + +void MacroEngine::AddCode(u32 method, u32 data) { + uploaded_macro_code[method].push_back(data); +} + +void MacroEngine::Execute(Engines::Maxwell3D& maxwell3d, u32 method, + const std::vector<u32>& parameters) { + auto compiled_macro = macro_cache.find(method); + if (compiled_macro != macro_cache.end()) { + const auto& cache_info = compiled_macro->second; + if (cache_info.has_hle_program) { + cache_info.hle_program->Execute(parameters, method); + } else { + cache_info.lle_program->Execute(parameters, method); + } + } else { + // Macro not compiled, check if it's uploaded and if so, compile it + std::optional<u32> mid_method = std::nullopt; + const auto macro_code = uploaded_macro_code.find(method); + if (macro_code == uploaded_macro_code.end()) { + for (const auto& [method_base, code] : uploaded_macro_code) { + if (method >= method_base && (method - method_base) < code.size()) { + mid_method = method_base; + break; + } + } + if (!mid_method.has_value()) { + UNREACHABLE_MSG("Macro 0x{0:x} was not uploaded", method); + return; + } + } + auto& cache_info = macro_cache[method]; + + if (!mid_method.has_value()) { + cache_info.lle_program = Compile(macro_code->second); + cache_info.hash = boost::hash_value(macro_code->second); + } else { + const auto& macro_cached = uploaded_macro_code[mid_method.value()]; + const auto rebased_method = method - mid_method.value(); + auto& code = uploaded_macro_code[method]; + code.resize(macro_cached.size() - rebased_method); + std::memcpy(code.data(), macro_cached.data() + rebased_method, + code.size() * sizeof(u32)); + cache_info.hash = boost::hash_value(code); + cache_info.lle_program = Compile(code); + } + + auto hle_program = hle_macros->GetHLEProgram(cache_info.hash); + if (hle_program.has_value()) { + cache_info.has_hle_program = true; + cache_info.hle_program = std::move(hle_program.value()); + cache_info.hle_program->Execute(parameters, method); + } else { + cache_info.lle_program->Execute(parameters, method); + } + } +} + +std::unique_ptr<MacroEngine> GetMacroEngine(Engines::Maxwell3D& maxwell3d) { + if (Settings::values.disable_macro_jit) { + return std::make_unique<MacroInterpreter>(maxwell3d); + } +#ifdef ARCHITECTURE_x86_64 + return std::make_unique<MacroJITx64>(maxwell3d); +#else + return std::make_unique<MacroInterpreter>(maxwell3d); +#endif +} + +} // namespace Tegra diff --git a/src/video_core/macro/macro.h b/src/video_core/macro/macro.h new file mode 100644 index 000000000..4d00b84b0 --- /dev/null +++ b/src/video_core/macro/macro.h @@ -0,0 +1,141 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <memory> +#include <unordered_map> +#include <vector> +#include "common/bit_field.h" +#include "common/common_types.h" + +namespace Tegra { + +namespace Engines { +class Maxwell3D; +} + +namespace Macro { +constexpr std::size_t NUM_MACRO_REGISTERS = 8; +enum class Operation : u32 { + ALU = 0, + AddImmediate = 1, + ExtractInsert = 2, + ExtractShiftLeftImmediate = 3, + ExtractShiftLeftRegister = 4, + Read = 5, + Unused = 6, // This operation doesn't seem to be a valid encoding. + Branch = 7, +}; + +enum class ALUOperation : u32 { + Add = 0, + AddWithCarry = 1, + Subtract = 2, + SubtractWithBorrow = 3, + // Operations 4-7 don't seem to be valid encodings. + Xor = 8, + Or = 9, + And = 10, + AndNot = 11, + Nand = 12 +}; + +enum class ResultOperation : u32 { + IgnoreAndFetch = 0, + Move = 1, + MoveAndSetMethod = 2, + FetchAndSend = 3, + MoveAndSend = 4, + FetchAndSetMethod = 5, + MoveAndSetMethodFetchAndSend = 6, + MoveAndSetMethodSend = 7 +}; + +enum class BranchCondition : u32 { + Zero = 0, + NotZero = 1, +}; + +union Opcode { + u32 raw; + BitField<0, 3, Operation> operation; + BitField<4, 3, ResultOperation> result_operation; + BitField<4, 1, BranchCondition> branch_condition; + // If set on a branch, then the branch doesn't have a delay slot. + BitField<5, 1, u32> branch_annul; + BitField<7, 1, u32> is_exit; + BitField<8, 3, u32> dst; + BitField<11, 3, u32> src_a; + BitField<14, 3, u32> src_b; + // The signed immediate overlaps the second source operand and the alu operation. + BitField<14, 18, s32> immediate; + + BitField<17, 5, ALUOperation> alu_operation; + + // Bitfield instructions data + BitField<17, 5, u32> bf_src_bit; + BitField<22, 5, u32> bf_size; + BitField<27, 5, u32> bf_dst_bit; + + u32 GetBitfieldMask() const { + return (1 << bf_size) - 1; + } + + s32 GetBranchTarget() const { + return static_cast<s32>(immediate * sizeof(u32)); + } +}; + +union MethodAddress { + u32 raw; + BitField<0, 12, u32> address; + BitField<12, 6, u32> increment; +}; + +} // namespace Macro + +class HLEMacro; + +class CachedMacro { +public: + virtual ~CachedMacro() = default; + /** + * Executes the macro code with the specified input parameters. + * @param code The macro byte code to execute + * @param parameters The parameters of the macro + */ + virtual void Execute(const std::vector<u32>& parameters, u32 method) = 0; +}; + +class MacroEngine { +public: + explicit MacroEngine(Engines::Maxwell3D& maxwell3d); + virtual ~MacroEngine(); + + // Store the uploaded macro code to compile them when they're called. + void AddCode(u32 method, u32 data); + + // Compiles the macro if its not in the cache, and executes the compiled macro + void Execute(Engines::Maxwell3D& maxwell3d, u32 method, const std::vector<u32>& parameters); + +protected: + virtual std::unique_ptr<CachedMacro> Compile(const std::vector<u32>& code) = 0; + +private: + struct CacheInfo { + std::unique_ptr<CachedMacro> lle_program{}; + std::unique_ptr<CachedMacro> hle_program{}; + u64 hash{}; + bool has_hle_program{}; + }; + + std::unordered_map<u32, CacheInfo> macro_cache; + std::unordered_map<u32, std::vector<u32>> uploaded_macro_code; + std::unique_ptr<HLEMacro> hle_macros; +}; + +std::unique_ptr<MacroEngine> GetMacroEngine(Engines::Maxwell3D& maxwell3d); + +} // namespace Tegra diff --git a/src/video_core/macro/macro_hle.cpp b/src/video_core/macro/macro_hle.cpp new file mode 100644 index 000000000..410f99018 --- /dev/null +++ b/src/video_core/macro/macro_hle.cpp @@ -0,0 +1,113 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <array> +#include <vector> +#include "video_core/engines/maxwell_3d.h" +#include "video_core/macro/macro_hle.h" +#include "video_core/rasterizer_interface.h" + +namespace Tegra { + +namespace { +// HLE'd functions +static void HLE_771BB18C62444DA0(Engines::Maxwell3D& maxwell3d, + const std::vector<u32>& parameters) { + const u32 instance_count = parameters[2] & maxwell3d.GetRegisterValue(0xD1B); + + maxwell3d.regs.draw.topology.Assign( + static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0] & + ~(0x3ffffff << 26))); + maxwell3d.regs.vb_base_instance = parameters[5]; + maxwell3d.mme_draw.instance_count = instance_count; + maxwell3d.regs.vb_element_base = parameters[3]; + maxwell3d.regs.index_array.count = parameters[1]; + maxwell3d.regs.index_array.first = parameters[4]; + + if (maxwell3d.ShouldExecute()) { + maxwell3d.GetRasterizer().Draw(true, true); + } + maxwell3d.regs.index_array.count = 0; + maxwell3d.mme_draw.instance_count = 0; + maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined; +} + +static void HLE_0D61FC9FAAC9FCAD(Engines::Maxwell3D& maxwell3d, + const std::vector<u32>& parameters) { + const u32 count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]); + + maxwell3d.regs.vertex_buffer.first = parameters[3]; + maxwell3d.regs.vertex_buffer.count = parameters[1]; + maxwell3d.regs.vb_base_instance = parameters[4]; + maxwell3d.regs.draw.topology.Assign( + static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0])); + maxwell3d.mme_draw.instance_count = count; + + if (maxwell3d.ShouldExecute()) { + maxwell3d.GetRasterizer().Draw(false, true); + } + maxwell3d.regs.vertex_buffer.count = 0; + maxwell3d.mme_draw.instance_count = 0; + maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined; +} + +static void HLE_0217920100488FF7(Engines::Maxwell3D& maxwell3d, + const std::vector<u32>& parameters) { + const u32 instance_count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]); + const u32 element_base = parameters[4]; + const u32 base_instance = parameters[5]; + maxwell3d.regs.index_array.first = parameters[3]; + maxwell3d.regs.reg_array[0x446] = element_base; // vertex id base? + maxwell3d.regs.index_array.count = parameters[1]; + maxwell3d.regs.vb_element_base = element_base; + maxwell3d.regs.vb_base_instance = base_instance; + maxwell3d.mme_draw.instance_count = instance_count; + maxwell3d.CallMethodFromMME(0x8e3, 0x640); + maxwell3d.CallMethodFromMME(0x8e4, element_base); + maxwell3d.CallMethodFromMME(0x8e5, base_instance); + maxwell3d.regs.draw.topology.Assign( + static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0])); + if (maxwell3d.ShouldExecute()) { + maxwell3d.GetRasterizer().Draw(true, true); + } + maxwell3d.regs.reg_array[0x446] = 0x0; // vertex id base? + maxwell3d.regs.index_array.count = 0; + maxwell3d.regs.vb_element_base = 0x0; + maxwell3d.regs.vb_base_instance = 0x0; + maxwell3d.mme_draw.instance_count = 0; + maxwell3d.CallMethodFromMME(0x8e3, 0x640); + maxwell3d.CallMethodFromMME(0x8e4, 0x0); + maxwell3d.CallMethodFromMME(0x8e5, 0x0); + maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined; +} +} // namespace + +constexpr std::array<std::pair<u64, HLEFunction>, 3> hle_funcs{{ + std::make_pair<u64, HLEFunction>(0x771BB18C62444DA0, &HLE_771BB18C62444DA0), + std::make_pair<u64, HLEFunction>(0x0D61FC9FAAC9FCAD, &HLE_0D61FC9FAAC9FCAD), + std::make_pair<u64, HLEFunction>(0x0217920100488FF7, &HLE_0217920100488FF7), +}}; + +HLEMacro::HLEMacro(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {} +HLEMacro::~HLEMacro() = default; + +std::optional<std::unique_ptr<CachedMacro>> HLEMacro::GetHLEProgram(u64 hash) const { + const auto it = std::find_if(hle_funcs.cbegin(), hle_funcs.cend(), + [hash](const auto& pair) { return pair.first == hash; }); + if (it == hle_funcs.end()) { + return std::nullopt; + } + return std::make_unique<HLEMacroImpl>(maxwell3d, it->second); +} + +HLEMacroImpl::~HLEMacroImpl() = default; + +HLEMacroImpl::HLEMacroImpl(Engines::Maxwell3D& maxwell3d, HLEFunction func) + : maxwell3d(maxwell3d), func(func) {} + +void HLEMacroImpl::Execute(const std::vector<u32>& parameters, u32 method) { + func(maxwell3d, parameters); +} + +} // namespace Tegra diff --git a/src/video_core/macro/macro_hle.h b/src/video_core/macro/macro_hle.h new file mode 100644 index 000000000..37af875a0 --- /dev/null +++ b/src/video_core/macro/macro_hle.h @@ -0,0 +1,44 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <memory> +#include <optional> +#include <vector> +#include "common/common_types.h" +#include "video_core/macro/macro.h" + +namespace Tegra { + +namespace Engines { +class Maxwell3D; +} + +using HLEFunction = void (*)(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters); + +class HLEMacro { +public: + explicit HLEMacro(Engines::Maxwell3D& maxwell3d); + ~HLEMacro(); + + std::optional<std::unique_ptr<CachedMacro>> GetHLEProgram(u64 hash) const; + +private: + Engines::Maxwell3D& maxwell3d; +}; + +class HLEMacroImpl : public CachedMacro { +public: + explicit HLEMacroImpl(Engines::Maxwell3D& maxwell3d, HLEFunction func); + ~HLEMacroImpl(); + + void Execute(const std::vector<u32>& parameters, u32 method) override; + +private: + Engines::Maxwell3D& maxwell3d; + HLEFunction func; +}; + +} // namespace Tegra diff --git a/src/video_core/macro_interpreter.cpp b/src/video_core/macro/macro_interpreter.cpp index 947364928..aa5256419 100644 --- a/src/video_core/macro_interpreter.cpp +++ b/src/video_core/macro/macro_interpreter.cpp @@ -1,4 +1,4 @@ -// Copyright 2018 yuzu Emulator Project +// Copyright 2020 yuzu Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. @@ -6,109 +6,47 @@ #include "common/logging/log.h" #include "common/microprofile.h" #include "video_core/engines/maxwell_3d.h" -#include "video_core/macro_interpreter.h" +#include "video_core/macro/macro_interpreter.h" MICROPROFILE_DEFINE(MacroInterp, "GPU", "Execute macro interpreter", MP_RGB(128, 128, 192)); namespace Tegra { -namespace { -enum class Operation : u32 { - ALU = 0, - AddImmediate = 1, - ExtractInsert = 2, - ExtractShiftLeftImmediate = 3, - ExtractShiftLeftRegister = 4, - Read = 5, - Unused = 6, // This operation doesn't seem to be a valid encoding. - Branch = 7, -}; -} // Anonymous namespace - -enum class MacroInterpreter::ALUOperation : u32 { - Add = 0, - AddWithCarry = 1, - Subtract = 2, - SubtractWithBorrow = 3, - // Operations 4-7 don't seem to be valid encodings. - Xor = 8, - Or = 9, - And = 10, - AndNot = 11, - Nand = 12 -}; - -enum class MacroInterpreter::ResultOperation : u32 { - IgnoreAndFetch = 0, - Move = 1, - MoveAndSetMethod = 2, - FetchAndSend = 3, - MoveAndSend = 4, - FetchAndSetMethod = 5, - MoveAndSetMethodFetchAndSend = 6, - MoveAndSetMethodSend = 7 -}; - -enum class MacroInterpreter::BranchCondition : u32 { - Zero = 0, - NotZero = 1, -}; - -union MacroInterpreter::Opcode { - u32 raw; - BitField<0, 3, Operation> operation; - BitField<4, 3, ResultOperation> result_operation; - BitField<4, 1, BranchCondition> branch_condition; - // If set on a branch, then the branch doesn't have a delay slot. - BitField<5, 1, u32> branch_annul; - BitField<7, 1, u32> is_exit; - BitField<8, 3, u32> dst; - BitField<11, 3, u32> src_a; - BitField<14, 3, u32> src_b; - // The signed immediate overlaps the second source operand and the alu operation. - BitField<14, 18, s32> immediate; - - BitField<17, 5, ALUOperation> alu_operation; - - // Bitfield instructions data - BitField<17, 5, u32> bf_src_bit; - BitField<22, 5, u32> bf_size; - BitField<27, 5, u32> bf_dst_bit; - - u32 GetBitfieldMask() const { - return (1 << bf_size) - 1; - } +MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d) + : MacroEngine::MacroEngine(maxwell3d), maxwell3d(maxwell3d) {} - s32 GetBranchTarget() const { - return static_cast<s32>(immediate * sizeof(u32)); - } -}; +std::unique_ptr<CachedMacro> MacroInterpreter::Compile(const std::vector<u32>& code) { + return std::make_unique<MacroInterpreterImpl>(maxwell3d, code); +} -MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {} +MacroInterpreterImpl::MacroInterpreterImpl(Engines::Maxwell3D& maxwell3d, + const std::vector<u32>& code) + : maxwell3d(maxwell3d), code(code) {} -void MacroInterpreter::Execute(u32 offset, std::size_t num_parameters, const u32* parameters) { +void MacroInterpreterImpl::Execute(const std::vector<u32>& parameters, u32 method) { MICROPROFILE_SCOPE(MacroInterp); Reset(); registers[1] = parameters[0]; + num_parameters = parameters.size(); if (num_parameters > parameters_capacity) { parameters_capacity = num_parameters; this->parameters = std::make_unique<u32[]>(num_parameters); } - std::memcpy(this->parameters.get(), parameters, num_parameters * sizeof(u32)); + std::memcpy(this->parameters.get(), parameters.data(), num_parameters * sizeof(u32)); this->num_parameters = num_parameters; // Execute the code until we hit an exit condition. bool keep_executing = true; while (keep_executing) { - keep_executing = Step(offset, false); + keep_executing = Step(false); } // Assert the the macro used all the input parameters ASSERT(next_parameter_index == num_parameters); } -void MacroInterpreter::Reset() { +void MacroInterpreterImpl::Reset() { registers = {}; pc = 0; delayed_pc = {}; @@ -120,10 +58,10 @@ void MacroInterpreter::Reset() { carry_flag = false; } -bool MacroInterpreter::Step(u32 offset, bool is_delay_slot) { +bool MacroInterpreterImpl::Step(bool is_delay_slot) { u32 base_address = pc; - Opcode opcode = GetOpcode(offset); + Macro::Opcode opcode = GetOpcode(); pc += 4; // Update the program counter if we were delayed @@ -134,18 +72,18 @@ bool MacroInterpreter::Step(u32 offset, bool is_delay_slot) { } switch (opcode.operation) { - case Operation::ALU: { + case Macro::Operation::ALU: { u32 result = GetALUResult(opcode.alu_operation, GetRegister(opcode.src_a), GetRegister(opcode.src_b)); ProcessResult(opcode.result_operation, opcode.dst, result); break; } - case Operation::AddImmediate: { + case Macro::Operation::AddImmediate: { ProcessResult(opcode.result_operation, opcode.dst, GetRegister(opcode.src_a) + opcode.immediate); break; } - case Operation::ExtractInsert: { + case Macro::Operation::ExtractInsert: { u32 dst = GetRegister(opcode.src_a); u32 src = GetRegister(opcode.src_b); @@ -155,7 +93,7 @@ bool MacroInterpreter::Step(u32 offset, bool is_delay_slot) { ProcessResult(opcode.result_operation, opcode.dst, dst); break; } - case Operation::ExtractShiftLeftImmediate: { + case Macro::Operation::ExtractShiftLeftImmediate: { u32 dst = GetRegister(opcode.src_a); u32 src = GetRegister(opcode.src_b); @@ -164,7 +102,7 @@ bool MacroInterpreter::Step(u32 offset, bool is_delay_slot) { ProcessResult(opcode.result_operation, opcode.dst, result); break; } - case Operation::ExtractShiftLeftRegister: { + case Macro::Operation::ExtractShiftLeftRegister: { u32 dst = GetRegister(opcode.src_a); u32 src = GetRegister(opcode.src_b); @@ -173,12 +111,12 @@ bool MacroInterpreter::Step(u32 offset, bool is_delay_slot) { ProcessResult(opcode.result_operation, opcode.dst, result); break; } - case Operation::Read: { + case Macro::Operation::Read: { u32 result = Read(GetRegister(opcode.src_a) + opcode.immediate); ProcessResult(opcode.result_operation, opcode.dst, result); break; } - case Operation::Branch: { + case Macro::Operation::Branch: { ASSERT_MSG(!is_delay_slot, "Executing a branch in a delay slot is not valid"); u32 value = GetRegister(opcode.src_a); bool taken = EvaluateBranchCondition(opcode.branch_condition, value); @@ -191,7 +129,7 @@ bool MacroInterpreter::Step(u32 offset, bool is_delay_slot) { delayed_pc = base_address + opcode.GetBranchTarget(); // Execute one more instruction due to the delay slot. - return Step(offset, true); + return Step(true); } break; } @@ -204,51 +142,44 @@ bool MacroInterpreter::Step(u32 offset, bool is_delay_slot) { // cause an exit if it's executed inside a delay slot. if (opcode.is_exit && !is_delay_slot) { // Exit has a delay slot, execute the next instruction - Step(offset, true); + Step(true); return false; } return true; } -MacroInterpreter::Opcode MacroInterpreter::GetOpcode(u32 offset) const { - const auto& macro_memory{maxwell3d.GetMacroMemory()}; - ASSERT((pc % sizeof(u32)) == 0); - ASSERT((pc + offset) < macro_memory.size() * sizeof(u32)); - return {macro_memory[offset + pc / sizeof(u32)]}; -} - -u32 MacroInterpreter::GetALUResult(ALUOperation operation, u32 src_a, u32 src_b) { +u32 MacroInterpreterImpl::GetALUResult(Macro::ALUOperation operation, u32 src_a, u32 src_b) { switch (operation) { - case ALUOperation::Add: { + case Macro::ALUOperation::Add: { const u64 result{static_cast<u64>(src_a) + src_b}; carry_flag = result > 0xffffffff; return static_cast<u32>(result); } - case ALUOperation::AddWithCarry: { + case Macro::ALUOperation::AddWithCarry: { const u64 result{static_cast<u64>(src_a) + src_b + (carry_flag ? 1ULL : 0ULL)}; carry_flag = result > 0xffffffff; return static_cast<u32>(result); } - case ALUOperation::Subtract: { + case Macro::ALUOperation::Subtract: { const u64 result{static_cast<u64>(src_a) - src_b}; carry_flag = result < 0x100000000; return static_cast<u32>(result); } - case ALUOperation::SubtractWithBorrow: { + case Macro::ALUOperation::SubtractWithBorrow: { const u64 result{static_cast<u64>(src_a) - src_b - (carry_flag ? 0ULL : 1ULL)}; carry_flag = result < 0x100000000; return static_cast<u32>(result); } - case ALUOperation::Xor: + case Macro::ALUOperation::Xor: return src_a ^ src_b; - case ALUOperation::Or: + case Macro::ALUOperation::Or: return src_a | src_b; - case ALUOperation::And: + case Macro::ALUOperation::And: return src_a & src_b; - case ALUOperation::AndNot: + case Macro::ALUOperation::AndNot: return src_a & ~src_b; - case ALUOperation::Nand: + case Macro::ALUOperation::Nand: return ~(src_a & src_b); default: @@ -257,43 +188,43 @@ u32 MacroInterpreter::GetALUResult(ALUOperation operation, u32 src_a, u32 src_b) } } -void MacroInterpreter::ProcessResult(ResultOperation operation, u32 reg, u32 result) { +void MacroInterpreterImpl::ProcessResult(Macro::ResultOperation operation, u32 reg, u32 result) { switch (operation) { - case ResultOperation::IgnoreAndFetch: + case Macro::ResultOperation::IgnoreAndFetch: // Fetch parameter and ignore result. SetRegister(reg, FetchParameter()); break; - case ResultOperation::Move: + case Macro::ResultOperation::Move: // Move result. SetRegister(reg, result); break; - case ResultOperation::MoveAndSetMethod: + case Macro::ResultOperation::MoveAndSetMethod: // Move result and use as Method Address. SetRegister(reg, result); SetMethodAddress(result); break; - case ResultOperation::FetchAndSend: + case Macro::ResultOperation::FetchAndSend: // Fetch parameter and send result. SetRegister(reg, FetchParameter()); Send(result); break; - case ResultOperation::MoveAndSend: + case Macro::ResultOperation::MoveAndSend: // Move and send result. SetRegister(reg, result); Send(result); break; - case ResultOperation::FetchAndSetMethod: + case Macro::ResultOperation::FetchAndSetMethod: // Fetch parameter and use result as Method Address. SetRegister(reg, FetchParameter()); SetMethodAddress(result); break; - case ResultOperation::MoveAndSetMethodFetchAndSend: + case Macro::ResultOperation::MoveAndSetMethodFetchAndSend: // Move result and use as Method Address, then fetch and send parameter. SetRegister(reg, result); SetMethodAddress(result); Send(FetchParameter()); break; - case ResultOperation::MoveAndSetMethodSend: + case Macro::ResultOperation::MoveAndSetMethodSend: // Move result and use as Method Address, then send bits 12:17 of result. SetRegister(reg, result); SetMethodAddress(result); @@ -304,16 +235,28 @@ void MacroInterpreter::ProcessResult(ResultOperation operation, u32 reg, u32 res } } -u32 MacroInterpreter::FetchParameter() { - ASSERT(next_parameter_index < num_parameters); - return parameters[next_parameter_index++]; +bool MacroInterpreterImpl::EvaluateBranchCondition(Macro::BranchCondition cond, u32 value) const { + switch (cond) { + case Macro::BranchCondition::Zero: + return value == 0; + case Macro::BranchCondition::NotZero: + return value != 0; + } + UNREACHABLE(); + return true; +} + +Macro::Opcode MacroInterpreterImpl::GetOpcode() const { + ASSERT((pc % sizeof(u32)) == 0); + ASSERT(pc < code.size() * sizeof(u32)); + return {code[pc / sizeof(u32)]}; } -u32 MacroInterpreter::GetRegister(u32 register_id) const { +u32 MacroInterpreterImpl::GetRegister(u32 register_id) const { return registers.at(register_id); } -void MacroInterpreter::SetRegister(u32 register_id, u32 value) { +void MacroInterpreterImpl::SetRegister(u32 register_id, u32 value) { // Register 0 is hardwired as the zero register. // Ensure no writes to it actually occur. if (register_id == 0) { @@ -323,30 +266,24 @@ void MacroInterpreter::SetRegister(u32 register_id, u32 value) { registers.at(register_id) = value; } -void MacroInterpreter::SetMethodAddress(u32 address) { +void MacroInterpreterImpl::SetMethodAddress(u32 address) { method_address.raw = address; } -void MacroInterpreter::Send(u32 value) { +void MacroInterpreterImpl::Send(u32 value) { maxwell3d.CallMethodFromMME(method_address.address, value); // Increment the method address by the method increment. method_address.address.Assign(method_address.address.Value() + method_address.increment.Value()); } -u32 MacroInterpreter::Read(u32 method) const { +u32 MacroInterpreterImpl::Read(u32 method) const { return maxwell3d.GetRegisterValue(method); } -bool MacroInterpreter::EvaluateBranchCondition(BranchCondition cond, u32 value) const { - switch (cond) { - case BranchCondition::Zero: - return value == 0; - case BranchCondition::NotZero: - return value != 0; - } - UNREACHABLE(); - return true; +u32 MacroInterpreterImpl::FetchParameter() { + ASSERT(next_parameter_index < num_parameters); + return parameters[next_parameter_index++]; } } // namespace Tegra diff --git a/src/video_core/macro_interpreter.h b/src/video_core/macro/macro_interpreter.h index 631146d89..90217fc89 100644 --- a/src/video_core/macro_interpreter.h +++ b/src/video_core/macro/macro_interpreter.h @@ -1,44 +1,37 @@ -// Copyright 2018 yuzu Emulator Project +// Copyright 2020 yuzu Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. #pragma once - #include <array> #include <optional> - +#include <vector> #include "common/bit_field.h" #include "common/common_types.h" +#include "video_core/macro/macro.h" namespace Tegra { namespace Engines { class Maxwell3D; } -class MacroInterpreter final { +class MacroInterpreter final : public MacroEngine { public: explicit MacroInterpreter(Engines::Maxwell3D& maxwell3d); - /** - * Executes the macro code with the specified input parameters. - * @param offset Offset to start execution at. - * @param parameters The parameters of the macro. - */ - void Execute(u32 offset, std::size_t num_parameters, const u32* parameters); +protected: + std::unique_ptr<CachedMacro> Compile(const std::vector<u32>& code) override; private: - enum class ALUOperation : u32; - enum class BranchCondition : u32; - enum class ResultOperation : u32; - - union Opcode; + Engines::Maxwell3D& maxwell3d; +}; - union MethodAddress { - u32 raw; - BitField<0, 12, u32> address; - BitField<12, 6, u32> increment; - }; +class MacroInterpreterImpl : public CachedMacro { +public: + MacroInterpreterImpl(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& code); + void Execute(const std::vector<u32>& parameters, u32 method) override; +private: /// Resets the execution engine state, zeroing registers, etc. void Reset(); @@ -49,20 +42,20 @@ private: * @param is_delay_slot Whether the current step is being executed due to a delay slot in a * previous instruction. */ - bool Step(u32 offset, bool is_delay_slot); + bool Step(bool is_delay_slot); /// Calculates the result of an ALU operation. src_a OP src_b; - u32 GetALUResult(ALUOperation operation, u32 src_a, u32 src_b); + u32 GetALUResult(Macro::ALUOperation operation, u32 src_a, u32 src_b); /// Performs the result operation on the input result and stores it in the specified register /// (if necessary). - void ProcessResult(ResultOperation operation, u32 reg, u32 result); + void ProcessResult(Macro::ResultOperation operation, u32 reg, u32 result); /// Evaluates the branch condition and returns whether the branch should be taken or not. - bool EvaluateBranchCondition(BranchCondition cond, u32 value) const; + bool EvaluateBranchCondition(Macro::BranchCondition cond, u32 value) const; /// Reads an opcode at the current program counter location. - Opcode GetOpcode(u32 offset) const; + Macro::Opcode GetOpcode() const; /// Returns the specified register's value. Register 0 is hardcoded to always return 0. u32 GetRegister(u32 register_id) const; @@ -89,13 +82,11 @@ private: /// Program counter to execute at after the delay slot is executed. std::optional<u32> delayed_pc; - static constexpr std::size_t NumMacroRegisters = 8; - /// General purpose macro registers. - std::array<u32, NumMacroRegisters> registers = {}; + std::array<u32, Macro::NUM_MACRO_REGISTERS> registers = {}; /// Method address to use for the next Send instruction. - MethodAddress method_address = {}; + Macro::MethodAddress method_address = {}; /// Input parameters of the current macro. std::unique_ptr<u32[]> parameters; @@ -105,5 +96,7 @@ private: u32 next_parameter_index = 0; bool carry_flag = false; + const std::vector<u32>& code; }; + } // namespace Tegra diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp new file mode 100644 index 000000000..07292702f --- /dev/null +++ b/src/video_core/macro/macro_jit_x64.cpp @@ -0,0 +1,621 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/assert.h" +#include "common/logging/log.h" +#include "common/microprofile.h" +#include "common/x64/xbyak_util.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/macro/macro_interpreter.h" +#include "video_core/macro/macro_jit_x64.h" + +MICROPROFILE_DEFINE(MacroJitCompile, "GPU", "Compile macro JIT", MP_RGB(173, 255, 47)); +MICROPROFILE_DEFINE(MacroJitExecute, "GPU", "Execute macro JIT", MP_RGB(255, 255, 0)); + +namespace Tegra { +static const Xbyak::Reg64 STATE = Xbyak::util::rbx; +static const Xbyak::Reg32 RESULT = Xbyak::util::ebp; +static const Xbyak::Reg64 PARAMETERS = Xbyak::util::r12; +static const Xbyak::Reg32 METHOD_ADDRESS = Xbyak::util::r14d; +static const Xbyak::Reg64 BRANCH_HOLDER = Xbyak::util::r15; + +static const std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({ + STATE, + RESULT, + PARAMETERS, + METHOD_ADDRESS, + BRANCH_HOLDER, +}); + +MacroJITx64::MacroJITx64(Engines::Maxwell3D& maxwell3d) + : MacroEngine::MacroEngine(maxwell3d), maxwell3d(maxwell3d) {} + +std::unique_ptr<CachedMacro> MacroJITx64::Compile(const std::vector<u32>& code) { + return std::make_unique<MacroJITx64Impl>(maxwell3d, code); +} + +MacroJITx64Impl::MacroJITx64Impl(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& code) + : Xbyak::CodeGenerator(MAX_CODE_SIZE), code(code), maxwell3d(maxwell3d) { + Compile(); +} + +MacroJITx64Impl::~MacroJITx64Impl() = default; + +void MacroJITx64Impl::Execute(const std::vector<u32>& parameters, u32 method) { + MICROPROFILE_SCOPE(MacroJitExecute); + ASSERT_OR_EXECUTE(program != nullptr, { return; }); + JITState state{}; + state.maxwell3d = &maxwell3d; + state.registers = {}; + program(&state, parameters.data()); +} + +void MacroJITx64Impl::Compile_ALU(Macro::Opcode opcode) { + const bool is_a_zero = opcode.src_a == 0; + const bool is_b_zero = opcode.src_b == 0; + const bool valid_operation = !is_a_zero && !is_b_zero; + [[maybe_unused]] const bool is_move_operation = !is_a_zero && is_b_zero; + const bool has_zero_register = is_a_zero || is_b_zero; + const bool no_zero_reg_skip = opcode.alu_operation == Macro::ALUOperation::AddWithCarry || + opcode.alu_operation == Macro::ALUOperation::SubtractWithBorrow; + + Xbyak::Reg32 src_a; + Xbyak::Reg32 src_b; + + if (!optimizer.zero_reg_skip || no_zero_reg_skip) { + src_a = Compile_GetRegister(opcode.src_a, RESULT); + src_b = Compile_GetRegister(opcode.src_b, eax); + } else { + if (!is_a_zero) { + src_a = Compile_GetRegister(opcode.src_a, RESULT); + } + if (!is_b_zero) { + src_b = Compile_GetRegister(opcode.src_b, eax); + } + } + + bool has_emitted = false; + + switch (opcode.alu_operation) { + case Macro::ALUOperation::Add: + if (optimizer.zero_reg_skip) { + if (valid_operation) { + add(src_a, src_b); + } + } else { + add(src_a, src_b); + } + + if (!optimizer.can_skip_carry) { + setc(byte[STATE + offsetof(JITState, carry_flag)]); + } + break; + case Macro::ALUOperation::AddWithCarry: + bt(dword[STATE + offsetof(JITState, carry_flag)], 0); + adc(src_a, src_b); + setc(byte[STATE + offsetof(JITState, carry_flag)]); + break; + case Macro::ALUOperation::Subtract: + if (optimizer.zero_reg_skip) { + if (valid_operation) { + sub(src_a, src_b); + has_emitted = true; + } + } else { + sub(src_a, src_b); + has_emitted = true; + } + if (!optimizer.can_skip_carry && has_emitted) { + setc(byte[STATE + offsetof(JITState, carry_flag)]); + } + break; + case Macro::ALUOperation::SubtractWithBorrow: + bt(dword[STATE + offsetof(JITState, carry_flag)], 0); + sbb(src_a, src_b); + setc(byte[STATE + offsetof(JITState, carry_flag)]); + break; + case Macro::ALUOperation::Xor: + if (optimizer.zero_reg_skip) { + if (valid_operation) { + xor_(src_a, src_b); + } + } else { + xor_(src_a, src_b); + } + break; + case Macro::ALUOperation::Or: + if (optimizer.zero_reg_skip) { + if (valid_operation) { + or_(src_a, src_b); + } + } else { + or_(src_a, src_b); + } + break; + case Macro::ALUOperation::And: + if (optimizer.zero_reg_skip) { + if (!has_zero_register) { + and_(src_a, src_b); + } + } else { + and_(src_a, src_b); + } + break; + case Macro::ALUOperation::AndNot: + if (optimizer.zero_reg_skip) { + if (!is_a_zero) { + not_(src_b); + and_(src_a, src_b); + } + } else { + not_(src_b); + and_(src_a, src_b); + } + break; + case Macro::ALUOperation::Nand: + if (optimizer.zero_reg_skip) { + if (!is_a_zero) { + and_(src_a, src_b); + not_(src_a); + } + } else { + and_(src_a, src_b); + not_(src_a); + } + break; + default: + UNIMPLEMENTED_MSG("Unimplemented ALU operation {}", + static_cast<std::size_t>(opcode.alu_operation.Value())); + break; + } + Compile_ProcessResult(opcode.result_operation, opcode.dst); +} + +void MacroJITx64Impl::Compile_AddImmediate(Macro::Opcode opcode) { + if (optimizer.skip_dummy_addimmediate) { + // Games tend to use this as an exit instruction placeholder. It's to encode an instruction + // without doing anything. In our case we can just not emit anything. + if (opcode.result_operation == Macro::ResultOperation::Move && opcode.dst == 0) { + return; + } + } + // Check for redundant moves + if (optimizer.optimize_for_method_move && + opcode.result_operation == Macro::ResultOperation::MoveAndSetMethod) { + if (next_opcode.has_value()) { + const auto next = *next_opcode; + if (next.result_operation == Macro::ResultOperation::MoveAndSetMethod && + opcode.dst == next.dst) { + return; + } + } + } + if (optimizer.zero_reg_skip && opcode.src_a == 0) { + if (opcode.immediate == 0) { + xor_(RESULT, RESULT); + } else { + mov(RESULT, opcode.immediate); + } + } else { + auto result = Compile_GetRegister(opcode.src_a, RESULT); + if (opcode.immediate > 2) { + add(result, opcode.immediate); + } else if (opcode.immediate == 1) { + inc(result); + } else if (opcode.immediate < 0) { + sub(result, opcode.immediate * -1); + } + } + Compile_ProcessResult(opcode.result_operation, opcode.dst); +} + +void MacroJITx64Impl::Compile_ExtractInsert(Macro::Opcode opcode) { + auto dst = Compile_GetRegister(opcode.src_a, RESULT); + auto src = Compile_GetRegister(opcode.src_b, eax); + + if (opcode.bf_src_bit != 0 && opcode.bf_src_bit != 31) { + shr(src, opcode.bf_src_bit); + } else if (opcode.bf_src_bit == 31) { + xor_(src, src); + } + // Don't bother masking the whole register since we're using a 32 bit register + if (opcode.bf_size != 31 && opcode.bf_size != 0) { + and_(src, opcode.GetBitfieldMask()); + } else if (opcode.bf_size == 0) { + xor_(src, src); + } + if (opcode.bf_dst_bit != 31 && opcode.bf_dst_bit != 0) { + shl(src, opcode.bf_dst_bit); + } else if (opcode.bf_dst_bit == 31) { + xor_(src, src); + } + + const u32 mask = ~(opcode.GetBitfieldMask() << opcode.bf_dst_bit); + if (mask != 0xffffffff) { + and_(dst, mask); + } + or_(dst, src); + Compile_ProcessResult(opcode.result_operation, opcode.dst); +} + +void MacroJITx64Impl::Compile_ExtractShiftLeftImmediate(Macro::Opcode opcode) { + const auto dst = Compile_GetRegister(opcode.src_a, ecx); + const auto src = Compile_GetRegister(opcode.src_b, RESULT); + + shr(src, dst.cvt8()); + if (opcode.bf_size != 0 && opcode.bf_size != 31) { + and_(src, opcode.GetBitfieldMask()); + } else if (opcode.bf_size == 0) { + xor_(src, src); + } + + if (opcode.bf_dst_bit != 0 && opcode.bf_dst_bit != 31) { + shl(src, opcode.bf_dst_bit); + } else if (opcode.bf_dst_bit == 31) { + xor_(src, src); + } + Compile_ProcessResult(opcode.result_operation, opcode.dst); +} + +void MacroJITx64Impl::Compile_ExtractShiftLeftRegister(Macro::Opcode opcode) { + const auto dst = Compile_GetRegister(opcode.src_a, ecx); + const auto src = Compile_GetRegister(opcode.src_b, RESULT); + + if (opcode.bf_src_bit != 0) { + shr(src, opcode.bf_src_bit); + } + + if (opcode.bf_size != 31) { + and_(src, opcode.GetBitfieldMask()); + } + shl(src, dst.cvt8()); + + Compile_ProcessResult(opcode.result_operation, opcode.dst); +} + +void MacroJITx64Impl::Compile_Read(Macro::Opcode opcode) { + if (optimizer.zero_reg_skip && opcode.src_a == 0) { + if (opcode.immediate == 0) { + xor_(RESULT, RESULT); + } else { + mov(RESULT, opcode.immediate); + } + } else { + auto result = Compile_GetRegister(opcode.src_a, RESULT); + if (opcode.immediate > 2) { + add(result, opcode.immediate); + } else if (opcode.immediate == 1) { + inc(result); + } else if (opcode.immediate < 0) { + sub(result, opcode.immediate * -1); + } + } + + // Equivalent to Engines::Maxwell3D::GetRegisterValue: + if (optimizer.enable_asserts) { + Xbyak::Label pass_range_check; + cmp(RESULT, static_cast<u32>(Engines::Maxwell3D::Regs::NUM_REGS)); + jb(pass_range_check); + int3(); + L(pass_range_check); + } + mov(rax, qword[STATE]); + mov(RESULT, + dword[rax + offsetof(Engines::Maxwell3D, regs) + + offsetof(Engines::Maxwell3D::Regs, reg_array) + RESULT.cvt64() * sizeof(u32)]); + + Compile_ProcessResult(opcode.result_operation, opcode.dst); +} + +static void Send(Engines::Maxwell3D* maxwell3d, Macro::MethodAddress method_address, u32 value) { + maxwell3d->CallMethodFromMME(method_address.address, value); +} + +void Tegra::MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) { + Common::X64::ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); + mov(Common::X64::ABI_PARAM1, qword[STATE]); + mov(Common::X64::ABI_PARAM2, METHOD_ADDRESS); + mov(Common::X64::ABI_PARAM3, value); + Common::X64::CallFarFunction(*this, &Send); + Common::X64::ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); + + Xbyak::Label dont_process{}; + // Get increment + test(METHOD_ADDRESS, 0x3f000); + // If zero, method address doesn't update + je(dont_process); + + mov(ecx, METHOD_ADDRESS); + and_(METHOD_ADDRESS, 0xfff); + shr(ecx, 12); + and_(ecx, 0x3f); + lea(eax, ptr[rcx + METHOD_ADDRESS.cvt64()]); + sal(ecx, 12); + or_(eax, ecx); + + mov(METHOD_ADDRESS, eax); + + L(dont_process); +} + +void Tegra::MacroJITx64Impl::Compile_Branch(Macro::Opcode opcode) { + ASSERT_MSG(!is_delay_slot, "Executing a branch in a delay slot is not valid"); + const s32 jump_address = + static_cast<s32>(pc) + static_cast<s32>(opcode.GetBranchTarget() / sizeof(s32)); + + Xbyak::Label end; + auto value = Compile_GetRegister(opcode.src_a, eax); + test(value, value); + if (optimizer.has_delayed_pc) { + switch (opcode.branch_condition) { + case Macro::BranchCondition::Zero: + jne(end, T_NEAR); + break; + case Macro::BranchCondition::NotZero: + je(end, T_NEAR); + break; + } + + if (opcode.branch_annul) { + xor_(BRANCH_HOLDER, BRANCH_HOLDER); + jmp(labels[jump_address], T_NEAR); + } else { + Xbyak::Label handle_post_exit{}; + Xbyak::Label skip{}; + jmp(skip, T_NEAR); + if (opcode.is_exit) { + L(handle_post_exit); + // Execute 1 instruction + mov(BRANCH_HOLDER, end_of_code); + // Jump to next instruction to skip delay slot check + jmp(labels[jump_address], T_NEAR); + } else { + L(handle_post_exit); + xor_(BRANCH_HOLDER, BRANCH_HOLDER); + jmp(labels[jump_address], T_NEAR); + } + L(skip); + mov(BRANCH_HOLDER, handle_post_exit); + jmp(delay_skip[pc], T_NEAR); + } + } else { + switch (opcode.branch_condition) { + case Macro::BranchCondition::Zero: + je(labels[jump_address], T_NEAR); + break; + case Macro::BranchCondition::NotZero: + jne(labels[jump_address], T_NEAR); + break; + } + } + + L(end); +} + +void Tegra::MacroJITx64Impl::Optimizer_ScanFlags() { + optimizer.can_skip_carry = true; + optimizer.has_delayed_pc = false; + for (auto raw_op : code) { + Macro::Opcode op{}; + op.raw = raw_op; + + if (op.operation == Macro::Operation::ALU) { + // Scan for any ALU operations which actually use the carry flag, if they don't exist in + // our current code we can skip emitting the carry flag handling operations + if (op.alu_operation == Macro::ALUOperation::AddWithCarry || + op.alu_operation == Macro::ALUOperation::SubtractWithBorrow) { + optimizer.can_skip_carry = false; + } + } + + if (op.operation == Macro::Operation::Branch) { + if (!op.branch_annul) { + optimizer.has_delayed_pc = true; + } + } + } +} + +void MacroJITx64Impl::Compile() { + MICROPROFILE_SCOPE(MacroJitCompile); + bool keep_executing = true; + labels.fill(Xbyak::Label()); + + Common::X64::ABI_PushRegistersAndAdjustStack(*this, Common::X64::ABI_ALL_CALLEE_SAVED, 8); + // JIT state + mov(STATE, Common::X64::ABI_PARAM1); + mov(PARAMETERS, Common::X64::ABI_PARAM2); + xor_(RESULT, RESULT); + xor_(METHOD_ADDRESS, METHOD_ADDRESS); + xor_(BRANCH_HOLDER, BRANCH_HOLDER); + + mov(dword[STATE + offsetof(JITState, registers) + 4], Compile_FetchParameter()); + + // Track get register for zero registers and mark it as no-op + optimizer.zero_reg_skip = true; + + // AddImmediate tends to be used as a NOP instruction, if we detect this we can + // completely skip the entire code path and no emit anything + optimizer.skip_dummy_addimmediate = true; + + // SMO tends to emit a lot of unnecessary method moves, we can mitigate this by only emitting + // one if our register isn't "dirty" + optimizer.optimize_for_method_move = true; + + // Enable run-time assertions in JITted code + optimizer.enable_asserts = false; + + // Check to see if we can skip emitting certain instructions + Optimizer_ScanFlags(); + + const u32 op_count = static_cast<u32>(code.size()); + for (u32 i = 0; i < op_count; i++) { + if (i < op_count - 1) { + pc = i + 1; + next_opcode = GetOpCode(); + } else { + next_opcode = {}; + } + pc = i; + Compile_NextInstruction(); + } + + L(end_of_code); + + Common::X64::ABI_PopRegistersAndAdjustStack(*this, Common::X64::ABI_ALL_CALLEE_SAVED, 8); + ret(); + ready(); + program = getCode<ProgramType>(); +} + +bool MacroJITx64Impl::Compile_NextInstruction() { + const auto opcode = GetOpCode(); + if (labels[pc].getAddress()) { + return false; + } + + L(labels[pc]); + + switch (opcode.operation) { + case Macro::Operation::ALU: + Compile_ALU(opcode); + break; + case Macro::Operation::AddImmediate: + Compile_AddImmediate(opcode); + break; + case Macro::Operation::ExtractInsert: + Compile_ExtractInsert(opcode); + break; + case Macro::Operation::ExtractShiftLeftImmediate: + Compile_ExtractShiftLeftImmediate(opcode); + break; + case Macro::Operation::ExtractShiftLeftRegister: + Compile_ExtractShiftLeftRegister(opcode); + break; + case Macro::Operation::Read: + Compile_Read(opcode); + break; + case Macro::Operation::Branch: + Compile_Branch(opcode); + break; + default: + UNIMPLEMENTED_MSG("Unimplemented opcode {}", opcode.operation.Value()); + break; + } + + if (optimizer.has_delayed_pc) { + if (opcode.is_exit) { + mov(rax, end_of_code); + test(BRANCH_HOLDER, BRANCH_HOLDER); + cmove(BRANCH_HOLDER, rax); + // Jump to next instruction to skip delay slot check + je(labels[pc + 1], T_NEAR); + } else { + // TODO(ogniK): Optimize delay slot branching + Xbyak::Label no_delay_slot{}; + test(BRANCH_HOLDER, BRANCH_HOLDER); + je(no_delay_slot, T_NEAR); + mov(rax, BRANCH_HOLDER); + xor_(BRANCH_HOLDER, BRANCH_HOLDER); + jmp(rax); + L(no_delay_slot); + } + L(delay_skip[pc]); + if (opcode.is_exit) { + return false; + } + } else { + test(BRANCH_HOLDER, BRANCH_HOLDER); + jne(end_of_code, T_NEAR); + if (opcode.is_exit) { + inc(BRANCH_HOLDER); + return false; + } + } + return true; +} + +Xbyak::Reg32 Tegra::MacroJITx64Impl::Compile_FetchParameter() { + mov(eax, dword[PARAMETERS]); + add(PARAMETERS, sizeof(u32)); + return eax; +} + +Xbyak::Reg32 MacroJITx64Impl::Compile_GetRegister(u32 index, Xbyak::Reg32 dst) { + if (index == 0) { + // Register 0 is always zero + xor_(dst, dst); + } else { + mov(dst, dword[STATE + offsetof(JITState, registers) + index * sizeof(u32)]); + } + + return dst; +} + +void MacroJITx64Impl::Compile_ProcessResult(Macro::ResultOperation operation, u32 reg) { + const auto SetRegister = [this](u32 reg, const Xbyak::Reg32& result) { + // Register 0 is supposed to always return 0. NOP is implemented as a store to the zero + // register. + if (reg == 0) { + return; + } + mov(dword[STATE + offsetof(JITState, registers) + reg * sizeof(u32)], result); + }; + const auto SetMethodAddress = [this](const Xbyak::Reg32& reg) { mov(METHOD_ADDRESS, reg); }; + + switch (operation) { + case Macro::ResultOperation::IgnoreAndFetch: + SetRegister(reg, Compile_FetchParameter()); + break; + case Macro::ResultOperation::Move: + SetRegister(reg, RESULT); + break; + case Macro::ResultOperation::MoveAndSetMethod: + SetRegister(reg, RESULT); + SetMethodAddress(RESULT); + break; + case Macro::ResultOperation::FetchAndSend: + // Fetch parameter and send result. + SetRegister(reg, Compile_FetchParameter()); + Compile_Send(RESULT); + break; + case Macro::ResultOperation::MoveAndSend: + // Move and send result. + SetRegister(reg, RESULT); + Compile_Send(RESULT); + break; + case Macro::ResultOperation::FetchAndSetMethod: + // Fetch parameter and use result as Method Address. + SetRegister(reg, Compile_FetchParameter()); + SetMethodAddress(RESULT); + break; + case Macro::ResultOperation::MoveAndSetMethodFetchAndSend: + // Move result and use as Method Address, then fetch and send parameter. + SetRegister(reg, RESULT); + SetMethodAddress(RESULT); + Compile_Send(Compile_FetchParameter()); + break; + case Macro::ResultOperation::MoveAndSetMethodSend: + // Move result and use as Method Address, then send bits 12:17 of result. + SetRegister(reg, RESULT); + SetMethodAddress(RESULT); + shr(RESULT, 12); + and_(RESULT, 0b111111); + Compile_Send(RESULT); + break; + default: + UNIMPLEMENTED_MSG("Unimplemented macro operation {}", static_cast<std::size_t>(operation)); + } +} + +Macro::Opcode MacroJITx64Impl::GetOpCode() const { + ASSERT(pc < code.size()); + return {code[pc]}; +} + +std::bitset<32> MacroJITx64Impl::PersistentCallerSavedRegs() const { + return PERSISTENT_REGISTERS & Common::X64::ABI_ALL_CALLER_SAVED; +} + +} // namespace Tegra diff --git a/src/video_core/macro/macro_jit_x64.h b/src/video_core/macro/macro_jit_x64.h new file mode 100644 index 000000000..a180e7428 --- /dev/null +++ b/src/video_core/macro/macro_jit_x64.h @@ -0,0 +1,98 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <array> +#include <bitset> +#include <xbyak.h> +#include "common/bit_field.h" +#include "common/common_types.h" +#include "common/x64/xbyak_abi.h" +#include "video_core/macro/macro.h" + +namespace Tegra { + +namespace Engines { +class Maxwell3D; +} + +/// MAX_CODE_SIZE is arbitrarily chosen based on current booting games +constexpr size_t MAX_CODE_SIZE = 0x10000; + +class MacroJITx64 final : public MacroEngine { +public: + explicit MacroJITx64(Engines::Maxwell3D& maxwell3d); + +protected: + std::unique_ptr<CachedMacro> Compile(const std::vector<u32>& code) override; + +private: + Engines::Maxwell3D& maxwell3d; +}; + +class MacroJITx64Impl : public Xbyak::CodeGenerator, public CachedMacro { +public: + MacroJITx64Impl(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& code); + ~MacroJITx64Impl(); + + void Execute(const std::vector<u32>& parameters, u32 method) override; + + void Compile_ALU(Macro::Opcode opcode); + void Compile_AddImmediate(Macro::Opcode opcode); + void Compile_ExtractInsert(Macro::Opcode opcode); + void Compile_ExtractShiftLeftImmediate(Macro::Opcode opcode); + void Compile_ExtractShiftLeftRegister(Macro::Opcode opcode); + void Compile_Read(Macro::Opcode opcode); + void Compile_Branch(Macro::Opcode opcode); + +private: + void Optimizer_ScanFlags(); + + void Compile(); + bool Compile_NextInstruction(); + + Xbyak::Reg32 Compile_FetchParameter(); + Xbyak::Reg32 Compile_GetRegister(u32 index, Xbyak::Reg32 dst); + + void Compile_ProcessResult(Macro::ResultOperation operation, u32 reg); + void Compile_Send(Xbyak::Reg32 value); + + Macro::Opcode GetOpCode() const; + std::bitset<32> PersistentCallerSavedRegs() const; + + struct JITState { + Engines::Maxwell3D* maxwell3d{}; + std::array<u32, Macro::NUM_MACRO_REGISTERS> registers{}; + u32 carry_flag{}; + }; + static_assert(offsetof(JITState, maxwell3d) == 0, "Maxwell3D is not at 0x0"); + using ProgramType = void (*)(JITState*, const u32*); + + struct OptimizerState { + bool can_skip_carry{}; + bool has_delayed_pc{}; + bool zero_reg_skip{}; + bool skip_dummy_addimmediate{}; + bool optimize_for_method_move{}; + bool enable_asserts{}; + }; + OptimizerState optimizer{}; + + std::optional<Macro::Opcode> next_opcode{}; + ProgramType program{nullptr}; + + std::array<Xbyak::Label, MAX_CODE_SIZE> labels; + std::array<Xbyak::Label, MAX_CODE_SIZE> delay_skip; + Xbyak::Label end_of_code{}; + + bool is_delay_slot{}; + u32 pc{}; + std::optional<u32> delayed_pc; + + const std::vector<u32>& code; + Engines::Maxwell3D& maxwell3d; +}; + +} // namespace Tegra diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index dbee9f634..ff5505d12 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -210,10 +210,11 @@ bool MemoryManager::IsBlockContinuous(const GPUVAddr start, const std::size_t si return range == inner_size; } -void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, const std::size_t size) const { +void MemoryManager::ReadBlock(GPUVAddr gpu_src_addr, void* dest_buffer, + const std::size_t size) const { std::size_t remaining_size{size}; - std::size_t page_index{src_addr >> page_bits}; - std::size_t page_offset{src_addr & page_mask}; + std::size_t page_index{gpu_src_addr >> page_bits}; + std::size_t page_offset{gpu_src_addr & page_mask}; auto& memory = system.Memory(); @@ -234,11 +235,11 @@ void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, const std::s } } -void MemoryManager::ReadBlockUnsafe(GPUVAddr src_addr, void* dest_buffer, +void MemoryManager::ReadBlockUnsafe(GPUVAddr gpu_src_addr, void* dest_buffer, const std::size_t size) const { std::size_t remaining_size{size}; - std::size_t page_index{src_addr >> page_bits}; - std::size_t page_offset{src_addr & page_mask}; + std::size_t page_index{gpu_src_addr >> page_bits}; + std::size_t page_offset{gpu_src_addr & page_mask}; auto& memory = system.Memory(); @@ -259,10 +260,11 @@ void MemoryManager::ReadBlockUnsafe(GPUVAddr src_addr, void* dest_buffer, } } -void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, const std::size_t size) { +void MemoryManager::WriteBlock(GPUVAddr gpu_dest_addr, const void* src_buffer, + const std::size_t size) { std::size_t remaining_size{size}; - std::size_t page_index{dest_addr >> page_bits}; - std::size_t page_offset{dest_addr & page_mask}; + std::size_t page_index{gpu_dest_addr >> page_bits}; + std::size_t page_offset{gpu_dest_addr & page_mask}; auto& memory = system.Memory(); @@ -283,11 +285,11 @@ void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, const } } -void MemoryManager::WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer, +void MemoryManager::WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buffer, const std::size_t size) { std::size_t remaining_size{size}; - std::size_t page_index{dest_addr >> page_bits}; - std::size_t page_offset{dest_addr & page_mask}; + std::size_t page_index{gpu_dest_addr >> page_bits}; + std::size_t page_offset{gpu_dest_addr & page_mask}; auto& memory = system.Memory(); @@ -306,16 +308,18 @@ void MemoryManager::WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer, } } -void MemoryManager::CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size) { +void MemoryManager::CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, + const std::size_t size) { std::vector<u8> tmp_buffer(size); - ReadBlock(src_addr, tmp_buffer.data(), size); - WriteBlock(dest_addr, tmp_buffer.data(), size); + ReadBlock(gpu_src_addr, tmp_buffer.data(), size); + WriteBlock(gpu_dest_addr, tmp_buffer.data(), size); } -void MemoryManager::CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size) { +void MemoryManager::CopyBlockUnsafe(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, + const std::size_t size) { std::vector<u8> tmp_buffer(size); - ReadBlockUnsafe(src_addr, tmp_buffer.data(), size); - WriteBlockUnsafe(dest_addr, tmp_buffer.data(), size); + ReadBlockUnsafe(gpu_src_addr, tmp_buffer.data(), size); + WriteBlockUnsafe(gpu_dest_addr, tmp_buffer.data(), size); } bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) { diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index 0ddd52d5a..87658e87a 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h @@ -79,9 +79,9 @@ public: * in the Host Memory counterpart. Note: This functions cause Host GPU Memory * Flushes and Invalidations, respectively to each operation. */ - void ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t size) const; - void WriteBlock(GPUVAddr dest_addr, const void* src_buffer, std::size_t size); - void CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size); + void ReadBlock(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size) const; + void WriteBlock(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size); + void CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size); /** * ReadBlockUnsafe and WriteBlockUnsafe are special versions of ReadBlock and @@ -93,9 +93,9 @@ public: * WriteBlockUnsafe instead of WriteBlock since it shouldn't invalidate the texture * being flushed. */ - void ReadBlockUnsafe(GPUVAddr src_addr, void* dest_buffer, std::size_t size) const; - void WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer, std::size_t size); - void CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size); + void ReadBlockUnsafe(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size) const; + void WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size); + void CopyBlockUnsafe(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size); /** * IsGranularRange checks if a gpu region can be simply read with a pointer diff --git a/src/video_core/query_cache.h b/src/video_core/query_cache.h index 2f75f8801..0d3a88765 100644 --- a/src/video_core/query_cache.h +++ b/src/video_core/query_cache.h @@ -132,7 +132,7 @@ public: } query->BindCounter(Stream(type).Current(), timestamp); - if (Settings::values.use_asynchronous_gpu_emulation) { + if (Settings::values.use_asynchronous_gpu_emulation.GetValue()) { AsyncFlushQuery(cpu_addr); } } @@ -220,8 +220,8 @@ private: return cache_begin < addr_end && addr_begin < cache_end; }; - const u64 page_end = addr_end >> PAGE_SHIFT; - for (u64 page = addr_begin >> PAGE_SHIFT; page <= page_end; ++page) { + const u64 page_end = addr_end >> PAGE_BITS; + for (u64 page = addr_begin >> PAGE_BITS; page <= page_end; ++page) { const auto& it = cached_queries.find(page); if (it == std::end(cached_queries)) { continue; @@ -242,14 +242,14 @@ private: /// Registers the passed parameters as cached and returns a pointer to the stored cached query. CachedQuery* Register(VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr, bool timestamp) { rasterizer.UpdatePagesCachedCount(cpu_addr, CachedQuery::SizeInBytes(timestamp), 1); - const u64 page = static_cast<u64>(cpu_addr) >> PAGE_SHIFT; + const u64 page = static_cast<u64>(cpu_addr) >> PAGE_BITS; return &cached_queries[page].emplace_back(static_cast<QueryCache&>(*this), type, cpu_addr, host_ptr); } /// Tries to a get a cached query. Returns nullptr on failure. CachedQuery* TryGet(VAddr addr) { - const u64 page = static_cast<u64>(addr) >> PAGE_SHIFT; + const u64 page = static_cast<u64>(addr) >> PAGE_BITS; const auto it = cached_queries.find(page); if (it == std::end(cached_queries)) { return nullptr; @@ -268,7 +268,7 @@ private: } static constexpr std::uintptr_t PAGE_SIZE = 4096; - static constexpr unsigned PAGE_SHIFT = 12; + static constexpr unsigned PAGE_BITS = 12; Core::System& system; VideoCore::RasterizerInterface& rasterizer; diff --git a/src/video_core/rasterizer_cache.cpp b/src/video_core/rasterizer_cache.cpp deleted file mode 100644 index 093b2cdf4..000000000 --- a/src/video_core/rasterizer_cache.cpp +++ /dev/null @@ -1,7 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "video_core/rasterizer_cache.h" - -RasterizerCacheObject::~RasterizerCacheObject() = default; diff --git a/src/video_core/rasterizer_cache.h b/src/video_core/rasterizer_cache.h deleted file mode 100644 index 096ee337c..000000000 --- a/src/video_core/rasterizer_cache.h +++ /dev/null @@ -1,253 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include <mutex> -#include <set> -#include <unordered_map> - -#include <boost/icl/interval_map.hpp> -#include <boost/range/iterator_range_core.hpp> - -#include "common/common_types.h" -#include "core/settings.h" -#include "video_core/gpu.h" -#include "video_core/rasterizer_interface.h" - -class RasterizerCacheObject { -public: - explicit RasterizerCacheObject(const VAddr cpu_addr) : cpu_addr{cpu_addr} {} - - virtual ~RasterizerCacheObject(); - - VAddr GetCpuAddr() const { - return cpu_addr; - } - - /// Gets the size of the shader in guest memory, required for cache management - virtual std::size_t GetSizeInBytes() const = 0; - - /// Sets whether the cached object should be considered registered - void SetIsRegistered(bool registered) { - is_registered = registered; - } - - /// Returns true if the cached object is registered - bool IsRegistered() const { - return is_registered; - } - - /// Returns true if the cached object is dirty - bool IsDirty() const { - return is_dirty; - } - - /// Returns ticks from when this cached object was last modified - u64 GetLastModifiedTicks() const { - return last_modified_ticks; - } - - /// Marks an object as recently modified, used to specify whether it is clean or dirty - template <class T> - void MarkAsModified(bool dirty, T& cache) { - is_dirty = dirty; - last_modified_ticks = cache.GetModifiedTicks(); - } - - void SetMemoryMarked(bool is_memory_marked_) { - is_memory_marked = is_memory_marked_; - } - - bool IsMemoryMarked() const { - return is_memory_marked; - } - - void SetSyncPending(bool is_sync_pending_) { - is_sync_pending = is_sync_pending_; - } - - bool IsSyncPending() const { - return is_sync_pending; - } - -private: - bool is_registered{}; ///< Whether the object is currently registered with the cache - bool is_dirty{}; ///< Whether the object is dirty (out of sync with guest memory) - bool is_memory_marked{}; ///< Whether the object is marking rasterizer memory. - bool is_sync_pending{}; ///< Whether the object is pending deletion. - u64 last_modified_ticks{}; ///< When the object was last modified, used for in-order flushing - VAddr cpu_addr{}; ///< Cpu address memory, unique from emulated virtual address space -}; - -template <class T> -class RasterizerCache : NonCopyable { - friend class RasterizerCacheObject; - -public: - explicit RasterizerCache(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {} - - /// Write any cached resources overlapping the specified region back to memory - void FlushRegion(VAddr addr, std::size_t size) { - std::lock_guard lock{mutex}; - - const auto& objects{GetSortedObjectsFromRegion(addr, size)}; - for (auto& object : objects) { - FlushObject(object); - } - } - - /// Mark the specified region as being invalidated - void InvalidateRegion(VAddr addr, u64 size) { - std::lock_guard lock{mutex}; - - const auto& objects{GetSortedObjectsFromRegion(addr, size)}; - for (auto& object : objects) { - if (!object->IsRegistered()) { - // Skip duplicates - continue; - } - Unregister(object); - } - } - - void OnCPUWrite(VAddr addr, std::size_t size) { - std::lock_guard lock{mutex}; - - for (const auto& object : GetSortedObjectsFromRegion(addr, size)) { - if (object->IsRegistered()) { - UnmarkMemory(object); - object->SetSyncPending(true); - marked_for_unregister.emplace_back(object); - } - } - } - - void SyncGuestHost() { - std::lock_guard lock{mutex}; - - for (const auto& object : marked_for_unregister) { - if (object->IsRegistered()) { - object->SetSyncPending(false); - Unregister(object); - } - } - marked_for_unregister.clear(); - } - - /// Invalidates everything in the cache - void InvalidateAll() { - std::lock_guard lock{mutex}; - - while (interval_cache.begin() != interval_cache.end()) { - Unregister(*interval_cache.begin()->second.begin()); - } - } - -protected: - /// Tries to get an object from the cache with the specified cache address - T TryGet(VAddr addr) const { - const auto iter = map_cache.find(addr); - if (iter != map_cache.end()) - return iter->second; - return nullptr; - } - - /// Register an object into the cache - virtual void Register(const T& object) { - std::lock_guard lock{mutex}; - - object->SetIsRegistered(true); - interval_cache.add({GetInterval(object), ObjectSet{object}}); - map_cache.insert({object->GetCpuAddr(), object}); - rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), 1); - object->SetMemoryMarked(true); - } - - /// Unregisters an object from the cache - virtual void Unregister(const T& object) { - std::lock_guard lock{mutex}; - - UnmarkMemory(object); - object->SetIsRegistered(false); - if (object->IsSyncPending()) { - marked_for_unregister.remove(object); - object->SetSyncPending(false); - } - const VAddr addr = object->GetCpuAddr(); - interval_cache.subtract({GetInterval(object), ObjectSet{object}}); - map_cache.erase(addr); - } - - void UnmarkMemory(const T& object) { - if (!object->IsMemoryMarked()) { - return; - } - rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), -1); - object->SetMemoryMarked(false); - } - - /// Returns a ticks counter used for tracking when cached objects were last modified - u64 GetModifiedTicks() { - std::lock_guard lock{mutex}; - - return ++modified_ticks; - } - - virtual void FlushObjectInner(const T& object) = 0; - - /// Flushes the specified object, updating appropriate cache state as needed - void FlushObject(const T& object) { - std::lock_guard lock{mutex}; - - if (!object->IsDirty()) { - return; - } - FlushObjectInner(object); - object->MarkAsModified(false, *this); - } - - std::recursive_mutex mutex; - -private: - /// Returns a list of cached objects from the specified memory region, ordered by access time - std::vector<T> GetSortedObjectsFromRegion(VAddr addr, u64 size) { - if (size == 0) { - return {}; - } - - std::vector<T> objects; - const ObjectInterval interval{addr, addr + size}; - for (auto& pair : boost::make_iterator_range(interval_cache.equal_range(interval))) { - for (auto& cached_object : pair.second) { - if (!cached_object) { - continue; - } - objects.push_back(cached_object); - } - } - - std::sort(objects.begin(), objects.end(), [](const T& a, const T& b) -> bool { - return a->GetLastModifiedTicks() < b->GetLastModifiedTicks(); - }); - - return objects; - } - - using ObjectSet = std::set<T>; - using ObjectCache = std::unordered_map<VAddr, T>; - using IntervalCache = boost::icl::interval_map<VAddr, ObjectSet>; - using ObjectInterval = typename IntervalCache::interval_type; - - static auto GetInterval(const T& object) { - return ObjectInterval::right_open(object->GetCpuAddr(), - object->GetCpuAddr() + object->GetSizeInBytes()); - } - - ObjectCache map_cache; - IntervalCache interval_cache; ///< Cache of objects - u64 modified_ticks{}; ///< Counter of cache state ticks, used for in-order flushing - VideoCore::RasterizerInterface& rasterizer; - std::list<T> marked_for_unregister; -}; diff --git a/src/video_core/renderer_base.cpp b/src/video_core/renderer_base.cpp index 919d1f2d4..dfb06e87e 100644 --- a/src/video_core/renderer_base.cpp +++ b/src/video_core/renderer_base.cpp @@ -18,7 +18,7 @@ RendererBase::~RendererBase() = default; void RendererBase::RefreshBaseSettings() { UpdateCurrentFramebufferLayout(); - renderer_settings.use_framelimiter = Settings::values.use_frame_limit; + renderer_settings.use_framelimiter = Settings::values.use_frame_limit.GetValue(); renderer_settings.set_background_color = true; } diff --git a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp new file mode 100644 index 000000000..eb5158407 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp @@ -0,0 +1,2073 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <algorithm> +#include <array> +#include <cstddef> +#include <string> +#include <string_view> +#include <utility> +#include <variant> + +#include <fmt/format.h> + +#include "common/alignment.h" +#include "common/assert.h" +#include "common/common_types.h" +#include "video_core/renderer_opengl/gl_arb_decompiler.h" +#include "video_core/renderer_opengl/gl_device.h" +#include "video_core/shader/registry.h" +#include "video_core/shader/shader_ir.h" + +// Predicates in the decompiled code follow the convention that -1 means true and 0 means false. +// GLASM lacks booleans, so they have to be implemented as integers. +// Using -1 for true is useful because both CMP.S and NOT.U can negate it, and CMP.S can be used to +// select between two values, because -1 will be evaluated as true and 0 as false. + +namespace OpenGL { + +namespace { + +using Tegra::Engines::ShaderType; +using Tegra::Shader::Attribute; +using Tegra::Shader::PixelImap; +using Tegra::Shader::Register; +using namespace VideoCommon::Shader; +using Operation = const OperationNode&; + +constexpr std::array INTERNAL_FLAG_NAMES = {"ZERO", "SIGN", "CARRY", "OVERFLOW"}; + +char Swizzle(std::size_t component) { + ASSERT(component < 4); + return component["xyzw"]; +} + +constexpr bool IsGenericAttribute(Attribute::Index index) { + return index >= Attribute::Index::Attribute_0 && index <= Attribute::Index::Attribute_31; +} + +u32 GetGenericAttributeIndex(Attribute::Index index) { + ASSERT(IsGenericAttribute(index)); + return static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0); +} + +std::string_view Modifiers(Operation operation) { + const auto meta = std::get_if<MetaArithmetic>(&operation.GetMeta()); + if (meta && meta->precise) { + return ".PREC"; + } + return ""; +} + +std::string_view GetInputFlags(PixelImap attribute) { + switch (attribute) { + case PixelImap::Perspective: + return ""; + case PixelImap::Constant: + return "FLAT "; + case PixelImap::ScreenLinear: + return "NOPERSPECTIVE "; + case PixelImap::Unused: + break; + } + UNIMPLEMENTED_MSG("Unknown attribute usage index={}", static_cast<int>(attribute)); + return {}; +} + +std::string_view ImageType(Tegra::Shader::ImageType image_type) { + switch (image_type) { + case Tegra::Shader::ImageType::Texture1D: + return "1D"; + case Tegra::Shader::ImageType::TextureBuffer: + return "BUFFER"; + case Tegra::Shader::ImageType::Texture1DArray: + return "ARRAY1D"; + case Tegra::Shader::ImageType::Texture2D: + return "2D"; + case Tegra::Shader::ImageType::Texture2DArray: + return "ARRAY2D"; + case Tegra::Shader::ImageType::Texture3D: + return "3D"; + } + UNREACHABLE(); + return {}; +} + +std::string_view StackName(MetaStackClass stack) { + switch (stack) { + case MetaStackClass::Ssy: + return "SSY"; + case MetaStackClass::Pbk: + return "PBK"; + } + UNREACHABLE(); + return ""; +}; + +std::string_view PrimitiveDescription(Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology topology) { + switch (topology) { + case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Points: + return "POINTS"; + case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Lines: + case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineStrip: + return "LINES"; + case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LinesAdjacency: + case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineStripAdjacency: + return "LINES_ADJACENCY"; + case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Triangles: + case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStrip: + case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleFan: + return "TRIANGLES"; + case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TrianglesAdjacency: + case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStripAdjacency: + return "TRIANGLES_ADJACENCY"; + default: + UNIMPLEMENTED_MSG("topology={}", static_cast<int>(topology)); + return "POINTS"; + } +} + +std::string_view TopologyName(Tegra::Shader::OutputTopology topology) { + switch (topology) { + case Tegra::Shader::OutputTopology::PointList: + return "POINTS"; + case Tegra::Shader::OutputTopology::LineStrip: + return "LINE_STRIP"; + case Tegra::Shader::OutputTopology::TriangleStrip: + return "TRIANGLE_STRIP"; + default: + UNIMPLEMENTED_MSG("Unknown output topology: {}", static_cast<u32>(topology)); + return "points"; + } +} + +std::string_view StageInputName(ShaderType stage) { + switch (stage) { + case ShaderType::Vertex: + case ShaderType::Geometry: + return "vertex"; + case ShaderType::Fragment: + return "fragment"; + case ShaderType::Compute: + return "invocation"; + default: + UNREACHABLE(); + return ""; + } +} + +std::string TextureType(const MetaTexture& meta) { + if (meta.sampler.is_buffer) { + return "BUFFER"; + } + std::string type; + if (meta.sampler.is_shadow) { + type += "SHADOW"; + } + if (meta.sampler.is_array) { + type += "ARRAY"; + } + type += [&meta] { + switch (meta.sampler.type) { + case Tegra::Shader::TextureType::Texture1D: + return "1D"; + case Tegra::Shader::TextureType::Texture2D: + return "2D"; + case Tegra::Shader::TextureType::Texture3D: + return "3D"; + case Tegra::Shader::TextureType::TextureCube: + return "CUBE"; + } + UNREACHABLE(); + return "2D"; + }(); + return type; +} + +std::string GlobalMemoryName(const GlobalMemoryBase& base) { + return fmt::format("gmem{}_{}", base.cbuf_index, base.cbuf_offset); +} + +class ARBDecompiler final { +public: + explicit ARBDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry, + ShaderType stage, std::string_view identifier); + + std::string Code() const { + return shader_source; + } + +private: + void DeclareHeader(); + void DeclareVertex(); + void DeclareGeometry(); + void DeclareFragment(); + void DeclareCompute(); + void DeclareInputAttributes(); + void DeclareOutputAttributes(); + void DeclareLocalMemory(); + void DeclareGlobalMemory(); + void DeclareConstantBuffers(); + void DeclareRegisters(); + void DeclareTemporaries(); + void DeclarePredicates(); + void DeclareInternalFlags(); + + void InitializeVariables(); + + void DecompileAST(); + void DecompileBranchMode(); + + void VisitAST(const ASTNode& node); + std::string VisitExpression(const Expr& node); + + void VisitBlock(const NodeBlock& bb); + + std::string Visit(const Node& node); + + std::pair<std::string, std::size_t> BuildCoords(Operation); + std::string BuildAoffi(Operation); + void Exit(); + + std::string Assign(Operation); + std::string Select(Operation); + std::string FClamp(Operation); + std::string FCastHalf0(Operation); + std::string FCastHalf1(Operation); + std::string FSqrt(Operation); + std::string FSwizzleAdd(Operation); + std::string HAdd2(Operation); + std::string HMul2(Operation); + std::string HFma2(Operation); + std::string HAbsolute(Operation); + std::string HNegate(Operation); + std::string HClamp(Operation); + std::string HCastFloat(Operation); + std::string HUnpack(Operation); + std::string HMergeF32(Operation); + std::string HMergeH0(Operation); + std::string HMergeH1(Operation); + std::string HPack2(Operation); + std::string LogicalAssign(Operation); + std::string LogicalPick2(Operation); + std::string LogicalAnd2(Operation); + std::string FloatOrdered(Operation); + std::string FloatUnordered(Operation); + std::string LogicalAddCarry(Operation); + std::string Texture(Operation); + std::string TextureGather(Operation); + std::string TextureQueryDimensions(Operation); + std::string TextureQueryLod(Operation); + std::string TexelFetch(Operation); + std::string TextureGradient(Operation); + std::string ImageLoad(Operation); + std::string ImageStore(Operation); + std::string Branch(Operation); + std::string BranchIndirect(Operation); + std::string PushFlowStack(Operation); + std::string PopFlowStack(Operation); + std::string Exit(Operation); + std::string Discard(Operation); + std::string EmitVertex(Operation); + std::string EndPrimitive(Operation); + std::string InvocationId(Operation); + std::string YNegate(Operation); + std::string ThreadId(Operation); + std::string ShuffleIndexed(Operation); + std::string Barrier(Operation); + std::string MemoryBarrierGroup(Operation); + std::string MemoryBarrierGlobal(Operation); + + template <const std::string_view& op> + std::string Unary(Operation operation) { + std::string temporary = AllocTemporary(); + AddLine("{}{} {}, {};", op, Modifiers(operation), temporary, Visit(operation[0])); + return temporary; + } + + template <const std::string_view& op> + std::string Binary(Operation operation) { + std::string temporary = AllocTemporary(); + AddLine("{}{} {}, {}, {};", op, Modifiers(operation), temporary, Visit(operation[0]), + Visit(operation[1])); + return temporary; + } + + template <const std::string_view& op> + std::string Trinary(Operation operation) { + std::string temporary = AllocTemporary(); + AddLine("{}{} {}, {}, {}, {};", op, Modifiers(operation), temporary, Visit(operation[0]), + Visit(operation[1]), Visit(operation[2])); + return temporary; + } + + template <const std::string_view& op, bool unordered> + std::string FloatComparison(Operation operation) { + std::string temporary = AllocTemporary(); + AddLine("TRUNC.U.CC RC.x, {};", Binary<op>(operation)); + AddLine("MOV.S {}, 0;", temporary); + AddLine("MOV.S {} (NE.x), -1;", temporary); + + const std::string op_a = Visit(operation[0]); + const std::string op_b = Visit(operation[1]); + if constexpr (unordered) { + AddLine("SNE.F RC.x, {}, {};", op_a, op_a); + AddLine("TRUNC.U.CC RC.x, RC.x;"); + AddLine("MOV.S {} (NE.x), -1;", temporary); + AddLine("SNE.F RC.x, {}, {};", op_b, op_b); + AddLine("TRUNC.U.CC RC.x, RC.x;"); + AddLine("MOV.S {} (NE.x), -1;", temporary); + } else if (op == SNE_F) { + AddLine("SNE.F RC.x, {}, {};", op_a, op_a); + AddLine("TRUNC.U.CC RC.x, RC.x;"); + AddLine("MOV.S {} (NE.x), 0;", temporary); + AddLine("SNE.F RC.x, {}, {};", op_b, op_b); + AddLine("TRUNC.U.CC RC.x, RC.x;"); + AddLine("MOV.S {} (NE.x), 0;", temporary); + } + return temporary; + } + + template <const std::string_view& op, bool is_nan> + std::string HalfComparison(Operation operation) { + std::string tmp1 = AllocVectorTemporary(); + const std::string tmp2 = AllocVectorTemporary(); + const std::string op_a = Visit(operation[0]); + const std::string op_b = Visit(operation[1]); + AddLine("UP2H.F {}, {};", tmp1, op_a); + AddLine("UP2H.F {}, {};", tmp2, op_b); + AddLine("{} {}, {}, {};", op, tmp1, tmp1, tmp2); + AddLine("TRUNC.U.CC RC.xy, {};", tmp1); + AddLine("MOV.S {}.xy, {{0, 0, 0, 0}};", tmp1); + AddLine("MOV.S {}.x (NE.x), -1;", tmp1); + AddLine("MOV.S {}.y (NE.y), -1;", tmp1); + if constexpr (is_nan) { + AddLine("MOVC.F RC.x, {};", op_a); + AddLine("MOV.S {}.x (NAN.x), -1;", tmp1); + AddLine("MOVC.F RC.x, {};", op_b); + AddLine("MOV.S {}.y (NAN.x), -1;", tmp1); + } + return tmp1; + } + + template <const std::string_view& op, const std::string_view& type> + std::string AtomicImage(Operation operation) { + const auto& meta = std::get<MetaImage>(operation.GetMeta()); + const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index; + const std::size_t num_coords = operation.GetOperandsCount(); + const std::size_t num_values = meta.values.size(); + + const std::string coord = AllocVectorTemporary(); + const std::string value = AllocVectorTemporary(); + for (std::size_t i = 0; i < num_coords; ++i) { + AddLine("MOV.S {}.{}, {};", coord, Swizzle(i), Visit(operation[i])); + } + for (std::size_t i = 0; i < num_values; ++i) { + AddLine("MOV.F {}.{}, {};", value, Swizzle(i), Visit(meta.values[i])); + } + + AddLine("ATOMIM.{}.{} {}.x, {}, {}, image[{}], {};", op, type, coord, value, coord, + image_id, ImageType(meta.image.type)); + return fmt::format("{}.x", coord); + } + + template <const std::string_view& op, const std::string_view& type> + std::string Atomic(Operation operation) { + std::string temporary = AllocTemporary(); + std::string address; + std::string_view opname; + if (const auto gmem = std::get_if<GmemNode>(&*operation[0])) { + AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem->GetRealAddress()), + Visit(gmem->GetBaseAddress())); + address = fmt::format("{}[{}]", GlobalMemoryName(gmem->GetDescriptor()), temporary); + opname = "ATOMB"; + } else if (const auto smem = std::get_if<SmemNode>(&*operation[0])) { + address = fmt::format("shared_mem[{}]", Visit(smem->GetAddress())); + opname = "ATOMS"; + } else { + UNREACHABLE(); + return "{0, 0, 0, 0}"; + } + AddLine("{}.{}.{} {}, {}, {};", opname, op, type, temporary, Visit(operation[1]), address); + return temporary; + } + + template <char type> + std::string Negate(Operation operation) { + std::string temporary = AllocTemporary(); + if constexpr (type == 'F') { + AddLine("MOV.F32 {}, -{};", temporary, Visit(operation[0])); + } else { + AddLine("MOV.{} {}, -{};", type, temporary, Visit(operation[0])); + } + return temporary; + } + + template <char type> + std::string Absolute(Operation operation) { + std::string temporary = AllocTemporary(); + AddLine("MOV.{} {}, |{}|;", type, temporary, Visit(operation[0])); + return temporary; + } + + template <char type> + std::string BitfieldInsert(Operation operation) { + const std::string temporary = AllocVectorTemporary(); + AddLine("MOV.{} {}.x, {};", type, temporary, Visit(operation[3])); + AddLine("MOV.{} {}.y, {};", type, temporary, Visit(operation[2])); + AddLine("BFI.{} {}.x, {}, {}, {};", type, temporary, temporary, Visit(operation[1]), + Visit(operation[0])); + return fmt::format("{}.x", temporary); + } + + template <char type> + std::string BitfieldExtract(Operation operation) { + const std::string temporary = AllocVectorTemporary(); + AddLine("MOV.{} {}.x, {};", type, temporary, Visit(operation[2])); + AddLine("MOV.{} {}.y, {};", type, temporary, Visit(operation[1])); + AddLine("BFE.{} {}.x, {}, {};", type, temporary, temporary, Visit(operation[0])); + return fmt::format("{}.x", temporary); + } + + template <char swizzle> + std::string LocalInvocationId(Operation) { + return fmt::format("invocation.localid.{}", swizzle); + } + + template <char swizzle> + std::string WorkGroupId(Operation) { + return fmt::format("invocation.groupid.{}", swizzle); + } + + template <char c1, char c2> + std::string ThreadMask(Operation) { + return fmt::format("{}.thread{}{}mask", StageInputName(stage), c1, c2); + } + + template <typename... Args> + void AddExpression(std::string_view text, Args&&... args) { + shader_source += fmt::format(text, std::forward<Args>(args)...); + } + + template <typename... Args> + void AddLine(std::string_view text, Args&&... args) { + AddExpression(text, std::forward<Args>(args)...); + shader_source += '\n'; + } + + std::string AllocTemporary() { + max_temporaries = std::max(max_temporaries, num_temporaries + 1); + return fmt::format("T{}.x", num_temporaries++); + } + + std::string AllocVectorTemporary() { + max_temporaries = std::max(max_temporaries, num_temporaries + 1); + return fmt::format("T{}", num_temporaries++); + } + + void ResetTemporaries() noexcept { + num_temporaries = 0; + } + + const Device& device; + const ShaderIR& ir; + const Registry& registry; + const ShaderType stage; + + std::size_t num_temporaries = 0; + std::size_t max_temporaries = 0; + + std::string shader_source; + + static constexpr std::string_view ADD_F32 = "ADD.F32"; + static constexpr std::string_view ADD_S = "ADD.S"; + static constexpr std::string_view ADD_U = "ADD.U"; + static constexpr std::string_view MUL_F32 = "MUL.F32"; + static constexpr std::string_view MUL_S = "MUL.S"; + static constexpr std::string_view MUL_U = "MUL.U"; + static constexpr std::string_view DIV_F32 = "DIV.F32"; + static constexpr std::string_view DIV_S = "DIV.S"; + static constexpr std::string_view DIV_U = "DIV.U"; + static constexpr std::string_view MAD_F32 = "MAD.F32"; + static constexpr std::string_view RSQ_F32 = "RSQ.F32"; + static constexpr std::string_view COS_F32 = "COS.F32"; + static constexpr std::string_view SIN_F32 = "SIN.F32"; + static constexpr std::string_view EX2_F32 = "EX2.F32"; + static constexpr std::string_view LG2_F32 = "LG2.F32"; + static constexpr std::string_view SLT_F = "SLT.F32"; + static constexpr std::string_view SLT_S = "SLT.S"; + static constexpr std::string_view SLT_U = "SLT.U"; + static constexpr std::string_view SEQ_F = "SEQ.F32"; + static constexpr std::string_view SEQ_S = "SEQ.S"; + static constexpr std::string_view SEQ_U = "SEQ.U"; + static constexpr std::string_view SLE_F = "SLE.F32"; + static constexpr std::string_view SLE_S = "SLE.S"; + static constexpr std::string_view SLE_U = "SLE.U"; + static constexpr std::string_view SGT_F = "SGT.F32"; + static constexpr std::string_view SGT_S = "SGT.S"; + static constexpr std::string_view SGT_U = "SGT.U"; + static constexpr std::string_view SNE_F = "SNE.F32"; + static constexpr std::string_view SNE_S = "SNE.S"; + static constexpr std::string_view SNE_U = "SNE.U"; + static constexpr std::string_view SGE_F = "SGE.F32"; + static constexpr std::string_view SGE_S = "SGE.S"; + static constexpr std::string_view SGE_U = "SGE.U"; + static constexpr std::string_view AND_S = "AND.S"; + static constexpr std::string_view AND_U = "AND.U"; + static constexpr std::string_view TRUNC_F = "TRUNC.F"; + static constexpr std::string_view TRUNC_S = "TRUNC.S"; + static constexpr std::string_view TRUNC_U = "TRUNC.U"; + static constexpr std::string_view SHL_S = "SHL.S"; + static constexpr std::string_view SHL_U = "SHL.U"; + static constexpr std::string_view SHR_S = "SHR.S"; + static constexpr std::string_view SHR_U = "SHR.U"; + static constexpr std::string_view OR_S = "OR.S"; + static constexpr std::string_view OR_U = "OR.U"; + static constexpr std::string_view XOR_S = "XOR.S"; + static constexpr std::string_view XOR_U = "XOR.U"; + static constexpr std::string_view NOT_S = "NOT.S"; + static constexpr std::string_view NOT_U = "NOT.U"; + static constexpr std::string_view BTC_S = "BTC.S"; + static constexpr std::string_view BTC_U = "BTC.U"; + static constexpr std::string_view BTFM_S = "BTFM.S"; + static constexpr std::string_view BTFM_U = "BTFM.U"; + static constexpr std::string_view ROUND_F = "ROUND.F"; + static constexpr std::string_view CEIL_F = "CEIL.F"; + static constexpr std::string_view FLR_F = "FLR.F"; + static constexpr std::string_view I2F_S = "I2F.S"; + static constexpr std::string_view I2F_U = "I2F.U"; + static constexpr std::string_view MIN_F = "MIN.F"; + static constexpr std::string_view MIN_S = "MIN.S"; + static constexpr std::string_view MIN_U = "MIN.U"; + static constexpr std::string_view MAX_F = "MAX.F"; + static constexpr std::string_view MAX_S = "MAX.S"; + static constexpr std::string_view MAX_U = "MAX.U"; + static constexpr std::string_view MOV_U = "MOV.U"; + static constexpr std::string_view TGBALLOT_U = "TGBALLOT.U"; + static constexpr std::string_view TGALL_U = "TGALL.U"; + static constexpr std::string_view TGANY_U = "TGANY.U"; + static constexpr std::string_view TGEQ_U = "TGEQ.U"; + static constexpr std::string_view EXCH = "EXCH"; + static constexpr std::string_view ADD = "ADD"; + static constexpr std::string_view MIN = "MIN"; + static constexpr std::string_view MAX = "MAX"; + static constexpr std::string_view AND = "AND"; + static constexpr std::string_view OR = "OR"; + static constexpr std::string_view XOR = "XOR"; + static constexpr std::string_view U32 = "U32"; + static constexpr std::string_view S32 = "S32"; + + static constexpr std::size_t NUM_ENTRIES = static_cast<std::size_t>(OperationCode::Amount); + using DecompilerType = std::string (ARBDecompiler::*)(Operation); + static constexpr std::array<DecompilerType, NUM_ENTRIES> OPERATION_DECOMPILERS = { + &ARBDecompiler::Assign, + + &ARBDecompiler::Select, + + &ARBDecompiler::Binary<ADD_F32>, + &ARBDecompiler::Binary<MUL_F32>, + &ARBDecompiler::Binary<DIV_F32>, + &ARBDecompiler::Trinary<MAD_F32>, + &ARBDecompiler::Negate<'F'>, + &ARBDecompiler::Absolute<'F'>, + &ARBDecompiler::FClamp, + &ARBDecompiler::FCastHalf0, + &ARBDecompiler::FCastHalf1, + &ARBDecompiler::Binary<MIN_F>, + &ARBDecompiler::Binary<MAX_F>, + &ARBDecompiler::Unary<COS_F32>, + &ARBDecompiler::Unary<SIN_F32>, + &ARBDecompiler::Unary<EX2_F32>, + &ARBDecompiler::Unary<LG2_F32>, + &ARBDecompiler::Unary<RSQ_F32>, + &ARBDecompiler::FSqrt, + &ARBDecompiler::Unary<ROUND_F>, + &ARBDecompiler::Unary<FLR_F>, + &ARBDecompiler::Unary<CEIL_F>, + &ARBDecompiler::Unary<TRUNC_F>, + &ARBDecompiler::Unary<I2F_S>, + &ARBDecompiler::Unary<I2F_U>, + &ARBDecompiler::FSwizzleAdd, + + &ARBDecompiler::Binary<ADD_S>, + &ARBDecompiler::Binary<MUL_S>, + &ARBDecompiler::Binary<DIV_S>, + &ARBDecompiler::Negate<'S'>, + &ARBDecompiler::Absolute<'S'>, + &ARBDecompiler::Binary<MIN_S>, + &ARBDecompiler::Binary<MAX_S>, + + &ARBDecompiler::Unary<TRUNC_S>, + &ARBDecompiler::Unary<MOV_U>, + &ARBDecompiler::Binary<SHL_S>, + &ARBDecompiler::Binary<SHR_U>, + &ARBDecompiler::Binary<SHR_S>, + &ARBDecompiler::Binary<AND_S>, + &ARBDecompiler::Binary<OR_S>, + &ARBDecompiler::Binary<XOR_S>, + &ARBDecompiler::Unary<NOT_S>, + &ARBDecompiler::BitfieldInsert<'S'>, + &ARBDecompiler::BitfieldExtract<'S'>, + &ARBDecompiler::Unary<BTC_S>, + &ARBDecompiler::Unary<BTFM_S>, + + &ARBDecompiler::Binary<ADD_U>, + &ARBDecompiler::Binary<MUL_U>, + &ARBDecompiler::Binary<DIV_U>, + &ARBDecompiler::Binary<MIN_U>, + &ARBDecompiler::Binary<MAX_U>, + &ARBDecompiler::Unary<TRUNC_U>, + &ARBDecompiler::Unary<MOV_U>, + &ARBDecompiler::Binary<SHL_U>, + &ARBDecompiler::Binary<SHR_U>, + &ARBDecompiler::Binary<SHR_U>, + &ARBDecompiler::Binary<AND_U>, + &ARBDecompiler::Binary<OR_U>, + &ARBDecompiler::Binary<XOR_U>, + &ARBDecompiler::Unary<NOT_U>, + &ARBDecompiler::BitfieldInsert<'U'>, + &ARBDecompiler::BitfieldExtract<'U'>, + &ARBDecompiler::Unary<BTC_U>, + &ARBDecompiler::Unary<BTFM_U>, + + &ARBDecompiler::HAdd2, + &ARBDecompiler::HMul2, + &ARBDecompiler::HFma2, + &ARBDecompiler::HAbsolute, + &ARBDecompiler::HNegate, + &ARBDecompiler::HClamp, + &ARBDecompiler::HCastFloat, + &ARBDecompiler::HUnpack, + &ARBDecompiler::HMergeF32, + &ARBDecompiler::HMergeH0, + &ARBDecompiler::HMergeH1, + &ARBDecompiler::HPack2, + + &ARBDecompiler::LogicalAssign, + &ARBDecompiler::Binary<AND_U>, + &ARBDecompiler::Binary<OR_U>, + &ARBDecompiler::Binary<XOR_U>, + &ARBDecompiler::Unary<NOT_U>, + &ARBDecompiler::LogicalPick2, + &ARBDecompiler::LogicalAnd2, + + &ARBDecompiler::FloatComparison<SLT_F, false>, + &ARBDecompiler::FloatComparison<SEQ_F, false>, + &ARBDecompiler::FloatComparison<SLE_F, false>, + &ARBDecompiler::FloatComparison<SGT_F, false>, + &ARBDecompiler::FloatComparison<SNE_F, false>, + &ARBDecompiler::FloatComparison<SGE_F, false>, + &ARBDecompiler::FloatOrdered, + &ARBDecompiler::FloatUnordered, + &ARBDecompiler::FloatComparison<SLT_F, true>, + &ARBDecompiler::FloatComparison<SEQ_F, true>, + &ARBDecompiler::FloatComparison<SLE_F, true>, + &ARBDecompiler::FloatComparison<SGT_F, true>, + &ARBDecompiler::FloatComparison<SNE_F, true>, + &ARBDecompiler::FloatComparison<SGE_F, true>, + + &ARBDecompiler::Binary<SLT_S>, + &ARBDecompiler::Binary<SEQ_S>, + &ARBDecompiler::Binary<SLE_S>, + &ARBDecompiler::Binary<SGT_S>, + &ARBDecompiler::Binary<SNE_S>, + &ARBDecompiler::Binary<SGE_S>, + + &ARBDecompiler::Binary<SLT_U>, + &ARBDecompiler::Binary<SEQ_U>, + &ARBDecompiler::Binary<SLE_U>, + &ARBDecompiler::Binary<SGT_U>, + &ARBDecompiler::Binary<SNE_U>, + &ARBDecompiler::Binary<SGE_U>, + + &ARBDecompiler::LogicalAddCarry, + + &ARBDecompiler::HalfComparison<SLT_F, false>, + &ARBDecompiler::HalfComparison<SEQ_F, false>, + &ARBDecompiler::HalfComparison<SLE_F, false>, + &ARBDecompiler::HalfComparison<SGT_F, false>, + &ARBDecompiler::HalfComparison<SNE_F, false>, + &ARBDecompiler::HalfComparison<SGE_F, false>, + &ARBDecompiler::HalfComparison<SLT_F, true>, + &ARBDecompiler::HalfComparison<SEQ_F, true>, + &ARBDecompiler::HalfComparison<SLE_F, true>, + &ARBDecompiler::HalfComparison<SGT_F, true>, + &ARBDecompiler::HalfComparison<SNE_F, true>, + &ARBDecompiler::HalfComparison<SGE_F, true>, + + &ARBDecompiler::Texture, + &ARBDecompiler::Texture, + &ARBDecompiler::TextureGather, + &ARBDecompiler::TextureQueryDimensions, + &ARBDecompiler::TextureQueryLod, + &ARBDecompiler::TexelFetch, + &ARBDecompiler::TextureGradient, + + &ARBDecompiler::ImageLoad, + &ARBDecompiler::ImageStore, + + &ARBDecompiler::AtomicImage<ADD, U32>, + &ARBDecompiler::AtomicImage<AND, U32>, + &ARBDecompiler::AtomicImage<OR, U32>, + &ARBDecompiler::AtomicImage<XOR, U32>, + &ARBDecompiler::AtomicImage<EXCH, U32>, + + &ARBDecompiler::Atomic<EXCH, U32>, + &ARBDecompiler::Atomic<ADD, U32>, + &ARBDecompiler::Atomic<MIN, U32>, + &ARBDecompiler::Atomic<MAX, U32>, + &ARBDecompiler::Atomic<AND, U32>, + &ARBDecompiler::Atomic<OR, U32>, + &ARBDecompiler::Atomic<XOR, U32>, + + &ARBDecompiler::Atomic<EXCH, S32>, + &ARBDecompiler::Atomic<ADD, S32>, + &ARBDecompiler::Atomic<MIN, S32>, + &ARBDecompiler::Atomic<MAX, S32>, + &ARBDecompiler::Atomic<AND, S32>, + &ARBDecompiler::Atomic<OR, S32>, + &ARBDecompiler::Atomic<XOR, S32>, + + &ARBDecompiler::Atomic<ADD, U32>, + &ARBDecompiler::Atomic<MIN, U32>, + &ARBDecompiler::Atomic<MAX, U32>, + &ARBDecompiler::Atomic<AND, U32>, + &ARBDecompiler::Atomic<OR, U32>, + &ARBDecompiler::Atomic<XOR, U32>, + + &ARBDecompiler::Atomic<ADD, S32>, + &ARBDecompiler::Atomic<MIN, S32>, + &ARBDecompiler::Atomic<MAX, S32>, + &ARBDecompiler::Atomic<AND, S32>, + &ARBDecompiler::Atomic<OR, S32>, + &ARBDecompiler::Atomic<XOR, S32>, + + &ARBDecompiler::Branch, + &ARBDecompiler::BranchIndirect, + &ARBDecompiler::PushFlowStack, + &ARBDecompiler::PopFlowStack, + &ARBDecompiler::Exit, + &ARBDecompiler::Discard, + + &ARBDecompiler::EmitVertex, + &ARBDecompiler::EndPrimitive, + + &ARBDecompiler::InvocationId, + &ARBDecompiler::YNegate, + &ARBDecompiler::LocalInvocationId<'x'>, + &ARBDecompiler::LocalInvocationId<'y'>, + &ARBDecompiler::LocalInvocationId<'z'>, + &ARBDecompiler::WorkGroupId<'x'>, + &ARBDecompiler::WorkGroupId<'y'>, + &ARBDecompiler::WorkGroupId<'z'>, + + &ARBDecompiler::Unary<TGBALLOT_U>, + &ARBDecompiler::Unary<TGALL_U>, + &ARBDecompiler::Unary<TGANY_U>, + &ARBDecompiler::Unary<TGEQ_U>, + + &ARBDecompiler::ThreadId, + &ARBDecompiler::ThreadMask<'e', 'q'>, + &ARBDecompiler::ThreadMask<'g', 'e'>, + &ARBDecompiler::ThreadMask<'g', 't'>, + &ARBDecompiler::ThreadMask<'l', 'e'>, + &ARBDecompiler::ThreadMask<'l', 't'>, + &ARBDecompiler::ShuffleIndexed, + + &ARBDecompiler::Barrier, + &ARBDecompiler::MemoryBarrierGroup, + &ARBDecompiler::MemoryBarrierGlobal, + }; +}; + +ARBDecompiler::ARBDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry, + ShaderType stage, std::string_view identifier) + : device{device}, ir{ir}, registry{registry}, stage{stage} { + AddLine("TEMP RC;"); + AddLine("TEMP FSWZA[4];"); + AddLine("TEMP FSWZB[4];"); + if (ir.IsDecompiled()) { + DecompileAST(); + } else { + DecompileBranchMode(); + } + AddLine("END"); + + const std::string code = std::move(shader_source); + DeclareHeader(); + DeclareVertex(); + DeclareGeometry(); + DeclareFragment(); + DeclareCompute(); + DeclareInputAttributes(); + DeclareOutputAttributes(); + DeclareLocalMemory(); + DeclareGlobalMemory(); + DeclareConstantBuffers(); + DeclareRegisters(); + DeclareTemporaries(); + DeclarePredicates(); + DeclareInternalFlags(); + + shader_source += code; +} + +std::string_view HeaderStageName(ShaderType stage) { + switch (stage) { + case ShaderType::Vertex: + return "vp"; + case ShaderType::Geometry: + return "gp"; + case ShaderType::Fragment: + return "fp"; + case ShaderType::Compute: + return "cp"; + default: + UNREACHABLE(); + return ""; + } +} + +void ARBDecompiler::DeclareHeader() { + AddLine("!!NV{}5.0", HeaderStageName(stage)); + // Enabling this allows us to cheat on some instructions like TXL with SHADOWARRAY2D + AddLine("OPTION NV_internal;"); + AddLine("OPTION NV_gpu_program_fp64;"); + AddLine("OPTION NV_shader_storage_buffer;"); + AddLine("OPTION NV_shader_thread_group;"); + if (ir.UsesWarps() && device.HasWarpIntrinsics()) { + AddLine("OPTION NV_shader_thread_shuffle;"); + } + if (stage == ShaderType::Vertex) { + if (device.HasNvViewportArray2()) { + AddLine("OPTION NV_viewport_array2;"); + } + } + if (stage == ShaderType::Fragment) { + AddLine("OPTION ARB_draw_buffers;"); + } + if (device.HasImageLoadFormatted()) { + AddLine("OPTION EXT_shader_image_load_formatted;"); + } +} + +void ARBDecompiler::DeclareVertex() { + if (stage != ShaderType::Vertex) { + return; + } + AddLine("OUTPUT result_clip[] = {{ result.clip[0..7] }};"); +} + +void ARBDecompiler::DeclareGeometry() { + if (stage != ShaderType::Geometry) { + return; + } + const auto& info = registry.GetGraphicsInfo(); + const auto& header = ir.GetHeader(); + AddLine("PRIMITIVE_IN {};", PrimitiveDescription(info.primitive_topology)); + AddLine("PRIMITIVE_OUT {};", TopologyName(header.common3.output_topology)); + AddLine("VERTICES_OUT {};", header.common4.max_output_vertices.Value()); + AddLine("ATTRIB vertex_position = vertex.position;"); +} + +void ARBDecompiler::DeclareFragment() { + if (stage != ShaderType::Fragment) { + return; + } + AddLine("OUTPUT result_color7 = result.color[7];"); + AddLine("OUTPUT result_color6 = result.color[6];"); + AddLine("OUTPUT result_color5 = result.color[5];"); + AddLine("OUTPUT result_color4 = result.color[4];"); + AddLine("OUTPUT result_color3 = result.color[3];"); + AddLine("OUTPUT result_color2 = result.color[2];"); + AddLine("OUTPUT result_color1 = result.color[1];"); + AddLine("OUTPUT result_color0 = result.color;"); +} + +void ARBDecompiler::DeclareCompute() { + if (stage != ShaderType::Compute) { + return; + } + const ComputeInfo& info = registry.GetComputeInfo(); + AddLine("GROUP_SIZE {} {} {};", info.workgroup_size[0], info.workgroup_size[1], + info.workgroup_size[2]); + if (info.shared_memory_size_in_words > 0) { + const u32 size_in_bytes = info.shared_memory_size_in_words * 4; + AddLine("SHARED_MEMORY {};", size_in_bytes); + AddLine("SHARED shared_mem[] = {{program.sharedmem}};"); + } +} + +void ARBDecompiler::DeclareInputAttributes() { + if (stage == ShaderType::Compute) { + return; + } + const std::string_view stage_name = StageInputName(stage); + for (const auto attribute : ir.GetInputAttributes()) { + if (!IsGenericAttribute(attribute)) { + continue; + } + const u32 index = GetGenericAttributeIndex(attribute); + + std::string_view suffix; + if (stage == ShaderType::Fragment) { + const auto input_mode{ir.GetHeader().ps.GetPixelImap(index)}; + if (input_mode == PixelImap::Unused) { + return; + } + suffix = GetInputFlags(input_mode); + } + AddLine("{}ATTRIB in_attr{}[] = {{ {}.attrib[{}..{}] }};", suffix, index, stage_name, index, + index); + } +} + +void ARBDecompiler::DeclareOutputAttributes() { + if (stage == ShaderType::Compute) { + return; + } + for (const auto attribute : ir.GetOutputAttributes()) { + if (!IsGenericAttribute(attribute)) { + continue; + } + const u32 index = GetGenericAttributeIndex(attribute); + AddLine("OUTPUT out_attr{}[] = {{ result.attrib[{}..{}] }};", index, index, index); + } +} + +void ARBDecompiler::DeclareLocalMemory() { + u64 size = 0; + if (stage == ShaderType::Compute) { + size = registry.GetComputeInfo().local_memory_size_in_words * 4ULL; + } else { + size = ir.GetHeader().GetLocalMemorySize(); + } + if (size == 0) { + return; + } + const u64 element_count = Common::AlignUp(size, 4) / 4; + AddLine("TEMP lmem[{}];", element_count); +} + +void ARBDecompiler::DeclareGlobalMemory() { + u32 binding = 0; // device.GetBaseBindings(stage).shader_storage_buffer; + for (const auto& pair : ir.GetGlobalMemory()) { + const auto& base = pair.first; + AddLine("STORAGE {}[] = {{ program.storage[{}] }};", GlobalMemoryName(base), binding); + ++binding; + } +} + +void ARBDecompiler::DeclareConstantBuffers() { + u32 binding = 0; + for (const auto& cbuf : ir.GetConstantBuffers()) { + AddLine("CBUFFER cbuf{}[] = {{ program.buffer[{}] }};", cbuf.first, binding); + ++binding; + } +} + +void ARBDecompiler::DeclareRegisters() { + for (const u32 gpr : ir.GetRegisters()) { + AddLine("TEMP R{};", gpr); + } +} + +void ARBDecompiler::DeclareTemporaries() { + for (std::size_t i = 0; i < max_temporaries; ++i) { + AddLine("TEMP T{};", i); + } +} + +void ARBDecompiler::DeclarePredicates() { + for (const Tegra::Shader::Pred pred : ir.GetPredicates()) { + AddLine("TEMP P{};", static_cast<u64>(pred)); + } +} + +void ARBDecompiler::DeclareInternalFlags() { + for (const char* name : INTERNAL_FLAG_NAMES) { + AddLine("TEMP {};", name); + } +} + +void ARBDecompiler::InitializeVariables() { + AddLine("MOV.F32 FSWZA[0], -1;"); + AddLine("MOV.F32 FSWZA[1], 1;"); + AddLine("MOV.F32 FSWZA[2], -1;"); + AddLine("MOV.F32 FSWZA[3], 0;"); + AddLine("MOV.F32 FSWZB[0], -1;"); + AddLine("MOV.F32 FSWZB[1], -1;"); + AddLine("MOV.F32 FSWZB[2], 1;"); + AddLine("MOV.F32 FSWZB[3], -1;"); + + if (stage == ShaderType::Vertex || stage == ShaderType::Geometry) { + AddLine("MOV.F result.position, {{0, 0, 0, 1}};"); + } + for (const auto attribute : ir.GetOutputAttributes()) { + if (!IsGenericAttribute(attribute)) { + continue; + } + const u32 index = GetGenericAttributeIndex(attribute); + AddLine("MOV.F result.attrib[{}], {{0, 0, 0, 1}};", index); + } + for (const u32 gpr : ir.GetRegisters()) { + AddLine("MOV.F R{}, {{0, 0, 0, 0}};", gpr); + } + for (const Tegra::Shader::Pred pred : ir.GetPredicates()) { + AddLine("MOV.U P{}, {{0, 0, 0, 0}};", static_cast<u64>(pred)); + } +} + +void ARBDecompiler::DecompileAST() { + const u32 num_flow_variables = ir.GetASTNumVariables(); + for (u32 i = 0; i < num_flow_variables; ++i) { + AddLine("TEMP F{};", i); + } + for (u32 i = 0; i < num_flow_variables; ++i) { + AddLine("MOV.U F{}, {{0, 0, 0, 0}};", i); + } + + InitializeVariables(); + + VisitAST(ir.GetASTProgram()); +} + +void ARBDecompiler::DecompileBranchMode() { + static constexpr u32 FLOW_STACK_SIZE = 20; + if (!ir.IsFlowStackDisabled()) { + AddLine("TEMP SSY[{}];", FLOW_STACK_SIZE); + AddLine("TEMP PBK[{}];", FLOW_STACK_SIZE); + AddLine("TEMP SSY_TOP;"); + AddLine("TEMP PBK_TOP;"); + } + + AddLine("TEMP PC;"); + + if (!ir.IsFlowStackDisabled()) { + AddLine("MOV.U SSY_TOP.x, 0;"); + AddLine("MOV.U PBK_TOP.x, 0;"); + } + + InitializeVariables(); + + const auto basic_block_end = ir.GetBasicBlocks().end(); + auto basic_block_it = ir.GetBasicBlocks().begin(); + const u32 first_address = basic_block_it->first; + AddLine("MOV.U PC.x, {};", first_address); + + AddLine("REP;"); + + std::size_t num_blocks = 0; + while (basic_block_it != basic_block_end) { + const auto& [address, bb] = *basic_block_it; + ++num_blocks; + + AddLine("SEQ.S.CC RC.x, PC.x, {};", address); + AddLine("IF NE.x;"); + + VisitBlock(bb); + + ++basic_block_it; + + if (basic_block_it != basic_block_end) { + const auto op = std::get_if<OperationNode>(&*bb[bb.size() - 1]); + if (!op || op->GetCode() != OperationCode::Branch) { + const u32 next_address = basic_block_it->first; + AddLine("MOV.U PC.x, {};", next_address); + AddLine("CONT;"); + } + } + + AddLine("ELSE;"); + } + AddLine("RET;"); + while (num_blocks--) { + AddLine("ENDIF;"); + } + + AddLine("ENDREP;"); +} + +void ARBDecompiler::VisitAST(const ASTNode& node) { + if (const auto ast = std::get_if<ASTProgram>(&*node->GetInnerData())) { + for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) { + VisitAST(current); + } + } else if (const auto ast = std::get_if<ASTIfThen>(&*node->GetInnerData())) { + const std::string condition = VisitExpression(ast->condition); + ResetTemporaries(); + + AddLine("MOVC.U RC.x, {};", condition); + AddLine("IF NE.x;"); + for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) { + VisitAST(current); + } + AddLine("ENDIF;"); + } else if (const auto ast = std::get_if<ASTIfElse>(&*node->GetInnerData())) { + AddLine("ELSE;"); + for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) { + VisitAST(current); + } + } else if (const auto ast = std::get_if<ASTBlockDecoded>(&*node->GetInnerData())) { + VisitBlock(ast->nodes); + } else if (const auto ast = std::get_if<ASTVarSet>(&*node->GetInnerData())) { + AddLine("MOV.U F{}, {};", ast->index, VisitExpression(ast->condition)); + ResetTemporaries(); + } else if (const auto ast = std::get_if<ASTDoWhile>(&*node->GetInnerData())) { + const std::string condition = VisitExpression(ast->condition); + ResetTemporaries(); + AddLine("REP;"); + for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) { + VisitAST(current); + } + AddLine("MOVC.U RC.x, {};", condition); + AddLine("BRK (NE.x);"); + AddLine("ENDREP;"); + } else if (const auto ast = std::get_if<ASTReturn>(&*node->GetInnerData())) { + const bool is_true = ExprIsTrue(ast->condition); + if (!is_true) { + AddLine("MOVC.U RC.x, {};", VisitExpression(ast->condition)); + AddLine("IF NE.x;"); + ResetTemporaries(); + } + if (ast->kills) { + AddLine("KIL TR;"); + } else { + Exit(); + } + if (!is_true) { + AddLine("ENDIF;"); + } + } else if (const auto ast = std::get_if<ASTBreak>(&*node->GetInnerData())) { + if (ExprIsTrue(ast->condition)) { + AddLine("BRK;"); + } else { + AddLine("MOVC.U RC.x, {};", VisitExpression(ast->condition)); + AddLine("BRK (NE.x);"); + ResetTemporaries(); + } + } else if (std::holds_alternative<ASTLabel>(*node->GetInnerData())) { + // Nothing to do + } else { + UNREACHABLE(); + } +} + +std::string ARBDecompiler::VisitExpression(const Expr& node) { + if (const auto expr = std::get_if<ExprAnd>(&*node)) { + std::string result = AllocTemporary(); + AddLine("AND.U {}, {}, {};", result, VisitExpression(expr->operand1), + VisitExpression(expr->operand2)); + return result; + } + if (const auto expr = std::get_if<ExprOr>(&*node)) { + std::string result = AllocTemporary(); + AddLine("OR.U {}, {}, {};", result, VisitExpression(expr->operand1), + VisitExpression(expr->operand2)); + return result; + } + if (const auto expr = std::get_if<ExprNot>(&*node)) { + std::string result = AllocTemporary(); + AddLine("CMP.S {}, {}, 0, -1;", result, VisitExpression(expr->operand1)); + return result; + } + if (const auto expr = std::get_if<ExprPredicate>(&*node)) { + return fmt::format("P{}.x", static_cast<u64>(expr->predicate)); + } + if (const auto expr = std::get_if<ExprCondCode>(&*node)) { + return Visit(ir.GetConditionCode(expr->cc)); + } + if (const auto expr = std::get_if<ExprVar>(&*node)) { + return fmt::format("F{}.x", expr->var_index); + } + if (const auto expr = std::get_if<ExprBoolean>(&*node)) { + return expr->value ? "0xffffffff" : "0"; + } + if (const auto expr = std::get_if<ExprGprEqual>(&*node)) { + std::string result = AllocTemporary(); + AddLine("SEQ.U {}, R{}.x, {};", result, expr->gpr, expr->value); + return result; + } + UNREACHABLE(); + return "0"; +} + +void ARBDecompiler::VisitBlock(const NodeBlock& bb) { + for (const auto& node : bb) { + Visit(node); + } +} + +std::string ARBDecompiler::Visit(const Node& node) { + if (const auto operation = std::get_if<OperationNode>(&*node)) { + if (const auto amend_index = operation->GetAmendIndex()) { + Visit(ir.GetAmendNode(*amend_index)); + } + const std::size_t index = static_cast<std::size_t>(operation->GetCode()); + if (index >= OPERATION_DECOMPILERS.size()) { + UNREACHABLE_MSG("Out of bounds operation: {}", index); + return {}; + } + const auto decompiler = OPERATION_DECOMPILERS[index]; + if (decompiler == nullptr) { + UNREACHABLE_MSG("Undefined operation: {}", index); + return {}; + } + return (this->*decompiler)(*operation); + } + + if (const auto gpr = std::get_if<GprNode>(&*node)) { + const u32 index = gpr->GetIndex(); + if (index == Register::ZeroIndex) { + return "{0, 0, 0, 0}.x"; + } + return fmt::format("R{}.x", index); + } + + if (const auto cv = std::get_if<CustomVarNode>(&*node)) { + return fmt::format("CV{}.x", cv->GetIndex()); + } + + if (const auto immediate = std::get_if<ImmediateNode>(&*node)) { + std::string temporary = AllocTemporary(); + AddLine("MOV.U {}, {};", temporary, immediate->GetValue()); + return temporary; + } + + if (const auto predicate = std::get_if<PredicateNode>(&*node)) { + std::string temporary = AllocTemporary(); + switch (const auto index = predicate->GetIndex(); index) { + case Tegra::Shader::Pred::UnusedIndex: + AddLine("MOV.S {}, -1;", temporary); + break; + case Tegra::Shader::Pred::NeverExecute: + AddLine("MOV.S {}, 0;", temporary); + break; + default: + AddLine("MOV.S {}, P{}.x;", temporary, static_cast<u64>(index)); + break; + } + if (predicate->IsNegated()) { + AddLine("CMP.S {}, {}, 0, -1;", temporary, temporary); + } + return temporary; + } + + if (const auto abuf = std::get_if<AbufNode>(&*node)) { + if (abuf->IsPhysicalBuffer()) { + UNIMPLEMENTED_MSG("Physical buffers are not implemented"); + return "{0, 0, 0, 0}.x"; + } + + const auto buffer_index = [this, &abuf]() -> std::string { + if (stage != ShaderType::Geometry) { + return ""; + } + return fmt::format("[{}]", Visit(abuf->GetBuffer())); + }; + + const Attribute::Index index = abuf->GetIndex(); + const u32 element = abuf->GetElement(); + const char swizzle = Swizzle(element); + switch (index) { + case Attribute::Index::Position: { + if (stage == ShaderType::Geometry) { + return fmt::format("{}_position[{}].{}", StageInputName(stage), + Visit(abuf->GetBuffer()), swizzle); + } else { + return fmt::format("{}.position.{}", StageInputName(stage), swizzle); + } + } + case Attribute::Index::TessCoordInstanceIDVertexID: + ASSERT(stage == ShaderType::Vertex); + switch (element) { + case 2: + return "vertex.instance"; + case 3: + return "vertex.id"; + } + UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element); + break; + case Attribute::Index::PointCoord: + switch (element) { + case 0: + return "fragment.pointcoord.x"; + case 1: + return "fragment.pointcoord.y"; + } + UNIMPLEMENTED(); + break; + case Attribute::Index::FrontFacing: { + ASSERT(stage == ShaderType::Fragment); + ASSERT(element == 3); + const std::string temporary = AllocVectorTemporary(); + AddLine("SGT.S RC.x, fragment.facing, {{0, 0, 0, 0}};"); + AddLine("MOV.U.CC RC.x, -RC;"); + AddLine("MOV.S {}.x, 0;", temporary); + AddLine("MOV.S {}.x (NE.x), -1;", temporary); + return fmt::format("{}.x", temporary); + } + default: + if (IsGenericAttribute(index)) { + if (stage == ShaderType::Geometry) { + return fmt::format("in_attr{}[{}][0].{}", GetGenericAttributeIndex(index), + Visit(abuf->GetBuffer()), swizzle); + } else { + return fmt::format("{}.attrib[{}].{}", StageInputName(stage), + GetGenericAttributeIndex(index), swizzle); + } + } + UNIMPLEMENTED_MSG("Unimplemented input attribute={}", static_cast<int>(index)); + break; + } + return "{0, 0, 0, 0}.x"; + } + + if (const auto cbuf = std::get_if<CbufNode>(&*node)) { + std::string offset_string; + const auto& offset = cbuf->GetOffset(); + if (const auto imm = std::get_if<ImmediateNode>(&*offset)) { + offset_string = std::to_string(imm->GetValue()); + } else { + offset_string = Visit(offset); + } + std::string temporary = AllocTemporary(); + AddLine("LDC.F32 {}, cbuf{}[{}];", temporary, cbuf->GetIndex(), offset_string); + return temporary; + } + + if (const auto gmem = std::get_if<GmemNode>(&*node)) { + std::string temporary = AllocTemporary(); + AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem->GetRealAddress()), + Visit(gmem->GetBaseAddress())); + AddLine("LDB.U32 {}, {}[{}];", temporary, GlobalMemoryName(gmem->GetDescriptor()), + temporary); + return temporary; + } + + if (const auto lmem = std::get_if<LmemNode>(&*node)) { + std::string temporary = Visit(lmem->GetAddress()); + AddLine("SHR.U {}, {}, 2;", temporary, temporary); + AddLine("MOV.U {}, lmem[{}].x;", temporary, temporary); + return temporary; + } + + if (const auto smem = std::get_if<SmemNode>(&*node)) { + std::string temporary = Visit(smem->GetAddress()); + AddLine("LDS.U32 {}, shared_mem[{}];", temporary, temporary); + return temporary; + } + + if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) { + const std::size_t index = static_cast<std::size_t>(internal_flag->GetFlag()); + return fmt::format("{}.x", INTERNAL_FLAG_NAMES[index]); + } + + if (const auto conditional = std::get_if<ConditionalNode>(&*node)) { + if (const auto amend_index = conditional->GetAmendIndex()) { + Visit(ir.GetAmendNode(*amend_index)); + } + AddLine("MOVC.U RC.x, {};", Visit(conditional->GetCondition())); + AddLine("IF NE.x;"); + VisitBlock(conditional->GetCode()); + AddLine("ENDIF;"); + return {}; + } + + if (const auto cmt = std::get_if<CommentNode>(&*node)) { + // Uncommenting this will generate invalid code. GLASM lacks comments. + // AddLine("// {}", cmt->GetText()); + return {}; + } + + UNIMPLEMENTED(); + return {}; +} + +std::pair<std::string, std::size_t> ARBDecompiler::BuildCoords(Operation operation) { + const auto& meta = std::get<MetaTexture>(operation.GetMeta()); + UNIMPLEMENTED_IF(meta.sampler.is_indexed); + UNIMPLEMENTED_IF(meta.sampler.is_shadow && meta.sampler.is_array && + meta.sampler.type == Tegra::Shader::TextureType::TextureCube); + + const std::size_t count = operation.GetOperandsCount(); + std::string temporary = AllocVectorTemporary(); + std::size_t i = 0; + for (; i < count; ++i) { + AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), Visit(operation[i])); + } + if (meta.sampler.is_array) { + AddLine("I2F.S {}.{}, {};", temporary, Swizzle(i++), Visit(meta.array)); + } + if (meta.sampler.is_shadow) { + AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i++), Visit(meta.depth_compare)); + } + return {std::move(temporary), i}; +} + +std::string ARBDecompiler::BuildAoffi(Operation operation) { + const auto& meta = std::get<MetaTexture>(operation.GetMeta()); + if (meta.aoffi.empty()) { + return {}; + } + const std::string temporary = AllocVectorTemporary(); + std::size_t i = 0; + for (auto& node : meta.aoffi) { + AddLine("MOV.S {}.{}, {};", temporary, Swizzle(i++), Visit(node)); + } + return fmt::format(", offset({})", temporary); +} + +void ARBDecompiler::Exit() { + if (stage != ShaderType::Fragment) { + AddLine("RET;"); + return; + } + + const auto safe_get_register = [this](u32 reg) -> std::string { + // TODO(Rodrigo): Replace with contains once C++20 releases + const auto& used_registers = ir.GetRegisters(); + if (used_registers.find(reg) != used_registers.end()) { + return fmt::format("R{}.x", reg); + } + return "{0, 0, 0, 0}.x"; + }; + + const auto& header = ir.GetHeader(); + u32 current_reg = 0; + for (u32 rt = 0; rt < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; ++rt) { + for (u32 component = 0; component < 4; ++component) { + if (!header.ps.IsColorComponentOutputEnabled(rt, component)) { + continue; + } + AddLine("MOV.F result_color{}.{}, {};", rt, Swizzle(component), + safe_get_register(current_reg)); + ++current_reg; + } + } + if (header.ps.omap.depth) { + AddLine("MOV.F result.depth.z, {};", safe_get_register(current_reg + 1)); + } + + AddLine("RET;"); +} + +std::string ARBDecompiler::Assign(Operation operation) { + const Node& dest = operation[0]; + const Node& src = operation[1]; + + std::string dest_name; + if (const auto gpr = std::get_if<GprNode>(&*dest)) { + if (gpr->GetIndex() == Register::ZeroIndex) { + // Writing to Register::ZeroIndex is a no op + return {}; + } + dest_name = fmt::format("R{}.x", gpr->GetIndex()); + } else if (const auto abuf = std::get_if<AbufNode>(&*dest)) { + const u32 element = abuf->GetElement(); + const char swizzle = Swizzle(element); + switch (const Attribute::Index index = abuf->GetIndex()) { + case Attribute::Index::Position: + dest_name = fmt::format("result.position.{}", swizzle); + break; + case Attribute::Index::LayerViewportPointSize: + switch (element) { + case 0: + UNIMPLEMENTED(); + return {}; + case 1: + case 2: + if (!device.HasNvViewportArray2()) { + LOG_ERROR( + Render_OpenGL, + "NV_viewport_array2 is missing. Maxwell gen 2 or better is required."); + return {}; + } + dest_name = element == 1 ? "result.layer.x" : "result.viewport.x"; + break; + case 3: + dest_name = "result.pointsize.x"; + break; + } + break; + case Attribute::Index::ClipDistances0123: + dest_name = fmt::format("result.clip[{}].x", element); + break; + case Attribute::Index::ClipDistances4567: + dest_name = fmt::format("result.clip[{}].x", element + 4); + break; + default: + if (!IsGenericAttribute(index)) { + UNREACHABLE(); + return {}; + } + dest_name = + fmt::format("result.attrib[{}].{}", GetGenericAttributeIndex(index), swizzle); + break; + } + } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) { + const std::string address = Visit(lmem->GetAddress()); + AddLine("SHR.U {}, {}, 2;", address, address); + dest_name = fmt::format("lmem[{}].x", address); + } else if (const auto smem = std::get_if<SmemNode>(&*dest)) { + AddLine("STS.U32 {}, shared_mem[{}];", Visit(src), Visit(smem->GetAddress())); + ResetTemporaries(); + return {}; + } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) { + const std::string temporary = AllocTemporary(); + AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem->GetRealAddress()), + Visit(gmem->GetBaseAddress())); + AddLine("STB.U32 {}, {}[{}];", Visit(src), GlobalMemoryName(gmem->GetDescriptor()), + temporary); + ResetTemporaries(); + return {}; + } else { + UNREACHABLE(); + ResetTemporaries(); + return {}; + } + + AddLine("MOV.U {}, {};", dest_name, Visit(src)); + ResetTemporaries(); + return {}; +} + +std::string ARBDecompiler::Select(Operation operation) { + std::string temporary = AllocTemporary(); + AddLine("CMP.S {}, {}, {}, {};", temporary, Visit(operation[0]), Visit(operation[1]), + Visit(operation[2])); + return temporary; +} + +std::string ARBDecompiler::FClamp(Operation operation) { + // 1.0f in hex, replace with std::bit_cast on C++20 + static constexpr u32 POSITIVE_ONE = 0x3f800000; + + std::string temporary = AllocTemporary(); + const Node& value = operation[0]; + const Node& low = operation[1]; + const Node& high = operation[2]; + const auto* const imm_low = std::get_if<ImmediateNode>(&*low); + const auto* const imm_high = std::get_if<ImmediateNode>(&*high); + if (imm_low && imm_high && imm_low->GetValue() == 0 && imm_high->GetValue() == POSITIVE_ONE) { + AddLine("MOV.F32.SAT {}, {};", temporary, Visit(value)); + } else { + AddLine("MIN.F {}, {}, {};", temporary, Visit(value), Visit(high)); + AddLine("MAX.F {}, {}, {};", temporary, temporary, Visit(low)); + } + return temporary; +} + +std::string ARBDecompiler::FCastHalf0(Operation operation) { + const std::string temporary = AllocVectorTemporary(); + AddLine("UP2H.F {}.x, {};", temporary, Visit(operation[0])); + return fmt::format("{}.x", temporary); +} + +std::string ARBDecompiler::FCastHalf1(Operation operation) { + const std::string temporary = AllocVectorTemporary(); + AddLine("UP2H.F {}.y, {};", temporary, Visit(operation[0])); + AddLine("MOV {}.x, {}.y;", temporary, temporary); + return fmt::format("{}.x", temporary); +} + +std::string ARBDecompiler::FSqrt(Operation operation) { + std::string temporary = AllocTemporary(); + AddLine("RSQ.F32 {}, {};", temporary, Visit(operation[0])); + AddLine("RCP.F32 {}, {};", temporary, temporary); + return temporary; +} + +std::string ARBDecompiler::FSwizzleAdd(Operation operation) { + const std::string temporary = AllocVectorTemporary(); + if (!device.HasWarpIntrinsics()) { + LOG_ERROR(Render_OpenGL, + "NV_shader_thread_shuffle is missing. Kepler or better is required."); + AddLine("ADD.F {}.x, {}, {};", temporary, Visit(operation[0]), Visit(operation[1])); + return fmt::format("{}.x", temporary); + } + + AddLine("AND.U {}.z, {}.threadid, 3;", temporary, StageInputName(stage)); + AddLine("SHL.U {}.z, {}.z, 1;", temporary, temporary); + AddLine("SHR.U {}.z, {}, {}.z;", temporary, Visit(operation[2]), temporary); + AddLine("AND.U {}.z, {}.z, 3;", temporary, temporary); + AddLine("MUL.F32 {}.x, {}, FSWZA[{}.z];", temporary, Visit(operation[0]), temporary); + AddLine("MUL.F32 {}.y, {}, FSWZB[{}.z];", temporary, Visit(operation[1]), temporary); + AddLine("ADD.F32 {}.x, {}.x, {}.y;", temporary, temporary, temporary); + return fmt::format("{}.x", temporary); +} + +std::string ARBDecompiler::HAdd2(Operation operation) { + const std::string tmp1 = AllocVectorTemporary(); + const std::string tmp2 = AllocVectorTemporary(); + AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0])); + AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1])); + AddLine("ADD.F16 {}, {}, {};", tmp1, tmp1, tmp2); + AddLine("PK2H.F {}.x, {};", tmp1, tmp1); + return fmt::format("{}.x", tmp1); +} + +std::string ARBDecompiler::HMul2(Operation operation) { + const std::string tmp1 = AllocVectorTemporary(); + const std::string tmp2 = AllocVectorTemporary(); + AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0])); + AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1])); + AddLine("MUL.F16 {}, {}, {};", tmp1, tmp1, tmp2); + AddLine("PK2H.F {}.x, {};", tmp1, tmp1); + return fmt::format("{}.x", tmp1); +} + +std::string ARBDecompiler::HFma2(Operation operation) { + const std::string tmp1 = AllocVectorTemporary(); + const std::string tmp2 = AllocVectorTemporary(); + const std::string tmp3 = AllocVectorTemporary(); + AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0])); + AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1])); + AddLine("UP2H.F {}.xy, {};", tmp3, Visit(operation[2])); + AddLine("MAD.F16 {}, {}, {}, {};", tmp1, tmp1, tmp2, tmp3); + AddLine("PK2H.F {}.x, {};", tmp1, tmp1); + return fmt::format("{}.x", tmp1); +} + +std::string ARBDecompiler::HAbsolute(Operation operation) { + const std::string temporary = AllocVectorTemporary(); + AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0])); + AddLine("PK2H.F {}.x, |{}|;", temporary, temporary); + return fmt::format("{}.x", temporary); +} + +std::string ARBDecompiler::HNegate(Operation operation) { + const std::string temporary = AllocVectorTemporary(); + AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0])); + AddLine("MOVC.S RC.x, {};", Visit(operation[1])); + AddLine("MOV.F {}.x (NE.x), -{}.x;", temporary, temporary); + AddLine("MOVC.S RC.x, {};", Visit(operation[2])); + AddLine("MOV.F {}.y (NE.x), -{}.y;", temporary, temporary); + AddLine("PK2H.F {}.x, {};", temporary, temporary); + return fmt::format("{}.x", temporary); +} + +std::string ARBDecompiler::HClamp(Operation operation) { + const std::string tmp1 = AllocVectorTemporary(); + const std::string tmp2 = AllocVectorTemporary(); + AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0])); + AddLine("MOV.U {}.x, {};", tmp2, Visit(operation[1])); + AddLine("MOV.U {}.y, {}.x;", tmp2, tmp2); + AddLine("MAX.F {}, {}, {};", tmp1, tmp1, tmp2); + AddLine("MOV.U {}.x, {};", tmp2, Visit(operation[2])); + AddLine("MOV.U {}.y, {}.x;", tmp2, tmp2); + AddLine("MIN.F {}, {}, {};", tmp1, tmp1, tmp2); + AddLine("PK2H.F {}.x, {};", tmp1, tmp1); + return fmt::format("{}.x", tmp1); +} + +std::string ARBDecompiler::HCastFloat(Operation operation) { + const std::string temporary = AllocVectorTemporary(); + AddLine("MOV.F {}.y, {{0, 0, 0, 0}};", temporary); + AddLine("MOV.F {}.x, {};", temporary, Visit(operation[0])); + AddLine("PK2H.F {}.x, {};", temporary, temporary); + return fmt::format("{}.x", temporary); +} + +std::string ARBDecompiler::HUnpack(Operation operation) { + const std::string operand = Visit(operation[0]); + switch (std::get<Tegra::Shader::HalfType>(operation.GetMeta())) { + case Tegra::Shader::HalfType::H0_H1: + return operand; + case Tegra::Shader::HalfType::F32: { + const std::string temporary = AllocVectorTemporary(); + AddLine("MOV.U {}.x, {};", temporary, operand); + AddLine("MOV.U {}.y, {}.x;", temporary, temporary); + AddLine("PK2H.F {}.x, {};", temporary, temporary); + return fmt::format("{}.x", temporary); + } + case Tegra::Shader::HalfType::H0_H0: { + const std::string temporary = AllocVectorTemporary(); + AddLine("UP2H.F {}.xy, {};", temporary, operand); + AddLine("MOV.U {}.y, {}.x;", temporary, temporary); + AddLine("PK2H.F {}.x, {};", temporary, temporary); + return fmt::format("{}.x", temporary); + } + case Tegra::Shader::HalfType::H1_H1: { + const std::string temporary = AllocVectorTemporary(); + AddLine("UP2H.F {}.xy, {};", temporary, operand); + AddLine("MOV.U {}.x, {}.y;", temporary, temporary); + AddLine("PK2H.F {}.x, {};", temporary, temporary); + return fmt::format("{}.x", temporary); + } + } + UNREACHABLE(); + return "{0, 0, 0, 0}.x"; +} + +std::string ARBDecompiler::HMergeF32(Operation operation) { + const std::string temporary = AllocVectorTemporary(); + AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0])); + return fmt::format("{}.x", temporary); +} + +std::string ARBDecompiler::HMergeH0(Operation operation) { + const std::string temporary = AllocVectorTemporary(); + AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0])); + AddLine("UP2H.F {}.zw, {};", temporary, Visit(operation[1])); + AddLine("MOV.U {}.x, {}.z;", temporary, temporary); + AddLine("PK2H.F {}.x, {};", temporary, temporary); + return fmt::format("{}.x", temporary); +} + +std::string ARBDecompiler::HMergeH1(Operation operation) { + const std::string temporary = AllocVectorTemporary(); + AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0])); + AddLine("UP2H.F {}.zw, {};", temporary, Visit(operation[1])); + AddLine("MOV.U {}.y, {}.w;", temporary, temporary); + AddLine("PK2H.F {}.x, {};", temporary, temporary); + return fmt::format("{}.x", temporary); +} + +std::string ARBDecompiler::HPack2(Operation operation) { + const std::string temporary = AllocVectorTemporary(); + AddLine("MOV.U {}.x, {};", temporary, Visit(operation[0])); + AddLine("MOV.U {}.y, {};", temporary, Visit(operation[1])); + AddLine("PK2H.F {}.x, {};", temporary, temporary); + return fmt::format("{}.x", temporary); +} + +std::string ARBDecompiler::LogicalAssign(Operation operation) { + const Node& dest = operation[0]; + const Node& src = operation[1]; + + std::string target; + + if (const auto pred = std::get_if<PredicateNode>(&*dest)) { + ASSERT_MSG(!pred->IsNegated(), "Negating logical assignment"); + + const Tegra::Shader::Pred index = pred->GetIndex(); + switch (index) { + case Tegra::Shader::Pred::NeverExecute: + case Tegra::Shader::Pred::UnusedIndex: + // Writing to these predicates is a no-op + return {}; + } + target = fmt::format("P{}.x", static_cast<u64>(index)); + } else if (const auto internal_flag = std::get_if<InternalFlagNode>(&*dest)) { + const std::size_t index = static_cast<std::size_t>(internal_flag->GetFlag()); + target = fmt::format("{}.x", INTERNAL_FLAG_NAMES[index]); + } else { + UNREACHABLE(); + ResetTemporaries(); + return {}; + } + + AddLine("MOV.U {}, {};", target, Visit(src)); + ResetTemporaries(); + return {}; +} + +std::string ARBDecompiler::LogicalPick2(Operation operation) { + std::string temporary = AllocTemporary(); + const u32 index = std::get<ImmediateNode>(*operation[1]).GetValue(); + AddLine("MOV.U {}, {}.{};", temporary, Visit(operation[0]), Swizzle(index)); + return temporary; +} + +std::string ARBDecompiler::LogicalAnd2(Operation operation) { + std::string temporary = AllocTemporary(); + const std::string op = Visit(operation[0]); + AddLine("AND.U {}, {}.x, {}.y;", temporary, op, op); + return temporary; +} + +std::string ARBDecompiler::FloatOrdered(Operation operation) { + std::string temporary = AllocTemporary(); + AddLine("MOVC.F32 RC.x, {};", Visit(operation[0])); + AddLine("MOVC.F32 RC.y, {};", Visit(operation[1])); + AddLine("MOV.S {}, -1;", temporary); + AddLine("MOV.S {} (NAN.x), 0;", temporary); + AddLine("MOV.S {} (NAN.y), 0;", temporary); + return temporary; +} + +std::string ARBDecompiler::FloatUnordered(Operation operation) { + std::string temporary = AllocTemporary(); + AddLine("MOVC.F32 RC.x, {};", Visit(operation[0])); + AddLine("MOVC.F32 RC.y, {};", Visit(operation[1])); + AddLine("MOV.S {}, 0;", temporary); + AddLine("MOV.S {} (NAN.x), -1;", temporary); + AddLine("MOV.S {} (NAN.y), -1;", temporary); + return temporary; +} + +std::string ARBDecompiler::LogicalAddCarry(Operation operation) { + std::string temporary = AllocTemporary(); + AddLine("ADDC.U RC, {}, {};", Visit(operation[0]), Visit(operation[1])); + AddLine("MOV.S {}, 0;", temporary); + AddLine("IF CF.x;"); + AddLine("MOV.S {}, -1;", temporary); + AddLine("ENDIF;"); + return temporary; +} + +std::string ARBDecompiler::Texture(Operation operation) { + const auto& meta = std::get<MetaTexture>(operation.GetMeta()); + const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; + const auto [temporary, swizzle] = BuildCoords(operation); + + std::string_view opcode = "TEX"; + std::string extra; + if (meta.bias) { + ASSERT(!meta.lod); + opcode = "TXB"; + + if (swizzle < 4) { + AddLine("MOV.F {}.w, {};", temporary, Visit(meta.bias)); + } else { + const std::string bias = AllocTemporary(); + AddLine("MOV.F {}, {};", bias, Visit(meta.bias)); + extra = fmt::format(" {},", bias); + } + } + if (meta.lod) { + ASSERT(!meta.bias); + opcode = "TXL"; + + if (swizzle < 4) { + AddLine("MOV.F {}.w, {};", temporary, Visit(meta.lod)); + } else { + const std::string lod = AllocTemporary(); + AddLine("MOV.F {}, {};", lod, Visit(meta.lod)); + extra = fmt::format(" {},", lod); + } + } + + AddLine("{}.F {}, {},{} texture[{}], {}{};", opcode, temporary, temporary, extra, sampler_id, + TextureType(meta), BuildAoffi(operation)); + AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element)); + return fmt::format("{}.x", temporary); +} + +std::string ARBDecompiler::TextureGather(Operation operation) { + const auto& meta = std::get<MetaTexture>(operation.GetMeta()); + const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; + const auto [temporary, swizzle] = BuildCoords(operation); + + std::string comp; + if (!meta.sampler.is_shadow) { + const auto& immediate = std::get<ImmediateNode>(*meta.component); + comp = fmt::format(".{}", Swizzle(immediate.GetValue())); + } + + AddLine("TXG.F {}, {}, texture[{}]{}, {}{};", temporary, temporary, sampler_id, comp, + TextureType(meta), BuildAoffi(operation)); + AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element)); + return fmt::format("{}.x", temporary); +} + +std::string ARBDecompiler::TextureQueryDimensions(Operation operation) { + const auto& meta = std::get<MetaTexture>(operation.GetMeta()); + const std::string temporary = AllocVectorTemporary(); + const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; + + ASSERT(!meta.sampler.is_array); + + const std::string lod = operation.GetOperandsCount() > 0 ? Visit(operation[0]) : "0"; + AddLine("TXQ {}, {}, texture[{}], {};", temporary, lod, sampler_id, TextureType(meta)); + AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element)); + return fmt::format("{}.x", temporary); +} + +std::string ARBDecompiler::TextureQueryLod(Operation operation) { + const auto& meta = std::get<MetaTexture>(operation.GetMeta()); + const std::string temporary = AllocVectorTemporary(); + const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; + + ASSERT(!meta.sampler.is_array); + + const std::size_t count = operation.GetOperandsCount(); + for (std::size_t i = 0; i < count; ++i) { + AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), Visit(operation[i])); + } + AddLine("LOD.F {}, {}, texture[{}], {};", temporary, temporary, sampler_id, TextureType(meta)); + AddLine("MUL.F32 {}, {}, {{256, 256, 0, 0}};", temporary, temporary); + AddLine("TRUNC.S {}, {};", temporary, temporary); + AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element)); + return fmt::format("{}.x", temporary); +} + +std::string ARBDecompiler::TexelFetch(Operation operation) { + const auto& meta = std::get<MetaTexture>(operation.GetMeta()); + const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; + const auto [temporary, swizzle] = BuildCoords(operation); + + if (!meta.sampler.is_buffer) { + ASSERT(swizzle < 4); + AddLine("MOV.F {}.w, {};", temporary, Visit(meta.lod)); + } + AddLine("TXF.F {}, {}, texture[{}], {}{};", temporary, temporary, sampler_id, TextureType(meta), + BuildAoffi(operation)); + AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element)); + return fmt::format("{}.x", temporary); +} + +std::string ARBDecompiler::TextureGradient(Operation operation) { + const auto& meta = std::get<MetaTexture>(operation.GetMeta()); + const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; + const std::string ddx = AllocVectorTemporary(); + const std::string ddy = AllocVectorTemporary(); + const std::string coord = BuildCoords(operation).first; + + const std::size_t num_components = meta.derivates.size() / 2; + for (std::size_t index = 0; index < num_components; ++index) { + const char swizzle = Swizzle(index); + AddLine("MOV.F {}.{}, {};", ddx, swizzle, Visit(meta.derivates[index * 2])); + AddLine("MOV.F {}.{}, {};", ddy, swizzle, Visit(meta.derivates[index * 2 + 1])); + } + + const std::string_view result = coord; + AddLine("TXD.F {}, {}, {}, {}, texture[{}], {}{};", result, coord, ddx, ddy, sampler_id, + TextureType(meta), BuildAoffi(operation)); + AddLine("MOV.F {}.x, {}.{};", result, result, Swizzle(meta.element)); + return fmt::format("{}.x", result); +} + +std::string ARBDecompiler::ImageLoad(Operation operation) { + const auto& meta = std::get<MetaImage>(operation.GetMeta()); + const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index; + const std::size_t count = operation.GetOperandsCount(); + const std::string_view type = ImageType(meta.image.type); + + const std::string temporary = AllocVectorTemporary(); + for (std::size_t i = 0; i < count; ++i) { + AddLine("MOV.S {}.{}, {};", temporary, Swizzle(i), Visit(operation[i])); + } + AddLine("LOADIM.F {}, {}, image[{}], {};", temporary, temporary, image_id, type); + AddLine("MOV.F {}.x, {}.{};", temporary, temporary, Swizzle(meta.element)); + return fmt::format("{}.x", temporary); +} + +std::string ARBDecompiler::ImageStore(Operation operation) { + const auto& meta = std::get<MetaImage>(operation.GetMeta()); + const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index; + const std::size_t num_coords = operation.GetOperandsCount(); + const std::size_t num_values = meta.values.size(); + const std::string_view type = ImageType(meta.image.type); + + const std::string coord = AllocVectorTemporary(); + const std::string value = AllocVectorTemporary(); + for (std::size_t i = 0; i < num_coords; ++i) { + AddLine("MOV.S {}.{}, {};", coord, Swizzle(i), Visit(operation[i])); + } + for (std::size_t i = 0; i < num_values; ++i) { + AddLine("MOV.F {}.{}, {};", value, Swizzle(i), Visit(meta.values[i])); + } + AddLine("STOREIM.F image[{}], {}, {}, {};", image_id, value, coord, type); + return {}; +} + +std::string ARBDecompiler::Branch(Operation operation) { + const auto target = std::get<ImmediateNode>(*operation[0]); + AddLine("MOV.U PC.x, {};", target.GetValue()); + AddLine("CONT;"); + return {}; +} + +std::string ARBDecompiler::BranchIndirect(Operation operation) { + AddLine("MOV.U PC.x, {};", Visit(operation[0])); + AddLine("CONT;"); + return {}; +} + +std::string ARBDecompiler::PushFlowStack(Operation operation) { + const auto stack = std::get<MetaStackClass>(operation.GetMeta()); + const u32 target = std::get<ImmediateNode>(*operation[0]).GetValue(); + const std::string_view stack_name = StackName(stack); + AddLine("MOV.U {}[{}_TOP.x].x, {};", stack_name, stack_name, target); + AddLine("ADD.S {}_TOP.x, {}_TOP.x, 1;", stack_name, stack_name); + return {}; +} + +std::string ARBDecompiler::PopFlowStack(Operation operation) { + const auto stack = std::get<MetaStackClass>(operation.GetMeta()); + const std::string_view stack_name = StackName(stack); + AddLine("SUB.S {}_TOP.x, {}_TOP.x, 1;", stack_name, stack_name); + AddLine("MOV.U PC.x, {}[{}_TOP.x].x;", stack_name, stack_name); + AddLine("CONT;"); + return {}; +} + +std::string ARBDecompiler::Exit(Operation) { + Exit(); + return {}; +} + +std::string ARBDecompiler::Discard(Operation) { + AddLine("KIL TR;"); + return {}; +} + +std::string ARBDecompiler::EmitVertex(Operation) { + AddLine("EMIT;"); + return {}; +} + +std::string ARBDecompiler::EndPrimitive(Operation) { + AddLine("ENDPRIM;"); + return {}; +} + +std::string ARBDecompiler::InvocationId(Operation) { + return "primitive.invocation"; +} + +std::string ARBDecompiler::YNegate(Operation) { + LOG_WARNING(Render_OpenGL, "(STUBBED)"); + const std::string temporary = AllocTemporary(); + AddLine("MOV.F {}, 1;", temporary); + return temporary; +} + +std::string ARBDecompiler::ThreadId(Operation) { + return fmt::format("{}.threadid", StageInputName(stage)); +} + +std::string ARBDecompiler::ShuffleIndexed(Operation operation) { + if (!device.HasWarpIntrinsics()) { + LOG_ERROR(Render_OpenGL, + "NV_shader_thread_shuffle is missing. Kepler or better is required."); + return Visit(operation[0]); + } + const std::string temporary = AllocVectorTemporary(); + AddLine("SHFIDX.U {}, {}, {}, {{31, 0, 0, 0}};", temporary, Visit(operation[0]), + Visit(operation[1])); + AddLine("MOV.U {}.x, {}.y;", temporary, temporary); + return fmt::format("{}.x", temporary); +} + +std::string ARBDecompiler::Barrier(Operation) { + if (!ir.IsDecompiled()) { + LOG_ERROR(Render_OpenGL, "BAR used but shader is not decompiled"); + return {}; + } + AddLine("BAR;"); + return {}; +} + +std::string ARBDecompiler::MemoryBarrierGroup(Operation) { + AddLine("MEMBAR.CTA;"); + return {}; +} + +std::string ARBDecompiler::MemoryBarrierGlobal(Operation) { + AddLine("MEMBAR;"); + return {}; +} + +} // Anonymous namespace + +std::string DecompileAssemblyShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir, + const VideoCommon::Shader::Registry& registry, + Tegra::Engines::ShaderType stage, std::string_view identifier) { + return ARBDecompiler(device, ir, registry, stage, identifier).Code(); +} + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_arb_decompiler.h b/src/video_core/renderer_opengl/gl_arb_decompiler.h new file mode 100644 index 000000000..6afc87220 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_arb_decompiler.h @@ -0,0 +1,29 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <string> +#include <string_view> + +#include "common/common_types.h" + +namespace Tegra::Engines { +enum class ShaderType : u32; +} + +namespace VideoCommon::Shader { +class ShaderIR; +class Registry; +} // namespace VideoCommon::Shader + +namespace OpenGL { + +class Device; + +std::string DecompileAssemblyShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir, + const VideoCommon::Shader::Registry& registry, + Tegra::Engines::ShaderType stage, std::string_view identifier); + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index 9964ea894..e461e4c70 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -22,22 +22,53 @@ using Maxwell = Tegra::Engines::Maxwell3D::Regs; MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128)); -CachedBufferBlock::CachedBufferBlock(VAddr cpu_addr, const std::size_t size) +Buffer::Buffer(const Device& device, VAddr cpu_addr, std::size_t size) : VideoCommon::BufferBlock{cpu_addr, size} { gl_buffer.Create(); glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW); + if (device.HasVertexBufferUnifiedMemory()) { + glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_WRITE); + glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address); + } +} + +Buffer::~Buffer() = default; + +void Buffer::Upload(std::size_t offset, std::size_t size, const u8* data) { + glNamedBufferSubData(Handle(), static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size), + data); +} + +void Buffer::Download(std::size_t offset, std::size_t size, u8* data) { + MICROPROFILE_SCOPE(OpenGL_Buffer_Download); + const GLsizeiptr gl_size = static_cast<GLsizeiptr>(size); + const GLintptr gl_offset = static_cast<GLintptr>(offset); + if (read_buffer.handle == 0) { + read_buffer.Create(); + glNamedBufferData(read_buffer.handle, static_cast<GLsizeiptr>(Size()), nullptr, + GL_STREAM_READ); + } + glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT); + glCopyNamedBufferSubData(gl_buffer.handle, read_buffer.handle, gl_offset, gl_offset, gl_size); + glGetNamedBufferSubData(read_buffer.handle, gl_offset, gl_size, data); } -CachedBufferBlock::~CachedBufferBlock() = default; +void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset, + std::size_t size) { + glCopyNamedBufferSubData(src.Handle(), Handle(), static_cast<GLintptr>(src_offset), + static_cast<GLintptr>(dst_offset), static_cast<GLsizeiptr>(size)); +} OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system, - const Device& device, std::size_t stream_size) - : GenericBufferCache{rasterizer, system, std::make_unique<OGLStreamBuffer>(stream_size, true)} { + const Device& device_, std::size_t stream_size) + : GenericBufferCache{rasterizer, system, + std::make_unique<OGLStreamBuffer>(device_, stream_size, true)}, + device{device_} { if (!device.HasFastBufferSubData()) { return; } - static constexpr auto size = static_cast<GLsizeiptr>(Maxwell::MaxConstBufferSize); + static constexpr GLsizeiptr size = static_cast<GLsizeiptr>(Maxwell::MaxConstBufferSize); glCreateBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs)); for (const GLuint cbuf : cbufs) { glNamedBufferData(cbuf, size, nullptr, GL_STREAM_DRAW); @@ -48,44 +79,21 @@ OGLBufferCache::~OGLBufferCache() { glDeleteBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs)); } -Buffer OGLBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) { - return std::make_shared<CachedBufferBlock>(cpu_addr, size); +std::shared_ptr<Buffer> OGLBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) { + return std::make_shared<Buffer>(device, cpu_addr, size); } -GLuint OGLBufferCache::ToHandle(const Buffer& buffer) { - return buffer->GetHandle(); -} - -GLuint OGLBufferCache::GetEmptyBuffer(std::size_t) { - return 0; -} - -void OGLBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, - const u8* data) { - glNamedBufferSubData(buffer->GetHandle(), static_cast<GLintptr>(offset), - static_cast<GLsizeiptr>(size), data); -} - -void OGLBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, - u8* data) { - MICROPROFILE_SCOPE(OpenGL_Buffer_Download); - glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT); - glGetNamedBufferSubData(buffer->GetHandle(), static_cast<GLintptr>(offset), - static_cast<GLsizeiptr>(size), data); -} - -void OGLBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset, - std::size_t dst_offset, std::size_t size) { - glCopyNamedBufferSubData(src->GetHandle(), dst->GetHandle(), static_cast<GLintptr>(src_offset), - static_cast<GLintptr>(dst_offset), static_cast<GLsizeiptr>(size)); +OGLBufferCache::BufferInfo OGLBufferCache::GetEmptyBuffer(std::size_t) { + return {0, 0, 0}; } OGLBufferCache::BufferInfo OGLBufferCache::ConstBufferUpload(const void* raw_pointer, std::size_t size) { DEBUG_ASSERT(cbuf_cursor < std::size(cbufs)); - const GLuint& cbuf = cbufs[cbuf_cursor++]; + const GLuint cbuf = cbufs[cbuf_cursor++]; + glNamedBufferSubData(cbuf, 0, static_cast<GLsizeiptr>(size), raw_pointer); - return {cbuf, 0}; + return {cbuf, 0, 0}; } } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index a9e86cfc7..88fdc0536 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -10,7 +10,6 @@ #include "common/common_types.h" #include "video_core/buffer_cache/buffer_cache.h" #include "video_core/engines/maxwell_3d.h" -#include "video_core/rasterizer_cache.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_stream_buffer.h" @@ -24,57 +23,58 @@ class Device; class OGLStreamBuffer; class RasterizerOpenGL; -class CachedBufferBlock; +class Buffer : public VideoCommon::BufferBlock { +public: + explicit Buffer(const Device& device, VAddr cpu_addr, std::size_t size); + ~Buffer(); -using Buffer = std::shared_ptr<CachedBufferBlock>; -using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>; + void Upload(std::size_t offset, std::size_t size, const u8* data); -class CachedBufferBlock : public VideoCommon::BufferBlock { -public: - explicit CachedBufferBlock(VAddr cpu_addr, const std::size_t size); - ~CachedBufferBlock(); + void Download(std::size_t offset, std::size_t size, u8* data); + + void CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset, + std::size_t size); - GLuint GetHandle() const { + GLuint Handle() const noexcept { return gl_buffer.handle; } + u64 Address() const noexcept { + return gpu_address; + } + private: OGLBuffer gl_buffer; + OGLBuffer read_buffer; + u64 gpu_address = 0; }; +using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>; class OGLBufferCache final : public GenericBufferCache { public: explicit OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system, const Device& device, std::size_t stream_size); ~OGLBufferCache(); - GLuint GetEmptyBuffer(std::size_t) override; + BufferInfo GetEmptyBuffer(std::size_t) override; void Acquire() noexcept { cbuf_cursor = 0; } protected: - Buffer CreateBlock(VAddr cpu_addr, std::size_t size) override; - - GLuint ToHandle(const Buffer& buffer) override; - - void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, - const u8* data) override; - - void DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, - u8* data) override; - - void CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset, - std::size_t dst_offset, std::size_t size) override; + std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) override; BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) override; private: + static constexpr std::size_t NUM_CBUFS = Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers * + Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram; + + const Device& device; + std::size_t cbuf_cursor = 0; - std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers * - Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram> - cbufs; + std::array<GLuint, NUM_CBUFS> cbufs{}; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 466a911db..c1f20f0ab 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -6,6 +6,7 @@ #include <array> #include <cstddef> #include <cstring> +#include <limits> #include <optional> #include <vector> @@ -26,24 +27,27 @@ constexpr u32 ReservedUniformBlocks = 1; constexpr u32 NumStages = 5; -constexpr std::array LimitUBOs = {GL_MAX_VERTEX_UNIFORM_BLOCKS, GL_MAX_TESS_CONTROL_UNIFORM_BLOCKS, - GL_MAX_TESS_EVALUATION_UNIFORM_BLOCKS, - GL_MAX_GEOMETRY_UNIFORM_BLOCKS, GL_MAX_FRAGMENT_UNIFORM_BLOCKS}; +constexpr std::array LimitUBOs = { + GL_MAX_VERTEX_UNIFORM_BLOCKS, GL_MAX_TESS_CONTROL_UNIFORM_BLOCKS, + GL_MAX_TESS_EVALUATION_UNIFORM_BLOCKS, GL_MAX_GEOMETRY_UNIFORM_BLOCKS, + GL_MAX_FRAGMENT_UNIFORM_BLOCKS, GL_MAX_COMPUTE_UNIFORM_BLOCKS}; constexpr std::array LimitSSBOs = { - GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS, GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS, + GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS, GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS, GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS, GL_MAX_GEOMETRY_SHADER_STORAGE_BLOCKS, - GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS}; + GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS, GL_MAX_COMPUTE_SHADER_STORAGE_BLOCKS}; -constexpr std::array LimitSamplers = { - GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS, GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS, - GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS, GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS, - GL_MAX_TEXTURE_IMAGE_UNITS}; +constexpr std::array LimitSamplers = {GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS, + GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS, + GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS, + GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS, + GL_MAX_TEXTURE_IMAGE_UNITS, + GL_MAX_COMPUTE_TEXTURE_IMAGE_UNITS}; -constexpr std::array LimitImages = {GL_MAX_VERTEX_IMAGE_UNIFORMS, - GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS, - GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS, - GL_MAX_GEOMETRY_IMAGE_UNIFORMS, GL_MAX_FRAGMENT_IMAGE_UNIFORMS}; +constexpr std::array LimitImages = { + GL_MAX_VERTEX_IMAGE_UNIFORMS, GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS, + GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS, GL_MAX_GEOMETRY_IMAGE_UNIFORMS, + GL_MAX_FRAGMENT_IMAGE_UNIFORMS, GL_MAX_COMPUTE_IMAGE_UNIFORMS}; template <typename T> T GetInteger(GLenum pname) { @@ -85,6 +89,13 @@ u32 Extract(u32& base, u32& num, u32 amount, std::optional<GLenum> limit = {}) { return std::exchange(base, base + amount); } +std::array<u32, Tegra::Engines::MaxShaderTypes> BuildMaxUniformBuffers() noexcept { + std::array<u32, Tegra::Engines::MaxShaderTypes> max; + std::transform(LimitUBOs.begin(), LimitUBOs.end(), max.begin(), + [](GLenum pname) { return GetInteger<u32>(pname); }); + return max; +} + std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindings() noexcept { std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> bindings; @@ -112,16 +123,24 @@ std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindin u32 num_images = GetInteger<u32>(GL_MAX_IMAGE_UNITS); u32 base_images = 0; - // Reserve more image bindings on fragment and vertex stages. + // GL_MAX_IMAGE_UNITS is guaranteed by the spec to have a minimum value of 8. + // Due to the limitation of GL_MAX_IMAGE_UNITS, reserve at least 4 image bindings on the + // fragment stage, and at least 1 for the rest of the stages. + // So far games are observed to use 1 image binding on vertex and 4 on fragment stages. + + // Reserve at least 4 image bindings on the fragment stage. bindings[4].image = - Extract(base_images, num_images, num_images / NumStages + 2, LimitImages[4]); - bindings[0].image = - Extract(base_images, num_images, num_images / NumStages + 1, LimitImages[0]); + Extract(base_images, num_images, std::max(4U, num_images / NumStages), LimitImages[4]); + + // This is guaranteed to be at least 1. + const u32 total_extracted_images = num_images / (NumStages - 1); // Reserve the other image bindings. - const u32 total_extracted_images = num_images / (NumStages - 2); - for (std::size_t i = 2; i < NumStages; ++i) { + for (std::size_t i = 0; i < NumStages; ++i) { const std::size_t stage = stage_swizzle[i]; + if (stage == 4) { + continue; + } bindings[stage].image = Extract(base_images, num_images, total_extracted_images, LimitImages[stage]); } @@ -133,6 +152,7 @@ std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindin } bool IsASTCSupported() { + static constexpr std::array targets = {GL_TEXTURE_2D, GL_TEXTURE_2D_ARRAY}; static constexpr std::array formats = { GL_COMPRESSED_RGBA_ASTC_4x4_KHR, GL_COMPRESSED_RGBA_ASTC_5x4_KHR, GL_COMPRESSED_RGBA_ASTC_5x5_KHR, GL_COMPRESSED_RGBA_ASTC_6x5_KHR, @@ -149,25 +169,44 @@ bool IsASTCSupported() { GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x10_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR, }; - return std::find_if_not(formats.begin(), formats.end(), [](GLenum format) { - GLint supported; - glGetInternalformativ(GL_TEXTURE_2D, format, GL_INTERNALFORMAT_SUPPORTED, 1, - &supported); - return supported == GL_TRUE; - }) == formats.end(); + static constexpr std::array required_support = { + GL_VERTEX_TEXTURE, GL_TESS_CONTROL_TEXTURE, GL_TESS_EVALUATION_TEXTURE, + GL_GEOMETRY_TEXTURE, GL_FRAGMENT_TEXTURE, GL_COMPUTE_TEXTURE, + }; + + for (const GLenum target : targets) { + for (const GLenum format : formats) { + for (const GLenum support : required_support) { + GLint value; + glGetInternalformativ(target, format, support, 1, &value); + if (value != GL_FULL_SUPPORT) { + return false; + } + } + } + } + return true; } } // Anonymous namespace -Device::Device() : base_bindings{BuildBaseBindings()} { +Device::Device() + : max_uniform_buffers{BuildMaxUniformBuffers()}, base_bindings{BuildBaseBindings()} { const std::string_view vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR)); - const auto renderer = reinterpret_cast<const char*>(glGetString(GL_RENDERER)); + const std::string_view renderer = reinterpret_cast<const char*>(glGetString(GL_RENDERER)); + const std::string_view version = reinterpret_cast<const char*>(glGetString(GL_VERSION)); const std::vector extensions = GetExtensions(); const bool is_nvidia = vendor == "NVIDIA Corporation"; const bool is_amd = vendor == "ATI Technologies Inc."; - const bool is_intel = vendor == "Intel"; - const bool is_intel_proprietary = is_intel && std::strstr(renderer, "Mesa") == nullptr; + + bool disable_fast_buffer_sub_data = false; + if (is_nvidia && version == "4.6.0 NVIDIA 443.24") { + LOG_WARNING( + Render_OpenGL, + "Beta driver 443.24 is known to have issues. There might be performance issues."); + disable_fast_buffer_sub_data = true; + } uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); @@ -178,36 +217,43 @@ Device::Device() : base_bindings{BuildBaseBindings()} { has_shader_ballot = GLAD_GL_ARB_shader_ballot; has_vertex_viewport_layer = GLAD_GL_ARB_shader_viewport_layer_array; has_image_load_formatted = HasExtension(extensions, "GL_EXT_shader_image_load_formatted"); + has_texture_shadow_lod = HasExtension(extensions, "GL_EXT_texture_shadow_lod"); has_astc = IsASTCSupported(); has_variable_aoffi = TestVariableAoffi(); has_component_indexing_bug = is_amd; has_precise_bug = TestPreciseBug(); - has_broken_compute = is_intel_proprietary; - has_fast_buffer_sub_data = is_nvidia; - use_assembly_shaders = Settings::values.use_assembly_shaders && GLAD_GL_NV_gpu_program5 && - GLAD_GL_NV_compute_program5; + has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2; + has_vertex_buffer_unified_memory = GLAD_GL_NV_vertex_buffer_unified_memory; + + // At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive + // uniform buffers as "push constants" + has_fast_buffer_sub_data = is_nvidia && !disable_fast_buffer_sub_data; + + use_assembly_shaders = Settings::values.use_assembly_shaders.GetValue() && + GLAD_GL_NV_gpu_program5 && GLAD_GL_NV_compute_program5 && + GLAD_GL_NV_transform_feedback && GLAD_GL_NV_transform_feedback2; LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi); LOG_INFO(Render_OpenGL, "Renderer_ComponentIndexingBug: {}", has_component_indexing_bug); LOG_INFO(Render_OpenGL, "Renderer_PreciseBug: {}", has_precise_bug); - if (Settings::values.use_assembly_shaders && !use_assembly_shaders) { + if (Settings::values.use_assembly_shaders.GetValue() && !use_assembly_shaders) { LOG_ERROR(Render_OpenGL, "Assembly shaders enabled but not supported"); } } Device::Device(std::nullptr_t) { - uniform_buffer_alignment = 0; + max_uniform_buffers.fill(std::numeric_limits<u32>::max()); + uniform_buffer_alignment = 4; + shader_storage_alignment = 4; max_vertex_attributes = 16; max_varyings = 15; has_warp_intrinsics = true; has_shader_ballot = true; has_vertex_viewport_layer = true; has_image_load_formatted = true; + has_texture_shadow_lod = true; has_variable_aoffi = true; - has_component_indexing_bug = false; - has_broken_compute = false; - has_precise_bug = false; } bool Device::TestVariableAoffi() { diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index e915dbd86..e1d811966 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -24,6 +24,10 @@ public: explicit Device(); explicit Device(std::nullptr_t); + u32 GetMaxUniformBuffers(Tegra::Engines::ShaderType shader_type) const noexcept { + return max_uniform_buffers[static_cast<std::size_t>(shader_type)]; + } + const BaseBindings& GetBaseBindings(std::size_t stage_index) const noexcept { return base_bindings[stage_index]; } @@ -64,6 +68,14 @@ public: return has_image_load_formatted; } + bool HasTextureShadowLod() const { + return has_texture_shadow_lod; + } + + bool HasVertexBufferUnifiedMemory() const { + return has_vertex_buffer_unified_memory; + } + bool HasASTC() const { return has_astc; } @@ -80,14 +92,14 @@ public: return has_precise_bug; } - bool HasBrokenCompute() const { - return has_broken_compute; - } - bool HasFastBufferSubData() const { return has_fast_buffer_sub_data; } + bool HasNvViewportArray2() const { + return has_nv_viewport_array2; + } + bool UseAssemblyShaders() const { return use_assembly_shaders; } @@ -96,7 +108,8 @@ private: static bool TestVariableAoffi(); static bool TestPreciseBug(); - std::array<BaseBindings, Tegra::Engines::MaxShaderTypes> base_bindings; + std::array<u32, Tegra::Engines::MaxShaderTypes> max_uniform_buffers{}; + std::array<BaseBindings, Tegra::Engines::MaxShaderTypes> base_bindings{}; std::size_t uniform_buffer_alignment{}; std::size_t shader_storage_alignment{}; u32 max_vertex_attributes{}; @@ -105,12 +118,14 @@ private: bool has_shader_ballot{}; bool has_vertex_viewport_layer{}; bool has_image_load_formatted{}; + bool has_texture_shadow_lod{}; + bool has_vertex_buffer_unified_memory{}; bool has_astc{}; bool has_variable_aoffi{}; bool has_component_indexing_bug{}; bool has_precise_bug{}; - bool has_broken_compute{}; bool has_fast_buffer_sub_data{}; + bool has_nv_viewport_array2{}; bool use_assembly_shaders{}; }; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 716d43e65..e960a0ef1 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -30,6 +30,7 @@ #include "video_core/renderer_opengl/gl_shader_cache.h" #include "video_core/renderer_opengl/maxwell_to_gl.h" #include "video_core/renderer_opengl/renderer_opengl.h" +#include "video_core/shader_cache.h" namespace OpenGL { @@ -54,15 +55,34 @@ MICROPROFILE_DEFINE(OpenGL_PrimitiveAssembly, "OpenGL", "Prim Asmbl", MP_RGB(255 namespace { -constexpr std::size_t NumSupportedVertexAttributes = 16; +constexpr std::size_t NUM_CONST_BUFFERS_PER_STAGE = 18; +constexpr std::size_t NUM_CONST_BUFFERS_BYTES_PER_STAGE = + NUM_CONST_BUFFERS_PER_STAGE * Maxwell::MaxConstBufferSize; +constexpr std::size_t TOTAL_CONST_BUFFER_BYTES = + NUM_CONST_BUFFERS_BYTES_PER_STAGE * Maxwell::MaxShaderStage; + +constexpr std::size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16; +constexpr std::size_t NUM_SUPPORTED_VERTEX_BINDINGS = 16; template <typename Engine, typename Entry> Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry, ShaderType shader_type, std::size_t index = 0) { + if constexpr (std::is_same_v<Entry, SamplerEntry>) { + if (entry.is_separated) { + const u32 buffer_1 = entry.buffer; + const u32 buffer_2 = entry.secondary_buffer; + const u32 offset_1 = entry.offset; + const u32 offset_2 = entry.secondary_offset; + const u32 handle_1 = engine.AccessConstBuffer32(shader_type, buffer_1, offset_1); + const u32 handle_2 = engine.AccessConstBuffer32(shader_type, buffer_2, offset_2); + return engine.GetTextureInfo(handle_1 | handle_2); + } + } if (entry.is_bindless) { - const auto tex_handle = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset); - return engine.GetTextureInfo(tex_handle); + const u32 handle = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset); + return engine.GetTextureInfo(handle); } + const auto& gpu_profile = engine.AccessGuestDriverProfile(); const u32 offset = entry.offset + static_cast<u32>(index * gpu_profile.GetTextureHandlerSize()); if constexpr (std::is_same_v<Engine, Tegra::Engines::Maxwell3D>) { @@ -87,6 +107,34 @@ std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer, return buffer.size; } +/// Translates hardware transform feedback indices +/// @param location Hardware location +/// @return Pair of ARB_transform_feedback3 token stream first and third arguments +/// @note Read https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_transform_feedback3.txt +std::pair<GLint, GLint> TransformFeedbackEnum(u8 location) { + const u8 index = location / 4; + if (index >= 8 && index <= 39) { + return {GL_GENERIC_ATTRIB_NV, index - 8}; + } + if (index >= 48 && index <= 55) { + return {GL_TEXTURE_COORD_NV, index - 48}; + } + switch (index) { + case 7: + return {GL_POSITION, 0}; + case 40: + return {GL_PRIMARY_COLOR_NV, 0}; + case 41: + return {GL_SECONDARY_COLOR_NV, 0}; + case 42: + return {GL_BACK_PRIMARY_COLOR_NV, 0}; + case 43: + return {GL_BACK_SECONDARY_COLOR_NV, 0}; + } + UNIMPLEMENTED_MSG("index={}", static_cast<int>(index)); + return {GL_POSITION, 0}; +} + void oglEnable(GLenum cap, bool state) { (state ? glEnable : glDisable)(cap); } @@ -104,6 +152,9 @@ RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWind screen_info{info}, program_manager{program_manager}, state_tracker{state_tracker} { CheckExtensions(); + unified_uniform_buffer.Create(); + glNamedBufferStorage(unified_uniform_buffer.handle, TOTAL_CONST_BUFFER_BYTES, nullptr, 0); + if (device.UseAssemblyShaders()) { glCreateBuffers(static_cast<GLsizei>(staging_cbufs.size()), staging_cbufs.data()); for (const GLuint cbuf : staging_cbufs) { @@ -143,7 +194,7 @@ void RasterizerOpenGL::SetupVertexFormat() { // avoid OpenGL errors. // TODO(Subv): Analyze the shader to identify which attributes are actually used and don't // assume every shader uses them all. - for (std::size_t index = 0; index < NumSupportedVertexAttributes; ++index) { + for (std::size_t index = 0; index < NUM_SUPPORTED_VERTEX_ATTRIBUTES; ++index) { if (!flags[Dirty::VertexFormat0 + index]) { continue; } @@ -162,9 +213,10 @@ void RasterizerOpenGL::SetupVertexFormat() { if (attrib.type == Maxwell::VertexAttribute::Type::SignedInt || attrib.type == Maxwell::VertexAttribute::Type::UnsignedInt) { glVertexAttribIFormat(gl_index, attrib.ComponentCount(), - MaxwellToGL::VertexType(attrib), attrib.offset); + MaxwellToGL::VertexFormat(attrib), attrib.offset); } else { - glVertexAttribFormat(gl_index, attrib.ComponentCount(), MaxwellToGL::VertexType(attrib), + glVertexAttribFormat(gl_index, attrib.ComponentCount(), + MaxwellToGL::VertexFormat(attrib), attrib.IsNormalized() ? GL_TRUE : GL_FALSE, attrib.offset); } glVertexAttribBinding(gl_index, attrib.buffer); @@ -181,9 +233,11 @@ void RasterizerOpenGL::SetupVertexBuffer() { MICROPROFILE_SCOPE(OpenGL_VB); + const bool use_unified_memory = device.HasVertexBufferUnifiedMemory(); + // Upload all guest vertex arrays sequentially to our buffer const auto& regs = gpu.regs; - for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { + for (std::size_t index = 0; index < NUM_SUPPORTED_VERTEX_BINDINGS; ++index) { if (!flags[Dirty::VertexBuffer0 + index]) { continue; } @@ -196,16 +250,25 @@ void RasterizerOpenGL::SetupVertexBuffer() { const GPUVAddr start = vertex_array.StartAddress(); const GPUVAddr end = regs.vertex_array_limit[index].LimitAddress(); - ASSERT(end >= start); + + const GLuint gl_index = static_cast<GLuint>(index); const u64 size = end - start; if (size == 0) { - glBindVertexBuffer(static_cast<GLuint>(index), 0, 0, vertex_array.stride); + glBindVertexBuffer(gl_index, 0, 0, vertex_array.stride); + if (use_unified_memory) { + glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, gl_index, 0, 0); + } continue; } - const auto [vertex_buffer, vertex_buffer_offset] = buffer_cache.UploadMemory(start, size); - glBindVertexBuffer(static_cast<GLuint>(index), vertex_buffer, vertex_buffer_offset, - vertex_array.stride); + const auto info = buffer_cache.UploadMemory(start, size); + if (use_unified_memory) { + glBindVertexBuffer(gl_index, 0, 0, vertex_array.stride); + glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, gl_index, + info.address + info.offset, size); + } else { + glBindVertexBuffer(gl_index, info.handle, info.offset, vertex_array.stride); + } } } @@ -218,7 +281,7 @@ void RasterizerOpenGL::SetupVertexInstances() { flags[Dirty::VertexInstances] = false; const auto& regs = gpu.regs; - for (std::size_t index = 0; index < NumSupportedVertexAttributes; ++index) { + for (std::size_t index = 0; index < NUM_SUPPORTED_VERTEX_ATTRIBUTES; ++index) { if (!flags[Dirty::VertexInstance0 + index]) { continue; } @@ -235,9 +298,9 @@ GLintptr RasterizerOpenGL::SetupIndexBuffer() { MICROPROFILE_SCOPE(OpenGL_Index); const auto& regs = system.GPU().Maxwell3D().regs; const std::size_t size = CalculateIndexBufferSize(); - const auto [buffer, offset] = buffer_cache.UploadMemory(regs.index_array.IndexStart(), size); - glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, buffer); - return offset; + const auto info = buffer_cache.UploadMemory(regs.index_array.IndexStart(), size); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, info.handle); + return info.offset; } void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { @@ -273,7 +336,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { continue; } - Shader shader{shader_cache.GetStageProgram(program)}; + Shader* const shader = shader_cache.GetStageProgram(program); if (device.UseAssemblyShaders()) { // Check for ARB limitation. We only have 16 SSBOs per context state. To workaround this @@ -567,7 +630,16 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment()); // Prepare the vertex array. - buffer_cache.Map(buffer_size); + const bool invalidated = buffer_cache.Map(buffer_size); + + if (invalidated) { + // When the stream buffer has been invalidated, we have to consider vertex buffers as dirty + auto& dirty = gpu.dirty.flags; + dirty[Dirty::VertexBuffers] = true; + for (int index = Dirty::VertexBuffer0; index <= Dirty::VertexBuffer31; ++index) { + dirty[index] = true; + } + } // Prepare vertex array format. SetupVertexFormat(); @@ -584,9 +656,9 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { if (!device.UseAssemblyShaders()) { MaxwellUniformData ubo; ubo.SetFromRegs(gpu); - const auto [buffer, offset] = + const auto info = buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment()); - glBindBufferRange(GL_UNIFORM_BUFFER, EmulationUniformBlockBinding, buffer, offset, + glBindBufferRange(GL_UNIFORM_BUFFER, EmulationUniformBlockBinding, info.handle, info.offset, static_cast<GLsizeiptr>(sizeof(ubo))); } @@ -655,10 +727,6 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { } void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { - if (device.HasBrokenCompute()) { - return; - } - buffer_cache.Acquire(); current_cbuf = 0; @@ -837,7 +905,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, return true; } -void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, const Shader& shader) { +void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, Shader* shader) { static constexpr std::array PARAMETER_LUT = { GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_EVALUATION_PROGRAM_PARAMETER_BUFFER_NV, GL_GEOMETRY_PROGRAM_PARAMETER_BUFFER_NV, @@ -846,41 +914,62 @@ void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, const Shad MICROPROFILE_SCOPE(OpenGL_UBO); const auto& stages = system.GPU().Maxwell3D().state.shader_stages; const auto& shader_stage = stages[stage_index]; + const auto& entries = shader->GetEntries(); + const bool use_unified = entries.use_unified_uniforms; + const std::size_t base_unified_offset = stage_index * NUM_CONST_BUFFERS_BYTES_PER_STAGE; - u32 binding = - device.UseAssemblyShaders() ? 0 : device.GetBaseBindings(stage_index).uniform_buffer; - for (const auto& entry : shader->GetEntries().const_buffers) { - const auto& buffer = shader_stage.const_buffers[entry.GetIndex()]; - SetupConstBuffer(PARAMETER_LUT[stage_index], binding++, buffer, entry); + const auto base_bindings = device.GetBaseBindings(stage_index); + u32 binding = device.UseAssemblyShaders() ? 0 : base_bindings.uniform_buffer; + for (const auto& entry : entries.const_buffers) { + const u32 index = entry.GetIndex(); + const auto& buffer = shader_stage.const_buffers[index]; + SetupConstBuffer(PARAMETER_LUT[stage_index], binding, buffer, entry, use_unified, + base_unified_offset + index * Maxwell::MaxConstBufferSize); + ++binding; + } + if (use_unified) { + const u32 index = static_cast<u32>(base_bindings.shader_storage_buffer + + entries.global_memory_entries.size()); + glBindBufferRange(GL_SHADER_STORAGE_BUFFER, index, unified_uniform_buffer.handle, + base_unified_offset, NUM_CONST_BUFFERS_BYTES_PER_STAGE); } } -void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) { +void RasterizerOpenGL::SetupComputeConstBuffers(Shader* kernel) { MICROPROFILE_SCOPE(OpenGL_UBO); const auto& launch_desc = system.GPU().KeplerCompute().launch_description; + const auto& entries = kernel->GetEntries(); + const bool use_unified = entries.use_unified_uniforms; u32 binding = 0; - for (const auto& entry : kernel->GetEntries().const_buffers) { + for (const auto& entry : entries.const_buffers) { const auto& config = launch_desc.const_buffer_config[entry.GetIndex()]; const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value(); Tegra::Engines::ConstBufferInfo buffer; buffer.address = config.Address(); buffer.size = config.size; buffer.enabled = mask[entry.GetIndex()]; - SetupConstBuffer(GL_COMPUTE_PROGRAM_PARAMETER_BUFFER_NV, binding++, buffer, entry); + SetupConstBuffer(GL_COMPUTE_PROGRAM_PARAMETER_BUFFER_NV, binding, buffer, entry, + use_unified, entry.GetIndex() * Maxwell::MaxConstBufferSize); + ++binding; + } + if (use_unified) { + const GLuint index = static_cast<GLuint>(entries.global_memory_entries.size()); + glBindBufferRange(GL_SHADER_STORAGE_BUFFER, index, unified_uniform_buffer.handle, 0, + NUM_CONST_BUFFERS_BYTES_PER_STAGE); } } void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding, const Tegra::Engines::ConstBufferInfo& buffer, - const ConstBufferEntry& entry) { + const ConstBufferEntry& entry, bool use_unified, + std::size_t unified_offset) { if (!buffer.enabled) { // Set values to zero to unbind buffers if (device.UseAssemblyShaders()) { glBindBufferRangeNV(stage, entry.GetIndex(), 0, 0, 0); } else { - glBindBufferRange(GL_UNIFORM_BUFFER, binding, - buffer_cache.GetEmptyBuffer(sizeof(float)), 0, sizeof(float)); + glBindBufferRange(GL_UNIFORM_BUFFER, binding, 0, 0, sizeof(float)); } return; } @@ -889,23 +978,33 @@ void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding, // UBO alignment requirements. const std::size_t size = Common::AlignUp(GetConstBufferSize(buffer, entry), sizeof(GLvec4)); - const auto alignment = device.GetUniformBufferAlignment(); - auto [cbuf, offset] = buffer_cache.UploadMemory(buffer.address, size, alignment, false, - device.HasFastBufferSubData()); - if (!device.UseAssemblyShaders()) { - glBindBufferRange(GL_UNIFORM_BUFFER, binding, cbuf, offset, size); + const bool fast_upload = !use_unified && device.HasFastBufferSubData(); + + const std::size_t alignment = use_unified ? 4 : device.GetUniformBufferAlignment(); + const GPUVAddr gpu_addr = buffer.address; + auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, false, fast_upload); + + if (device.UseAssemblyShaders()) { + UNIMPLEMENTED_IF(use_unified); + if (info.offset != 0) { + const GLuint staging_cbuf = staging_cbufs[current_cbuf++]; + glCopyNamedBufferSubData(info.handle, staging_cbuf, info.offset, 0, size); + info.handle = staging_cbuf; + info.offset = 0; + } + glBindBufferRangeNV(stage, binding, info.handle, info.offset, size); return; } - if (offset != 0) { - const GLuint staging_cbuf = staging_cbufs[current_cbuf++]; - glCopyNamedBufferSubData(cbuf, staging_cbuf, offset, 0, size); - cbuf = staging_cbuf; - offset = 0; + + if (use_unified) { + glCopyNamedBufferSubData(info.handle, unified_uniform_buffer.handle, info.offset, + unified_offset, size); + } else { + glBindBufferRange(GL_UNIFORM_BUFFER, binding, info.handle, info.offset, size); } - glBindBufferRangeNV(stage, binding, cbuf, offset, size); } -void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader) { +void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, Shader* shader) { auto& gpu{system.GPU()}; auto& memory_manager{gpu.MemoryManager()}; const auto cbufs{gpu.Maxwell3D().state.shader_stages[stage_index]}; @@ -920,7 +1019,7 @@ void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, const Shad } } -void RasterizerOpenGL::SetupComputeGlobalMemory(const Shader& kernel) { +void RasterizerOpenGL::SetupComputeGlobalMemory(Shader* kernel) { auto& gpu{system.GPU()}; auto& memory_manager{gpu.MemoryManager()}; const auto cbufs{gpu.KeplerCompute().launch_description.const_buffer_config}; @@ -937,13 +1036,12 @@ void RasterizerOpenGL::SetupComputeGlobalMemory(const Shader& kernel) { void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, GPUVAddr gpu_addr, std::size_t size) { const auto alignment{device.GetShaderStorageBufferAlignment()}; - const auto [ssbo, buffer_offset] = - buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.is_written); - glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, ssbo, buffer_offset, + const auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.is_written); + glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, info.handle, info.offset, static_cast<GLsizeiptr>(size)); } -void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, const Shader& shader) { +void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, Shader* shader) { MICROPROFILE_SCOPE(OpenGL_Texture); const auto& maxwell3d = system.GPU().Maxwell3D(); u32 binding = device.GetBaseBindings(stage_index).sampler; @@ -956,7 +1054,7 @@ void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, const Shader& } } -void RasterizerOpenGL::SetupComputeTextures(const Shader& kernel) { +void RasterizerOpenGL::SetupComputeTextures(Shader* kernel) { MICROPROFILE_SCOPE(OpenGL_Texture); const auto& compute = system.GPU().KeplerCompute(); u32 binding = 0; @@ -985,7 +1083,7 @@ void RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextu } } -void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, const Shader& shader) { +void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, Shader* shader) { const auto& maxwell3d = system.GPU().Maxwell3D(); u32 binding = device.GetBaseBindings(stage_index).image; for (const auto& entry : shader->GetEntries().images) { @@ -995,7 +1093,7 @@ void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, const Shader& sh } } -void RasterizerOpenGL::SetupComputeImages(const Shader& shader) { +void RasterizerOpenGL::SetupComputeImages(Shader* shader) { const auto& compute = system.GPU().KeplerCompute(); u32 binding = 0; for (const auto& entry : shader->GetEntries().images) { @@ -1024,6 +1122,26 @@ void RasterizerOpenGL::SyncViewport() { const auto& regs = gpu.regs; const bool dirty_viewport = flags[Dirty::Viewports]; + const bool dirty_clip_control = flags[Dirty::ClipControl]; + + if (dirty_clip_control || flags[Dirty::FrontFace]) { + flags[Dirty::FrontFace] = false; + + GLenum mode = MaxwellToGL::FrontFace(regs.front_face); + if (regs.screen_y_control.triangle_rast_flip != 0 && + regs.viewport_transform[0].scale_y < 0.0f) { + switch (mode) { + case GL_CW: + mode = GL_CCW; + break; + case GL_CCW: + mode = GL_CW; + break; + } + } + glFrontFace(mode); + } + if (dirty_viewport || flags[Dirty::ClipControl]) { flags[Dirty::ClipControl] = false; @@ -1121,11 +1239,6 @@ void RasterizerOpenGL::SyncCullMode() { glDisable(GL_CULL_FACE); } } - - if (flags[Dirty::FrontFace]) { - flags[Dirty::FrontFace] = false; - glFrontFace(MaxwellToGL::FrontFace(regs.front_face)); - } } void RasterizerOpenGL::SyncPrimitiveRestart() { @@ -1496,12 +1609,70 @@ void RasterizerOpenGL::SyncFramebufferSRGB() { oglEnable(GL_FRAMEBUFFER_SRGB, gpu.regs.framebuffer_srgb); } +void RasterizerOpenGL::SyncTransformFeedback() { + // TODO(Rodrigo): Inject SKIP_COMPONENTS*_NV when required. An unimplemented message will signal + // when this is required. + const auto& regs = system.GPU().Maxwell3D().regs; + + static constexpr std::size_t STRIDE = 3; + std::array<GLint, 128 * STRIDE * Maxwell::NumTransformFeedbackBuffers> attribs; + std::array<GLint, Maxwell::NumTransformFeedbackBuffers> streams; + + GLint* cursor = attribs.data(); + GLint* current_stream = streams.data(); + + for (std::size_t feedback = 0; feedback < Maxwell::NumTransformFeedbackBuffers; ++feedback) { + const auto& layout = regs.tfb_layouts[feedback]; + UNIMPLEMENTED_IF_MSG(layout.stride != layout.varying_count * 4, "Stride padding"); + if (layout.varying_count == 0) { + continue; + } + + *current_stream = static_cast<GLint>(feedback); + if (current_stream != streams.data()) { + // When stepping one stream, push the expected token + cursor[0] = GL_NEXT_BUFFER_NV; + cursor[1] = 0; + cursor[2] = 0; + cursor += STRIDE; + } + ++current_stream; + + const auto& locations = regs.tfb_varying_locs[feedback]; + std::optional<u8> current_index; + for (u32 offset = 0; offset < layout.varying_count; ++offset) { + const u8 location = locations[offset]; + const u8 index = location / 4; + + if (current_index == index) { + // Increase number of components of the previous attachment + ++cursor[-2]; + continue; + } + current_index = index; + + std::tie(cursor[0], cursor[2]) = TransformFeedbackEnum(location); + cursor[1] = 1; + cursor += STRIDE; + } + } + + const GLsizei num_attribs = static_cast<GLsizei>((cursor - attribs.data()) / STRIDE); + const GLsizei num_strides = static_cast<GLsizei>(current_stream - streams.data()); + glTransformFeedbackStreamAttribsNV(num_attribs, attribs.data(), num_strides, streams.data(), + GL_INTERLEAVED_ATTRIBS); +} + void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) { const auto& regs = system.GPU().Maxwell3D().regs; if (regs.tfb_enabled == 0) { return; } + if (device.UseAssemblyShaders()) { + SyncTransformFeedback(); + } + UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) || regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) || regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry)); @@ -1528,6 +1699,10 @@ void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) { static_cast<GLsizeiptr>(size)); } + // We may have to call BeginTransformFeedbackNV here since they seem to call different + // implementations on Nvidia's driver (the pointer is different) but we are using + // ARB_transform_feedback3 features with NV_transform_feedback interactions and the ARB + // extension doesn't define BeginTransformFeedback (without NV) interactions. It just works. glBeginTransformFeedback(GL_POINTS); } @@ -1549,8 +1724,9 @@ void RasterizerOpenGL::EndTransformFeedback() { const GLuint handle = transform_feedback_buffers[index].handle; const GPUVAddr gpu_addr = binding.Address(); const std::size_t size = binding.buffer_size; - const auto [dest_buffer, offset] = buffer_cache.UploadMemory(gpu_addr, size, 4, true); - glCopyNamedBufferSubData(handle, dest_buffer, 0, offset, static_cast<GLsizeiptr>(size)); + const auto info = buffer_cache.UploadMemory(gpu_addr, size, 4, true); + glCopyNamedBufferSubData(handle, info.handle, 0, info.offset, + static_cast<GLsizeiptr>(size)); } } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 87f7fe159..4f082592f 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -19,7 +19,6 @@ #include "video_core/engines/const_buffer_info.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/rasterizer_accelerated.h" -#include "video_core/rasterizer_cache.h" #include "video_core/rasterizer_interface.h" #include "video_core/renderer_opengl/gl_buffer_cache.h" #include "video_core/renderer_opengl/gl_device.h" @@ -100,40 +99,41 @@ private: void ConfigureClearFramebuffer(bool using_color, bool using_depth_stencil); /// Configures the current constbuffers to use for the draw command. - void SetupDrawConstBuffers(std::size_t stage_index, const Shader& shader); + void SetupDrawConstBuffers(std::size_t stage_index, Shader* shader); /// Configures the current constbuffers to use for the kernel invocation. - void SetupComputeConstBuffers(const Shader& kernel); + void SetupComputeConstBuffers(Shader* kernel); /// Configures a constant buffer. void SetupConstBuffer(GLenum stage, u32 binding, const Tegra::Engines::ConstBufferInfo& buffer, - const ConstBufferEntry& entry); + const ConstBufferEntry& entry, bool use_unified, + std::size_t unified_offset); /// Configures the current global memory entries to use for the draw command. - void SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader); + void SetupDrawGlobalMemory(std::size_t stage_index, Shader* shader); /// Configures the current global memory entries to use for the kernel invocation. - void SetupComputeGlobalMemory(const Shader& kernel); + void SetupComputeGlobalMemory(Shader* kernel); /// Configures a constant buffer. void SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, GPUVAddr gpu_addr, std::size_t size); /// Configures the current textures to use for the draw command. - void SetupDrawTextures(std::size_t stage_index, const Shader& shader); + void SetupDrawTextures(std::size_t stage_index, Shader* shader); /// Configures the textures used in a compute shader. - void SetupComputeTextures(const Shader& kernel); + void SetupComputeTextures(Shader* kernel); /// Configures a texture. void SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture, const SamplerEntry& entry); /// Configures images in a graphics shader. - void SetupDrawImages(std::size_t stage_index, const Shader& shader); + void SetupDrawImages(std::size_t stage_index, Shader* shader); /// Configures images in a compute shader. - void SetupComputeImages(const Shader& shader); + void SetupComputeImages(Shader* shader); /// Configures an image. void SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic, const ImageEntry& entry); @@ -201,6 +201,10 @@ private: /// Syncs the framebuffer sRGB state to match the guest state void SyncFramebufferSRGB(); + /// Syncs transform feedback state to match guest state + /// @note Only valid on assembly shaders + void SyncTransformFeedback(); + /// Begin a transform feedback void BeginTransformFeedback(GLenum primitive_mode); @@ -253,6 +257,7 @@ private: Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram; std::array<GLuint, NUM_CONSTANT_BUFFERS> staging_cbufs{}; std::size_t current_cbuf = 0; + OGLBuffer unified_uniform_buffer; /// Number of commands queued to the OpenGL driver. Reseted on flush. std::size_t num_queued_commands = 0; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 4cd0f36cf..c6a3bf3a1 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -20,6 +20,7 @@ #include "video_core/engines/maxwell_3d.h" #include "video_core/engines/shader_type.h" #include "video_core/memory_manager.h" +#include "video_core/renderer_opengl/gl_arb_decompiler.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_shader_cache.h" #include "video_core/renderer_opengl/gl_shader_decompiler.h" @@ -29,6 +30,7 @@ #include "video_core/shader/memory_util.h" #include "video_core/shader/registry.h" #include "video_core/shader/shader_ir.h" +#include "video_core/shader_cache.h" namespace OpenGL { @@ -147,7 +149,8 @@ ProgramSharedPtr BuildShader(const Device& device, ShaderType shader_type, u64 u auto program = std::make_shared<ProgramHandle>(); if (device.UseAssemblyShaders()) { - const std::string arb = "Not implemented"; + const std::string arb = + DecompileAssemblyShader(device, ir, registry, shader_type, shader_id); GLuint& arb_prog = program->assembly_program.handle; @@ -194,12 +197,9 @@ std::unordered_set<GLenum> GetSupportedFormats() { } // Anonymous namespace -CachedShader::CachedShader(VAddr cpu_addr, std::size_t size_in_bytes, - std::shared_ptr<VideoCommon::Shader::Registry> registry, - ShaderEntries entries, ProgramSharedPtr program_) - : RasterizerCacheObject{cpu_addr}, registry{std::move(registry)}, entries{std::move(entries)}, - size_in_bytes{size_in_bytes}, program{std::move(program_)} { - // Assign either the assembly program or source program. We can't have both. +Shader::Shader(std::shared_ptr<VideoCommon::Shader::Registry> registry_, ShaderEntries entries_, + ProgramSharedPtr program_) + : registry{std::move(registry_)}, entries{std::move(entries_)}, program{std::move(program_)} { handle = program->assembly_program.handle; if (handle == 0) { handle = program->source_program.handle; @@ -207,16 +207,16 @@ CachedShader::CachedShader(VAddr cpu_addr, std::size_t size_in_bytes, ASSERT(handle != 0); } -CachedShader::~CachedShader() = default; +Shader::~Shader() = default; -GLuint CachedShader::GetHandle() const { +GLuint Shader::GetHandle() const { DEBUG_ASSERT(registry->IsConsistent()); return handle; } -Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params, - Maxwell::ShaderProgram program_type, ProgramCode code, - ProgramCode code_b) { +std::unique_ptr<Shader> Shader::CreateStageFromMemory(const ShaderParameters& params, + Maxwell::ShaderProgram program_type, + ProgramCode code, ProgramCode code_b) { const auto shader_type = GetShaderType(program_type); const std::size_t size_in_bytes = code.size() * sizeof(u64); @@ -241,11 +241,12 @@ Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params, entry.bindless_samplers = registry->GetBindlessSamplers(); params.disk_cache.SaveEntry(std::move(entry)); - return std::shared_ptr<CachedShader>(new CachedShader( - params.cpu_addr, size_in_bytes, std::move(registry), MakeEntries(ir), std::move(program))); + return std::unique_ptr<Shader>(new Shader( + std::move(registry), MakeEntries(params.device, ir, shader_type), std::move(program))); } -Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) { +std::unique_ptr<Shader> Shader::CreateKernelFromMemory(const ShaderParameters& params, + ProgramCode code) { const std::size_t size_in_bytes = code.size() * sizeof(u64); auto& engine = params.system.GPU().KeplerCompute(); @@ -265,22 +266,23 @@ Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, Prog entry.bindless_samplers = registry->GetBindlessSamplers(); params.disk_cache.SaveEntry(std::move(entry)); - return std::shared_ptr<CachedShader>(new CachedShader( - params.cpu_addr, size_in_bytes, std::move(registry), MakeEntries(ir), std::move(program))); + return std::unique_ptr<Shader>(new Shader(std::move(registry), + MakeEntries(params.device, ir, ShaderType::Compute), + std::move(program))); } -Shader CachedShader::CreateFromCache(const ShaderParameters& params, - const PrecompiledShader& precompiled_shader, - std::size_t size_in_bytes) { - return std::shared_ptr<CachedShader>( - new CachedShader(params.cpu_addr, size_in_bytes, precompiled_shader.registry, - precompiled_shader.entries, precompiled_shader.program)); +std::unique_ptr<Shader> Shader::CreateFromCache(const ShaderParameters& params, + const PrecompiledShader& precompiled_shader) { + return std::unique_ptr<Shader>(new Shader( + precompiled_shader.registry, precompiled_shader.entries, precompiled_shader.program)); } ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system, Core::Frontend::EmuWindow& emu_window, const Device& device) - : RasterizerCache{rasterizer}, system{system}, emu_window{emu_window}, device{device}, - disk_cache{system} {} + : VideoCommon::ShaderCache<Shader>{rasterizer}, system{system}, + emu_window{emu_window}, device{device}, disk_cache{system} {} + +ShaderCacheOpenGL::~ShaderCacheOpenGL() = default; void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback) { @@ -348,7 +350,7 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, PrecompiledShader shader; shader.program = std::move(program); shader.registry = std::move(registry); - shader.entries = MakeEntries(ir); + shader.entries = MakeEntries(device, ir, entry.type); std::scoped_lock lock{mutex}; if (callback) { @@ -434,7 +436,7 @@ ProgramSharedPtr ShaderCacheOpenGL::GeneratePrecompiledProgram( return program; } -Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { +Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { if (!system.GPU().Maxwell3D().dirty.flags[Dirty::Shaders]) { return last_shaders[static_cast<std::size_t>(program)]; } @@ -444,8 +446,7 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { // Look up shader in the cache based on address const auto cpu_addr{memory_manager.GpuToCpuAddress(address)}; - Shader shader{cpu_addr ? TryGet(*cpu_addr) : null_shader}; - if (shader) { + if (Shader* const shader{cpu_addr ? TryGet(*cpu_addr) : null_shader.get()}) { return last_shaders[static_cast<std::size_t>(program)] = shader; } @@ -459,62 +460,64 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { const u8* host_ptr_b = memory_manager.GetPointer(address_b); code_b = GetShaderCode(memory_manager, address_b, host_ptr_b, false); } + const std::size_t code_size = code.size() * sizeof(u64); - const auto unique_identifier = GetUniqueIdentifier( + const u64 unique_identifier = GetUniqueIdentifier( GetShaderType(program), program == Maxwell::ShaderProgram::VertexA, code, code_b); const ShaderParameters params{system, disk_cache, device, *cpu_addr, host_ptr, unique_identifier}; + std::unique_ptr<Shader> shader; const auto found = runtime_cache.find(unique_identifier); if (found == runtime_cache.end()) { - shader = CachedShader::CreateStageFromMemory(params, program, std::move(code), - std::move(code_b)); + shader = Shader::CreateStageFromMemory(params, program, std::move(code), std::move(code_b)); } else { - const std::size_t size_in_bytes = code.size() * sizeof(u64); - shader = CachedShader::CreateFromCache(params, found->second, size_in_bytes); + shader = Shader::CreateFromCache(params, found->second); } + Shader* const result = shader.get(); if (cpu_addr) { - Register(shader); + Register(std::move(shader), *cpu_addr, code_size); } else { - null_shader = shader; + null_shader = std::move(shader); } - return last_shaders[static_cast<std::size_t>(program)] = shader; + return last_shaders[static_cast<std::size_t>(program)] = result; } -Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) { +Shader* ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) { auto& memory_manager{system.GPU().MemoryManager()}; const auto cpu_addr{memory_manager.GpuToCpuAddress(code_addr)}; - auto kernel = cpu_addr ? TryGet(*cpu_addr) : null_kernel; - if (kernel) { + if (Shader* const kernel = cpu_addr ? TryGet(*cpu_addr) : null_kernel.get()) { return kernel; } const auto host_ptr{memory_manager.GetPointer(code_addr)}; // No kernel found, create a new one - auto code{GetShaderCode(memory_manager, code_addr, host_ptr, true)}; - const auto unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code)}; + ProgramCode code{GetShaderCode(memory_manager, code_addr, host_ptr, true)}; + const std::size_t code_size{code.size() * sizeof(u64)}; + const u64 unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code)}; const ShaderParameters params{system, disk_cache, device, *cpu_addr, host_ptr, unique_identifier}; + std::unique_ptr<Shader> kernel; const auto found = runtime_cache.find(unique_identifier); if (found == runtime_cache.end()) { - kernel = CachedShader::CreateKernelFromMemory(params, std::move(code)); + kernel = Shader::CreateKernelFromMemory(params, std::move(code)); } else { - const std::size_t size_in_bytes = code.size() * sizeof(u64); - kernel = CachedShader::CreateFromCache(params, found->second, size_in_bytes); + kernel = Shader::CreateFromCache(params, found->second); } + Shader* const result = kernel.get(); if (cpu_addr) { - Register(kernel); + Register(std::move(kernel), *cpu_addr, code_size); } else { - null_kernel = kernel; + null_kernel = std::move(kernel); } - return kernel; + return result; } } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index b2ae8d7f9..994aaeaf2 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -18,12 +18,12 @@ #include "common/common_types.h" #include "video_core/engines/shader_type.h" -#include "video_core/rasterizer_cache.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_shader_decompiler.h" #include "video_core/renderer_opengl/gl_shader_disk_cache.h" #include "video_core/shader/registry.h" #include "video_core/shader/shader_ir.h" +#include "video_core/shader_cache.h" namespace Core { class System; @@ -35,12 +35,9 @@ class EmuWindow; namespace OpenGL { -class CachedShader; class Device; class RasterizerOpenGL; -struct UnspecializedShader; -using Shader = std::shared_ptr<CachedShader>; using Maxwell = Tegra::Engines::Maxwell3D::Regs; struct ProgramHandle { @@ -64,62 +61,53 @@ struct ShaderParameters { u64 unique_identifier; }; -class CachedShader final : public RasterizerCacheObject { +class Shader final { public: - ~CachedShader(); + ~Shader(); /// Gets the GL program handle for the shader GLuint GetHandle() const; - /// Returns the size in bytes of the shader - std::size_t GetSizeInBytes() const override { - return size_in_bytes; - } - /// Gets the shader entries for the shader const ShaderEntries& GetEntries() const { return entries; } - static Shader CreateStageFromMemory(const ShaderParameters& params, - Maxwell::ShaderProgram program_type, - ProgramCode program_code, ProgramCode program_code_b); - static Shader CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code); + static std::unique_ptr<Shader> CreateStageFromMemory(const ShaderParameters& params, + Maxwell::ShaderProgram program_type, + ProgramCode program_code, + ProgramCode program_code_b); + static std::unique_ptr<Shader> CreateKernelFromMemory(const ShaderParameters& params, + ProgramCode code); - static Shader CreateFromCache(const ShaderParameters& params, - const PrecompiledShader& precompiled_shader, - std::size_t size_in_bytes); + static std::unique_ptr<Shader> CreateFromCache(const ShaderParameters& params, + const PrecompiledShader& precompiled_shader); private: - explicit CachedShader(VAddr cpu_addr, std::size_t size_in_bytes, - std::shared_ptr<VideoCommon::Shader::Registry> registry, - ShaderEntries entries, ProgramSharedPtr program); + explicit Shader(std::shared_ptr<VideoCommon::Shader::Registry> registry, ShaderEntries entries, + ProgramSharedPtr program); std::shared_ptr<VideoCommon::Shader::Registry> registry; ShaderEntries entries; - std::size_t size_in_bytes = 0; ProgramSharedPtr program; GLuint handle = 0; }; -class ShaderCacheOpenGL final : public RasterizerCache<Shader> { +class ShaderCacheOpenGL final : public VideoCommon::ShaderCache<Shader> { public: explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system, Core::Frontend::EmuWindow& emu_window, const Device& device); + ~ShaderCacheOpenGL() override; /// Loads disk cache for the current game void LoadDiskCache(const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback); /// Gets the current specified shader stage program - Shader GetStageProgram(Maxwell::ShaderProgram program); + Shader* GetStageProgram(Maxwell::ShaderProgram program); /// Gets a compute kernel in the passed address - Shader GetComputeKernel(GPUVAddr code_addr); - -protected: - // We do not have to flush this cache as things in it are never modified by us. - void FlushObjectInner(const Shader& object) override {} + Shader* GetComputeKernel(GPUVAddr code_addr); private: ProgramSharedPtr GeneratePrecompiledProgram( @@ -132,10 +120,10 @@ private: ShaderDiskCacheOpenGL disk_cache; std::unordered_map<u64, PrecompiledShader> runtime_cache; - Shader null_shader{}; - Shader null_kernel{}; + std::unique_ptr<Shader> null_shader; + std::unique_ptr<Shader> null_kernel; - std::array<Shader, Maxwell::MaxShaderProgram> last_shaders; + std::array<Shader*, Maxwell::MaxShaderProgram> last_shaders{}; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 9cb115959..2c49aeaac 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -37,6 +37,7 @@ using Tegra::Shader::IpaMode; using Tegra::Shader::IpaSampleMode; using Tegra::Shader::PixelImap; using Tegra::Shader::Register; +using Tegra::Shader::TextureType; using VideoCommon::Shader::BuildTransformFeedback; using VideoCommon::Shader::Registry; @@ -61,8 +62,8 @@ struct TextureDerivates {}; using TextureArgument = std::pair<Type, Node>; using TextureIR = std::variant<TextureOffset, TextureDerivates, TextureArgument>; -constexpr u32 MAX_CONSTBUFFER_ELEMENTS = - static_cast<u32>(Maxwell::MaxConstBufferSize) / (4 * sizeof(float)); +constexpr u32 MAX_CONSTBUFFER_SCALARS = static_cast<u32>(Maxwell::MaxConstBufferSize) / sizeof(u32); +constexpr u32 MAX_CONSTBUFFER_ELEMENTS = MAX_CONSTBUFFER_SCALARS / sizeof(u32); constexpr std::string_view CommonDeclarations = R"(#define ftoi floatBitsToInt #define ftou floatBitsToUint @@ -402,6 +403,13 @@ std::string FlowStackTopName(MetaStackClass stack) { return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack)); } +bool UseUnifiedUniforms(const Device& device, const ShaderIR& ir, ShaderType stage) { + const u32 num_ubos = static_cast<u32>(ir.GetConstantBuffers().size()); + // We waste one UBO for emulation + const u32 num_available_ubos = device.GetMaxUniformBuffers(stage) - 1; + return num_ubos > num_available_ubos; +} + struct GenericVaryingDescription { std::string name; u8 first_element = 0; @@ -412,8 +420,9 @@ class GLSLDecompiler final { public: explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry, ShaderType stage, std::string_view identifier, std::string_view suffix) - : device{device}, ir{ir}, registry{registry}, stage{stage}, - identifier{identifier}, suffix{suffix}, header{ir.GetHeader()} { + : device{device}, ir{ir}, registry{registry}, stage{stage}, identifier{identifier}, + suffix{suffix}, header{ir.GetHeader()}, use_unified_uniforms{ + UseUnifiedUniforms(device, ir, stage)} { if (stage != ShaderType::Compute) { transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo()); } @@ -518,6 +527,9 @@ private: if (device.HasImageLoadFormatted()) { code.AddLine("#extension GL_EXT_shader_image_load_formatted : require"); } + if (device.HasTextureShadowLod()) { + code.AddLine("#extension GL_EXT_texture_shadow_lod : require"); + } if (device.HasWarpIntrinsics()) { code.AddLine("#extension GL_NV_gpu_shader5 : require"); code.AddLine("#extension GL_NV_shader_thread_group : require"); @@ -618,7 +630,9 @@ private: break; } } - if (stage != ShaderType::Vertex || device.HasVertexViewportLayer()) { + + if (stage != ShaderType::Geometry && + (stage != ShaderType::Vertex || device.HasVertexViewportLayer())) { if (ir.UsesLayer()) { code.AddLine("int gl_Layer;"); } @@ -647,6 +661,16 @@ private: --code.scope; code.AddLine("}};"); code.AddNewLine(); + + if (stage == ShaderType::Geometry) { + if (ir.UsesLayer()) { + code.AddLine("out int gl_Layer;"); + } + if (ir.UsesViewportIndex()) { + code.AddLine("out int gl_ViewportIndex;"); + } + } + code.AddNewLine(); } void DeclareRegisters() { @@ -834,12 +858,24 @@ private: } void DeclareConstantBuffers() { + if (use_unified_uniforms) { + const u32 binding = device.GetBaseBindings(stage).shader_storage_buffer + + static_cast<u32>(ir.GetGlobalMemory().size()); + code.AddLine("layout (std430, binding = {}) readonly buffer UnifiedUniforms {{", + binding); + code.AddLine(" uint cbufs[];"); + code.AddLine("}};"); + code.AddNewLine(); + return; + } + u32 binding = device.GetBaseBindings(stage).uniform_buffer; - for (const auto& buffers : ir.GetConstantBuffers()) { - const auto index = buffers.first; + for (const auto [index, info] : ir.GetConstantBuffers()) { + const u32 num_elements = Common::AlignUp(info.GetSize(), 4) / 4; + const u32 size = info.IsIndirect() ? MAX_CONSTBUFFER_ELEMENTS : num_elements; code.AddLine("layout (std140, binding = {}) uniform {} {{", binding++, GetConstBufferBlock(index)); - code.AddLine(" uvec4 {}[{}];", GetConstBuffer(index), MAX_CONSTBUFFER_ELEMENTS); + code.AddLine(" uvec4 {}[{}];", GetConstBuffer(index), size); code.AddLine("}};"); code.AddNewLine(); } @@ -877,13 +913,13 @@ private: return "samplerBuffer"; } switch (sampler.type) { - case Tegra::Shader::TextureType::Texture1D: + case TextureType::Texture1D: return "sampler1D"; - case Tegra::Shader::TextureType::Texture2D: + case TextureType::Texture2D: return "sampler2D"; - case Tegra::Shader::TextureType::Texture3D: + case TextureType::Texture3D: return "sampler3D"; - case Tegra::Shader::TextureType::TextureCube: + case TextureType::TextureCube: return "samplerCube"; default: UNREACHABLE(); @@ -1038,42 +1074,51 @@ private: if (const auto cbuf = std::get_if<CbufNode>(&*node)) { const Node offset = cbuf->GetOffset(); + const u32 base_unified_offset = cbuf->GetIndex() * MAX_CONSTBUFFER_SCALARS; + if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) { // Direct access const u32 offset_imm = immediate->GetValue(); ASSERT_MSG(offset_imm % 4 == 0, "Unaligned cbuf direct access"); - return {fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()), - offset_imm / (4 * 4), (offset_imm / 4) % 4), - Type::Uint}; + if (use_unified_uniforms) { + return {fmt::format("cbufs[{}]", base_unified_offset + offset_imm / 4), + Type::Uint}; + } else { + return {fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()), + offset_imm / (4 * 4), (offset_imm / 4) % 4), + Type::Uint}; + } } - if (std::holds_alternative<OperationNode>(*offset)) { - // Indirect access - const std::string final_offset = code.GenerateTemporary(); - code.AddLine("uint {} = {} >> 2;", final_offset, Visit(offset).AsUint()); + // Indirect access + if (use_unified_uniforms) { + return {fmt::format("cbufs[{} + ({} >> 2)]", base_unified_offset, + Visit(offset).AsUint()), + Type::Uint}; + } - if (!device.HasComponentIndexingBug()) { - return {fmt::format("{}[{} >> 2][{} & 3]", GetConstBuffer(cbuf->GetIndex()), - final_offset, final_offset), - Type::Uint}; - } + const std::string final_offset = code.GenerateTemporary(); + code.AddLine("uint {} = {} >> 2;", final_offset, Visit(offset).AsUint()); - // AMD's proprietary GLSL compiler emits ill code for variable component access. - // To bypass this driver bug generate 4 ifs, one per each component. - const std::string pack = code.GenerateTemporary(); - code.AddLine("uvec4 {} = {}[{} >> 2];", pack, GetConstBuffer(cbuf->GetIndex()), - final_offset); - - const std::string result = code.GenerateTemporary(); - code.AddLine("uint {};", result); - for (u32 swizzle = 0; swizzle < 4; ++swizzle) { - code.AddLine("if (({} & 3) == {}) {} = {}{};", final_offset, swizzle, result, - pack, GetSwizzle(swizzle)); - } - return {result, Type::Uint}; + if (!device.HasComponentIndexingBug()) { + return {fmt::format("{}[{} >> 2][{} & 3]", GetConstBuffer(cbuf->GetIndex()), + final_offset, final_offset), + Type::Uint}; } - UNREACHABLE_MSG("Unmanaged offset node type"); + // AMD's proprietary GLSL compiler emits ill code for variable component access. + // To bypass this driver bug generate 4 ifs, one per each component. + const std::string pack = code.GenerateTemporary(); + code.AddLine("uvec4 {} = {}[{} >> 2];", pack, GetConstBuffer(cbuf->GetIndex()), + final_offset); + + const std::string result = code.GenerateTemporary(); + code.AddLine("uint {};", result); + for (u32 swizzle = 0; swizzle < 4; ++swizzle) { + code.AddLine("if (({} & 3) == {}) {} = {}{};", final_offset, swizzle, result, pack, + GetSwizzle(swizzle)); + } + return {result, Type::Uint}; } if (const auto gmem = std::get_if<GmemNode>(&*node)) { @@ -1339,8 +1384,19 @@ private: const std::size_t count = operation.GetOperandsCount(); const bool has_array = meta->sampler.is_array; const bool has_shadow = meta->sampler.is_shadow; + const bool workaround_lod_array_shadow_as_grad = + !device.HasTextureShadowLod() && function_suffix == "Lod" && meta->sampler.is_shadow && + ((meta->sampler.type == TextureType::Texture2D && meta->sampler.is_array) || + meta->sampler.type == TextureType::TextureCube); + + std::string expr = "texture"; + + if (workaround_lod_array_shadow_as_grad) { + expr += "Grad"; + } else { + expr += function_suffix; + } - std::string expr = "texture" + function_suffix; if (!meta->aoffi.empty()) { expr += "Offset"; } else if (!meta->ptp.empty()) { @@ -1374,6 +1430,16 @@ private: expr += ')'; } + if (workaround_lod_array_shadow_as_grad) { + switch (meta->sampler.type) { + case TextureType::Texture2D: + return expr + ", vec2(0.0), vec2(0.0))"; + case TextureType::TextureCube: + return expr + ", vec3(0.0), vec3(0.0))"; + } + UNREACHABLE(); + } + for (const auto& variant : extras) { if (const auto argument = std::get_if<TextureArgument>(&variant)) { expr += GenerateTextureArgument(*argument); @@ -2000,8 +2066,19 @@ private: const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); ASSERT(meta); - std::string expr = GenerateTexture( - operation, "Lod", {TextureArgument{Type::Float, meta->lod}, TextureOffset{}}); + std::string expr{}; + + if (!device.HasTextureShadowLod() && meta->sampler.is_shadow && + ((meta->sampler.type == TextureType::Texture2D && meta->sampler.is_array) || + meta->sampler.type == TextureType::TextureCube)) { + LOG_ERROR(Render_OpenGL, + "Device lacks GL_EXT_texture_shadow_lod, using textureGrad as a workaround"); + expr = GenerateTexture(operation, "Lod", {}); + } else { + expr = GenerateTexture(operation, "Lod", + {TextureArgument{Type::Float, meta->lod}, TextureOffset{}}); + } + if (meta->sampler.is_shadow) { expr = "vec4(" + expr + ')'; } @@ -2710,6 +2787,7 @@ private: const std::string_view identifier; const std::string_view suffix; const Header header; + const bool use_unified_uniforms; std::unordered_map<u8, VaryingTFB> transform_feedback; ShaderWriter code; @@ -2905,7 +2983,7 @@ void GLSLDecompiler::DecompileAST() { } // Anonymous namespace -ShaderEntries MakeEntries(const VideoCommon::Shader::ShaderIR& ir) { +ShaderEntries MakeEntries(const Device& device, const ShaderIR& ir, ShaderType stage) { ShaderEntries entries; for (const auto& cbuf : ir.GetConstantBuffers()) { entries.const_buffers.emplace_back(cbuf.second.GetMaxOffset(), cbuf.second.IsIndirect(), @@ -2926,6 +3004,7 @@ ShaderEntries MakeEntries(const VideoCommon::Shader::ShaderIR& ir) { entries.clip_distances = (clip_distances[i] ? 1U : 0U) << i; } entries.shader_length = ir.GetLength(); + entries.use_unified_uniforms = UseUnifiedUniforms(device, ir, stage); return entries; } diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h index e8a178764..451c9689a 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.h +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h @@ -53,11 +53,13 @@ struct ShaderEntries { std::vector<GlobalMemoryEntry> global_memory_entries; std::vector<SamplerEntry> samplers; std::vector<ImageEntry> images; - u32 clip_distances{}; std::size_t shader_length{}; + u32 clip_distances{}; + bool use_unified_uniforms{}; }; -ShaderEntries MakeEntries(const VideoCommon::Shader::ShaderIR& ir); +ShaderEntries MakeEntries(const Device& device, const VideoCommon::Shader::ShaderIR& ir, + Tegra::Engines::ShaderType stage); std::string DecompileShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir, const VideoCommon::Shader::Registry& registry, diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index 9e95a122b..2dcc2b0eb 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp @@ -29,6 +29,8 @@ using VideoCommon::Shader::KeyMap; namespace { +using VideoCommon::Shader::SeparateSamplerKey; + using ShaderCacheVersionHash = std::array<u8, 64>; struct ConstBufferKey { @@ -37,18 +39,26 @@ struct ConstBufferKey { u32 value = 0; }; -struct BoundSamplerKey { +struct BoundSamplerEntry { u32 offset = 0; Tegra::Engines::SamplerDescriptor sampler; }; -struct BindlessSamplerKey { +struct SeparateSamplerEntry { + u32 cbuf1 = 0; + u32 cbuf2 = 0; + u32 offset1 = 0; + u32 offset2 = 0; + Tegra::Engines::SamplerDescriptor sampler; +}; + +struct BindlessSamplerEntry { u32 cbuf = 0; u32 offset = 0; Tegra::Engines::SamplerDescriptor sampler; }; -constexpr u32 NativeVersion = 20; +constexpr u32 NativeVersion = 21; ShaderCacheVersionHash GetShaderCacheVersionHash() { ShaderCacheVersionHash hash{}; @@ -87,12 +97,14 @@ bool ShaderDiskCacheEntry::Load(FileUtil::IOFile& file) { u32 texture_handler_size_value; u32 num_keys; u32 num_bound_samplers; + u32 num_separate_samplers; u32 num_bindless_samplers; if (file.ReadArray(&unique_identifier, 1) != 1 || file.ReadArray(&bound_buffer, 1) != 1 || file.ReadArray(&is_texture_handler_size_known, 1) != 1 || file.ReadArray(&texture_handler_size_value, 1) != 1 || file.ReadArray(&graphics_info, 1) != 1 || file.ReadArray(&compute_info, 1) != 1 || file.ReadArray(&num_keys, 1) != 1 || file.ReadArray(&num_bound_samplers, 1) != 1 || + file.ReadArray(&num_separate_samplers, 1) != 1 || file.ReadArray(&num_bindless_samplers, 1) != 1) { return false; } @@ -101,23 +113,32 @@ bool ShaderDiskCacheEntry::Load(FileUtil::IOFile& file) { } std::vector<ConstBufferKey> flat_keys(num_keys); - std::vector<BoundSamplerKey> flat_bound_samplers(num_bound_samplers); - std::vector<BindlessSamplerKey> flat_bindless_samplers(num_bindless_samplers); + std::vector<BoundSamplerEntry> flat_bound_samplers(num_bound_samplers); + std::vector<SeparateSamplerEntry> flat_separate_samplers(num_separate_samplers); + std::vector<BindlessSamplerEntry> flat_bindless_samplers(num_bindless_samplers); if (file.ReadArray(flat_keys.data(), flat_keys.size()) != flat_keys.size() || file.ReadArray(flat_bound_samplers.data(), flat_bound_samplers.size()) != flat_bound_samplers.size() || + file.ReadArray(flat_separate_samplers.data(), flat_separate_samplers.size()) != + flat_separate_samplers.size() || file.ReadArray(flat_bindless_samplers.data(), flat_bindless_samplers.size()) != flat_bindless_samplers.size()) { return false; } - for (const auto& key : flat_keys) { - keys.insert({{key.cbuf, key.offset}, key.value}); + for (const auto& entry : flat_keys) { + keys.insert({{entry.cbuf, entry.offset}, entry.value}); } - for (const auto& key : flat_bound_samplers) { - bound_samplers.emplace(key.offset, key.sampler); + for (const auto& entry : flat_bound_samplers) { + bound_samplers.emplace(entry.offset, entry.sampler); } - for (const auto& key : flat_bindless_samplers) { - bindless_samplers.insert({{key.cbuf, key.offset}, key.sampler}); + for (const auto& entry : flat_separate_samplers) { + SeparateSamplerKey key; + key.buffers = {entry.cbuf1, entry.cbuf2}; + key.offsets = {entry.offset1, entry.offset2}; + separate_samplers.emplace(key, entry.sampler); + } + for (const auto& entry : flat_bindless_samplers) { + bindless_samplers.insert({{entry.cbuf, entry.offset}, entry.sampler}); } return true; @@ -142,6 +163,7 @@ bool ShaderDiskCacheEntry::Save(FileUtil::IOFile& file) const { file.WriteObject(graphics_info) != 1 || file.WriteObject(compute_info) != 1 || file.WriteObject(static_cast<u32>(keys.size())) != 1 || file.WriteObject(static_cast<u32>(bound_samplers.size())) != 1 || + file.WriteObject(static_cast<u32>(separate_samplers.size())) != 1 || file.WriteObject(static_cast<u32>(bindless_samplers.size())) != 1) { return false; } @@ -152,22 +174,34 @@ bool ShaderDiskCacheEntry::Save(FileUtil::IOFile& file) const { flat_keys.push_back(ConstBufferKey{address.first, address.second, value}); } - std::vector<BoundSamplerKey> flat_bound_samplers; + std::vector<BoundSamplerEntry> flat_bound_samplers; flat_bound_samplers.reserve(bound_samplers.size()); for (const auto& [address, sampler] : bound_samplers) { - flat_bound_samplers.push_back(BoundSamplerKey{address, sampler}); + flat_bound_samplers.push_back(BoundSamplerEntry{address, sampler}); + } + + std::vector<SeparateSamplerEntry> flat_separate_samplers; + flat_separate_samplers.reserve(separate_samplers.size()); + for (const auto& [key, sampler] : separate_samplers) { + SeparateSamplerEntry entry; + std::tie(entry.cbuf1, entry.cbuf2) = key.buffers; + std::tie(entry.offset1, entry.offset2) = key.offsets; + entry.sampler = sampler; + flat_separate_samplers.push_back(entry); } - std::vector<BindlessSamplerKey> flat_bindless_samplers; + std::vector<BindlessSamplerEntry> flat_bindless_samplers; flat_bindless_samplers.reserve(bindless_samplers.size()); for (const auto& [address, sampler] : bindless_samplers) { flat_bindless_samplers.push_back( - BindlessSamplerKey{address.first, address.second, sampler}); + BindlessSamplerEntry{address.first, address.second, sampler}); } return file.WriteArray(flat_keys.data(), flat_keys.size()) == flat_keys.size() && file.WriteArray(flat_bound_samplers.data(), flat_bound_samplers.size()) == flat_bound_samplers.size() && + file.WriteArray(flat_separate_samplers.data(), flat_separate_samplers.size()) == + flat_separate_samplers.size() && file.WriteArray(flat_bindless_samplers.data(), flat_bindless_samplers.size()) == flat_bindless_samplers.size(); } @@ -179,7 +213,7 @@ ShaderDiskCacheOpenGL::~ShaderDiskCacheOpenGL() = default; std::optional<std::vector<ShaderDiskCacheEntry>> ShaderDiskCacheOpenGL::LoadTransferable() { // Skip games without title id const bool has_title_id = system.CurrentProcess()->GetTitleID() != 0; - if (!Settings::values.use_disk_shader_cache || !has_title_id) { + if (!Settings::values.use_disk_shader_cache.GetValue() || !has_title_id) { return {}; } diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h index d5be52e40..a79cef0e9 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h @@ -57,6 +57,7 @@ struct ShaderDiskCacheEntry { VideoCommon::Shader::ComputeInfo compute_info; VideoCommon::Shader::KeyMap keys; VideoCommon::Shader::BoundSamplerMap bound_samplers; + VideoCommon::Shader::SeparateSamplerMap separate_samplers; VideoCommon::Shader::BindlessSamplerMap bindless_samplers; }; diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp index 6ec328c53..3655ff629 100644 --- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp +++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp @@ -2,11 +2,13 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include <deque> +#include <tuple> #include <vector> + #include "common/alignment.h" #include "common/assert.h" #include "common/microprofile.h" +#include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_stream_buffer.h" MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning", @@ -14,8 +16,7 @@ MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning", namespace OpenGL { -OGLStreamBuffer::OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool prefer_coherent, - bool use_persistent) +OGLStreamBuffer::OGLStreamBuffer(const Device& device, GLsizeiptr size, bool vertex_data_usage) : buffer_size(size) { gl_buffer.Create(); @@ -29,34 +30,22 @@ OGLStreamBuffer::OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool p allocate_size *= 2; } - if (use_persistent) { - persistent = true; - coherent = prefer_coherent; - const GLbitfield flags = - GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | (coherent ? GL_MAP_COHERENT_BIT : 0); - glNamedBufferStorage(gl_buffer.handle, allocate_size, nullptr, flags); - mapped_ptr = static_cast<u8*>(glMapNamedBufferRange( - gl_buffer.handle, 0, buffer_size, flags | (coherent ? 0 : GL_MAP_FLUSH_EXPLICIT_BIT))); - } else { - glNamedBufferData(gl_buffer.handle, allocate_size, nullptr, GL_STREAM_DRAW); + static constexpr GLbitfield flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT; + glNamedBufferStorage(gl_buffer.handle, allocate_size, nullptr, flags); + mapped_ptr = static_cast<u8*>( + glMapNamedBufferRange(gl_buffer.handle, 0, buffer_size, flags | GL_MAP_FLUSH_EXPLICIT_BIT)); + + if (device.HasVertexBufferUnifiedMemory()) { + glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_ONLY); + glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address); } } OGLStreamBuffer::~OGLStreamBuffer() { - if (persistent) { - glUnmapNamedBuffer(gl_buffer.handle); - } + glUnmapNamedBuffer(gl_buffer.handle); gl_buffer.Release(); } -GLuint OGLStreamBuffer::GetHandle() const { - return gl_buffer.handle; -} - -GLsizeiptr OGLStreamBuffer::GetSize() const { - return buffer_size; -} - std::tuple<u8*, GLintptr, bool> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr alignment) { ASSERT(size <= buffer_size); ASSERT(alignment <= buffer_size); @@ -68,36 +57,21 @@ std::tuple<u8*, GLintptr, bool> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr a bool invalidate = false; if (buffer_pos + size > buffer_size) { + MICROPROFILE_SCOPE(OpenGL_StreamBuffer); + glInvalidateBufferData(gl_buffer.handle); + buffer_pos = 0; invalidate = true; - - if (persistent) { - glUnmapNamedBuffer(gl_buffer.handle); - } } - if (invalidate || !persistent) { - MICROPROFILE_SCOPE(OpenGL_StreamBuffer); - GLbitfield flags = GL_MAP_WRITE_BIT | (persistent ? GL_MAP_PERSISTENT_BIT : 0) | - (coherent ? GL_MAP_COHERENT_BIT : GL_MAP_FLUSH_EXPLICIT_BIT) | - (invalidate ? GL_MAP_INVALIDATE_BUFFER_BIT : GL_MAP_UNSYNCHRONIZED_BIT); - mapped_ptr = static_cast<u8*>( - glMapNamedBufferRange(gl_buffer.handle, buffer_pos, buffer_size - buffer_pos, flags)); - mapped_offset = buffer_pos; - } - - return std::make_tuple(mapped_ptr + buffer_pos - mapped_offset, buffer_pos, invalidate); + return std::make_tuple(mapped_ptr + buffer_pos, buffer_pos, invalidate); } void OGLStreamBuffer::Unmap(GLsizeiptr size) { ASSERT(size <= mapped_size); - if (!coherent && size > 0) { - glFlushMappedNamedBufferRange(gl_buffer.handle, buffer_pos - mapped_offset, size); - } - - if (!persistent) { - glUnmapNamedBuffer(gl_buffer.handle); + if (size > 0) { + glFlushMappedNamedBufferRange(gl_buffer.handle, buffer_pos, size); } buffer_pos += size; diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.h b/src/video_core/renderer_opengl/gl_stream_buffer.h index f8383cbd4..307a67113 100644 --- a/src/video_core/renderer_opengl/gl_stream_buffer.h +++ b/src/video_core/renderer_opengl/gl_stream_buffer.h @@ -11,15 +11,13 @@ namespace OpenGL { +class Device; + class OGLStreamBuffer : private NonCopyable { public: - explicit OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool prefer_coherent = false, - bool use_persistent = true); + explicit OGLStreamBuffer(const Device& device, GLsizeiptr size, bool vertex_data_usage); ~OGLStreamBuffer(); - GLuint GetHandle() const; - GLsizeiptr GetSize() const; - /* * Allocates a linear chunk of memory in the GPU buffer with at least "size" bytes * and the optional alignment requirement. @@ -32,15 +30,24 @@ public: void Unmap(GLsizeiptr size); + GLuint Handle() const { + return gl_buffer.handle; + } + + u64 Address() const { + return gpu_address; + } + + GLsizeiptr Size() const noexcept { + return buffer_size; + } + private: OGLBuffer gl_buffer; - bool coherent = false; - bool persistent = false; - + GLuint64EXT gpu_address = 0; GLintptr buffer_pos = 0; GLsizeiptr buffer_size = 0; - GLintptr mapped_offset = 0; GLsizeiptr mapped_size = 0; u8* mapped_ptr = nullptr; }; diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 4faa8b90c..61505879b 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -263,9 +263,14 @@ CachedSurface::CachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& param target = GetTextureTarget(params.target); texture = CreateTexture(params, target, internal_format, texture_buffer); DecorateSurfaceName(); - main_view = CreateViewInner( - ViewParams(params.target, 0, params.is_layered ? params.depth : 1, 0, params.num_levels), - true); + + u32 num_layers = 1; + if (params.is_layered || params.target == SurfaceTarget::Texture3D) { + num_layers = params.depth; + } + + main_view = + CreateViewInner(ViewParams(params.target, 0, num_layers, 0, params.num_levels), true); } CachedSurface::~CachedSurface() = default; @@ -404,8 +409,7 @@ View CachedSurface::CreateViewInner(const ViewParams& view_key, const bool is_pr CachedSurfaceView::CachedSurfaceView(CachedSurface& surface, const ViewParams& params, bool is_proxy) - : VideoCommon::ViewBase(params), surface{surface}, - format{GetFormatTuple(surface.GetSurfaceParams().pixel_format).internal_format}, + : VideoCommon::ViewBase(params), surface{surface}, format{surface.internal_format}, target{GetTextureTarget(params.target)}, is_proxy{is_proxy} { if (!is_proxy) { main_view = CreateTextureView(); @@ -414,20 +418,23 @@ CachedSurfaceView::CachedSurfaceView(CachedSurface& surface, const ViewParams& p CachedSurfaceView::~CachedSurfaceView() = default; -void CachedSurfaceView::Attach(GLenum attachment, GLenum target) const { +void CachedSurfaceView::Attach(GLenum attachment, GLenum fb_target) const { ASSERT(params.num_levels == 1); + if (params.target == SurfaceTarget::Texture3D) { + if (params.num_layers > 1) { + ASSERT(params.base_layer == 0); + glFramebufferTexture(fb_target, attachment, surface.texture.handle, params.base_level); + } else { + glFramebufferTexture3D(fb_target, attachment, target, surface.texture.handle, + params.base_level, params.base_layer); + } + return; + } + if (params.num_layers > 1) { - // Layered framebuffer attachments UNIMPLEMENTED_IF(params.base_layer != 0); - - switch (params.target) { - case SurfaceTarget::Texture2DArray: - glFramebufferTexture(target, attachment, GetTexture(), 0); - break; - default: - UNIMPLEMENTED(); - } + glFramebufferTexture(fb_target, attachment, GetTexture(), 0); return; } @@ -435,16 +442,16 @@ void CachedSurfaceView::Attach(GLenum attachment, GLenum target) const { const GLuint texture = surface.GetTexture(); switch (surface.GetSurfaceParams().target) { case SurfaceTarget::Texture1D: - glFramebufferTexture1D(target, attachment, view_target, texture, params.base_level); + glFramebufferTexture1D(fb_target, attachment, view_target, texture, params.base_level); break; case SurfaceTarget::Texture2D: - glFramebufferTexture2D(target, attachment, view_target, texture, params.base_level); + glFramebufferTexture2D(fb_target, attachment, view_target, texture, params.base_level); break; case SurfaceTarget::Texture1DArray: case SurfaceTarget::Texture2DArray: case SurfaceTarget::TextureCubemap: case SurfaceTarget::TextureCubeArray: - glFramebufferTextureLayer(target, attachment, texture, params.base_level, + glFramebufferTextureLayer(fb_target, attachment, texture, params.base_level, params.base_layer); break; default: @@ -501,8 +508,13 @@ OGLTextureView CachedSurfaceView::CreateTextureView() const { OGLTextureView texture_view; texture_view.Create(); - glTextureView(texture_view.handle, target, surface.texture.handle, format, params.base_level, - params.num_levels, params.base_layer, params.num_layers); + if (target == GL_TEXTURE_3D) { + glTextureView(texture_view.handle, target, surface.texture.handle, format, + params.base_level, params.num_levels, 0, 1); + } else { + glTextureView(texture_view.handle, target, surface.texture.handle, format, + params.base_level, params.num_levels, params.base_layer, params.num_layers); + } ApplyTextureDefaults(surface.GetSurfaceParams(), texture_view.handle); return texture_view; @@ -545,8 +557,8 @@ void TextureCacheOpenGL::ImageBlit(View& src_view, View& dst_view, const Tegra::Engines::Fermi2D::Config& copy_config) { const auto& src_params{src_view->GetSurfaceParams()}; const auto& dst_params{dst_view->GetSurfaceParams()}; - UNIMPLEMENTED_IF(src_params.target == SurfaceTarget::Texture3D); - UNIMPLEMENTED_IF(dst_params.target == SurfaceTarget::Texture3D); + UNIMPLEMENTED_IF(src_params.depth != 1); + UNIMPLEMENTED_IF(dst_params.depth != 1); state_tracker.NotifyScissor0(); state_tracker.NotifyFramebuffer(); diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 8a2ac8603..bfc4ddf5d 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -80,8 +80,10 @@ public: explicit CachedSurfaceView(CachedSurface& surface, const ViewParams& params, bool is_proxy); ~CachedSurfaceView(); - /// Attaches this texture view to the current bound GL_DRAW_FRAMEBUFFER - void Attach(GLenum attachment, GLenum target) const; + /// @brief Attaches this texture view to the currently bound fb_target framebuffer + /// @param attachment Attachment to bind textures to + /// @param fb_target Framebuffer target to attach to (e.g. DRAW_FRAMEBUFFER) + void Attach(GLenum attachment, GLenum fb_target) const; GLuint GetTexture(Tegra::Texture::SwizzleSource x_source, Tegra::Texture::SwizzleSource y_source, diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h index 994ae98eb..fe9bd4b5a 100644 --- a/src/video_core/renderer_opengl/maxwell_to_gl.h +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h @@ -24,10 +24,11 @@ namespace MaxwellToGL { using Maxwell = Tegra::Engines::Maxwell3D::Regs; -inline GLenum VertexType(Maxwell::VertexAttribute attrib) { +inline GLenum VertexFormat(Maxwell::VertexAttribute attrib) { switch (attrib.type) { - case Maxwell::VertexAttribute::Type::UnsignedInt: case Maxwell::VertexAttribute::Type::UnsignedNorm: + case Maxwell::VertexAttribute::Type::UnsignedScaled: + case Maxwell::VertexAttribute::Type::UnsignedInt: switch (attrib.size) { case Maxwell::VertexAttribute::Size::Size_8: case Maxwell::VertexAttribute::Size::Size_8_8: @@ -46,12 +47,11 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) { return GL_UNSIGNED_INT; case Maxwell::VertexAttribute::Size::Size_10_10_10_2: return GL_UNSIGNED_INT_2_10_10_10_REV; - default: - LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString()); - return {}; } - case Maxwell::VertexAttribute::Type::SignedInt: + break; case Maxwell::VertexAttribute::Type::SignedNorm: + case Maxwell::VertexAttribute::Type::SignedScaled: + case Maxwell::VertexAttribute::Type::SignedInt: switch (attrib.size) { case Maxwell::VertexAttribute::Size::Size_8: case Maxwell::VertexAttribute::Size::Size_8_8: @@ -70,10 +70,8 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) { return GL_INT; case Maxwell::VertexAttribute::Size::Size_10_10_10_2: return GL_INT_2_10_10_10_REV; - default: - LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString()); - return {}; } + break; case Maxwell::VertexAttribute::Type::Float: switch (attrib.size) { case Maxwell::VertexAttribute::Size::Size_16: @@ -86,46 +84,12 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) { case Maxwell::VertexAttribute::Size::Size_32_32_32: case Maxwell::VertexAttribute::Size::Size_32_32_32_32: return GL_FLOAT; - default: - LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString()); - return {}; - } - case Maxwell::VertexAttribute::Type::UnsignedScaled: - switch (attrib.size) { - case Maxwell::VertexAttribute::Size::Size_8: - case Maxwell::VertexAttribute::Size::Size_8_8: - case Maxwell::VertexAttribute::Size::Size_8_8_8: - case Maxwell::VertexAttribute::Size::Size_8_8_8_8: - return GL_UNSIGNED_BYTE; - case Maxwell::VertexAttribute::Size::Size_16: - case Maxwell::VertexAttribute::Size::Size_16_16: - case Maxwell::VertexAttribute::Size::Size_16_16_16: - case Maxwell::VertexAttribute::Size::Size_16_16_16_16: - return GL_UNSIGNED_SHORT; - default: - LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString()); - return {}; } - case Maxwell::VertexAttribute::Type::SignedScaled: - switch (attrib.size) { - case Maxwell::VertexAttribute::Size::Size_8: - case Maxwell::VertexAttribute::Size::Size_8_8: - case Maxwell::VertexAttribute::Size::Size_8_8_8: - case Maxwell::VertexAttribute::Size::Size_8_8_8_8: - return GL_BYTE; - case Maxwell::VertexAttribute::Size::Size_16: - case Maxwell::VertexAttribute::Size::Size_16_16: - case Maxwell::VertexAttribute::Size::Size_16_16_16: - case Maxwell::VertexAttribute::Size::Size_16_16_16_16: - return GL_SHORT; - default: - LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString()); - return {}; - } - default: - LOG_ERROR(Render_OpenGL, "Unimplemented vertex type={}", attrib.TypeString()); - return {}; + break; } + UNIMPLEMENTED_MSG("Unimplemented vertex format of type={} and size={}", attrib.TypeString(), + attrib.SizeString()); + return {}; } inline GLenum IndexFormat(Maxwell::IndexFormat index_format) { @@ -137,8 +101,7 @@ inline GLenum IndexFormat(Maxwell::IndexFormat index_format) { case Maxwell::IndexFormat::UnsignedInt: return GL_UNSIGNED_INT; } - LOG_CRITICAL(Render_OpenGL, "Unimplemented index_format={}", static_cast<u32>(index_format)); - UNREACHABLE(); + UNREACHABLE_MSG("Invalid index_format={}", static_cast<u32>(index_format)); return {}; } @@ -180,33 +143,32 @@ inline GLenum PrimitiveTopology(Maxwell::PrimitiveTopology topology) { } inline GLenum TextureFilterMode(Tegra::Texture::TextureFilter filter_mode, - Tegra::Texture::TextureMipmapFilter mip_filter_mode) { + Tegra::Texture::TextureMipmapFilter mipmap_filter_mode) { switch (filter_mode) { - case Tegra::Texture::TextureFilter::Linear: { - switch (mip_filter_mode) { + case Tegra::Texture::TextureFilter::Nearest: + switch (mipmap_filter_mode) { case Tegra::Texture::TextureMipmapFilter::None: - return GL_LINEAR; + return GL_NEAREST; case Tegra::Texture::TextureMipmapFilter::Nearest: - return GL_LINEAR_MIPMAP_NEAREST; + return GL_NEAREST_MIPMAP_NEAREST; case Tegra::Texture::TextureMipmapFilter::Linear: - return GL_LINEAR_MIPMAP_LINEAR; + return GL_NEAREST_MIPMAP_LINEAR; } break; - } - case Tegra::Texture::TextureFilter::Nearest: { - switch (mip_filter_mode) { + case Tegra::Texture::TextureFilter::Linear: + switch (mipmap_filter_mode) { case Tegra::Texture::TextureMipmapFilter::None: - return GL_NEAREST; + return GL_LINEAR; case Tegra::Texture::TextureMipmapFilter::Nearest: - return GL_NEAREST_MIPMAP_NEAREST; + return GL_LINEAR_MIPMAP_NEAREST; case Tegra::Texture::TextureMipmapFilter::Linear: - return GL_NEAREST_MIPMAP_LINEAR; + return GL_LINEAR_MIPMAP_LINEAR; } break; } - } - LOG_ERROR(Render_OpenGL, "Unimplemented texture filter mode={}", static_cast<u32>(filter_mode)); - return GL_LINEAR; + UNREACHABLE_MSG("Invalid texture filter mode={} and mipmap filter mode={}", + static_cast<u32>(filter_mode), static_cast<u32>(mipmap_filter_mode)); + return GL_NEAREST; } inline GLenum WrapMode(Tegra::Texture::WrapMode wrap_mode) { @@ -229,10 +191,15 @@ inline GLenum WrapMode(Tegra::Texture::WrapMode wrap_mode) { } else { return GL_MIRROR_CLAMP_TO_EDGE; } - default: - LOG_ERROR(Render_OpenGL, "Unimplemented texture wrap mode={}", static_cast<u32>(wrap_mode)); - return GL_REPEAT; + case Tegra::Texture::WrapMode::MirrorOnceClampOGL: + if (GL_EXT_texture_mirror_clamp) { + return GL_MIRROR_CLAMP_EXT; + } else { + return GL_MIRROR_CLAMP_TO_EDGE; + } } + UNIMPLEMENTED_MSG("Unimplemented texture wrap mode={}", static_cast<u32>(wrap_mode)); + return GL_REPEAT; } inline GLenum DepthCompareFunc(Tegra::Texture::DepthCompareFunc func) { @@ -254,8 +221,7 @@ inline GLenum DepthCompareFunc(Tegra::Texture::DepthCompareFunc func) { case Tegra::Texture::DepthCompareFunc::Always: return GL_ALWAYS; } - LOG_ERROR(Render_OpenGL, "Unimplemented texture depth compare function ={}", - static_cast<u32>(func)); + UNIMPLEMENTED_MSG("Unimplemented texture depth compare function={}", static_cast<u32>(func)); return GL_GREATER; } @@ -277,7 +243,7 @@ inline GLenum BlendEquation(Maxwell::Blend::Equation equation) { case Maxwell::Blend::Equation::MaxGL: return GL_MAX; } - LOG_ERROR(Render_OpenGL, "Unimplemented blend equation={}", static_cast<u32>(equation)); + UNIMPLEMENTED_MSG("Unimplemented blend equation={}", static_cast<u32>(equation)); return GL_FUNC_ADD; } @@ -341,7 +307,7 @@ inline GLenum BlendFunc(Maxwell::Blend::Factor factor) { case Maxwell::Blend::Factor::OneMinusConstantAlphaGL: return GL_ONE_MINUS_CONSTANT_ALPHA; } - LOG_ERROR(Render_OpenGL, "Unimplemented blend factor={}", static_cast<u32>(factor)); + UNIMPLEMENTED_MSG("Unimplemented blend factor={}", static_cast<u32>(factor)); return GL_ZERO; } @@ -361,7 +327,7 @@ inline GLenum SwizzleSource(Tegra::Texture::SwizzleSource source) { case Tegra::Texture::SwizzleSource::OneFloat: return GL_ONE; } - LOG_ERROR(Render_OpenGL, "Unimplemented swizzle source={}", static_cast<u32>(source)); + UNIMPLEMENTED_MSG("Unimplemented swizzle source={}", static_cast<u32>(source)); return GL_ZERO; } @@ -392,7 +358,7 @@ inline GLenum ComparisonOp(Maxwell::ComparisonOp comparison) { case Maxwell::ComparisonOp::AlwaysOld: return GL_ALWAYS; } - LOG_ERROR(Render_OpenGL, "Unimplemented comparison op={}", static_cast<u32>(comparison)); + UNIMPLEMENTED_MSG("Unimplemented comparison op={}", static_cast<u32>(comparison)); return GL_ALWAYS; } @@ -423,7 +389,7 @@ inline GLenum StencilOp(Maxwell::StencilOp stencil) { case Maxwell::StencilOp::DecrWrapOGL: return GL_DECR_WRAP; } - LOG_ERROR(Render_OpenGL, "Unimplemented stencil op={}", static_cast<u32>(stencil)); + UNIMPLEMENTED_MSG("Unimplemented stencil op={}", static_cast<u32>(stencil)); return GL_KEEP; } @@ -434,7 +400,7 @@ inline GLenum FrontFace(Maxwell::FrontFace front_face) { case Maxwell::FrontFace::CounterClockWise: return GL_CCW; } - LOG_ERROR(Render_OpenGL, "Unimplemented front face cull={}", static_cast<u32>(front_face)); + UNIMPLEMENTED_MSG("Unimplemented front face cull={}", static_cast<u32>(front_face)); return GL_CCW; } @@ -447,7 +413,7 @@ inline GLenum CullFace(Maxwell::CullFace cull_face) { case Maxwell::CullFace::FrontAndBack: return GL_FRONT_AND_BACK; } - LOG_ERROR(Render_OpenGL, "Unimplemented cull face={}", static_cast<u32>(cull_face)); + UNIMPLEMENTED_MSG("Unimplemented cull face={}", static_cast<u32>(cull_face)); return GL_BACK; } @@ -486,7 +452,7 @@ inline GLenum LogicOp(Maxwell::LogicOperation operation) { case Maxwell::LogicOperation::Set: return GL_SET; } - LOG_ERROR(Render_OpenGL, "Unimplemented logic operation={}", static_cast<u32>(operation)); + UNIMPLEMENTED_MSG("Unimplemented logic operation={}", static_cast<u32>(operation)); return GL_COPY; } diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 6b489e6db..e66cdc083 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -455,8 +455,8 @@ void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color void RendererOpenGL::InitOpenGLObjects() { frame_mailbox = std::make_unique<FrameMailbox>(); - glClearColor(Settings::values.bg_red, Settings::values.bg_green, Settings::values.bg_blue, - 0.0f); + glClearColor(Settings::values.bg_red.GetValue(), Settings::values.bg_green.GetValue(), + Settings::values.bg_blue.GetValue(), 0.0f); // Create shader programs OGLShader vertex_shader; @@ -488,6 +488,15 @@ void RendererOpenGL::InitOpenGLObjects() { // Clear screen to black LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture); + + // Enable unified vertex attributes and query vertex buffer address when the driver supports it + if (device.HasVertexBufferUnifiedMemory()) { + glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV); + + glMakeNamedBufferResidentNV(vertex_buffer.handle, GL_READ_ONLY); + glGetNamedBufferParameterui64vNV(vertex_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, + &vertex_buffer_address); + } } void RendererOpenGL::AddTelemetryFields() { @@ -552,8 +561,8 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture, void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { if (renderer_settings.set_background_color) { // Update background color before drawing - glClearColor(Settings::values.bg_red, Settings::values.bg_green, Settings::values.bg_blue, - 0.0f); + glClearColor(Settings::values.bg_red.GetValue(), Settings::values.bg_green.GetValue(), + Settings::values.bg_blue.GetValue(), 0.0f); } // Set projection matrix @@ -656,7 +665,13 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { offsetof(ScreenRectVertex, tex_coord)); glVertexAttribBinding(PositionLocation, 0); glVertexAttribBinding(TexCoordLocation, 0); - glBindVertexBuffer(0, vertex_buffer.handle, 0, sizeof(ScreenRectVertex)); + if (device.HasVertexBufferUnifiedMemory()) { + glBindVertexBuffer(0, 0, 0, sizeof(ScreenRectVertex)); + glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, 0, vertex_buffer_address, + sizeof(vertices)); + } else { + glBindVertexBuffer(0, vertex_buffer.handle, 0, sizeof(ScreenRectVertex)); + } glBindTextureUnit(0, screen_info.display_texture); glBindSampler(0, 0); @@ -751,8 +766,9 @@ void RendererOpenGL::RenderScreenshot() { } bool RendererOpenGL::Init() { - if (GLAD_GL_KHR_debug) { + if (Settings::values.renderer_debug && GLAD_GL_KHR_debug) { glEnable(GL_DEBUG_OUTPUT); + glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS); glDebugMessageCallback(DebugHandler, nullptr); } diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index 61bf507f4..8b18d32e6 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -107,6 +107,9 @@ private: OGLPipeline pipeline; OGLFramebuffer screenshot_framebuffer; + // GPU address of the vertex buffer + GLuint64EXT vertex_buffer_address = 0; + /// Display information for Switch screen ScreenInfo screen_info; diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index 568744e3c..d1f0ea932 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -39,53 +39,18 @@ constexpr std::array POLYGON_OFFSET_ENABLE_LUT = { } // Anonymous namespace -void FixedPipelineState::DepthStencil::Fill(const Maxwell& regs) noexcept { - raw = 0; - front.action_stencil_fail.Assign(PackStencilOp(regs.stencil_front_op_fail)); - front.action_depth_fail.Assign(PackStencilOp(regs.stencil_front_op_zfail)); - front.action_depth_pass.Assign(PackStencilOp(regs.stencil_front_op_zpass)); - front.test_func.Assign(PackComparisonOp(regs.stencil_front_func_func)); - if (regs.stencil_two_side_enable) { - back.action_stencil_fail.Assign(PackStencilOp(regs.stencil_back_op_fail)); - back.action_depth_fail.Assign(PackStencilOp(regs.stencil_back_op_zfail)); - back.action_depth_pass.Assign(PackStencilOp(regs.stencil_back_op_zpass)); - back.test_func.Assign(PackComparisonOp(regs.stencil_back_func_func)); - } else { - back.action_stencil_fail.Assign(front.action_stencil_fail); - back.action_depth_fail.Assign(front.action_depth_fail); - back.action_depth_pass.Assign(front.action_depth_pass); - back.test_func.Assign(front.test_func); - } - depth_test_enable.Assign(regs.depth_test_enable); - depth_write_enable.Assign(regs.depth_write_enabled); - depth_bounds_enable.Assign(regs.depth_bounds_enable); - stencil_enable.Assign(regs.stencil_enable); - depth_test_func.Assign(PackComparisonOp(regs.depth_test_func)); -} - -void FixedPipelineState::Rasterizer::Fill(const Maxwell& regs) noexcept { +void FixedPipelineState::Fill(const Maxwell& regs, bool has_extended_dynamic_state) { const auto& clip = regs.view_volume_clip_control; const std::array enabled_lut = {regs.polygon_offset_point_enable, regs.polygon_offset_line_enable, regs.polygon_offset_fill_enable}; const u32 topology_index = static_cast<u32>(regs.draw.topology.Value()); - u32 packed_front_face = PackFrontFace(regs.front_face); - if (regs.screen_y_control.triangle_rast_flip != 0 && - regs.viewport_transform[0].scale_y > 0.0f) { - // Flip front face - packed_front_face = 1 - packed_front_face; - } - raw = 0; - topology.Assign(topology_index); primitive_restart_enable.Assign(regs.primitive_restart.enabled != 0 ? 1 : 0); - cull_enable.Assign(regs.cull_test_enabled != 0 ? 1 : 0); depth_bias_enable.Assign(enabled_lut[POLYGON_OFFSET_ENABLE_LUT[topology_index]] != 0 ? 1 : 0); depth_clamp_disabled.Assign(regs.view_volume_clip_control.depth_clamp_disabled.Value()); ndc_minus_one_to_one.Assign(regs.depth_mode == Maxwell::DepthMode::MinusOneToOne ? 1 : 0); - cull_face.Assign(PackCullFace(regs.cull_face)); - front_face.Assign(packed_front_face); polygon_mode.Assign(PackPolygonMode(regs.polygon_mode_front)); patch_control_points_minus_one.Assign(regs.patch_vertices - 1); tessellation_primitive.Assign(static_cast<u32>(regs.tess_mode.prim.Value())); @@ -94,19 +59,37 @@ void FixedPipelineState::Rasterizer::Fill(const Maxwell& regs) noexcept { logic_op_enable.Assign(regs.logic_op.enable != 0 ? 1 : 0); logic_op.Assign(PackLogicOp(regs.logic_op.operation)); rasterize_enable.Assign(regs.rasterize_enable != 0 ? 1 : 0); + std::memcpy(&point_size, ®s.point_size, sizeof(point_size)); // TODO: C++20 std::bit_cast -} -void FixedPipelineState::ColorBlending::Fill(const Maxwell& regs) noexcept { + for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { + binding_divisors[index] = + regs.instanced_arrays.IsInstancingEnabled(index) ? regs.vertex_array[index].divisor : 0; + } + + for (std::size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) { + const auto& input = regs.vertex_attrib_format[index]; + auto& attribute = attributes[index]; + attribute.raw = 0; + attribute.enabled.Assign(input.IsConstant() ? 0 : 1); + attribute.buffer.Assign(input.buffer); + attribute.offset.Assign(input.offset); + attribute.type.Assign(static_cast<u32>(input.type.Value())); + attribute.size.Assign(static_cast<u32>(input.size.Value())); + } + for (std::size_t index = 0; index < std::size(attachments); ++index) { attachments[index].Fill(regs, index); } -} -void FixedPipelineState::ViewportSwizzles::Fill(const Maxwell& regs) noexcept { const auto& transform = regs.viewport_transform; - std::transform(transform.begin(), transform.end(), swizzles.begin(), + std::transform(transform.begin(), transform.end(), viewport_swizzles.begin(), [](const auto& viewport) { return static_cast<u16>(viewport.swizzle.raw); }); + + if (!has_extended_dynamic_state) { + no_extended_dynamic_state.Assign(1); + dynamic_state.Fill(regs); + } } void FixedPipelineState::BlendingAttachment::Fill(const Maxwell& regs, std::size_t index) { @@ -148,20 +131,57 @@ void FixedPipelineState::BlendingAttachment::Fill(const Maxwell& regs, std::size enable.Assign(1); } -void FixedPipelineState::Fill(const Maxwell& regs) { - rasterizer.Fill(regs); - depth_stencil.Fill(regs); - color_blending.Fill(regs); - viewport_swizzles.Fill(regs); +void FixedPipelineState::DynamicState::Fill(const Maxwell& regs) { + const u32 topology_index = static_cast<u32>(regs.draw.topology.Value()); + u32 packed_front_face = PackFrontFace(regs.front_face); + if (regs.screen_y_control.triangle_rast_flip != 0) { + // Flip front face + packed_front_face = 1 - packed_front_face; + } + + raw1 = 0; + raw2 = 0; + front.action_stencil_fail.Assign(PackStencilOp(regs.stencil_front_op_fail)); + front.action_depth_fail.Assign(PackStencilOp(regs.stencil_front_op_zfail)); + front.action_depth_pass.Assign(PackStencilOp(regs.stencil_front_op_zpass)); + front.test_func.Assign(PackComparisonOp(regs.stencil_front_func_func)); + if (regs.stencil_two_side_enable) { + back.action_stencil_fail.Assign(PackStencilOp(regs.stencil_back_op_fail)); + back.action_depth_fail.Assign(PackStencilOp(regs.stencil_back_op_zfail)); + back.action_depth_pass.Assign(PackStencilOp(regs.stencil_back_op_zpass)); + back.test_func.Assign(PackComparisonOp(regs.stencil_back_func_func)); + } else { + back.action_stencil_fail.Assign(front.action_stencil_fail); + back.action_depth_fail.Assign(front.action_depth_fail); + back.action_depth_pass.Assign(front.action_depth_pass); + back.test_func.Assign(front.test_func); + } + stencil_enable.Assign(regs.stencil_enable); + depth_write_enable.Assign(regs.depth_write_enabled); + depth_bounds_enable.Assign(regs.depth_bounds_enable); + depth_test_enable.Assign(regs.depth_test_enable); + front_face.Assign(packed_front_face); + depth_test_func.Assign(PackComparisonOp(regs.depth_test_func)); + topology.Assign(topology_index); + cull_face.Assign(PackCullFace(regs.cull_face)); + cull_enable.Assign(regs.cull_test_enabled != 0 ? 1 : 0); + + for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { + const auto& input = regs.vertex_array[index]; + VertexBinding& binding = vertex_bindings[index]; + binding.raw = 0; + binding.enabled.Assign(input.IsEnabled() ? 1 : 0); + binding.stride.Assign(static_cast<u16>(input.stride.Value())); + } } std::size_t FixedPipelineState::Hash() const noexcept { - const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this); + const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), Size()); return static_cast<std::size_t>(hash); } bool FixedPipelineState::operator==(const FixedPipelineState& rhs) const noexcept { - return std::memcmp(this, &rhs, sizeof *this) == 0; + return std::memcmp(this, &rhs, Size()) == 0; } u32 FixedPipelineState::PackComparisonOp(Maxwell::ComparisonOp op) noexcept { diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h index 31a6398f2..cdcbb65f5 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h @@ -60,14 +60,6 @@ struct FixedPipelineState { void Fill(const Maxwell& regs, std::size_t index); - std::size_t Hash() const noexcept; - - bool operator==(const BlendingAttachment& rhs) const noexcept; - - bool operator!=(const BlendingAttachment& rhs) const noexcept { - return !operator==(rhs); - } - constexpr std::array<bool, 4> Mask() const noexcept { return {mask_r != 0, mask_g != 0, mask_b != 0, mask_a != 0}; } @@ -97,156 +89,116 @@ struct FixedPipelineState { } }; - struct VertexInput { - union Binding { - u16 raw; - BitField<0, 1, u16> enabled; - BitField<1, 12, u16> stride; - }; + union VertexAttribute { + u32 raw; + BitField<0, 1, u32> enabled; + BitField<1, 5, u32> buffer; + BitField<6, 14, u32> offset; + BitField<20, 3, u32> type; + BitField<23, 6, u32> size; - union Attribute { - u32 raw; - BitField<0, 1, u32> enabled; - BitField<1, 5, u32> buffer; - BitField<6, 14, u32> offset; - BitField<20, 3, u32> type; - BitField<23, 6, u32> size; - - constexpr Maxwell::VertexAttribute::Type Type() const noexcept { - return static_cast<Maxwell::VertexAttribute::Type>(type.Value()); - } - - constexpr Maxwell::VertexAttribute::Size Size() const noexcept { - return static_cast<Maxwell::VertexAttribute::Size>(size.Value()); - } - }; - - std::array<Binding, Maxwell::NumVertexArrays> bindings; - std::array<u32, Maxwell::NumVertexArrays> binding_divisors; - std::array<Attribute, Maxwell::NumVertexAttributes> attributes; - - void SetBinding(std::size_t index, bool enabled, u32 stride, u32 divisor) noexcept { - auto& binding = bindings[index]; - binding.raw = 0; - binding.enabled.Assign(enabled ? 1 : 0); - binding.stride.Assign(static_cast<u16>(stride)); - binding_divisors[index] = divisor; + constexpr Maxwell::VertexAttribute::Type Type() const noexcept { + return static_cast<Maxwell::VertexAttribute::Type>(type.Value()); } - void SetAttribute(std::size_t index, bool enabled, u32 buffer, u32 offset, - Maxwell::VertexAttribute::Type type, - Maxwell::VertexAttribute::Size size) noexcept { - auto& attribute = attributes[index]; - attribute.raw = 0; - attribute.enabled.Assign(enabled ? 1 : 0); - attribute.buffer.Assign(buffer); - attribute.offset.Assign(offset); - attribute.type.Assign(static_cast<u32>(type)); - attribute.size.Assign(static_cast<u32>(size)); + constexpr Maxwell::VertexAttribute::Size Size() const noexcept { + return static_cast<Maxwell::VertexAttribute::Size>(size.Value()); } }; - struct Rasterizer { - union { - u32 raw; - BitField<0, 4, u32> topology; - BitField<4, 1, u32> primitive_restart_enable; - BitField<5, 1, u32> cull_enable; - BitField<6, 1, u32> depth_bias_enable; - BitField<7, 1, u32> depth_clamp_disabled; - BitField<8, 1, u32> ndc_minus_one_to_one; - BitField<9, 2, u32> cull_face; - BitField<11, 1, u32> front_face; - BitField<12, 2, u32> polygon_mode; - BitField<14, 5, u32> patch_control_points_minus_one; - BitField<19, 2, u32> tessellation_primitive; - BitField<21, 2, u32> tessellation_spacing; - BitField<23, 1, u32> tessellation_clockwise; - BitField<24, 1, u32> logic_op_enable; - BitField<25, 4, u32> logic_op; - BitField<29, 1, u32> rasterize_enable; - }; - - // TODO(Rodrigo): Move this to push constants - u32 point_size; + template <std::size_t Position> + union StencilFace { + BitField<Position + 0, 3, u32> action_stencil_fail; + BitField<Position + 3, 3, u32> action_depth_fail; + BitField<Position + 6, 3, u32> action_depth_pass; + BitField<Position + 9, 3, u32> test_func; - void Fill(const Maxwell& regs) noexcept; + Maxwell::StencilOp ActionStencilFail() const noexcept { + return UnpackStencilOp(action_stencil_fail); + } - constexpr Maxwell::PrimitiveTopology Topology() const noexcept { - return static_cast<Maxwell::PrimitiveTopology>(topology.Value()); + Maxwell::StencilOp ActionDepthFail() const noexcept { + return UnpackStencilOp(action_depth_fail); } - Maxwell::CullFace CullFace() const noexcept { - return UnpackCullFace(cull_face.Value()); + Maxwell::StencilOp ActionDepthPass() const noexcept { + return UnpackStencilOp(action_depth_pass); } - Maxwell::FrontFace FrontFace() const noexcept { - return UnpackFrontFace(front_face.Value()); + Maxwell::ComparisonOp TestFunc() const noexcept { + return UnpackComparisonOp(test_func); } }; - struct DepthStencil { - template <std::size_t Position> - union StencilFace { - BitField<Position + 0, 3, u32> action_stencil_fail; - BitField<Position + 3, 3, u32> action_depth_fail; - BitField<Position + 6, 3, u32> action_depth_pass; - BitField<Position + 9, 3, u32> test_func; - - Maxwell::StencilOp ActionStencilFail() const noexcept { - return UnpackStencilOp(action_stencil_fail); - } - - Maxwell::StencilOp ActionDepthFail() const noexcept { - return UnpackStencilOp(action_depth_fail); - } - - Maxwell::StencilOp ActionDepthPass() const noexcept { - return UnpackStencilOp(action_depth_pass); - } - - Maxwell::ComparisonOp TestFunc() const noexcept { - return UnpackComparisonOp(test_func); - } - }; + union VertexBinding { + u16 raw; + BitField<0, 12, u16> stride; + BitField<12, 1, u16> enabled; + }; + struct DynamicState { union { - u32 raw; + u32 raw1; StencilFace<0> front; StencilFace<12> back; - BitField<24, 1, u32> depth_test_enable; + BitField<24, 1, u32> stencil_enable; BitField<25, 1, u32> depth_write_enable; BitField<26, 1, u32> depth_bounds_enable; - BitField<27, 1, u32> stencil_enable; - BitField<28, 3, u32> depth_test_func; + BitField<27, 1, u32> depth_test_enable; + BitField<28, 1, u32> front_face; + BitField<29, 3, u32> depth_test_func; + }; + union { + u32 raw2; + BitField<0, 4, u32> topology; + BitField<4, 2, u32> cull_face; + BitField<6, 1, u32> cull_enable; }; + std::array<VertexBinding, Maxwell::NumVertexArrays> vertex_bindings; - void Fill(const Maxwell& regs) noexcept; + void Fill(const Maxwell& regs); Maxwell::ComparisonOp DepthTestFunc() const noexcept { return UnpackComparisonOp(depth_test_func); } - }; - - struct ColorBlending { - std::array<BlendingAttachment, Maxwell::NumRenderTargets> attachments; - void Fill(const Maxwell& regs) noexcept; - }; + Maxwell::CullFace CullFace() const noexcept { + return UnpackCullFace(cull_face.Value()); + } - struct ViewportSwizzles { - std::array<u16, Maxwell::NumViewports> swizzles; + Maxwell::FrontFace FrontFace() const noexcept { + return UnpackFrontFace(front_face.Value()); + } - void Fill(const Maxwell& regs) noexcept; + constexpr Maxwell::PrimitiveTopology Topology() const noexcept { + return static_cast<Maxwell::PrimitiveTopology>(topology.Value()); + } }; - VertexInput vertex_input; - Rasterizer rasterizer; - DepthStencil depth_stencil; - ColorBlending color_blending; - ViewportSwizzles viewport_swizzles; + union { + u32 raw; + BitField<0, 1, u32> no_extended_dynamic_state; + BitField<2, 1, u32> primitive_restart_enable; + BitField<3, 1, u32> depth_bias_enable; + BitField<4, 1, u32> depth_clamp_disabled; + BitField<5, 1, u32> ndc_minus_one_to_one; + BitField<6, 2, u32> polygon_mode; + BitField<8, 5, u32> patch_control_points_minus_one; + BitField<13, 2, u32> tessellation_primitive; + BitField<15, 2, u32> tessellation_spacing; + BitField<17, 1, u32> tessellation_clockwise; + BitField<18, 1, u32> logic_op_enable; + BitField<19, 4, u32> logic_op; + BitField<23, 1, u32> rasterize_enable; + }; + u32 point_size; + std::array<u32, Maxwell::NumVertexArrays> binding_divisors; + std::array<VertexAttribute, Maxwell::NumVertexAttributes> attributes; + std::array<BlendingAttachment, Maxwell::NumRenderTargets> attachments; + std::array<u16, Maxwell::NumViewports> viewport_swizzles; + DynamicState dynamic_state; - void Fill(const Maxwell& regs); + void Fill(const Maxwell& regs, bool has_extended_dynamic_state); std::size_t Hash() const noexcept; @@ -255,6 +207,11 @@ struct FixedPipelineState { bool operator!=(const FixedPipelineState& rhs) const noexcept { return !operator==(rhs); } + + std::size_t Size() const noexcept { + const std::size_t total_size = sizeof *this; + return total_size - (no_extended_dynamic_state != 0 ? 0 : sizeof(DynamicState)); + } }; static_assert(std::has_unique_object_representations_v<FixedPipelineState>); static_assert(std::is_trivially_copyable_v<FixedPipelineState>); diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index 2871035f5..d7f1ae89f 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp @@ -21,29 +21,29 @@ namespace Sampler { VkFilter Filter(Tegra::Texture::TextureFilter filter) { switch (filter) { - case Tegra::Texture::TextureFilter::Linear: - return VK_FILTER_LINEAR; case Tegra::Texture::TextureFilter::Nearest: return VK_FILTER_NEAREST; + case Tegra::Texture::TextureFilter::Linear: + return VK_FILTER_LINEAR; } - UNIMPLEMENTED_MSG("Unimplemented sampler filter={}", static_cast<u32>(filter)); + UNREACHABLE_MSG("Invalid sampler filter={}", static_cast<u32>(filter)); return {}; } VkSamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter) { switch (mipmap_filter) { case Tegra::Texture::TextureMipmapFilter::None: - // TODO(Rodrigo): None seems to be mapped to OpenGL's mag and min filters without mipmapping - // (e.g. GL_NEAREST and GL_LINEAR). Vulkan doesn't have such a thing, find out if we have to - // use an image view with a single mipmap level to emulate this. - return VK_SAMPLER_MIPMAP_MODE_LINEAR; - ; - case Tegra::Texture::TextureMipmapFilter::Linear: - return VK_SAMPLER_MIPMAP_MODE_LINEAR; + // There are no Vulkan filter modes that directly correspond to OpenGL minification filters + // of GL_LINEAR or GL_NEAREST, but they can be emulated using + // VK_SAMPLER_MIPMAP_MODE_NEAREST, minLod = 0, and maxLod = 0.25, and using minFilter = + // VK_FILTER_LINEAR or minFilter = VK_FILTER_NEAREST, respectively. + return VK_SAMPLER_MIPMAP_MODE_NEAREST; case Tegra::Texture::TextureMipmapFilter::Nearest: return VK_SAMPLER_MIPMAP_MODE_NEAREST; + case Tegra::Texture::TextureMipmapFilter::Linear: + return VK_SAMPLER_MIPMAP_MODE_LINEAR; } - UNIMPLEMENTED_MSG("Unimplemented sampler mipmap mode={}", static_cast<u32>(mipmap_filter)); + UNREACHABLE_MSG("Invalid sampler mipmap mode={}", static_cast<u32>(mipmap_filter)); return {}; } @@ -78,10 +78,9 @@ VkSamplerAddressMode WrapMode(const VKDevice& device, Tegra::Texture::WrapMode w case Tegra::Texture::WrapMode::MirrorOnceBorder: UNIMPLEMENTED(); return VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE; - default: - UNIMPLEMENTED_MSG("Unimplemented wrap mode={}", static_cast<u32>(wrap_mode)); - return {}; } + UNIMPLEMENTED_MSG("Unimplemented wrap mode={}", static_cast<u32>(wrap_mode)); + return {}; } VkCompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func) { @@ -149,7 +148,7 @@ struct FormatTuple { {VK_FORMAT_R16_SFLOAT, Attachable | Storage}, // R16F {VK_FORMAT_R16_UNORM, Attachable | Storage}, // R16U {VK_FORMAT_UNDEFINED}, // R16S - {VK_FORMAT_UNDEFINED}, // R16UI + {VK_FORMAT_R16_UINT, Attachable | Storage}, // R16UI {VK_FORMAT_UNDEFINED}, // R16I {VK_FORMAT_R16G16_UNORM, Attachable | Storage}, // RG16 {VK_FORMAT_R16G16_SFLOAT, Attachable | Storage}, // RG16F @@ -288,14 +287,35 @@ VkPrimitiveTopology PrimitiveTopology([[maybe_unused]] const VKDevice& device, return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; case Maxwell::PrimitiveTopology::Patches: return VK_PRIMITIVE_TOPOLOGY_PATCH_LIST; - default: - UNIMPLEMENTED_MSG("Unimplemented topology={}", static_cast<u32>(topology)); - return {}; } + UNIMPLEMENTED_MSG("Unimplemented topology={}", static_cast<u32>(topology)); + return {}; } VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size) { switch (type) { + case Maxwell::VertexAttribute::Type::UnsignedNorm: + switch (size) { + case Maxwell::VertexAttribute::Size::Size_8: + return VK_FORMAT_R8_UNORM; + case Maxwell::VertexAttribute::Size::Size_8_8: + return VK_FORMAT_R8G8_UNORM; + case Maxwell::VertexAttribute::Size::Size_8_8_8: + return VK_FORMAT_R8G8B8_UNORM; + case Maxwell::VertexAttribute::Size::Size_8_8_8_8: + return VK_FORMAT_R8G8B8A8_UNORM; + case Maxwell::VertexAttribute::Size::Size_16: + return VK_FORMAT_R16_UNORM; + case Maxwell::VertexAttribute::Size::Size_16_16: + return VK_FORMAT_R16G16_UNORM; + case Maxwell::VertexAttribute::Size::Size_16_16_16: + return VK_FORMAT_R16G16B16_UNORM; + case Maxwell::VertexAttribute::Size::Size_16_16_16_16: + return VK_FORMAT_R16G16B16A16_UNORM; + case Maxwell::VertexAttribute::Size::Size_10_10_10_2: + return VK_FORMAT_A2B10G10R10_UNORM_PACK32; + } + break; case Maxwell::VertexAttribute::Type::SignedNorm: switch (size) { case Maxwell::VertexAttribute::Size::Size_8: @@ -316,62 +336,50 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib return VK_FORMAT_R16G16B16A16_SNORM; case Maxwell::VertexAttribute::Size::Size_10_10_10_2: return VK_FORMAT_A2B10G10R10_SNORM_PACK32; - default: - break; } break; - case Maxwell::VertexAttribute::Type::UnsignedNorm: + case Maxwell::VertexAttribute::Type::UnsignedScaled: switch (size) { case Maxwell::VertexAttribute::Size::Size_8: - return VK_FORMAT_R8_UNORM; + return VK_FORMAT_R8_USCALED; case Maxwell::VertexAttribute::Size::Size_8_8: - return VK_FORMAT_R8G8_UNORM; + return VK_FORMAT_R8G8_USCALED; case Maxwell::VertexAttribute::Size::Size_8_8_8: - return VK_FORMAT_R8G8B8_UNORM; + return VK_FORMAT_R8G8B8_USCALED; case Maxwell::VertexAttribute::Size::Size_8_8_8_8: - return VK_FORMAT_R8G8B8A8_UNORM; + return VK_FORMAT_R8G8B8A8_USCALED; case Maxwell::VertexAttribute::Size::Size_16: - return VK_FORMAT_R16_UNORM; + return VK_FORMAT_R16_USCALED; case Maxwell::VertexAttribute::Size::Size_16_16: - return VK_FORMAT_R16G16_UNORM; + return VK_FORMAT_R16G16_USCALED; case Maxwell::VertexAttribute::Size::Size_16_16_16: - return VK_FORMAT_R16G16B16_UNORM; + return VK_FORMAT_R16G16B16_USCALED; case Maxwell::VertexAttribute::Size::Size_16_16_16_16: - return VK_FORMAT_R16G16B16A16_UNORM; + return VK_FORMAT_R16G16B16A16_USCALED; case Maxwell::VertexAttribute::Size::Size_10_10_10_2: - return VK_FORMAT_A2B10G10R10_UNORM_PACK32; - default: - break; + return VK_FORMAT_A2B10G10R10_USCALED_PACK32; } break; - case Maxwell::VertexAttribute::Type::SignedInt: + case Maxwell::VertexAttribute::Type::SignedScaled: switch (size) { case Maxwell::VertexAttribute::Size::Size_8: - return VK_FORMAT_R8_SINT; + return VK_FORMAT_R8_SSCALED; case Maxwell::VertexAttribute::Size::Size_8_8: - return VK_FORMAT_R8G8_SINT; + return VK_FORMAT_R8G8_SSCALED; case Maxwell::VertexAttribute::Size::Size_8_8_8: - return VK_FORMAT_R8G8B8_SINT; + return VK_FORMAT_R8G8B8_SSCALED; case Maxwell::VertexAttribute::Size::Size_8_8_8_8: - return VK_FORMAT_R8G8B8A8_SINT; + return VK_FORMAT_R8G8B8A8_SSCALED; case Maxwell::VertexAttribute::Size::Size_16: - return VK_FORMAT_R16_SINT; + return VK_FORMAT_R16_SSCALED; case Maxwell::VertexAttribute::Size::Size_16_16: - return VK_FORMAT_R16G16_SINT; + return VK_FORMAT_R16G16_SSCALED; case Maxwell::VertexAttribute::Size::Size_16_16_16: - return VK_FORMAT_R16G16B16_SINT; + return VK_FORMAT_R16G16B16_SSCALED; case Maxwell::VertexAttribute::Size::Size_16_16_16_16: - return VK_FORMAT_R16G16B16A16_SINT; - case Maxwell::VertexAttribute::Size::Size_32: - return VK_FORMAT_R32_SINT; - case Maxwell::VertexAttribute::Size::Size_32_32: - return VK_FORMAT_R32G32_SINT; - case Maxwell::VertexAttribute::Size::Size_32_32_32: - return VK_FORMAT_R32G32B32_SINT; - case Maxwell::VertexAttribute::Size::Size_32_32_32_32: - return VK_FORMAT_R32G32B32A32_SINT; - default: - break; + return VK_FORMAT_R16G16B16A16_SSCALED; + case Maxwell::VertexAttribute::Size::Size_10_10_10_2: + return VK_FORMAT_A2B10G10R10_SSCALED_PACK32; } break; case Maxwell::VertexAttribute::Type::UnsignedInt: @@ -400,56 +408,50 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib return VK_FORMAT_R32G32B32_UINT; case Maxwell::VertexAttribute::Size::Size_32_32_32_32: return VK_FORMAT_R32G32B32A32_UINT; - default: - break; + case Maxwell::VertexAttribute::Size::Size_10_10_10_2: + return VK_FORMAT_A2B10G10R10_UINT_PACK32; } break; - case Maxwell::VertexAttribute::Type::UnsignedScaled: + case Maxwell::VertexAttribute::Type::SignedInt: switch (size) { case Maxwell::VertexAttribute::Size::Size_8: - return VK_FORMAT_R8_USCALED; + return VK_FORMAT_R8_SINT; case Maxwell::VertexAttribute::Size::Size_8_8: - return VK_FORMAT_R8G8_USCALED; + return VK_FORMAT_R8G8_SINT; case Maxwell::VertexAttribute::Size::Size_8_8_8: - return VK_FORMAT_R8G8B8_USCALED; + return VK_FORMAT_R8G8B8_SINT; case Maxwell::VertexAttribute::Size::Size_8_8_8_8: - return VK_FORMAT_R8G8B8A8_USCALED; + return VK_FORMAT_R8G8B8A8_SINT; case Maxwell::VertexAttribute::Size::Size_16: - return VK_FORMAT_R16_USCALED; + return VK_FORMAT_R16_SINT; case Maxwell::VertexAttribute::Size::Size_16_16: - return VK_FORMAT_R16G16_USCALED; + return VK_FORMAT_R16G16_SINT; case Maxwell::VertexAttribute::Size::Size_16_16_16: - return VK_FORMAT_R16G16B16_USCALED; + return VK_FORMAT_R16G16B16_SINT; case Maxwell::VertexAttribute::Size::Size_16_16_16_16: - return VK_FORMAT_R16G16B16A16_USCALED; - default: - break; + return VK_FORMAT_R16G16B16A16_SINT; + case Maxwell::VertexAttribute::Size::Size_32: + return VK_FORMAT_R32_SINT; + case Maxwell::VertexAttribute::Size::Size_32_32: + return VK_FORMAT_R32G32_SINT; + case Maxwell::VertexAttribute::Size::Size_32_32_32: + return VK_FORMAT_R32G32B32_SINT; + case Maxwell::VertexAttribute::Size::Size_32_32_32_32: + return VK_FORMAT_R32G32B32A32_SINT; + case Maxwell::VertexAttribute::Size::Size_10_10_10_2: + return VK_FORMAT_A2B10G10R10_SINT_PACK32; } break; - case Maxwell::VertexAttribute::Type::SignedScaled: + case Maxwell::VertexAttribute::Type::Float: switch (size) { - case Maxwell::VertexAttribute::Size::Size_8: - return VK_FORMAT_R8_SSCALED; - case Maxwell::VertexAttribute::Size::Size_8_8: - return VK_FORMAT_R8G8_SSCALED; - case Maxwell::VertexAttribute::Size::Size_8_8_8: - return VK_FORMAT_R8G8B8_SSCALED; - case Maxwell::VertexAttribute::Size::Size_8_8_8_8: - return VK_FORMAT_R8G8B8A8_SSCALED; case Maxwell::VertexAttribute::Size::Size_16: - return VK_FORMAT_R16_SSCALED; + return VK_FORMAT_R16_SFLOAT; case Maxwell::VertexAttribute::Size::Size_16_16: - return VK_FORMAT_R16G16_SSCALED; + return VK_FORMAT_R16G16_SFLOAT; case Maxwell::VertexAttribute::Size::Size_16_16_16: - return VK_FORMAT_R16G16B16_SSCALED; + return VK_FORMAT_R16G16B16_SFLOAT; case Maxwell::VertexAttribute::Size::Size_16_16_16_16: - return VK_FORMAT_R16G16B16A16_SSCALED; - default: - break; - } - break; - case Maxwell::VertexAttribute::Type::Float: - switch (size) { + return VK_FORMAT_R16G16B16A16_SFLOAT; case Maxwell::VertexAttribute::Size::Size_32: return VK_FORMAT_R32_SFLOAT; case Maxwell::VertexAttribute::Size::Size_32_32: @@ -458,16 +460,6 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib return VK_FORMAT_R32G32B32_SFLOAT; case Maxwell::VertexAttribute::Size::Size_32_32_32_32: return VK_FORMAT_R32G32B32A32_SFLOAT; - case Maxwell::VertexAttribute::Size::Size_16: - return VK_FORMAT_R16_SFLOAT; - case Maxwell::VertexAttribute::Size::Size_16_16: - return VK_FORMAT_R16G16_SFLOAT; - case Maxwell::VertexAttribute::Size::Size_16_16_16: - return VK_FORMAT_R16G16B16_SFLOAT; - case Maxwell::VertexAttribute::Size::Size_16_16_16_16: - return VK_FORMAT_R16G16B16A16_SFLOAT; - default: - break; } break; } diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index 59b441943..2258479f5 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -13,6 +13,7 @@ #include <fmt/format.h> #include "common/dynamic_library.h" +#include "common/file_util.h" #include "common/logging/log.h" #include "common/telemetry.h" #include "core/core.h" @@ -76,7 +77,8 @@ Common::DynamicLibrary OpenVulkanLibrary() { char* libvulkan_env = getenv("LIBVULKAN_PATH"); if (!libvulkan_env || !library.Open(libvulkan_env)) { // Use the libvulkan.dylib from the application bundle. - std::string filename = File::GetBundleDirectory() + "/Contents/Frameworks/libvulkan.dylib"; + const std::string filename = + FileUtil::GetBundleDirectory() + "/Contents/Frameworks/libvulkan.dylib"; library.Open(filename.c_str()); } #else @@ -153,11 +155,31 @@ vk::Instance CreateInstance(Common::DynamicLibrary& library, vk::InstanceDispatc } } - static constexpr std::array layers_data{"VK_LAYER_LUNARG_standard_validation"}; - vk::Span<const char*> layers = layers_data; - if (!enable_layers) { - layers = {}; + std::vector<const char*> layers; + layers.reserve(1); + if (enable_layers) { + layers.push_back("VK_LAYER_KHRONOS_validation"); + } + + const std::optional layer_properties = vk::EnumerateInstanceLayerProperties(dld); + if (!layer_properties) { + LOG_ERROR(Render_Vulkan, "Failed to query layer properties, disabling layers"); + layers.clear(); + } + + for (auto layer_it = layers.begin(); layer_it != layers.end();) { + const char* const layer = *layer_it; + const auto it = std::find_if( + layer_properties->begin(), layer_properties->end(), + [layer](const VkLayerProperties& prop) { return !std::strcmp(layer, prop.layerName); }); + if (it == layer_properties->end()) { + LOG_ERROR(Render_Vulkan, "Layer {} not available, removing it", layer); + layer_it = layers.erase(layer_it); + } else { + ++layer_it; + } } + vk::Instance instance = vk::Instance::Create(layers, extensions, dld); if (!instance) { LOG_ERROR(Render_Vulkan, "Failed to create Vulkan instance"); @@ -387,7 +409,7 @@ bool RendererVulkan::PickDevices() { return false; } - const s32 device_index = Settings::values.vulkan_device; + const s32 device_index = Settings::values.vulkan_device.GetValue(); if (device_index < 0 || device_index >= static_cast<s32>(devices->size())) { LOG_ERROR(Render_Vulkan, "Invalid device index {}!", device_index); return false; diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 5f33d9e40..2be38d419 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -37,9 +37,9 @@ std::unique_ptr<VKStreamBuffer> CreateStreamBuffer(const VKDevice& device, VKSch } // Anonymous namespace -CachedBufferBlock::CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager, - VAddr cpu_addr, std::size_t size) - : VideoCommon::BufferBlock{cpu_addr, size} { +Buffer::Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VKScheduler& scheduler_, + VKStagingBufferPool& staging_pool_, VAddr cpu_addr, std::size_t size) + : VideoCommon::BufferBlock{cpu_addr, size}, scheduler{scheduler_}, staging_pool{staging_pool_} { VkBufferCreateInfo ci; ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; ci.pNext = nullptr; @@ -54,46 +54,17 @@ CachedBufferBlock::CachedBufferBlock(const VKDevice& device, VKMemoryManager& me buffer.commit = memory_manager.Commit(buffer.handle, false); } -CachedBufferBlock::~CachedBufferBlock() = default; +Buffer::~Buffer() = default; -VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, - const VKDevice& device, VKMemoryManager& memory_manager, - VKScheduler& scheduler, VKStagingBufferPool& staging_pool) - : VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer>{rasterizer, system, - CreateStreamBuffer(device, - scheduler)}, - device{device}, memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{ - staging_pool} {} - -VKBufferCache::~VKBufferCache() = default; - -Buffer VKBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) { - return std::make_shared<CachedBufferBlock>(device, memory_manager, cpu_addr, size); -} - -VkBuffer VKBufferCache::ToHandle(const Buffer& buffer) { - return buffer->GetHandle(); -} - -VkBuffer VKBufferCache::GetEmptyBuffer(std::size_t size) { - size = std::max(size, std::size_t(4)); - const auto& empty = staging_pool.GetUnusedBuffer(size, false); - scheduler.RequestOutsideRenderPassOperationContext(); - scheduler.Record([size, buffer = *empty.handle](vk::CommandBuffer cmdbuf) { - cmdbuf.FillBuffer(buffer, 0, size, 0); - }); - return *empty.handle; -} - -void VKBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, - const u8* data) { +void Buffer::Upload(std::size_t offset, std::size_t size, const u8* data) { const auto& staging = staging_pool.GetUnusedBuffer(size, true); std::memcpy(staging.commit->Map(size), data, size); scheduler.RequestOutsideRenderPassOperationContext(); - scheduler.Record([staging = *staging.handle, buffer = buffer->GetHandle(), offset, - size](vk::CommandBuffer cmdbuf) { - cmdbuf.CopyBuffer(staging, buffer, VkBufferCopy{0, offset, size}); + + const VkBuffer handle = Handle(); + scheduler.Record([staging = *staging.handle, handle, offset, size](vk::CommandBuffer cmdbuf) { + cmdbuf.CopyBuffer(staging, handle, VkBufferCopy{0, offset, size}); VkBufferMemoryBarrier barrier; barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; @@ -102,7 +73,7 @@ void VKBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, st barrier.dstAccessMask = UPLOAD_ACCESS_BARRIERS; barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barrier.buffer = buffer; + barrier.buffer = handle; barrier.offset = offset; barrier.size = size; cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0, {}, @@ -110,12 +81,12 @@ void VKBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, st }); } -void VKBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, - u8* data) { +void Buffer::Download(std::size_t offset, std::size_t size, u8* data) { const auto& staging = staging_pool.GetUnusedBuffer(size, true); scheduler.RequestOutsideRenderPassOperationContext(); - scheduler.Record([staging = *staging.handle, buffer = buffer->GetHandle(), offset, - size](vk::CommandBuffer cmdbuf) { + + const VkBuffer handle = Handle(); + scheduler.Record([staging = *staging.handle, handle, offset, size](vk::CommandBuffer cmdbuf) { VkBufferMemoryBarrier barrier; barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; barrier.pNext = nullptr; @@ -123,7 +94,7 @@ void VKBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barrier.buffer = buffer; + barrier.buffer = handle; barrier.offset = offset; barrier.size = size; @@ -131,18 +102,20 @@ void VKBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, {}, barrier, {}); - cmdbuf.CopyBuffer(buffer, staging, VkBufferCopy{offset, 0, size}); + cmdbuf.CopyBuffer(handle, staging, VkBufferCopy{offset, 0, size}); }); scheduler.Finish(); std::memcpy(data, staging.commit->Map(size), size); } -void VKBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset, - std::size_t dst_offset, std::size_t size) { +void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset, + std::size_t size) { scheduler.RequestOutsideRenderPassOperationContext(); - scheduler.Record([src_buffer = src->GetHandle(), dst_buffer = dst->GetHandle(), src_offset, - dst_offset, size](vk::CommandBuffer cmdbuf) { + + const VkBuffer dst_buffer = Handle(); + scheduler.Record([src_buffer = src.Handle(), dst_buffer, src_offset, dst_offset, + size](vk::CommandBuffer cmdbuf) { cmdbuf.CopyBuffer(src_buffer, dst_buffer, VkBufferCopy{src_offset, dst_offset, size}); std::array<VkBufferMemoryBarrier, 2> barriers; @@ -169,4 +142,30 @@ void VKBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t }); } +VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, + const VKDevice& device, VKMemoryManager& memory_manager, + VKScheduler& scheduler, VKStagingBufferPool& staging_pool) + : VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer>{rasterizer, system, + CreateStreamBuffer(device, + scheduler)}, + device{device}, memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{ + staging_pool} {} + +VKBufferCache::~VKBufferCache() = default; + +std::shared_ptr<Buffer> VKBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) { + return std::make_shared<Buffer>(device, memory_manager, scheduler, staging_pool, cpu_addr, + size); +} + +VKBufferCache::BufferInfo VKBufferCache::GetEmptyBuffer(std::size_t size) { + size = std::max(size, std::size_t(4)); + const auto& empty = staging_pool.GetUnusedBuffer(size, false); + scheduler.RequestOutsideRenderPassOperationContext(); + scheduler.Record([size, buffer = *empty.handle](vk::CommandBuffer cmdbuf) { + cmdbuf.FillBuffer(buffer, 0, size, 0); + }); + return {*empty.handle, 0, 0}; +} + } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index a54583e7d..991ee451c 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h @@ -8,7 +8,6 @@ #include "common/common_types.h" #include "video_core/buffer_cache/buffer_cache.h" -#include "video_core/rasterizer_cache.h" #include "video_core/renderer_vulkan/vk_memory_manager.h" #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" #include "video_core/renderer_vulkan/vk_stream_buffer.h" @@ -24,22 +23,34 @@ class VKDevice; class VKMemoryManager; class VKScheduler; -class CachedBufferBlock final : public VideoCommon::BufferBlock { +class Buffer final : public VideoCommon::BufferBlock { public: - explicit CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager, - VAddr cpu_addr, std::size_t size); - ~CachedBufferBlock(); + explicit Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VKScheduler& scheduler, + VKStagingBufferPool& staging_pool, VAddr cpu_addr, std::size_t size); + ~Buffer(); - VkBuffer GetHandle() const { + void Upload(std::size_t offset, std::size_t size, const u8* data); + + void Download(std::size_t offset, std::size_t size, u8* data); + + void CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset, + std::size_t size); + + VkBuffer Handle() const { return *buffer.handle; } + u64 Address() const { + return 0; + } + private: + VKScheduler& scheduler; + VKStagingBufferPool& staging_pool; + VKBuffer buffer; }; -using Buffer = std::shared_ptr<CachedBufferBlock>; - class VKBufferCache final : public VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer> { public: explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, @@ -47,21 +58,10 @@ public: VKScheduler& scheduler, VKStagingBufferPool& staging_pool); ~VKBufferCache(); - VkBuffer GetEmptyBuffer(std::size_t size) override; + BufferInfo GetEmptyBuffer(std::size_t size) override; protected: - VkBuffer ToHandle(const Buffer& buffer) override; - - Buffer CreateBlock(VAddr cpu_addr, std::size_t size) override; - - void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, - const u8* data) override; - - void DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, - u8* data) override; - - void CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset, - std::size_t dst_offset, std::size_t size) override; + std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) override; private: const VKDevice& device; diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 8e1b46277..281bf9ac3 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -53,8 +53,9 @@ vk::DescriptorSetLayout VKComputePipeline::CreateDescriptorSetLayout() const { }; add_bindings(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, entries.const_buffers.size()); add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, entries.global_buffers.size()); - add_bindings(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, entries.texel_buffers.size()); + add_bindings(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, entries.uniform_texels.size()); add_bindings(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, entries.samplers.size()); + add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, entries.storage_texels.size()); add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, entries.images.size()); VkDescriptorSetLayoutCreateInfo ci; diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp index 890fd52cf..9259b618d 100644 --- a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp @@ -42,6 +42,7 @@ vk::DescriptorPool* VKDescriptorPool::AllocateNewPool() { {VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, num_sets * 60}, {VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, num_sets * 64}, {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, num_sets * 64}, + {VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, num_sets * 64}, {VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, num_sets * 40}}; VkDescriptorPoolCreateInfo ci; diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp index 750e5a0ca..fdaea4210 100644 --- a/src/video_core/renderer_vulkan/vk_device.cpp +++ b/src/video_core/renderer_vulkan/vk_device.cpp @@ -73,76 +73,79 @@ VkFormatFeatureFlags GetFormatFeatures(VkFormatProperties properties, FormatType std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties( vk::PhysicalDevice physical, const vk::InstanceDispatch& dld) { - static constexpr std::array formats{VK_FORMAT_A8B8G8R8_UNORM_PACK32, - VK_FORMAT_A8B8G8R8_UINT_PACK32, - VK_FORMAT_A8B8G8R8_SNORM_PACK32, - VK_FORMAT_A8B8G8R8_SRGB_PACK32, - VK_FORMAT_B5G6R5_UNORM_PACK16, - VK_FORMAT_A2B10G10R10_UNORM_PACK32, - VK_FORMAT_A1R5G5B5_UNORM_PACK16, - VK_FORMAT_R32G32B32A32_SFLOAT, - VK_FORMAT_R32G32B32A32_UINT, - VK_FORMAT_R32G32_SFLOAT, - VK_FORMAT_R32G32_UINT, - VK_FORMAT_R16G16B16A16_UINT, - VK_FORMAT_R16G16B16A16_SNORM, - VK_FORMAT_R16G16B16A16_UNORM, - VK_FORMAT_R16G16_UNORM, - VK_FORMAT_R16G16_SNORM, - VK_FORMAT_R16G16_SFLOAT, - VK_FORMAT_R16_UNORM, - VK_FORMAT_R8G8B8A8_SRGB, - VK_FORMAT_R8G8_UNORM, - VK_FORMAT_R8G8_SNORM, - VK_FORMAT_R8G8_UINT, - VK_FORMAT_R8_UNORM, - VK_FORMAT_R8_UINT, - VK_FORMAT_B10G11R11_UFLOAT_PACK32, - VK_FORMAT_R32_SFLOAT, - VK_FORMAT_R32_UINT, - VK_FORMAT_R32_SINT, - VK_FORMAT_R16_SFLOAT, - VK_FORMAT_R16G16B16A16_SFLOAT, - VK_FORMAT_B8G8R8A8_UNORM, - VK_FORMAT_B8G8R8A8_SRGB, - VK_FORMAT_R4G4B4A4_UNORM_PACK16, - VK_FORMAT_D32_SFLOAT, - VK_FORMAT_D16_UNORM, - VK_FORMAT_D16_UNORM_S8_UINT, - VK_FORMAT_D24_UNORM_S8_UINT, - VK_FORMAT_D32_SFLOAT_S8_UINT, - VK_FORMAT_BC1_RGBA_UNORM_BLOCK, - VK_FORMAT_BC2_UNORM_BLOCK, - VK_FORMAT_BC3_UNORM_BLOCK, - VK_FORMAT_BC4_UNORM_BLOCK, - VK_FORMAT_BC5_UNORM_BLOCK, - VK_FORMAT_BC5_SNORM_BLOCK, - VK_FORMAT_BC7_UNORM_BLOCK, - VK_FORMAT_BC6H_UFLOAT_BLOCK, - VK_FORMAT_BC6H_SFLOAT_BLOCK, - VK_FORMAT_BC1_RGBA_SRGB_BLOCK, - VK_FORMAT_BC2_SRGB_BLOCK, - VK_FORMAT_BC3_SRGB_BLOCK, - VK_FORMAT_BC7_SRGB_BLOCK, - VK_FORMAT_ASTC_4x4_SRGB_BLOCK, - VK_FORMAT_ASTC_8x8_SRGB_BLOCK, - VK_FORMAT_ASTC_8x5_SRGB_BLOCK, - VK_FORMAT_ASTC_5x4_SRGB_BLOCK, - VK_FORMAT_ASTC_5x5_UNORM_BLOCK, - VK_FORMAT_ASTC_5x5_SRGB_BLOCK, - VK_FORMAT_ASTC_10x8_UNORM_BLOCK, - VK_FORMAT_ASTC_10x8_SRGB_BLOCK, - VK_FORMAT_ASTC_6x6_UNORM_BLOCK, - VK_FORMAT_ASTC_6x6_SRGB_BLOCK, - VK_FORMAT_ASTC_10x10_UNORM_BLOCK, - VK_FORMAT_ASTC_10x10_SRGB_BLOCK, - VK_FORMAT_ASTC_12x12_UNORM_BLOCK, - VK_FORMAT_ASTC_12x12_SRGB_BLOCK, - VK_FORMAT_ASTC_8x6_UNORM_BLOCK, - VK_FORMAT_ASTC_8x6_SRGB_BLOCK, - VK_FORMAT_ASTC_6x5_UNORM_BLOCK, - VK_FORMAT_ASTC_6x5_SRGB_BLOCK, - VK_FORMAT_E5B9G9R9_UFLOAT_PACK32}; + static constexpr std::array formats{ + VK_FORMAT_A8B8G8R8_UNORM_PACK32, + VK_FORMAT_A8B8G8R8_UINT_PACK32, + VK_FORMAT_A8B8G8R8_SNORM_PACK32, + VK_FORMAT_A8B8G8R8_SRGB_PACK32, + VK_FORMAT_B5G6R5_UNORM_PACK16, + VK_FORMAT_A2B10G10R10_UNORM_PACK32, + VK_FORMAT_A1R5G5B5_UNORM_PACK16, + VK_FORMAT_R32G32B32A32_SFLOAT, + VK_FORMAT_R32G32B32A32_UINT, + VK_FORMAT_R32G32_SFLOAT, + VK_FORMAT_R32G32_UINT, + VK_FORMAT_R16G16B16A16_UINT, + VK_FORMAT_R16G16B16A16_SNORM, + VK_FORMAT_R16G16B16A16_UNORM, + VK_FORMAT_R16G16_UNORM, + VK_FORMAT_R16G16_SNORM, + VK_FORMAT_R16G16_SFLOAT, + VK_FORMAT_R16_UNORM, + VK_FORMAT_R16_UINT, + VK_FORMAT_R8G8B8A8_SRGB, + VK_FORMAT_R8G8_UNORM, + VK_FORMAT_R8G8_SNORM, + VK_FORMAT_R8G8_UINT, + VK_FORMAT_R8_UNORM, + VK_FORMAT_R8_UINT, + VK_FORMAT_B10G11R11_UFLOAT_PACK32, + VK_FORMAT_R32_SFLOAT, + VK_FORMAT_R32_UINT, + VK_FORMAT_R32_SINT, + VK_FORMAT_R16_SFLOAT, + VK_FORMAT_R16G16B16A16_SFLOAT, + VK_FORMAT_B8G8R8A8_UNORM, + VK_FORMAT_B8G8R8A8_SRGB, + VK_FORMAT_R4G4B4A4_UNORM_PACK16, + VK_FORMAT_D32_SFLOAT, + VK_FORMAT_D16_UNORM, + VK_FORMAT_D16_UNORM_S8_UINT, + VK_FORMAT_D24_UNORM_S8_UINT, + VK_FORMAT_D32_SFLOAT_S8_UINT, + VK_FORMAT_BC1_RGBA_UNORM_BLOCK, + VK_FORMAT_BC2_UNORM_BLOCK, + VK_FORMAT_BC3_UNORM_BLOCK, + VK_FORMAT_BC4_UNORM_BLOCK, + VK_FORMAT_BC5_UNORM_BLOCK, + VK_FORMAT_BC5_SNORM_BLOCK, + VK_FORMAT_BC7_UNORM_BLOCK, + VK_FORMAT_BC6H_UFLOAT_BLOCK, + VK_FORMAT_BC6H_SFLOAT_BLOCK, + VK_FORMAT_BC1_RGBA_SRGB_BLOCK, + VK_FORMAT_BC2_SRGB_BLOCK, + VK_FORMAT_BC3_SRGB_BLOCK, + VK_FORMAT_BC7_SRGB_BLOCK, + VK_FORMAT_ASTC_4x4_SRGB_BLOCK, + VK_FORMAT_ASTC_8x8_SRGB_BLOCK, + VK_FORMAT_ASTC_8x5_SRGB_BLOCK, + VK_FORMAT_ASTC_5x4_SRGB_BLOCK, + VK_FORMAT_ASTC_5x5_UNORM_BLOCK, + VK_FORMAT_ASTC_5x5_SRGB_BLOCK, + VK_FORMAT_ASTC_10x8_UNORM_BLOCK, + VK_FORMAT_ASTC_10x8_SRGB_BLOCK, + VK_FORMAT_ASTC_6x6_UNORM_BLOCK, + VK_FORMAT_ASTC_6x6_SRGB_BLOCK, + VK_FORMAT_ASTC_10x10_UNORM_BLOCK, + VK_FORMAT_ASTC_10x10_SRGB_BLOCK, + VK_FORMAT_ASTC_12x12_UNORM_BLOCK, + VK_FORMAT_ASTC_12x12_SRGB_BLOCK, + VK_FORMAT_ASTC_8x6_UNORM_BLOCK, + VK_FORMAT_ASTC_8x6_SRGB_BLOCK, + VK_FORMAT_ASTC_6x5_UNORM_BLOCK, + VK_FORMAT_ASTC_6x5_SRGB_BLOCK, + VK_FORMAT_E5B9G9R9_UFLOAT_PACK32, + }; std::unordered_map<VkFormat, VkFormatProperties> format_properties; for (const auto format : formats) { format_properties.emplace(format, physical.GetFormatProperties(format)); @@ -310,6 +313,16 @@ bool VKDevice::Create() { LOG_INFO(Render_Vulkan, "Device doesn't support custom border colors"); } + VkPhysicalDeviceExtendedDynamicStateFeaturesEXT dynamic_state; + if (ext_extended_dynamic_state) { + dynamic_state.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT; + dynamic_state.pNext = nullptr; + dynamic_state.extendedDynamicState = VK_TRUE; + SetNext(next, dynamic_state); + } else { + LOG_INFO(Render_Vulkan, "Device doesn't support extended dynamic state"); + } + if (!ext_depth_range_unrestricted) { LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted"); } @@ -538,6 +551,7 @@ std::vector<const char*> VKDevice::LoadExtensions() { bool has_ext_subgroup_size_control{}; bool has_ext_transform_feedback{}; bool has_ext_custom_border_color{}; + bool has_ext_extended_dynamic_state{}; for (const auto& extension : physical.EnumerateDeviceExtensionProperties()) { Test(extension, nv_viewport_swizzle, VK_NV_VIEWPORT_SWIZZLE_EXTENSION_NAME, true); Test(extension, khr_uniform_buffer_standard_layout, @@ -555,6 +569,8 @@ std::vector<const char*> VKDevice::LoadExtensions() { false); Test(extension, has_ext_custom_border_color, VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME, false); + Test(extension, has_ext_extended_dynamic_state, + VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false); if (Settings::values.renderer_debug) { Test(extension, nv_device_diagnostics_config, VK_NV_DEVICE_DIAGNOSTICS_CONFIG_EXTENSION_NAME, true); @@ -640,6 +656,19 @@ std::vector<const char*> VKDevice::LoadExtensions() { } } + if (has_ext_extended_dynamic_state) { + VkPhysicalDeviceExtendedDynamicStateFeaturesEXT dynamic_state; + dynamic_state.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT; + dynamic_state.pNext = nullptr; + features.pNext = &dynamic_state; + physical.GetFeatures2KHR(features); + + if (dynamic_state.extendedDynamicState) { + extensions.push_back(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME); + ext_extended_dynamic_state = true; + } + } + return extensions; } diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/renderer_vulkan/vk_device.h index 6b9227b09..ae5c21baa 100644 --- a/src/video_core/renderer_vulkan/vk_device.h +++ b/src/video_core/renderer_vulkan/vk_device.h @@ -182,6 +182,11 @@ public: return ext_custom_border_color; } + /// Returns true if the device supports VK_EXT_extended_dynamic_state. + bool IsExtExtendedDynamicStateSupported() const { + return ext_extended_dynamic_state; + } + /// Returns the vendor name reported from Vulkan. std::string_view GetVendorName() const { return vendor_name; @@ -239,6 +244,7 @@ private: bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer. bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback. bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color. + bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state. bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config. // Telemetry parameters diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 69b6bba00..844445105 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -176,20 +176,32 @@ std::vector<vk::ShaderModule> VKGraphicsPipeline::CreateShaderModules( vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpass_params, const SPIRVProgram& program) const { - const auto& vi = fixed_state.vertex_input; - const auto& ds = fixed_state.depth_stencil; - const auto& cd = fixed_state.color_blending; - const auto& rs = fixed_state.rasterizer; - const auto& viewport_swizzles = fixed_state.viewport_swizzles.swizzles; + const auto& state = fixed_state; + const auto& viewport_swizzles = state.viewport_swizzles; + + FixedPipelineState::DynamicState dynamic; + if (device.IsExtExtendedDynamicStateSupported()) { + // Insert dummy values, as long as they are valid they don't matter as extended dynamic + // state is ignored + dynamic.raw1 = 0; + dynamic.raw2 = 0; + for (FixedPipelineState::VertexBinding& binding : dynamic.vertex_bindings) { + // Enable all vertex bindings + binding.raw = 0; + binding.enabled.Assign(1); + } + } else { + dynamic = state.dynamic_state; + } std::vector<VkVertexInputBindingDescription> vertex_bindings; std::vector<VkVertexInputBindingDivisorDescriptionEXT> vertex_binding_divisors; - for (std::size_t index = 0; index < std::size(vi.bindings); ++index) { - const auto& binding = vi.bindings[index]; + for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { + const auto& binding = dynamic.vertex_bindings[index]; if (!binding.enabled) { continue; } - const bool instanced = vi.binding_divisors[index] != 0; + const bool instanced = state.binding_divisors[index] != 0; const auto rate = instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX; auto& vertex_binding = vertex_bindings.emplace_back(); @@ -200,14 +212,14 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa if (instanced) { auto& binding_divisor = vertex_binding_divisors.emplace_back(); binding_divisor.binding = static_cast<u32>(index); - binding_divisor.divisor = vi.binding_divisors[index]; + binding_divisor.divisor = state.binding_divisors[index]; } } std::vector<VkVertexInputAttributeDescription> vertex_attributes; const auto& input_attributes = program[0]->entries.attributes; - for (std::size_t index = 0; index < std::size(vi.attributes); ++index) { - const auto& attribute = vi.attributes[index]; + for (std::size_t index = 0; index < state.attributes.size(); ++index) { + const auto& attribute = state.attributes[index]; if (!attribute.enabled) { continue; } @@ -244,15 +256,15 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa input_assembly_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO; input_assembly_ci.pNext = nullptr; input_assembly_ci.flags = 0; - input_assembly_ci.topology = MaxwellToVK::PrimitiveTopology(device, rs.Topology()); + input_assembly_ci.topology = MaxwellToVK::PrimitiveTopology(device, dynamic.Topology()); input_assembly_ci.primitiveRestartEnable = - rs.primitive_restart_enable != 0 && SupportsPrimitiveRestart(input_assembly_ci.topology); + state.primitive_restart_enable != 0 && SupportsPrimitiveRestart(input_assembly_ci.topology); VkPipelineTessellationStateCreateInfo tessellation_ci; tessellation_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO; tessellation_ci.pNext = nullptr; tessellation_ci.flags = 0; - tessellation_ci.patchControlPoints = rs.patch_control_points_minus_one.Value() + 1; + tessellation_ci.patchControlPoints = state.patch_control_points_minus_one.Value() + 1; VkPipelineViewportStateCreateInfo viewport_ci; viewport_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO; @@ -280,13 +292,13 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa rasterization_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO; rasterization_ci.pNext = nullptr; rasterization_ci.flags = 0; - rasterization_ci.depthClampEnable = rs.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE; - rasterization_ci.rasterizerDiscardEnable = rs.rasterize_enable == 0 ? VK_TRUE : VK_FALSE; + rasterization_ci.depthClampEnable = state.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE; + rasterization_ci.rasterizerDiscardEnable = state.rasterize_enable == 0 ? VK_TRUE : VK_FALSE; rasterization_ci.polygonMode = VK_POLYGON_MODE_FILL; rasterization_ci.cullMode = - rs.cull_enable ? MaxwellToVK::CullFace(rs.CullFace()) : VK_CULL_MODE_NONE; - rasterization_ci.frontFace = MaxwellToVK::FrontFace(rs.FrontFace()); - rasterization_ci.depthBiasEnable = rs.depth_bias_enable; + dynamic.cull_enable ? MaxwellToVK::CullFace(dynamic.CullFace()) : VK_CULL_MODE_NONE; + rasterization_ci.frontFace = MaxwellToVK::FrontFace(dynamic.FrontFace()); + rasterization_ci.depthBiasEnable = state.depth_bias_enable; rasterization_ci.depthBiasConstantFactor = 0.0f; rasterization_ci.depthBiasClamp = 0.0f; rasterization_ci.depthBiasSlopeFactor = 0.0f; @@ -307,14 +319,15 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa depth_stencil_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO; depth_stencil_ci.pNext = nullptr; depth_stencil_ci.flags = 0; - depth_stencil_ci.depthTestEnable = ds.depth_test_enable; - depth_stencil_ci.depthWriteEnable = ds.depth_write_enable; - depth_stencil_ci.depthCompareOp = - ds.depth_test_enable ? MaxwellToVK::ComparisonOp(ds.DepthTestFunc()) : VK_COMPARE_OP_ALWAYS; - depth_stencil_ci.depthBoundsTestEnable = ds.depth_bounds_enable; - depth_stencil_ci.stencilTestEnable = ds.stencil_enable; - depth_stencil_ci.front = GetStencilFaceState(ds.front); - depth_stencil_ci.back = GetStencilFaceState(ds.back); + depth_stencil_ci.depthTestEnable = dynamic.depth_test_enable; + depth_stencil_ci.depthWriteEnable = dynamic.depth_write_enable; + depth_stencil_ci.depthCompareOp = dynamic.depth_test_enable + ? MaxwellToVK::ComparisonOp(dynamic.DepthTestFunc()) + : VK_COMPARE_OP_ALWAYS; + depth_stencil_ci.depthBoundsTestEnable = dynamic.depth_bounds_enable; + depth_stencil_ci.stencilTestEnable = dynamic.stencil_enable; + depth_stencil_ci.front = GetStencilFaceState(dynamic.front); + depth_stencil_ci.back = GetStencilFaceState(dynamic.back); depth_stencil_ci.minDepthBounds = 0.0f; depth_stencil_ci.maxDepthBounds = 0.0f; @@ -324,7 +337,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa static constexpr std::array COMPONENT_TABLE = { VK_COLOR_COMPONENT_R_BIT, VK_COLOR_COMPONENT_G_BIT, VK_COLOR_COMPONENT_B_BIT, VK_COLOR_COMPONENT_A_BIT}; - const auto& blend = cd.attachments[index]; + const auto& blend = state.attachments[index]; VkColorComponentFlags color_components = 0; for (std::size_t i = 0; i < COMPONENT_TABLE.size(); ++i) { @@ -354,11 +367,27 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa color_blend_ci.pAttachments = cb_attachments.data(); std::memset(color_blend_ci.blendConstants, 0, sizeof(color_blend_ci.blendConstants)); - static constexpr std::array dynamic_states = { + std::vector dynamic_states = { VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR, VK_DYNAMIC_STATE_DEPTH_BIAS, VK_DYNAMIC_STATE_BLEND_CONSTANTS, VK_DYNAMIC_STATE_DEPTH_BOUNDS, VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, - VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, VK_DYNAMIC_STATE_STENCIL_REFERENCE}; + VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, VK_DYNAMIC_STATE_STENCIL_REFERENCE, + }; + if (device.IsExtExtendedDynamicStateSupported()) { + static constexpr std::array extended = { + VK_DYNAMIC_STATE_CULL_MODE_EXT, + VK_DYNAMIC_STATE_FRONT_FACE_EXT, + VK_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY_EXT, + VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE_EXT, + VK_DYNAMIC_STATE_DEPTH_TEST_ENABLE_EXT, + VK_DYNAMIC_STATE_DEPTH_WRITE_ENABLE_EXT, + VK_DYNAMIC_STATE_DEPTH_COMPARE_OP_EXT, + VK_DYNAMIC_STATE_DEPTH_BOUNDS_TEST_ENABLE_EXT, + VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE_EXT, + VK_DYNAMIC_STATE_STENCIL_OP_EXT, + }; + dynamic_states.insert(dynamic_states.end(), extended.begin(), extended.end()); + } VkPipelineDynamicStateCreateInfo dynamic_state_ci; dynamic_state_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index a5c7b7945..3da835324 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -27,6 +27,7 @@ #include "video_core/renderer_vulkan/wrapper.h" #include "video_core/shader/compiler_settings.h" #include "video_core/shader/memory_util.h" +#include "video_core/shader_cache.h" namespace Vulkan { @@ -45,6 +46,7 @@ constexpr VkDescriptorType UNIFORM_BUFFER = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; constexpr VkDescriptorType STORAGE_BUFFER = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; constexpr VkDescriptorType UNIFORM_TEXEL_BUFFER = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; constexpr VkDescriptorType COMBINED_IMAGE_SAMPLER = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; +constexpr VkDescriptorType STORAGE_TEXEL_BUFFER = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; constexpr VkDescriptorType STORAGE_IMAGE = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; constexpr VideoCommon::Shader::CompilerSettings compiler_settings{ @@ -104,8 +106,9 @@ u32 FillDescriptorLayout(const ShaderEntries& entries, u32 binding = base_binding; AddBindings<UNIFORM_BUFFER>(bindings, binding, flags, entries.const_buffers); AddBindings<STORAGE_BUFFER>(bindings, binding, flags, entries.global_buffers); - AddBindings<UNIFORM_TEXEL_BUFFER>(bindings, binding, flags, entries.texel_buffers); + AddBindings<UNIFORM_TEXEL_BUFFER>(bindings, binding, flags, entries.uniform_texels); AddBindings<COMBINED_IMAGE_SAMPLER>(bindings, binding, flags, entries.samplers); + AddBindings<STORAGE_TEXEL_BUFFER>(bindings, binding, flags, entries.storage_texels); AddBindings<STORAGE_IMAGE>(bindings, binding, flags, entries.images); return binding; } @@ -113,12 +116,12 @@ u32 FillDescriptorLayout(const ShaderEntries& entries, } // Anonymous namespace std::size_t GraphicsPipelineCacheKey::Hash() const noexcept { - const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this); + const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), Size()); return static_cast<std::size_t>(hash); } bool GraphicsPipelineCacheKey::operator==(const GraphicsPipelineCacheKey& rhs) const noexcept { - return std::memcmp(&rhs, this, sizeof *this) == 0; + return std::memcmp(&rhs, this, Size()) == 0; } std::size_t ComputePipelineCacheKey::Hash() const noexcept { @@ -130,19 +133,18 @@ bool ComputePipelineCacheKey::operator==(const ComputePipelineCacheKey& rhs) con return std::memcmp(&rhs, this, sizeof *this) == 0; } -CachedShader::CachedShader(Core::System& system, Tegra::Engines::ShaderType stage, - GPUVAddr gpu_addr, VAddr cpu_addr, ProgramCode program_code, - u32 main_offset) - : RasterizerCacheObject{cpu_addr}, gpu_addr{gpu_addr}, program_code{std::move(program_code)}, +Shader::Shader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr, + VideoCommon::Shader::ProgramCode program_code, u32 main_offset) + : gpu_addr{gpu_addr}, program_code{std::move(program_code)}, registry{stage, GetEngine(system, stage)}, shader_ir{this->program_code, main_offset, compiler_settings, registry}, entries{GenerateShaderEntries(shader_ir)} {} -CachedShader::~CachedShader() = default; +Shader::~Shader() = default; -Tegra::Engines::ConstBufferEngineInterface& CachedShader::GetEngine( - Core::System& system, Tegra::Engines::ShaderType stage) { - if (stage == Tegra::Engines::ShaderType::Compute) { +Tegra::Engines::ConstBufferEngineInterface& Shader::GetEngine(Core::System& system, + Tegra::Engines::ShaderType stage) { + if (stage == ShaderType::Compute) { return system.GPU().KeplerCompute(); } else { return system.GPU().Maxwell3D(); @@ -154,16 +156,16 @@ VKPipelineCache::VKPipelineCache(Core::System& system, RasterizerVulkan& rasteri VKDescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue, VKRenderPassCache& renderpass_cache) - : RasterizerCache{rasterizer}, system{system}, device{device}, scheduler{scheduler}, - descriptor_pool{descriptor_pool}, update_descriptor_queue{update_descriptor_queue}, - renderpass_cache{renderpass_cache} {} + : VideoCommon::ShaderCache<Shader>{rasterizer}, system{system}, device{device}, + scheduler{scheduler}, descriptor_pool{descriptor_pool}, + update_descriptor_queue{update_descriptor_queue}, renderpass_cache{renderpass_cache} {} VKPipelineCache::~VKPipelineCache() = default; -std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() { +std::array<Shader*, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() { const auto& gpu = system.GPU().Maxwell3D(); - std::array<Shader, Maxwell::MaxShaderProgram> shaders; + std::array<Shader*, Maxwell::MaxShaderProgram> shaders{}; for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { const auto program{static_cast<Maxwell::ShaderProgram>(index)}; @@ -176,24 +178,28 @@ std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() { const GPUVAddr program_addr{GetShaderAddress(system, program)}; const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr); ASSERT(cpu_addr); - auto shader = cpu_addr ? TryGet(*cpu_addr) : null_shader; - if (!shader) { + + Shader* result = cpu_addr ? TryGet(*cpu_addr) : null_shader.get(); + if (!result) { const auto host_ptr{memory_manager.GetPointer(program_addr)}; // No shader found - create a new one constexpr u32 stage_offset = STAGE_MAIN_OFFSET; - const auto stage = static_cast<Tegra::Engines::ShaderType>(index == 0 ? 0 : index - 1); + const auto stage = static_cast<ShaderType>(index == 0 ? 0 : index - 1); ProgramCode code = GetShaderCode(memory_manager, program_addr, host_ptr, false); + const std::size_t size_in_bytes = code.size() * sizeof(u64); + + auto shader = std::make_unique<Shader>(system, stage, program_addr, std::move(code), + stage_offset); + result = shader.get(); - shader = std::make_shared<CachedShader>(system, stage, program_addr, *cpu_addr, - std::move(code), stage_offset); if (cpu_addr) { - Register(shader); + Register(std::move(shader), *cpu_addr, size_in_bytes); } else { - null_shader = shader; + null_shader = std::move(shader); } } - shaders[index] = std::move(shader); + shaders[index] = result; } return last_shaders = shaders; } @@ -234,19 +240,22 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach const auto cpu_addr = memory_manager.GpuToCpuAddress(program_addr); ASSERT(cpu_addr); - auto shader = cpu_addr ? TryGet(*cpu_addr) : null_kernel; + Shader* shader = cpu_addr ? TryGet(*cpu_addr) : null_kernel.get(); if (!shader) { // No shader found - create a new one const auto host_ptr = memory_manager.GetPointer(program_addr); ProgramCode code = GetShaderCode(memory_manager, program_addr, host_ptr, true); - shader = std::make_shared<CachedShader>(system, Tegra::Engines::ShaderType::Compute, - program_addr, *cpu_addr, std::move(code), - KERNEL_MAIN_OFFSET); + const std::size_t size_in_bytes = code.size() * sizeof(u64); + + auto shader_info = std::make_unique<Shader>(system, ShaderType::Compute, program_addr, + std::move(code), KERNEL_MAIN_OFFSET); + shader = shader_info.get(); + if (cpu_addr) { - Register(shader); + Register(std::move(shader_info), *cpu_addr, size_in_bytes); } else { - null_kernel = shader; + null_kernel = std::move(shader_info); } } @@ -262,7 +271,7 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach return *entry; } -void VKPipelineCache::Unregister(const Shader& shader) { +void VKPipelineCache::OnShaderRemoval(Shader* shader) { bool finished = false; const auto Finish = [&] { // TODO(Rodrigo): Instead of finishing here, wait for the fences that use this pipeline and @@ -294,8 +303,6 @@ void VKPipelineCache::Unregister(const Shader& shader) { Finish(); it = compute_cache.erase(it); } - - RasterizerCache::Unregister(shader); } std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>> @@ -305,16 +312,19 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) { const auto& gpu = system.GPU().Maxwell3D(); Specialization specialization; - if (fixed_state.rasterizer.Topology() == Maxwell::PrimitiveTopology::Points) { + if (fixed_state.dynamic_state.Topology() == Maxwell::PrimitiveTopology::Points || + device.IsExtExtendedDynamicStateSupported()) { float point_size; - std::memcpy(&point_size, &fixed_state.rasterizer.point_size, sizeof(float)); + std::memcpy(&point_size, &fixed_state.point_size, sizeof(float)); specialization.point_size = point_size; ASSERT(point_size != 0.0f); } for (std::size_t i = 0; i < Maxwell::NumVertexAttributes; ++i) { - specialization.attribute_types[i] = fixed_state.vertex_input.attributes[i].Type(); + const auto& attribute = fixed_state.attributes[i]; + specialization.enabled_attributes[i] = attribute.enabled.Value() != 0; + specialization.attribute_types[i] = attribute.Type(); } - specialization.ndc_minus_one_to_one = fixed_state.rasterizer.ndc_minus_one_to_one; + specialization.ndc_minus_one_to_one = fixed_state.ndc_minus_one_to_one; SPIRVProgram program; std::vector<VkDescriptorSetLayoutBinding> bindings; @@ -328,12 +338,11 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) { } const GPUVAddr gpu_addr = GetShaderAddress(system, program_enum); - const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr); - const auto shader = cpu_addr ? TryGet(*cpu_addr) : null_shader; - ASSERT(shader); + const std::optional<VAddr> cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr); + Shader* const shader = cpu_addr ? TryGet(*cpu_addr) : null_shader.get(); const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5 - const auto program_type = GetShaderType(program_enum); + const ShaderType program_type = GetShaderType(program_enum); const auto& entries = shader->GetEntries(); program[stage] = { Decompile(device, shader->GetIR(), program_type, shader->GetRegistry(), specialization), @@ -375,16 +384,17 @@ void AddEntry(std::vector<VkDescriptorUpdateTemplateEntry>& template_entries, u3 return; } - if constexpr (descriptor_type == UNIFORM_TEXEL_BUFFER) { - // Nvidia has a bug where updating multiple uniform texels at once causes the driver to - // crash. + if constexpr (descriptor_type == UNIFORM_TEXEL_BUFFER || + descriptor_type == STORAGE_TEXEL_BUFFER) { + // Nvidia has a bug where updating multiple texels at once causes the driver to crash. + // Note: Fixed in driver Windows 443.24, Linux 440.66.15 for (u32 i = 0; i < count; ++i) { VkDescriptorUpdateTemplateEntry& entry = template_entries.emplace_back(); entry.dstBinding = binding + i; entry.dstArrayElement = 0; entry.descriptorCount = 1; entry.descriptorType = descriptor_type; - entry.offset = offset + i * entry_size; + entry.offset = static_cast<std::size_t>(offset + i * entry_size); entry.stride = entry_size; } } else if (count > 0) { @@ -405,8 +415,9 @@ void FillDescriptorUpdateTemplateEntries( std::vector<VkDescriptorUpdateTemplateEntryKHR>& template_entries) { AddEntry<UNIFORM_BUFFER>(template_entries, offset, binding, entries.const_buffers); AddEntry<STORAGE_BUFFER>(template_entries, offset, binding, entries.global_buffers); - AddEntry<UNIFORM_TEXEL_BUFFER>(template_entries, offset, binding, entries.texel_buffers); + AddEntry<UNIFORM_TEXEL_BUFFER>(template_entries, offset, binding, entries.uniform_texels); AddEntry<COMBINED_IMAGE_SAMPLER>(template_entries, offset, binding, entries.samplers); + AddEntry<STORAGE_TEXEL_BUFFER>(template_entries, offset, binding, entries.storage_texels); AddEntry<STORAGE_IMAGE>(template_entries, offset, binding, entries.images); } diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 0b5796fef..0a3fe65fb 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -17,7 +17,6 @@ #include "common/common_types.h" #include "video_core/engines/const_buffer_engine_interface.h" #include "video_core/engines/maxwell_3d.h" -#include "video_core/rasterizer_cache.h" #include "video_core/renderer_vulkan/fixed_pipeline_state.h" #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" #include "video_core/renderer_vulkan/vk_renderpass_cache.h" @@ -26,6 +25,7 @@ #include "video_core/shader/memory_util.h" #include "video_core/shader/registry.h" #include "video_core/shader/shader_ir.h" +#include "video_core/shader_cache.h" namespace Core { class System; @@ -41,15 +41,13 @@ class VKFence; class VKScheduler; class VKUpdateDescriptorQueue; -class CachedShader; -using Shader = std::shared_ptr<CachedShader>; using Maxwell = Tegra::Engines::Maxwell3D::Regs; struct GraphicsPipelineCacheKey { - FixedPipelineState fixed_state; RenderPassParams renderpass_params; + u32 padding; std::array<GPUVAddr, Maxwell::MaxShaderProgram> shaders; - u64 padding; // This is necessary for unique object representations + FixedPipelineState fixed_state; std::size_t Hash() const noexcept; @@ -58,6 +56,10 @@ struct GraphicsPipelineCacheKey { bool operator!=(const GraphicsPipelineCacheKey& rhs) const noexcept { return !operator==(rhs); } + + std::size_t Size() const noexcept { + return sizeof(renderpass_params) + sizeof(padding) + sizeof(shaders) + fixed_state.Size(); + } }; static_assert(std::has_unique_object_representations_v<GraphicsPipelineCacheKey>); static_assert(std::is_trivially_copyable_v<GraphicsPipelineCacheKey>); @@ -102,21 +104,16 @@ struct hash<Vulkan::ComputePipelineCacheKey> { namespace Vulkan { -class CachedShader final : public RasterizerCacheObject { +class Shader { public: - explicit CachedShader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr, - VAddr cpu_addr, VideoCommon::Shader::ProgramCode program_code, - u32 main_offset); - ~CachedShader(); + explicit Shader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr, + VideoCommon::Shader::ProgramCode program_code, u32 main_offset); + ~Shader(); GPUVAddr GetGpuAddr() const { return gpu_addr; } - std::size_t GetSizeInBytes() const override { - return program_code.size() * sizeof(u64); - } - VideoCommon::Shader::ShaderIR& GetIR() { return shader_ir; } @@ -144,25 +141,23 @@ private: ShaderEntries entries; }; -class VKPipelineCache final : public RasterizerCache<Shader> { +class VKPipelineCache final : public VideoCommon::ShaderCache<Shader> { public: explicit VKPipelineCache(Core::System& system, RasterizerVulkan& rasterizer, const VKDevice& device, VKScheduler& scheduler, VKDescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue, VKRenderPassCache& renderpass_cache); - ~VKPipelineCache(); + ~VKPipelineCache() override; - std::array<Shader, Maxwell::MaxShaderProgram> GetShaders(); + std::array<Shader*, Maxwell::MaxShaderProgram> GetShaders(); VKGraphicsPipeline& GetGraphicsPipeline(const GraphicsPipelineCacheKey& key); VKComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key); protected: - void Unregister(const Shader& shader) override; - - void FlushObjectInner(const Shader& object) override {} + void OnShaderRemoval(Shader* shader) final; private: std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>> DecompileShaders( @@ -175,10 +170,10 @@ private: VKUpdateDescriptorQueue& update_descriptor_queue; VKRenderPassCache& renderpass_cache; - Shader null_shader{}; - Shader null_kernel{}; + std::unique_ptr<Shader> null_shader; + std::unique_ptr<Shader> null_kernel; - std::array<Shader, Maxwell::MaxShaderProgram> last_shaders; + std::array<Shader*, Maxwell::MaxShaderProgram> last_shaders{}; GraphicsPipelineCacheKey last_graphics_key; VKGraphicsPipeline* last_graphics_pipeline = nullptr; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index be5b77fae..380ed532b 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -38,6 +38,7 @@ #include "video_core/renderer_vulkan/vk_texture_cache.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" #include "video_core/renderer_vulkan/wrapper.h" +#include "video_core/shader_cache.h" namespace Vulkan { @@ -98,7 +99,7 @@ VkRect2D GetScissorState(const Maxwell& regs, std::size_t index) { } std::array<GPUVAddr, Maxwell::MaxShaderProgram> GetShaderAddresses( - const std::array<Shader, Maxwell::MaxShaderProgram>& shaders) { + const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders) { std::array<GPUVAddr, Maxwell::MaxShaderProgram> addresses; for (std::size_t i = 0; i < std::size(addresses); ++i) { addresses[i] = shaders[i] ? shaders[i]->GetGpuAddr() : 0; @@ -117,6 +118,17 @@ template <typename Engine, typename Entry> Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry, std::size_t stage, std::size_t index = 0) { const auto stage_type = static_cast<Tegra::Engines::ShaderType>(stage); + if constexpr (std::is_same_v<Entry, SamplerEntry>) { + if (entry.is_separated) { + const u32 buffer_1 = entry.buffer; + const u32 buffer_2 = entry.secondary_buffer; + const u32 offset_1 = entry.offset; + const u32 offset_2 = entry.secondary_offset; + const u32 handle_1 = engine.AccessConstBuffer32(stage_type, buffer_1, offset_1); + const u32 handle_2 = engine.AccessConstBuffer32(stage_type, buffer_2, offset_2); + return engine.GetTextureInfo(handle_1 | handle_2); + } + } if (entry.is_bindless) { const auto tex_handle = engine.AccessConstBuffer32(stage_type, entry.buffer, entry.offset); return engine.GetTextureInfo(tex_handle); @@ -131,13 +143,65 @@ Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry } } +/// @brief Determine if an attachment to be updated has to preserve contents +/// @param is_clear True when a clear is being executed +/// @param regs 3D registers +/// @return True when the contents have to be preserved +bool HasToPreserveColorContents(bool is_clear, const Maxwell& regs) { + if (!is_clear) { + return true; + } + // First we have to make sure all clear masks are enabled. + if (!regs.clear_buffers.R || !regs.clear_buffers.G || !regs.clear_buffers.B || + !regs.clear_buffers.A) { + return true; + } + // If scissors are disabled, the whole screen is cleared + if (!regs.clear_flags.scissor) { + return false; + } + // Then we have to confirm scissor testing clears the whole image + const std::size_t index = regs.clear_buffers.RT; + const auto& scissor = regs.scissor_test[0]; + return scissor.min_x > 0 || scissor.min_y > 0 || scissor.max_x < regs.rt[index].width || + scissor.max_y < regs.rt[index].height; +} + +/// @brief Determine if an attachment to be updated has to preserve contents +/// @param is_clear True when a clear is being executed +/// @param regs 3D registers +/// @return True when the contents have to be preserved +bool HasToPreserveDepthContents(bool is_clear, const Maxwell& regs) { + // If we are not clearing, the contents have to be preserved + if (!is_clear) { + return true; + } + // For depth stencil clears we only have to confirm scissor test covers the whole image + if (!regs.clear_flags.scissor) { + return false; + } + // Make sure the clear cover the whole image + const auto& scissor = regs.scissor_test[0]; + return scissor.min_x > 0 || scissor.min_y > 0 || scissor.max_x < regs.zeta_width || + scissor.max_y < regs.zeta_height; +} + +template <std::size_t N> +std::array<VkDeviceSize, N> ExpandStrides(const std::array<u16, N>& strides) { + std::array<VkDeviceSize, N> expanded; + std::copy(strides.begin(), strides.end(), expanded.begin()); + return expanded; +} + } // Anonymous namespace class BufferBindings final { public: - void AddVertexBinding(VkBuffer buffer, VkDeviceSize offset) { + void AddVertexBinding(VkBuffer buffer, VkDeviceSize offset, VkDeviceSize size, u32 stride) { vertex.buffers[vertex.num_buffers] = buffer; vertex.offsets[vertex.num_buffers] = offset; + vertex.sizes[vertex.num_buffers] = size; + vertex.strides[vertex.num_buffers] = static_cast<u16>(stride); ++vertex.num_buffers; } @@ -147,76 +211,76 @@ public: index.type = type; } - void Bind(VKScheduler& scheduler) const { + void Bind(const VKDevice& device, VKScheduler& scheduler) const { // Use this large switch case to avoid dispatching more memory in the record lambda than // what we need. It looks horrible, but it's the best we can do on standard C++. switch (vertex.num_buffers) { case 0: - return BindStatic<0>(scheduler); + return BindStatic<0>(device, scheduler); case 1: - return BindStatic<1>(scheduler); + return BindStatic<1>(device, scheduler); case 2: - return BindStatic<2>(scheduler); + return BindStatic<2>(device, scheduler); case 3: - return BindStatic<3>(scheduler); + return BindStatic<3>(device, scheduler); case 4: - return BindStatic<4>(scheduler); + return BindStatic<4>(device, scheduler); case 5: - return BindStatic<5>(scheduler); + return BindStatic<5>(device, scheduler); case 6: - return BindStatic<6>(scheduler); + return BindStatic<6>(device, scheduler); case 7: - return BindStatic<7>(scheduler); + return BindStatic<7>(device, scheduler); case 8: - return BindStatic<8>(scheduler); + return BindStatic<8>(device, scheduler); case 9: - return BindStatic<9>(scheduler); + return BindStatic<9>(device, scheduler); case 10: - return BindStatic<10>(scheduler); + return BindStatic<10>(device, scheduler); case 11: - return BindStatic<11>(scheduler); + return BindStatic<11>(device, scheduler); case 12: - return BindStatic<12>(scheduler); + return BindStatic<12>(device, scheduler); case 13: - return BindStatic<13>(scheduler); + return BindStatic<13>(device, scheduler); case 14: - return BindStatic<14>(scheduler); + return BindStatic<14>(device, scheduler); case 15: - return BindStatic<15>(scheduler); + return BindStatic<15>(device, scheduler); case 16: - return BindStatic<16>(scheduler); + return BindStatic<16>(device, scheduler); case 17: - return BindStatic<17>(scheduler); + return BindStatic<17>(device, scheduler); case 18: - return BindStatic<18>(scheduler); + return BindStatic<18>(device, scheduler); case 19: - return BindStatic<19>(scheduler); + return BindStatic<19>(device, scheduler); case 20: - return BindStatic<20>(scheduler); + return BindStatic<20>(device, scheduler); case 21: - return BindStatic<21>(scheduler); + return BindStatic<21>(device, scheduler); case 22: - return BindStatic<22>(scheduler); + return BindStatic<22>(device, scheduler); case 23: - return BindStatic<23>(scheduler); + return BindStatic<23>(device, scheduler); case 24: - return BindStatic<24>(scheduler); + return BindStatic<24>(device, scheduler); case 25: - return BindStatic<25>(scheduler); + return BindStatic<25>(device, scheduler); case 26: - return BindStatic<26>(scheduler); + return BindStatic<26>(device, scheduler); case 27: - return BindStatic<27>(scheduler); + return BindStatic<27>(device, scheduler); case 28: - return BindStatic<28>(scheduler); + return BindStatic<28>(device, scheduler); case 29: - return BindStatic<29>(scheduler); + return BindStatic<29>(device, scheduler); case 30: - return BindStatic<30>(scheduler); + return BindStatic<30>(device, scheduler); case 31: - return BindStatic<31>(scheduler); + return BindStatic<31>(device, scheduler); case 32: - return BindStatic<32>(scheduler); + return BindStatic<32>(device, scheduler); } UNREACHABLE(); } @@ -227,6 +291,8 @@ private: std::size_t num_buffers = 0; std::array<VkBuffer, Maxwell::NumVertexArrays> buffers; std::array<VkDeviceSize, Maxwell::NumVertexArrays> offsets; + std::array<VkDeviceSize, Maxwell::NumVertexArrays> sizes; + std::array<u16, Maxwell::NumVertexArrays> strides; } vertex; struct { @@ -236,15 +302,23 @@ private: } index; template <std::size_t N> - void BindStatic(VKScheduler& scheduler) const { - if (index.buffer) { - BindStatic<N, true>(scheduler); + void BindStatic(const VKDevice& device, VKScheduler& scheduler) const { + if (device.IsExtExtendedDynamicStateSupported()) { + if (index.buffer) { + BindStatic<N, true, true>(scheduler); + } else { + BindStatic<N, false, true>(scheduler); + } } else { - BindStatic<N, false>(scheduler); + if (index.buffer) { + BindStatic<N, true, false>(scheduler); + } else { + BindStatic<N, false, false>(scheduler); + } } } - template <std::size_t N, bool is_indexed> + template <std::size_t N, bool is_indexed, bool has_extended_dynamic_state> void BindStatic(VKScheduler& scheduler) const { static_assert(N <= Maxwell::NumVertexArrays); if constexpr (N == 0) { @@ -256,6 +330,31 @@ private: std::copy(vertex.buffers.begin(), vertex.buffers.begin() + N, buffers.begin()); std::copy(vertex.offsets.begin(), vertex.offsets.begin() + N, offsets.begin()); + if constexpr (has_extended_dynamic_state) { + // With extended dynamic states we can specify the length and stride of a vertex buffer + // std::array<VkDeviceSize, N> sizes; + std::array<u16, N> strides; + // std::copy(vertex.sizes.begin(), vertex.sizes.begin() + N, sizes.begin()); + std::copy(vertex.strides.begin(), vertex.strides.begin() + N, strides.begin()); + + if constexpr (is_indexed) { + scheduler.Record( + [buffers, offsets, strides, index = index](vk::CommandBuffer cmdbuf) { + cmdbuf.BindIndexBuffer(index.buffer, index.offset, index.type); + cmdbuf.BindVertexBuffers2EXT(0, static_cast<u32>(N), buffers.data(), + offsets.data(), nullptr, + ExpandStrides(strides).data()); + }); + } else { + scheduler.Record([buffers, offsets, strides](vk::CommandBuffer cmdbuf) { + cmdbuf.BindVertexBuffers2EXT(0, static_cast<u32>(N), buffers.data(), + offsets.data(), nullptr, + ExpandStrides(strides).data()); + }); + } + return; + } + if constexpr (is_indexed) { // Indexed draw scheduler.Record([buffers, offsets, index = index](vk::CommandBuffer cmdbuf) { @@ -314,7 +413,7 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { const auto& gpu = system.GPU().Maxwell3D(); GraphicsPipelineCacheKey key; - key.fixed_state.Fill(gpu.regs); + key.fixed_state.Fill(gpu.regs, device.IsExtExtendedDynamicStateSupported()); buffer_cache.Map(CalculateGraphicsStreamBufferSize(is_indexed)); @@ -332,7 +431,7 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { buffer_cache.Unmap(); - const Texceptions texceptions = UpdateAttachments(); + const Texceptions texceptions = UpdateAttachments(false); SetupImageTransitions(texceptions, color_attachments, zeta_attachment); key.renderpass_params = GetRenderPassParams(texceptions); @@ -347,7 +446,7 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { UpdateDynamicStates(); - buffer_bindings.Bind(scheduler); + buffer_bindings.Bind(device, scheduler); BeginTransformFeedback(); @@ -388,7 +487,7 @@ void RasterizerVulkan::Clear() { return; } - [[maybe_unused]] const auto texceptions = UpdateAttachments(); + [[maybe_unused]] const auto texceptions = UpdateAttachments(true); DEBUG_ASSERT(texceptions.none()); SetupImageTransitions(0, color_attachments, zeta_attachment); @@ -468,8 +567,9 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) { const auto& entries = pipeline.GetEntries(); SetupComputeConstBuffers(entries); SetupComputeGlobalBuffers(entries); - SetupComputeTexelBuffers(entries); + SetupComputeUniformTexels(entries); SetupComputeTextures(entries); + SetupComputeStorageTexels(entries); SetupComputeImages(entries); buffer_cache.Unmap(); @@ -664,9 +764,12 @@ void RasterizerVulkan::FlushWork() { draw_counter = 0; } -RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments() { +RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments(bool is_clear) { MICROPROFILE_SCOPE(Vulkan_RenderTargets); - auto& dirty = system.GPU().Maxwell3D().dirty.flags; + auto& maxwell3d = system.GPU().Maxwell3D(); + auto& dirty = maxwell3d.dirty.flags; + auto& regs = maxwell3d.regs; + const bool update_rendertargets = dirty[VideoCommon::Dirty::RenderTargets]; dirty[VideoCommon::Dirty::RenderTargets] = false; @@ -675,7 +778,8 @@ RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments() { Texceptions texceptions; for (std::size_t rt = 0; rt < Maxwell::NumRenderTargets; ++rt) { if (update_rendertargets) { - color_attachments[rt] = texture_cache.GetColorBufferSurface(rt, true); + const bool preserve_contents = HasToPreserveColorContents(is_clear, regs); + color_attachments[rt] = texture_cache.GetColorBufferSurface(rt, preserve_contents); } if (color_attachments[rt] && WalkAttachmentOverlaps(*color_attachments[rt])) { texceptions[rt] = true; @@ -683,7 +787,8 @@ RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments() { } if (update_rendertargets) { - zeta_attachment = texture_cache.GetDepthBufferSurface(true); + const bool preserve_contents = HasToPreserveDepthContents(is_clear, regs); + zeta_attachment = texture_cache.GetDepthBufferSurface(preserve_contents); } if (zeta_attachment && WalkAttachmentOverlaps(*zeta_attachment)) { texceptions[ZETA_TEXCEPTION_INDEX] = true; @@ -715,7 +820,7 @@ std::tuple<VkFramebuffer, VkExtent2D> RasterizerVulkan::ConfigureFramebuffers( if (!view) { return false; } - key.views.push_back(view->GetHandle()); + key.views.push_back(view->GetAttachment()); key.width = std::min(key.width, view->GetWidth()); key.height = std::min(key.height, view->GetHeight()); key.layers = std::min(key.layers, view->GetNumLayers()); @@ -761,7 +866,7 @@ RasterizerVulkan::DrawParameters RasterizerVulkan::SetupGeometry(FixedPipelineSt const auto& gpu = system.GPU().Maxwell3D(); const auto& regs = gpu.regs; - SetupVertexArrays(fixed_state.vertex_input, buffer_bindings); + SetupVertexArrays(buffer_bindings); const u32 base_instance = regs.vb_base_instance; const u32 num_instances = is_instanced ? gpu.mme_draw.instance_count : 1; @@ -775,20 +880,21 @@ RasterizerVulkan::DrawParameters RasterizerVulkan::SetupGeometry(FixedPipelineSt } void RasterizerVulkan::SetupShaderDescriptors( - const std::array<Shader, Maxwell::MaxShaderProgram>& shaders) { + const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders) { texture_cache.GuardSamplers(true); for (std::size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { // Skip VertexA stage - const auto& shader = shaders[stage + 1]; + Shader* const shader = shaders[stage + 1]; if (!shader) { continue; } const auto& entries = shader->GetEntries(); SetupGraphicsConstBuffers(entries, stage); SetupGraphicsGlobalBuffers(entries, stage); - SetupGraphicsTexelBuffers(entries, stage); + SetupGraphicsUniformTexels(entries, stage); SetupGraphicsTextures(entries, stage); + SetupGraphicsStorageTexels(entries, stage); SetupGraphicsImages(entries, stage); } texture_cache.GuardSamplers(false); @@ -831,6 +937,17 @@ void RasterizerVulkan::UpdateDynamicStates() { UpdateBlendConstants(regs); UpdateDepthBounds(regs); UpdateStencilFaces(regs); + if (device.IsExtExtendedDynamicStateSupported()) { + UpdateCullMode(regs); + UpdateDepthBoundsTestEnable(regs); + UpdateDepthTestEnable(regs); + UpdateDepthWriteEnable(regs); + UpdateDepthCompareOp(regs); + UpdateFrontFace(regs); + UpdatePrimitiveTopology(regs); + UpdateStencilOp(regs); + UpdateStencilTestEnable(regs); + } } void RasterizerVulkan::BeginTransformFeedback() { @@ -838,6 +955,10 @@ void RasterizerVulkan::BeginTransformFeedback() { if (regs.tfb_enabled == 0) { return; } + if (!device.IsExtTransformFeedbackSupported()) { + LOG_ERROR(Render_Vulkan, "Transform feedbacks used but not supported"); + return; + } UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) || regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) || @@ -852,10 +973,10 @@ void RasterizerVulkan::BeginTransformFeedback() { UNIMPLEMENTED_IF(binding.buffer_offset != 0); const GPUVAddr gpu_addr = binding.Address(); - const std::size_t size = binding.buffer_size; - const auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, size, 4, true); + const VkDeviceSize size = static_cast<VkDeviceSize>(binding.buffer_size); + const auto info = buffer_cache.UploadMemory(gpu_addr, size, 4, true); - scheduler.Record([buffer = buffer, offset = offset, size](vk::CommandBuffer cmdbuf) { + scheduler.Record([buffer = info.handle, offset = info.offset, size](vk::CommandBuffer cmdbuf) { cmdbuf.BindTransformFeedbackBuffersEXT(0, 1, &buffer, &offset, &size); cmdbuf.BeginTransformFeedbackEXT(0, 0, nullptr, nullptr); }); @@ -866,50 +987,33 @@ void RasterizerVulkan::EndTransformFeedback() { if (regs.tfb_enabled == 0) { return; } + if (!device.IsExtTransformFeedbackSupported()) { + return; + } scheduler.Record( [](vk::CommandBuffer cmdbuf) { cmdbuf.EndTransformFeedbackEXT(0, 0, nullptr, nullptr); }); } -void RasterizerVulkan::SetupVertexArrays(FixedPipelineState::VertexInput& vertex_input, - BufferBindings& buffer_bindings) { +void RasterizerVulkan::SetupVertexArrays(BufferBindings& buffer_bindings) { const auto& regs = system.GPU().Maxwell3D().regs; - for (std::size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) { - const auto& attrib = regs.vertex_attrib_format[index]; - if (!attrib.IsValid()) { - vertex_input.SetAttribute(index, false, 0, 0, {}, {}); - continue; - } - - [[maybe_unused]] const auto& buffer = regs.vertex_array[attrib.buffer]; - ASSERT(buffer.IsEnabled()); - - vertex_input.SetAttribute(index, true, attrib.buffer, attrib.offset, attrib.type.Value(), - attrib.size.Value()); - } - for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { const auto& vertex_array = regs.vertex_array[index]; if (!vertex_array.IsEnabled()) { - vertex_input.SetBinding(index, false, 0, 0); continue; } - vertex_input.SetBinding( - index, true, vertex_array.stride, - regs.instanced_arrays.IsInstancingEnabled(index) ? vertex_array.divisor : 0); - const GPUVAddr start{vertex_array.StartAddress()}; const GPUVAddr end{regs.vertex_array_limit[index].LimitAddress()}; ASSERT(end >= start); - const std::size_t size{end - start}; + const std::size_t size = end - start; if (size == 0) { - buffer_bindings.AddVertexBinding(DefaultBuffer(), 0); + buffer_bindings.AddVertexBinding(DefaultBuffer(), 0, DEFAULT_BUFFER_SIZE, 0); continue; } - const auto [buffer, offset] = buffer_cache.UploadMemory(start, size); - buffer_bindings.AddVertexBinding(buffer, offset); + const auto info = buffer_cache.UploadMemory(start, size); + buffer_bindings.AddVertexBinding(info.handle, info.offset, size, vertex_array.stride); } } @@ -931,7 +1035,9 @@ void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawPar break; } const GPUVAddr gpu_addr = regs.index_array.IndexStart(); - auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize()); + const auto info = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize()); + VkBuffer buffer = info.handle; + u64 offset = info.offset; std::tie(buffer, offset) = quad_indexed_pass.Assemble( regs.index_array.format, params.num_vertices, params.base_vertex, buffer, offset); @@ -945,7 +1051,9 @@ void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawPar break; } const GPUVAddr gpu_addr = regs.index_array.IndexStart(); - auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize()); + const auto info = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize()); + VkBuffer buffer = info.handle; + u64 offset = info.offset; auto format = regs.index_array.format; const bool is_uint8 = format == Maxwell::IndexFormat::UnsignedByte; @@ -980,12 +1088,12 @@ void RasterizerVulkan::SetupGraphicsGlobalBuffers(const ShaderEntries& entries, } } -void RasterizerVulkan::SetupGraphicsTexelBuffers(const ShaderEntries& entries, std::size_t stage) { +void RasterizerVulkan::SetupGraphicsUniformTexels(const ShaderEntries& entries, std::size_t stage) { MICROPROFILE_SCOPE(Vulkan_Textures); const auto& gpu = system.GPU().Maxwell3D(); - for (const auto& entry : entries.texel_buffers) { + for (const auto& entry : entries.uniform_texels) { const auto image = GetTextureInfo(gpu, entry, stage).tic; - SetupTexelBuffer(image, entry); + SetupUniformTexels(image, entry); } } @@ -1000,6 +1108,15 @@ void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, std:: } } +void RasterizerVulkan::SetupGraphicsStorageTexels(const ShaderEntries& entries, std::size_t stage) { + MICROPROFILE_SCOPE(Vulkan_Textures); + const auto& gpu = system.GPU().Maxwell3D(); + for (const auto& entry : entries.storage_texels) { + const auto image = GetTextureInfo(gpu, entry, stage).tic; + SetupStorageTexel(image, entry); + } +} + void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage) { MICROPROFILE_SCOPE(Vulkan_Images); const auto& gpu = system.GPU().Maxwell3D(); @@ -1032,12 +1149,12 @@ void RasterizerVulkan::SetupComputeGlobalBuffers(const ShaderEntries& entries) { } } -void RasterizerVulkan::SetupComputeTexelBuffers(const ShaderEntries& entries) { +void RasterizerVulkan::SetupComputeUniformTexels(const ShaderEntries& entries) { MICROPROFILE_SCOPE(Vulkan_Textures); const auto& gpu = system.GPU().KeplerCompute(); - for (const auto& entry : entries.texel_buffers) { + for (const auto& entry : entries.uniform_texels) { const auto image = GetTextureInfo(gpu, entry, ComputeShaderIndex).tic; - SetupTexelBuffer(image, entry); + SetupUniformTexels(image, entry); } } @@ -1052,6 +1169,15 @@ void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) { } } +void RasterizerVulkan::SetupComputeStorageTexels(const ShaderEntries& entries) { + MICROPROFILE_SCOPE(Vulkan_Textures); + const auto& gpu = system.GPU().KeplerCompute(); + for (const auto& entry : entries.storage_texels) { + const auto image = GetTextureInfo(gpu, entry, ComputeShaderIndex).tic; + SetupStorageTexel(image, entry); + } +} + void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) { MICROPROFILE_SCOPE(Vulkan_Images); const auto& gpu = system.GPU().KeplerCompute(); @@ -1074,10 +1200,9 @@ void RasterizerVulkan::SetupConstBuffer(const ConstBufferEntry& entry, Common::AlignUp(CalculateConstBufferSize(entry, buffer), 4 * sizeof(float)); ASSERT(size <= MaxConstbufferSize); - const auto [buffer_handle, offset] = + const auto info = buffer_cache.UploadMemory(buffer.address, size, device.GetUniformBufferAlignment()); - - update_descriptor_queue.AddBuffer(buffer_handle, offset, size); + update_descriptor_queue.AddBuffer(info.handle, info.offset, size); } void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address) { @@ -1091,18 +1216,18 @@ void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAdd // Note: Do *not* use DefaultBuffer() here, storage buffers can be written breaking the // default buffer. static constexpr std::size_t dummy_size = 4; - const auto buffer = buffer_cache.GetEmptyBuffer(dummy_size); - update_descriptor_queue.AddBuffer(buffer, 0, dummy_size); + const auto info = buffer_cache.GetEmptyBuffer(dummy_size); + update_descriptor_queue.AddBuffer(info.handle, info.offset, dummy_size); return; } - const auto [buffer, offset] = buffer_cache.UploadMemory( + const auto info = buffer_cache.UploadMemory( actual_addr, size, device.GetStorageBufferAlignment(), entry.IsWritten()); - update_descriptor_queue.AddBuffer(buffer, offset, size); + update_descriptor_queue.AddBuffer(info.handle, info.offset, size); } -void RasterizerVulkan::SetupTexelBuffer(const Tegra::Texture::TICEntry& tic, - const TexelBufferEntry& entry) { +void RasterizerVulkan::SetupUniformTexels(const Tegra::Texture::TICEntry& tic, + const UniformTexelEntry& entry) { const auto view = texture_cache.GetTextureSurface(tic, entry); ASSERT(view->IsBufferView()); @@ -1114,16 +1239,24 @@ void RasterizerVulkan::SetupTexture(const Tegra::Texture::FullTextureInfo& textu auto view = texture_cache.GetTextureSurface(texture.tic, entry); ASSERT(!view->IsBufferView()); - const auto image_view = view->GetHandle(texture.tic.x_source, texture.tic.y_source, - texture.tic.z_source, texture.tic.w_source); + const VkImageView image_view = view->GetImageView(texture.tic.x_source, texture.tic.y_source, + texture.tic.z_source, texture.tic.w_source); const auto sampler = sampler_cache.GetSampler(texture.tsc); update_descriptor_queue.AddSampledImage(sampler, image_view); - const auto image_layout = update_descriptor_queue.GetLastImageLayout(); + VkImageLayout* const image_layout = update_descriptor_queue.LastImageLayout(); *image_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; sampled_views.push_back(ImageView{std::move(view), image_layout}); } +void RasterizerVulkan::SetupStorageTexel(const Tegra::Texture::TICEntry& tic, + const StorageTexelEntry& entry) { + const auto view = texture_cache.GetImageSurface(tic, entry); + ASSERT(view->IsBufferView()); + + update_descriptor_queue.AddTexelBuffer(view->GetBufferView()); +} + void RasterizerVulkan::SetupImage(const Tegra::Texture::TICEntry& tic, const ImageEntry& entry) { auto view = texture_cache.GetImageSurface(tic, entry); @@ -1133,10 +1266,11 @@ void RasterizerVulkan::SetupImage(const Tegra::Texture::TICEntry& tic, const Ima UNIMPLEMENTED_IF(tic.IsBuffer()); - const auto image_view = view->GetHandle(tic.x_source, tic.y_source, tic.z_source, tic.w_source); + const VkImageView image_view = + view->GetImageView(tic.x_source, tic.y_source, tic.z_source, tic.w_source); update_descriptor_queue.AddImage(image_view); - const auto image_layout = update_descriptor_queue.GetLastImageLayout(); + VkImageLayout* const image_layout = update_descriptor_queue.LastImageLayout(); *image_layout = VK_IMAGE_LAYOUT_GENERAL; image_views.push_back(ImageView{std::move(view), image_layout}); } @@ -1231,6 +1365,117 @@ void RasterizerVulkan::UpdateStencilFaces(Tegra::Engines::Maxwell3D::Regs& regs) } } +void RasterizerVulkan::UpdateCullMode(Tegra::Engines::Maxwell3D::Regs& regs) { + if (!state_tracker.TouchCullMode()) { + return; + } + scheduler.Record( + [enabled = regs.cull_test_enabled, cull_face = regs.cull_face](vk::CommandBuffer cmdbuf) { + cmdbuf.SetCullModeEXT(enabled ? MaxwellToVK::CullFace(cull_face) : VK_CULL_MODE_NONE); + }); +} + +void RasterizerVulkan::UpdateDepthBoundsTestEnable(Tegra::Engines::Maxwell3D::Regs& regs) { + if (!state_tracker.TouchDepthBoundsTestEnable()) { + return; + } + scheduler.Record([enable = regs.depth_bounds_enable](vk::CommandBuffer cmdbuf) { + cmdbuf.SetDepthBoundsTestEnableEXT(enable); + }); +} + +void RasterizerVulkan::UpdateDepthTestEnable(Tegra::Engines::Maxwell3D::Regs& regs) { + if (!state_tracker.TouchDepthTestEnable()) { + return; + } + scheduler.Record([enable = regs.depth_test_enable](vk::CommandBuffer cmdbuf) { + cmdbuf.SetDepthTestEnableEXT(enable); + }); +} + +void RasterizerVulkan::UpdateDepthWriteEnable(Tegra::Engines::Maxwell3D::Regs& regs) { + if (!state_tracker.TouchDepthWriteEnable()) { + return; + } + scheduler.Record([enable = regs.depth_write_enabled](vk::CommandBuffer cmdbuf) { + cmdbuf.SetDepthWriteEnableEXT(enable); + }); +} + +void RasterizerVulkan::UpdateDepthCompareOp(Tegra::Engines::Maxwell3D::Regs& regs) { + if (!state_tracker.TouchDepthCompareOp()) { + return; + } + scheduler.Record([func = regs.depth_test_func](vk::CommandBuffer cmdbuf) { + cmdbuf.SetDepthCompareOpEXT(MaxwellToVK::ComparisonOp(func)); + }); +} + +void RasterizerVulkan::UpdateFrontFace(Tegra::Engines::Maxwell3D::Regs& regs) { + if (!state_tracker.TouchFrontFace()) { + return; + } + + VkFrontFace front_face = MaxwellToVK::FrontFace(regs.front_face); + if (regs.screen_y_control.triangle_rast_flip != 0) { + front_face = front_face == VK_FRONT_FACE_CLOCKWISE ? VK_FRONT_FACE_COUNTER_CLOCKWISE + : VK_FRONT_FACE_CLOCKWISE; + } + scheduler.Record( + [front_face](vk::CommandBuffer cmdbuf) { cmdbuf.SetFrontFaceEXT(front_face); }); +} + +void RasterizerVulkan::UpdatePrimitiveTopology(Tegra::Engines::Maxwell3D::Regs& regs) { + if (!state_tracker.TouchPrimitiveTopology()) { + return; + } + const Maxwell::PrimitiveTopology primitive_topology = regs.draw.topology.Value(); + scheduler.Record([this, primitive_topology](vk::CommandBuffer cmdbuf) { + cmdbuf.SetPrimitiveTopologyEXT(MaxwellToVK::PrimitiveTopology(device, primitive_topology)); + }); +} + +void RasterizerVulkan::UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs) { + if (!state_tracker.TouchStencilOp()) { + return; + } + const Maxwell::StencilOp fail = regs.stencil_front_op_fail; + const Maxwell::StencilOp zfail = regs.stencil_front_op_zfail; + const Maxwell::StencilOp zpass = regs.stencil_front_op_zpass; + const Maxwell::ComparisonOp compare = regs.stencil_front_func_func; + if (regs.stencil_two_side_enable) { + scheduler.Record([fail, zfail, zpass, compare](vk::CommandBuffer cmdbuf) { + cmdbuf.SetStencilOpEXT(VK_STENCIL_FACE_FRONT_AND_BACK, MaxwellToVK::StencilOp(fail), + MaxwellToVK::StencilOp(zpass), MaxwellToVK::StencilOp(zfail), + MaxwellToVK::ComparisonOp(compare)); + }); + } else { + const Maxwell::StencilOp back_fail = regs.stencil_back_op_fail; + const Maxwell::StencilOp back_zfail = regs.stencil_back_op_zfail; + const Maxwell::StencilOp back_zpass = regs.stencil_back_op_zpass; + const Maxwell::ComparisonOp back_compare = regs.stencil_back_func_func; + scheduler.Record([fail, zfail, zpass, compare, back_fail, back_zfail, back_zpass, + back_compare](vk::CommandBuffer cmdbuf) { + cmdbuf.SetStencilOpEXT(VK_STENCIL_FACE_FRONT_BIT, MaxwellToVK::StencilOp(fail), + MaxwellToVK::StencilOp(zpass), MaxwellToVK::StencilOp(zfail), + MaxwellToVK::ComparisonOp(compare)); + cmdbuf.SetStencilOpEXT(VK_STENCIL_FACE_BACK_BIT, MaxwellToVK::StencilOp(back_fail), + MaxwellToVK::StencilOp(back_zpass), + MaxwellToVK::StencilOp(back_zfail), + MaxwellToVK::ComparisonOp(back_compare)); + }); + } +} + +void RasterizerVulkan::UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& regs) { + if (!state_tracker.TouchStencilTestEnable()) { + return; + } + scheduler.Record([enable = regs.stencil_enable](vk::CommandBuffer cmdbuf) { + cmdbuf.SetStencilTestEnableEXT(enable); + }); +} + std::size_t RasterizerVulkan::CalculateGraphicsStreamBufferSize(bool is_indexed) const { std::size_t size = CalculateVertexArraysSize(); if (is_indexed) { diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 0ed0e48c6..923178b0b 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -159,7 +159,10 @@ private: void FlushWork(); - Texceptions UpdateAttachments(); + /// @brief Updates the currently bound attachments + /// @param is_clear True when the framebuffer is updated as a clear + /// @return Bitfield of attachments being used as sampled textures + Texceptions UpdateAttachments(bool is_clear); std::tuple<VkFramebuffer, VkExtent2D> ConfigureFramebuffers(VkRenderPass renderpass); @@ -168,7 +171,7 @@ private: bool is_indexed, bool is_instanced); /// Setup descriptors in the graphics pipeline. - void SetupShaderDescriptors(const std::array<Shader, Maxwell::MaxShaderProgram>& shaders); + void SetupShaderDescriptors(const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders); void SetupImageTransitions(Texceptions texceptions, const std::array<View, Maxwell::NumRenderTargets>& color_attachments, @@ -182,8 +185,7 @@ private: bool WalkAttachmentOverlaps(const CachedSurfaceView& attachment); - void SetupVertexArrays(FixedPipelineState::VertexInput& vertex_input, - BufferBindings& buffer_bindings); + void SetupVertexArrays(BufferBindings& buffer_bindings); void SetupIndexBuffer(BufferBindings& buffer_bindings, DrawParameters& params, bool is_indexed); @@ -193,12 +195,15 @@ private: /// Setup global buffers in the graphics pipeline. void SetupGraphicsGlobalBuffers(const ShaderEntries& entries, std::size_t stage); - /// Setup texel buffers in the graphics pipeline. - void SetupGraphicsTexelBuffers(const ShaderEntries& entries, std::size_t stage); + /// Setup uniform texels in the graphics pipeline. + void SetupGraphicsUniformTexels(const ShaderEntries& entries, std::size_t stage); /// Setup textures in the graphics pipeline. void SetupGraphicsTextures(const ShaderEntries& entries, std::size_t stage); + /// Setup storage texels in the graphics pipeline. + void SetupGraphicsStorageTexels(const ShaderEntries& entries, std::size_t stage); + /// Setup images in the graphics pipeline. void SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage); @@ -209,11 +214,14 @@ private: void SetupComputeGlobalBuffers(const ShaderEntries& entries); /// Setup texel buffers in the compute pipeline. - void SetupComputeTexelBuffers(const ShaderEntries& entries); + void SetupComputeUniformTexels(const ShaderEntries& entries); /// Setup textures in the compute pipeline. void SetupComputeTextures(const ShaderEntries& entries); + /// Setup storage texels in the compute pipeline. + void SetupComputeStorageTexels(const ShaderEntries& entries); + /// Setup images in the compute pipeline. void SetupComputeImages(const ShaderEntries& entries); @@ -222,10 +230,12 @@ private: void SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address); - void SetupTexelBuffer(const Tegra::Texture::TICEntry& image, const TexelBufferEntry& entry); + void SetupUniformTexels(const Tegra::Texture::TICEntry& image, const UniformTexelEntry& entry); void SetupTexture(const Tegra::Texture::FullTextureInfo& texture, const SamplerEntry& entry); + void SetupStorageTexel(const Tegra::Texture::TICEntry& tic, const StorageTexelEntry& entry); + void SetupImage(const Tegra::Texture::TICEntry& tic, const ImageEntry& entry); void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs); @@ -235,6 +245,16 @@ private: void UpdateDepthBounds(Tegra::Engines::Maxwell3D::Regs& regs); void UpdateStencilFaces(Tegra::Engines::Maxwell3D::Regs& regs); + void UpdateCullMode(Tegra::Engines::Maxwell3D::Regs& regs); + void UpdateDepthBoundsTestEnable(Tegra::Engines::Maxwell3D::Regs& regs); + void UpdateDepthTestEnable(Tegra::Engines::Maxwell3D::Regs& regs); + void UpdateDepthWriteEnable(Tegra::Engines::Maxwell3D::Regs& regs); + void UpdateDepthCompareOp(Tegra::Engines::Maxwell3D::Regs& regs); + void UpdateFrontFace(Tegra::Engines::Maxwell3D::Regs& regs); + void UpdatePrimitiveTopology(Tegra::Engines::Maxwell3D::Regs& regs); + void UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs); + void UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& regs); + std::size_t CalculateGraphicsStreamBufferSize(bool is_indexed) const; std::size_t CalculateComputeStreamBufferSize() const; diff --git a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp index e6f2fa553..616eacc36 100644 --- a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp @@ -9,6 +9,8 @@ #include "video_core/renderer_vulkan/wrapper.h" #include "video_core/textures/texture.h" +using Tegra::Texture::TextureMipmapFilter; + namespace Vulkan { namespace { @@ -63,8 +65,8 @@ vk::Sampler VKSamplerCache::CreateSampler(const Tegra::Texture::TSCEntry& tsc) c ci.maxAnisotropy = tsc.GetMaxAnisotropy(); ci.compareEnable = tsc.depth_compare_enabled; ci.compareOp = MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func); - ci.minLod = tsc.GetMinLod(); - ci.maxLod = tsc.GetMaxLod(); + ci.minLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.0f : tsc.GetMinLod(); + ci.maxLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.25f : tsc.GetMaxLod(); ci.borderColor = arbitrary_borders ? VK_BORDER_COLOR_INT_CUSTOM_EXT : ConvertBorderColor(color); ci.unnormalizedCoordinates = VK_FALSE; return device.GetLogical().CreateSampler(ci); diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index 82ec9180e..56524e6f3 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -9,6 +9,7 @@ #include <utility> #include "common/microprofile.h" +#include "common/thread.h" #include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_query_cache.h" #include "video_core/renderer_vulkan/vk_resource_manager.h" @@ -133,6 +134,7 @@ void VKScheduler::BindGraphicsPipeline(VkPipeline pipeline) { } void VKScheduler::WorkerThread() { + Common::SetCurrentThreadPriority(Common::ThreadPriority::High); std::unique_lock lock{mutex}; do { cv.wait(lock, [this] { return !chunk_queue.Empty() || quit; }); diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index 6f6dedd82..97429cc59 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -400,8 +400,9 @@ private: u32 binding = specialization.base_binding; binding = DeclareConstantBuffers(binding); binding = DeclareGlobalBuffers(binding); - binding = DeclareTexelBuffers(binding); + binding = DeclareUniformTexels(binding); binding = DeclareSamplers(binding); + binding = DeclareStorageTexels(binding); binding = DeclareImages(binding); const Id main = OpFunction(t_void, {}, TypeFunction(t_void)); @@ -741,8 +742,10 @@ private: if (!IsGenericAttribute(index)) { continue; } - const u32 location = GetGenericAttributeLocation(index); + if (!IsAttributeEnabled(location)) { + continue; + } const auto type_descriptor = GetAttributeType(location); Id type; if (IsInputAttributeArray()) { @@ -887,7 +890,7 @@ private: return binding; } - u32 DeclareTexelBuffers(u32 binding) { + u32 DeclareUniformTexels(u32 binding) { for (const auto& sampler : ir.GetSamplers()) { if (!sampler.is_buffer) { continue; @@ -908,7 +911,7 @@ private: Decorate(id, spv::Decoration::Binding, binding++); Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET); - texel_buffers.emplace(sampler.index, TexelBuffer{image_type, id}); + uniform_texels.emplace(sampler.index, TexelBuffer{image_type, id}); } return binding; } @@ -943,31 +946,48 @@ private: return binding; } - u32 DeclareImages(u32 binding) { + u32 DeclareStorageTexels(u32 binding) { for (const auto& image : ir.GetImages()) { - const auto [dim, arrayed] = GetImageDim(image); - constexpr int depth = 0; - constexpr bool ms = false; - constexpr int sampled = 2; // This won't be accessed with a sampler - constexpr auto format = spv::ImageFormat::Unknown; - const Id image_type = TypeImage(t_uint, dim, depth, arrayed, ms, sampled, format, {}); - const Id pointer_type = TypePointer(spv::StorageClass::UniformConstant, image_type); - const Id id = OpVariable(pointer_type, spv::StorageClass::UniformConstant); - AddGlobalVariable(Name(id, fmt::format("image_{}", image.index))); - - Decorate(id, spv::Decoration::Binding, binding++); - Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET); - if (image.is_read && !image.is_written) { - Decorate(id, spv::Decoration::NonWritable); - } else if (image.is_written && !image.is_read) { - Decorate(id, spv::Decoration::NonReadable); + if (image.type != Tegra::Shader::ImageType::TextureBuffer) { + continue; } + DeclareImage(image, binding); + } + return binding; + } - images.emplace(image.index, StorageImage{image_type, id}); + u32 DeclareImages(u32 binding) { + for (const auto& image : ir.GetImages()) { + if (image.type == Tegra::Shader::ImageType::TextureBuffer) { + continue; + } + DeclareImage(image, binding); } return binding; } + void DeclareImage(const Image& image, u32& binding) { + const auto [dim, arrayed] = GetImageDim(image); + constexpr int depth = 0; + constexpr bool ms = false; + constexpr int sampled = 2; // This won't be accessed with a sampler + const auto format = image.is_atomic ? spv::ImageFormat::R32ui : spv::ImageFormat::Unknown; + const Id image_type = TypeImage(t_uint, dim, depth, arrayed, ms, sampled, format, {}); + const Id pointer_type = TypePointer(spv::StorageClass::UniformConstant, image_type); + const Id id = OpVariable(pointer_type, spv::StorageClass::UniformConstant); + AddGlobalVariable(Name(id, fmt::format("image_{}", image.index))); + + Decorate(id, spv::Decoration::Binding, binding++); + Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET); + if (image.is_read && !image.is_written) { + Decorate(id, spv::Decoration::NonWritable); + } else if (image.is_written && !image.is_read) { + Decorate(id, spv::Decoration::NonReadable); + } + + images.emplace(image.index, StorageImage{image_type, id}); + } + bool IsRenderTargetEnabled(u32 rt) const { for (u32 component = 0; component < 4; ++component) { if (header.ps.IsColorComponentOutputEnabled(rt, component)) { @@ -986,6 +1006,10 @@ private: return stage == ShaderType::TesselationControl; } + bool IsAttributeEnabled(u32 location) const { + return stage != ShaderType::Vertex || specialization.enabled_attributes[location]; + } + u32 GetNumInputVertices() const { switch (stage) { case ShaderType::Geometry: @@ -1201,16 +1225,20 @@ private: UNIMPLEMENTED_MSG("Unmanaged FrontFacing element={}", element); return {v_float_zero, Type::Float}; default: - if (IsGenericAttribute(attribute)) { - const u32 location = GetGenericAttributeLocation(attribute); - const auto type_descriptor = GetAttributeType(location); - const Type type = type_descriptor.type; - const Id attribute_id = input_attributes.at(attribute); - const std::vector elements = {element}; - const Id pointer = ArrayPass(type_descriptor.scalar, attribute_id, elements); - return {OpLoad(GetTypeDefinition(type), pointer), type}; + if (!IsGenericAttribute(attribute)) { + break; } - break; + const u32 location = GetGenericAttributeLocation(attribute); + if (!IsAttributeEnabled(location)) { + // Disabled attributes (also known as constant attributes) always return zero. + return {v_float_zero, Type::Float}; + } + const auto type_descriptor = GetAttributeType(location); + const Type type = type_descriptor.type; + const Id attribute_id = input_attributes.at(attribute); + const std::vector elements = {element}; + const Id pointer = ArrayPass(type_descriptor.scalar, attribute_id, elements); + return {OpLoad(GetTypeDefinition(type), pointer), type}; } UNIMPLEMENTED_MSG("Unhandled input attribute: {}", static_cast<u32>(attribute)); return {v_float_zero, Type::Float}; @@ -1246,7 +1274,7 @@ private: } else { UNREACHABLE_MSG("Unmanaged offset node type"); } - pointer = OpAccessChain(t_cbuf_float, buffer_id, Constant(t_uint, 0), buffer_index, + pointer = OpAccessChain(t_cbuf_float, buffer_id, v_uint_zero, buffer_index, buffer_element); } return {OpLoad(t_float, pointer), Type::Float}; @@ -1601,7 +1629,7 @@ private: const Id result = OpIAddCarry(TypeStruct({t_uint, t_uint}), op_a, op_b); const Id carry = OpCompositeExtract(t_uint, result, 1); - return {OpINotEqual(t_bool, carry, Constant(t_uint, 0)), Type::Bool}; + return {OpINotEqual(t_bool, carry, v_uint_zero), Type::Bool}; } Expression LogicalAssign(Operation operation) { @@ -1664,7 +1692,7 @@ private: const auto& meta = std::get<MetaTexture>(operation.GetMeta()); const u32 index = meta.sampler.index; if (meta.sampler.is_buffer) { - const auto& entry = texel_buffers.at(index); + const auto& entry = uniform_texels.at(index); return OpLoad(entry.image_type, entry.image); } else { const auto& entry = sampled_images.at(index); @@ -1941,39 +1969,20 @@ private: return {}; } - Expression AtomicImageAdd(Operation operation) { - UNIMPLEMENTED(); - return {}; - } - - Expression AtomicImageMin(Operation operation) { - UNIMPLEMENTED(); - return {}; - } - - Expression AtomicImageMax(Operation operation) { - UNIMPLEMENTED(); - return {}; - } - - Expression AtomicImageAnd(Operation operation) { - UNIMPLEMENTED(); - return {}; - } - - Expression AtomicImageOr(Operation operation) { - UNIMPLEMENTED(); - return {}; - } + template <Id (Module::*func)(Id, Id, Id, Id, Id)> + Expression AtomicImage(Operation operation) { + const auto& meta{std::get<MetaImage>(operation.GetMeta())}; + ASSERT(meta.values.size() == 1); - Expression AtomicImageXor(Operation operation) { - UNIMPLEMENTED(); - return {}; - } + const Id coordinate = GetCoordinates(operation, Type::Int); + const Id image = images.at(meta.image.index).image; + const Id sample = v_uint_zero; + const Id pointer = OpImageTexelPointer(t_image_uint, image, coordinate, sample); - Expression AtomicImageExchange(Operation operation) { - UNIMPLEMENTED(); - return {}; + const Id scope = Constant(t_uint, static_cast<u32>(spv::Scope::Device)); + const Id semantics = v_uint_zero; + const Id value = AsUint(Visit(meta.values[0])); + return {(this->*func)(t_uint, pointer, scope, semantics, value), Type::Uint}; } template <Id (Module::*func)(Id, Id, Id, Id, Id)> @@ -1988,7 +1997,7 @@ private: return {v_float_zero, Type::Float}; } const Id scope = Constant(t_uint, static_cast<u32>(spv::Scope::Device)); - const Id semantics = Constant(t_uint, 0); + const Id semantics = v_uint_zero; const Id value = AsUint(Visit(operation[1])); return {(this->*func)(t_uint, pointer, scope, semantics, value), Type::Uint}; @@ -2612,11 +2621,11 @@ private: &SPIRVDecompiler::ImageLoad, &SPIRVDecompiler::ImageStore, - &SPIRVDecompiler::AtomicImageAdd, - &SPIRVDecompiler::AtomicImageAnd, - &SPIRVDecompiler::AtomicImageOr, - &SPIRVDecompiler::AtomicImageXor, - &SPIRVDecompiler::AtomicImageExchange, + &SPIRVDecompiler::AtomicImage<&Module::OpAtomicIAdd>, + &SPIRVDecompiler::AtomicImage<&Module::OpAtomicAnd>, + &SPIRVDecompiler::AtomicImage<&Module::OpAtomicOr>, + &SPIRVDecompiler::AtomicImage<&Module::OpAtomicXor>, + &SPIRVDecompiler::AtomicImage<&Module::OpAtomicExchange>, &SPIRVDecompiler::Atomic<&Module::OpAtomicExchange>, &SPIRVDecompiler::Atomic<&Module::OpAtomicIAdd>, @@ -2758,8 +2767,11 @@ private: Decorate(TypeStruct(t_gmem_array), spv::Decoration::Block), 0, spv::Decoration::Offset, 0); const Id t_gmem_ssbo = TypePointer(spv::StorageClass::StorageBuffer, t_gmem_struct); + const Id t_image_uint = TypePointer(spv::StorageClass::Image, t_uint); + const Id v_float_zero = Constant(t_float, 0.0f); const Id v_float_one = Constant(t_float, 1.0f); + const Id v_uint_zero = Constant(t_uint, 0); // Nvidia uses these defaults for varyings (e.g. position and generic attributes) const Id v_varying_default = @@ -2784,15 +2796,16 @@ private: std::unordered_map<u8, GenericVaryingDescription> output_attributes; std::map<u32, Id> constant_buffers; std::map<GlobalMemoryBase, Id> global_buffers; - std::map<u32, TexelBuffer> texel_buffers; + std::map<u32, TexelBuffer> uniform_texels; std::map<u32, SampledImage> sampled_images; + std::map<u32, TexelBuffer> storage_texels; std::map<u32, StorageImage> images; + std::array<Id, Maxwell::NumRenderTargets> frag_colors{}; Id instance_index{}; Id vertex_index{}; Id base_instance{}; Id base_vertex{}; - std::array<Id, Maxwell::NumRenderTargets> frag_colors{}; Id frag_depth{}; Id frag_coord{}; Id front_facing{}; @@ -3048,13 +3061,17 @@ ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir) { } for (const auto& sampler : ir.GetSamplers()) { if (sampler.is_buffer) { - entries.texel_buffers.emplace_back(sampler); + entries.uniform_texels.emplace_back(sampler); } else { entries.samplers.emplace_back(sampler); } } for (const auto& image : ir.GetImages()) { - entries.images.emplace_back(image); + if (image.type == Tegra::Shader::ImageType::TextureBuffer) { + entries.storage_texels.emplace_back(image); + } else { + entries.images.emplace_back(image); + } } for (const auto& attribute : ir.GetInputAttributes()) { if (IsGenericAttribute(attribute)) { diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h index f4c05ac3c..2b0e90396 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.h @@ -21,8 +21,9 @@ class VKDevice; namespace Vulkan { using Maxwell = Tegra::Engines::Maxwell3D::Regs; -using TexelBufferEntry = VideoCommon::Shader::Sampler; +using UniformTexelEntry = VideoCommon::Shader::Sampler; using SamplerEntry = VideoCommon::Shader::Sampler; +using StorageTexelEntry = VideoCommon::Shader::Image; using ImageEntry = VideoCommon::Shader::Image; constexpr u32 DESCRIPTOR_SET = 0; @@ -66,13 +67,15 @@ private: struct ShaderEntries { u32 NumBindings() const { return static_cast<u32>(const_buffers.size() + global_buffers.size() + - texel_buffers.size() + samplers.size() + images.size()); + uniform_texels.size() + samplers.size() + storage_texels.size() + + images.size()); } std::vector<ConstBufferEntry> const_buffers; std::vector<GlobalBufferEntry> global_buffers; - std::vector<TexelBufferEntry> texel_buffers; + std::vector<UniformTexelEntry> uniform_texels; std::vector<SamplerEntry> samplers; + std::vector<StorageTexelEntry> storage_texels; std::vector<ImageEntry> images; std::set<u32> attributes; std::array<bool, Maxwell::NumClipDistances> clip_distances{}; @@ -88,7 +91,8 @@ struct Specialization final { u32 shared_memory_size{}; // Graphics specific - std::optional<float> point_size{}; + std::optional<float> point_size; + std::bitset<Maxwell::NumVertexAttributes> enabled_attributes; std::array<Maxwell::VertexAttribute::Type, Maxwell::NumVertexAttributes> attribute_types{}; bool ndc_minus_one_to_one{}; }; diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.cpp b/src/video_core/renderer_vulkan/vk_state_tracker.cpp index 94a89e388..e5a583dd5 100644 --- a/src/video_core/renderer_vulkan/vk_state_tracker.cpp +++ b/src/video_core/renderer_vulkan/vk_state_tracker.cpp @@ -36,6 +36,15 @@ Flags MakeInvalidationFlags() { flags[BlendConstants] = true; flags[DepthBounds] = true; flags[StencilProperties] = true; + flags[CullMode] = true; + flags[DepthBoundsEnable] = true; + flags[DepthTestEnable] = true; + flags[DepthWriteEnable] = true; + flags[DepthCompareOp] = true; + flags[FrontFace] = true; + flags[PrimitiveTopology] = true; + flags[StencilOp] = true; + flags[StencilTestEnable] = true; return flags; } @@ -75,6 +84,57 @@ void SetupDirtyStencilProperties(Tables& tables) { table[OFF(stencil_back_func_mask)] = StencilProperties; } +void SetupDirtyCullMode(Tables& tables) { + auto& table = tables[0]; + table[OFF(cull_face)] = CullMode; + table[OFF(cull_test_enabled)] = CullMode; +} + +void SetupDirtyDepthBoundsEnable(Tables& tables) { + tables[0][OFF(depth_bounds_enable)] = DepthBoundsEnable; +} + +void SetupDirtyDepthTestEnable(Tables& tables) { + tables[0][OFF(depth_test_enable)] = DepthTestEnable; +} + +void SetupDirtyDepthWriteEnable(Tables& tables) { + tables[0][OFF(depth_write_enabled)] = DepthWriteEnable; +} + +void SetupDirtyDepthCompareOp(Tables& tables) { + tables[0][OFF(depth_test_func)] = DepthCompareOp; +} + +void SetupDirtyFrontFace(Tables& tables) { + auto& table = tables[0]; + table[OFF(front_face)] = FrontFace; + table[OFF(screen_y_control)] = FrontFace; +} + +void SetupDirtyPrimitiveTopology(Tables& tables) { + tables[0][OFF(draw.topology)] = PrimitiveTopology; +} + +void SetupDirtyStencilOp(Tables& tables) { + auto& table = tables[0]; + table[OFF(stencil_front_op_fail)] = StencilOp; + table[OFF(stencil_front_op_zfail)] = StencilOp; + table[OFF(stencil_front_op_zpass)] = StencilOp; + table[OFF(stencil_front_func_func)] = StencilOp; + table[OFF(stencil_back_op_fail)] = StencilOp; + table[OFF(stencil_back_op_zfail)] = StencilOp; + table[OFF(stencil_back_op_zpass)] = StencilOp; + table[OFF(stencil_back_func_func)] = StencilOp; + + // Table 0 is used by StencilProperties + tables[1][OFF(stencil_two_side_enable)] = StencilOp; +} + +void SetupDirtyStencilTestEnable(Tables& tables) { + tables[0][OFF(stencil_enable)] = StencilTestEnable; +} + } // Anonymous namespace StateTracker::StateTracker(Core::System& system) @@ -90,6 +150,14 @@ void StateTracker::Initialize() { SetupDirtyBlendConstants(tables); SetupDirtyDepthBounds(tables); SetupDirtyStencilProperties(tables); + SetupDirtyCullMode(tables); + SetupDirtyDepthBoundsEnable(tables); + SetupDirtyDepthTestEnable(tables); + SetupDirtyDepthWriteEnable(tables); + SetupDirtyDepthCompareOp(tables); + SetupDirtyFrontFace(tables); + SetupDirtyPrimitiveTopology(tables); + SetupDirtyStencilOp(tables); } void StateTracker::InvalidateCommandBufferState() { diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.h b/src/video_core/renderer_vulkan/vk_state_tracker.h index 03bc415b2..54ca0d6c6 100644 --- a/src/video_core/renderer_vulkan/vk_state_tracker.h +++ b/src/video_core/renderer_vulkan/vk_state_tracker.h @@ -26,6 +26,16 @@ enum : u8 { DepthBounds, StencilProperties, + CullMode, + DepthBoundsEnable, + DepthTestEnable, + DepthWriteEnable, + DepthCompareOp, + FrontFace, + PrimitiveTopology, + StencilOp, + StencilTestEnable, + Last }; static_assert(Last <= std::numeric_limits<u8>::max()); @@ -64,6 +74,46 @@ public: return Exchange(Dirty::StencilProperties, false); } + bool TouchCullMode() { + return Exchange(Dirty::CullMode, false); + } + + bool TouchDepthBoundsTestEnable() { + return Exchange(Dirty::DepthBoundsEnable, false); + } + + bool TouchDepthTestEnable() { + return Exchange(Dirty::DepthTestEnable, false); + } + + bool TouchDepthBoundsEnable() { + return Exchange(Dirty::DepthBoundsEnable, false); + } + + bool TouchDepthWriteEnable() { + return Exchange(Dirty::DepthWriteEnable, false); + } + + bool TouchDepthCompareOp() { + return Exchange(Dirty::DepthCompareOp, false); + } + + bool TouchFrontFace() { + return Exchange(Dirty::FrontFace, false); + } + + bool TouchPrimitiveTopology() { + return Exchange(Dirty::PrimitiveTopology, false); + } + + bool TouchStencilOp() { + return Exchange(Dirty::StencilOp, false); + } + + bool TouchStencilTestEnable() { + return Exchange(Dirty::StencilTestEnable, false); + } + private: bool Exchange(std::size_t id, bool new_value) const noexcept { auto& flags = system.GPU().Maxwell3D().dirty.flags; diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp index 868447af2..2d28a6c47 100644 --- a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp @@ -121,7 +121,7 @@ void VKStreamBuffer::CreateBuffers(VkBufferUsageFlags usage) { // Substract from the preferred heap size some bytes to avoid getting out of memory. const VkDeviceSize heap_size = memory_properties.memoryHeaps[preferred_heap].size; - const VkDeviceSize allocable_size = heap_size - 4 * 1024 * 1024; + const VkDeviceSize allocable_size = heap_size - 9 * 1024 * 1024; VkBufferCreateInfo buffer_ci; buffer_ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.h b/src/video_core/renderer_vulkan/vk_stream_buffer.h index dfddf7ad6..689f0d276 100644 --- a/src/video_core/renderer_vulkan/vk_stream_buffer.h +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h @@ -35,10 +35,14 @@ public: /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy. void Unmap(u64 size); - VkBuffer GetHandle() const { + VkBuffer Handle() const noexcept { return *buffer; } + u64 Address() const noexcept { + return 0; + } + private: struct Watch final { VKFenceWatch fence; diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 2f1d5021d..430031665 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -100,8 +100,8 @@ vk::Buffer CreateBuffer(const VKDevice& device, const SurfaceParams& params, ci.pNext = nullptr; ci.flags = 0; ci.size = static_cast<VkDeviceSize>(host_memory_size); - ci.usage = VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | - VK_BUFFER_USAGE_TRANSFER_DST_BIT; + ci.usage = VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | + VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE; ci.queueFamilyIndexCount = 0; ci.pQueueFamilyIndices = nullptr; @@ -167,6 +167,7 @@ VkImageCreateInfo GenerateImageCreateInfo(const VKDevice& device, const SurfaceP ci.extent = {params.width, params.height, 1}; break; case SurfaceTarget::Texture3D: + ci.flags |= VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT; ci.extent = {params.width, params.height, params.depth}; break; case SurfaceTarget::TextureBuffer: @@ -176,6 +177,12 @@ VkImageCreateInfo GenerateImageCreateInfo(const VKDevice& device, const SurfaceP return ci; } +u32 EncodeSwizzle(Tegra::Texture::SwizzleSource x_source, Tegra::Texture::SwizzleSource y_source, + Tegra::Texture::SwizzleSource z_source, Tegra::Texture::SwizzleSource w_source) { + return (static_cast<u32>(x_source) << 24) | (static_cast<u32>(y_source) << 16) | + (static_cast<u32>(z_source) << 8) | static_cast<u32>(w_source); +} + } // Anonymous namespace CachedSurface::CachedSurface(Core::System& system, const VKDevice& device, @@ -203,9 +210,11 @@ CachedSurface::CachedSurface(Core::System& system, const VKDevice& device, } // TODO(Rodrigo): Move this to a virtual function. - main_view = CreateViewInner( - ViewParams(params.target, 0, static_cast<u32>(params.GetNumLayers()), 0, params.num_levels), - true); + u32 num_layers = 1; + if (params.is_layered || params.target == SurfaceTarget::Texture3D) { + num_layers = params.depth; + } + main_view = CreateView(ViewParams(params.target, 0, num_layers, 0, params.num_levels)); } CachedSurface::~CachedSurface() = default; @@ -253,12 +262,8 @@ void CachedSurface::DecorateSurfaceName() { } View CachedSurface::CreateView(const ViewParams& params) { - return CreateViewInner(params, false); -} - -View CachedSurface::CreateViewInner(const ViewParams& params, bool is_proxy) { // TODO(Rodrigo): Add name decorations - return views[params] = std::make_shared<CachedSurfaceView>(device, *this, params, is_proxy); + return views[params] = std::make_shared<CachedSurfaceView>(device, *this, params); } void CachedSurface::UploadBuffer(const std::vector<u8>& staging_buffer) { @@ -342,18 +347,27 @@ VkImageSubresourceRange CachedSurface::GetImageSubresourceRange() const { } CachedSurfaceView::CachedSurfaceView(const VKDevice& device, CachedSurface& surface, - const ViewParams& params, bool is_proxy) + const ViewParams& params) : VideoCommon::ViewBase{params}, params{surface.GetSurfaceParams()}, image{surface.GetImageHandle()}, buffer_view{surface.GetBufferViewHandle()}, aspect_mask{surface.GetAspectMask()}, device{device}, surface{surface}, - base_layer{params.base_layer}, num_layers{params.num_layers}, base_level{params.base_level}, - num_levels{params.num_levels}, image_view_type{image ? GetImageViewType(params.target) - : VK_IMAGE_VIEW_TYPE_1D} {} + base_level{params.base_level}, num_levels{params.num_levels}, + image_view_type{image ? GetImageViewType(params.target) : VK_IMAGE_VIEW_TYPE_1D} { + if (image_view_type == VK_IMAGE_VIEW_TYPE_3D) { + base_layer = 0; + num_layers = 1; + base_slice = params.base_layer; + num_slices = params.num_layers; + } else { + base_layer = params.base_layer; + num_layers = params.num_layers; + } +} CachedSurfaceView::~CachedSurfaceView() = default; -VkImageView CachedSurfaceView::GetHandle(SwizzleSource x_source, SwizzleSource y_source, - SwizzleSource z_source, SwizzleSource w_source) { +VkImageView CachedSurfaceView::GetImageView(SwizzleSource x_source, SwizzleSource y_source, + SwizzleSource z_source, SwizzleSource w_source) { const u32 new_swizzle = EncodeSwizzle(x_source, y_source, z_source, w_source); if (last_image_view && last_swizzle == new_swizzle) { return last_image_view; @@ -399,6 +413,11 @@ VkImageView CachedSurfaceView::GetHandle(SwizzleSource x_source, SwizzleSource y }); } + if (image_view_type == VK_IMAGE_VIEW_TYPE_3D) { + ASSERT(base_slice == 0); + ASSERT(num_slices == params.depth); + } + VkImageViewCreateInfo ci; ci.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; ci.pNext = nullptr; @@ -417,6 +436,35 @@ VkImageView CachedSurfaceView::GetHandle(SwizzleSource x_source, SwizzleSource y return last_image_view = *image_view; } +VkImageView CachedSurfaceView::GetAttachment() { + if (render_target) { + return *render_target; + } + + VkImageViewCreateInfo ci; + ci.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; + ci.pNext = nullptr; + ci.flags = 0; + ci.image = surface.GetImageHandle(); + ci.format = surface.GetImage().GetFormat(); + ci.components = {VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, + VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY}; + ci.subresourceRange.aspectMask = aspect_mask; + ci.subresourceRange.baseMipLevel = base_level; + ci.subresourceRange.levelCount = num_levels; + if (image_view_type == VK_IMAGE_VIEW_TYPE_3D) { + ci.viewType = num_slices > 1 ? VK_IMAGE_VIEW_TYPE_2D_ARRAY : VK_IMAGE_VIEW_TYPE_2D; + ci.subresourceRange.baseArrayLayer = base_slice; + ci.subresourceRange.layerCount = num_slices; + } else { + ci.viewType = image_view_type; + ci.subresourceRange.baseArrayLayer = base_layer; + ci.subresourceRange.layerCount = num_layers; + } + render_target = device.GetLogical().CreateImageView(ci); + return *render_target; +} + VKTextureCache::VKTextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer, const VKDevice& device, VKResourceManager& resource_manager, VKMemoryManager& memory_manager, VKScheduler& scheduler, diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index f211ccb1e..807e26c8a 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -91,7 +91,6 @@ protected: void DecorateSurfaceName(); View CreateView(const ViewParams& params) override; - View CreateViewInner(const ViewParams& params, bool is_proxy); private: void UploadBuffer(const std::vector<u8>& staging_buffer); @@ -120,23 +119,20 @@ private: class CachedSurfaceView final : public VideoCommon::ViewBase { public: explicit CachedSurfaceView(const VKDevice& device, CachedSurface& surface, - const ViewParams& params, bool is_proxy); + const ViewParams& params); ~CachedSurfaceView(); - VkImageView GetHandle(Tegra::Texture::SwizzleSource x_source, - Tegra::Texture::SwizzleSource y_source, - Tegra::Texture::SwizzleSource z_source, - Tegra::Texture::SwizzleSource w_source); + VkImageView GetImageView(Tegra::Texture::SwizzleSource x_source, + Tegra::Texture::SwizzleSource y_source, + Tegra::Texture::SwizzleSource z_source, + Tegra::Texture::SwizzleSource w_source); + + VkImageView GetAttachment(); bool IsSameSurface(const CachedSurfaceView& rhs) const { return &surface == &rhs.surface; } - VkImageView GetHandle() { - return GetHandle(Tegra::Texture::SwizzleSource::R, Tegra::Texture::SwizzleSource::G, - Tegra::Texture::SwizzleSource::B, Tegra::Texture::SwizzleSource::A); - } - u32 GetWidth() const { return params.GetMipWidth(base_level); } @@ -180,14 +176,6 @@ public: } private: - static u32 EncodeSwizzle(Tegra::Texture::SwizzleSource x_source, - Tegra::Texture::SwizzleSource y_source, - Tegra::Texture::SwizzleSource z_source, - Tegra::Texture::SwizzleSource w_source) { - return (static_cast<u32>(x_source) << 24) | (static_cast<u32>(y_source) << 16) | - (static_cast<u32>(z_source) << 8) | static_cast<u32>(w_source); - } - // Store a copy of these values to avoid double dereference when reading them const SurfaceParams params; const VkImage image; @@ -196,15 +184,18 @@ private: const VKDevice& device; CachedSurface& surface; - const u32 base_layer; - const u32 num_layers; const u32 base_level; const u32 num_levels; const VkImageViewType image_view_type; + u32 base_layer = 0; + u32 num_layers = 0; + u32 base_slice = 0; + u32 num_slices = 0; VkImageView last_image_view = nullptr; u32 last_swizzle = 0; + vk::ImageView render_target; std::unordered_map<u32, vk::ImageView> view_cache; }; diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp index 681ecde98..351c048d2 100644 --- a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp +++ b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp @@ -24,35 +24,25 @@ void VKUpdateDescriptorQueue::TickFrame() { } void VKUpdateDescriptorQueue::Acquire() { - entries.clear(); -} + // Minimum number of entries required. + // This is the maximum number of entries a single draw call migth use. + static constexpr std::size_t MIN_ENTRIES = 0x400; -void VKUpdateDescriptorQueue::Send(VkDescriptorUpdateTemplateKHR update_template, - VkDescriptorSet set) { - if (payload.size() + entries.size() >= payload.max_size()) { + if (payload.size() + MIN_ENTRIES >= payload.max_size()) { LOG_WARNING(Render_Vulkan, "Payload overflow, waiting for worker thread"); scheduler.WaitWorker(); payload.clear(); } + upload_start = &*payload.end(); +} - // TODO(Rodrigo): Rework to write the payload directly - const auto payload_start = payload.data() + payload.size(); - for (const auto& entry : entries) { - if (const auto image = std::get_if<VkDescriptorImageInfo>(&entry)) { - payload.push_back(*image); - } else if (const auto buffer = std::get_if<VkDescriptorBufferInfo>(&entry)) { - payload.push_back(*buffer); - } else if (const auto texel = std::get_if<VkBufferView>(&entry)) { - payload.push_back(*texel); - } else { - UNREACHABLE(); - } - } - - scheduler.Record( - [payload_start, set, update_template, logical = &device.GetLogical()](vk::CommandBuffer) { - logical->UpdateDescriptorSet(set, update_template, payload_start); - }); +void VKUpdateDescriptorQueue::Send(VkDescriptorUpdateTemplateKHR update_template, + VkDescriptorSet set) { + const void* const data = upload_start; + const vk::Device* const logical = &device.GetLogical(); + scheduler.Record([data, logical, set, update_template](vk::CommandBuffer) { + logical->UpdateDescriptorSet(set, update_template, data); + }); } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.h b/src/video_core/renderer_vulkan/vk_update_descriptor.h index cc7e3dff4..945320c72 100644 --- a/src/video_core/renderer_vulkan/vk_update_descriptor.h +++ b/src/video_core/renderer_vulkan/vk_update_descriptor.h @@ -15,17 +15,13 @@ namespace Vulkan { class VKDevice; class VKScheduler; -class DescriptorUpdateEntry { -public: - explicit DescriptorUpdateEntry() {} - - DescriptorUpdateEntry(VkDescriptorImageInfo image) : image{image} {} +struct DescriptorUpdateEntry { + DescriptorUpdateEntry(VkDescriptorImageInfo image_) : image{image_} {} - DescriptorUpdateEntry(VkDescriptorBufferInfo buffer) : buffer{buffer} {} + DescriptorUpdateEntry(VkDescriptorBufferInfo buffer_) : buffer{buffer_} {} - DescriptorUpdateEntry(VkBufferView texel_buffer) : texel_buffer{texel_buffer} {} + DescriptorUpdateEntry(VkBufferView texel_buffer_) : texel_buffer{texel_buffer_} {} -private: union { VkDescriptorImageInfo image; VkDescriptorBufferInfo buffer; @@ -45,32 +41,34 @@ public: void Send(VkDescriptorUpdateTemplateKHR update_template, VkDescriptorSet set); void AddSampledImage(VkSampler sampler, VkImageView image_view) { - entries.emplace_back(VkDescriptorImageInfo{sampler, image_view, {}}); + payload.emplace_back(VkDescriptorImageInfo{sampler, image_view, {}}); } void AddImage(VkImageView image_view) { - entries.emplace_back(VkDescriptorImageInfo{{}, image_view, {}}); + payload.emplace_back(VkDescriptorImageInfo{{}, image_view, {}}); } void AddBuffer(VkBuffer buffer, u64 offset, std::size_t size) { - entries.emplace_back(VkDescriptorBufferInfo{buffer, offset, size}); + payload.emplace_back(VkDescriptorBufferInfo{buffer, offset, size}); } void AddTexelBuffer(VkBufferView texel_buffer) { - entries.emplace_back(texel_buffer); + payload.emplace_back(texel_buffer); } - VkImageLayout* GetLastImageLayout() { - return &std::get<VkDescriptorImageInfo>(entries.back()).imageLayout; + VkImageLayout* LastImageLayout() { + return &payload.back().image.imageLayout; } -private: - using Variant = std::variant<VkDescriptorImageInfo, VkDescriptorBufferInfo, VkBufferView>; + const VkImageLayout* LastImageLayout() const { + return &payload.back().image.imageLayout; + } +private: const VKDevice& device; VKScheduler& scheduler; - boost::container::static_vector<Variant, 0x400> entries; + const DescriptorUpdateEntry* upload_start = nullptr; boost::container::static_vector<DescriptorUpdateEntry, 0x10000> payload; }; diff --git a/src/video_core/renderer_vulkan/wrapper.cpp b/src/video_core/renderer_vulkan/wrapper.cpp index 2ce9b0626..051298cc8 100644 --- a/src/video_core/renderer_vulkan/wrapper.cpp +++ b/src/video_core/renderer_vulkan/wrapper.cpp @@ -88,6 +88,16 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { X(vkCmdSetStencilWriteMask); X(vkCmdSetViewport); X(vkCmdWaitEvents); + X(vkCmdBindVertexBuffers2EXT); + X(vkCmdSetCullModeEXT); + X(vkCmdSetDepthBoundsTestEnableEXT); + X(vkCmdSetDepthCompareOpEXT); + X(vkCmdSetDepthTestEnableEXT); + X(vkCmdSetDepthWriteEnableEXT); + X(vkCmdSetFrontFaceEXT); + X(vkCmdSetPrimitiveTopologyEXT); + X(vkCmdSetStencilOpEXT); + X(vkCmdSetStencilTestEnableEXT); X(vkCreateBuffer); X(vkCreateBufferView); X(vkCreateCommandPool); @@ -153,7 +163,8 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { bool Load(InstanceDispatch& dld) noexcept { #define X(name) Proc(dld.name, dld, #name) - return X(vkCreateInstance) && X(vkEnumerateInstanceExtensionProperties); + return X(vkCreateInstance) && X(vkEnumerateInstanceExtensionProperties) && + X(vkEnumerateInstanceLayerProperties); #undef X } @@ -725,8 +736,7 @@ bool PhysicalDevice::GetSurfaceSupportKHR(u32 queue_family_index, VkSurfaceKHR s return supported == VK_TRUE; } -VkSurfaceCapabilitiesKHR PhysicalDevice::GetSurfaceCapabilitiesKHR(VkSurfaceKHR surface) const - noexcept { +VkSurfaceCapabilitiesKHR PhysicalDevice::GetSurfaceCapabilitiesKHR(VkSurfaceKHR surface) const { VkSurfaceCapabilitiesKHR capabilities; Check(dld->vkGetPhysicalDeviceSurfaceCapabilitiesKHR(physical_device, surface, &capabilities)); return capabilities; @@ -771,4 +781,17 @@ std::optional<std::vector<VkExtensionProperties>> EnumerateInstanceExtensionProp return properties; } +std::optional<std::vector<VkLayerProperties>> EnumerateInstanceLayerProperties( + const InstanceDispatch& dld) { + u32 num; + if (dld.vkEnumerateInstanceLayerProperties(&num, nullptr) != VK_SUCCESS) { + return std::nullopt; + } + std::vector<VkLayerProperties> properties(num); + if (dld.vkEnumerateInstanceLayerProperties(&num, properties.data()) != VK_SUCCESS) { + return std::nullopt; + } + return properties; +} + } // namespace Vulkan::vk diff --git a/src/video_core/renderer_vulkan/wrapper.h b/src/video_core/renderer_vulkan/wrapper.h index 98937a77a..71daac9d7 100644 --- a/src/video_core/renderer_vulkan/wrapper.h +++ b/src/video_core/renderer_vulkan/wrapper.h @@ -141,6 +141,7 @@ struct InstanceDispatch { PFN_vkCreateInstance vkCreateInstance; PFN_vkDestroyInstance vkDestroyInstance; PFN_vkEnumerateInstanceExtensionProperties vkEnumerateInstanceExtensionProperties; + PFN_vkEnumerateInstanceLayerProperties vkEnumerateInstanceLayerProperties; PFN_vkCreateDebugUtilsMessengerEXT vkCreateDebugUtilsMessengerEXT; PFN_vkCreateDevice vkCreateDevice; @@ -206,6 +207,16 @@ struct DeviceDispatch : public InstanceDispatch { PFN_vkCmdSetStencilWriteMask vkCmdSetStencilWriteMask; PFN_vkCmdSetViewport vkCmdSetViewport; PFN_vkCmdWaitEvents vkCmdWaitEvents; + PFN_vkCmdBindVertexBuffers2EXT vkCmdBindVertexBuffers2EXT; + PFN_vkCmdSetCullModeEXT vkCmdSetCullModeEXT; + PFN_vkCmdSetDepthBoundsTestEnableEXT vkCmdSetDepthBoundsTestEnableEXT; + PFN_vkCmdSetDepthCompareOpEXT vkCmdSetDepthCompareOpEXT; + PFN_vkCmdSetDepthTestEnableEXT vkCmdSetDepthTestEnableEXT; + PFN_vkCmdSetDepthWriteEnableEXT vkCmdSetDepthWriteEnableEXT; + PFN_vkCmdSetFrontFaceEXT vkCmdSetFrontFaceEXT; + PFN_vkCmdSetPrimitiveTopologyEXT vkCmdSetPrimitiveTopologyEXT; + PFN_vkCmdSetStencilOpEXT vkCmdSetStencilOpEXT; + PFN_vkCmdSetStencilTestEnableEXT vkCmdSetStencilTestEnableEXT; PFN_vkCreateBuffer vkCreateBuffer; PFN_vkCreateBufferView vkCreateBufferView; PFN_vkCreateCommandPool vkCreateCommandPool; @@ -779,7 +790,7 @@ public: bool GetSurfaceSupportKHR(u32 queue_family_index, VkSurfaceKHR) const; - VkSurfaceCapabilitiesKHR GetSurfaceCapabilitiesKHR(VkSurfaceKHR) const noexcept; + VkSurfaceCapabilitiesKHR GetSurfaceCapabilitiesKHR(VkSurfaceKHR) const; std::vector<VkSurfaceFormatKHR> GetSurfaceFormatsKHR(VkSurfaceKHR) const; @@ -968,6 +979,50 @@ public: buffer_barriers.data(), image_barriers.size(), image_barriers.data()); } + void BindVertexBuffers2EXT(u32 first_binding, u32 binding_count, const VkBuffer* buffers, + const VkDeviceSize* offsets, const VkDeviceSize* sizes, + const VkDeviceSize* strides) const noexcept { + dld->vkCmdBindVertexBuffers2EXT(handle, first_binding, binding_count, buffers, offsets, + sizes, strides); + } + + void SetCullModeEXT(VkCullModeFlags cull_mode) const noexcept { + dld->vkCmdSetCullModeEXT(handle, cull_mode); + } + + void SetDepthBoundsTestEnableEXT(bool enable) const noexcept { + dld->vkCmdSetDepthBoundsTestEnableEXT(handle, enable ? VK_TRUE : VK_FALSE); + } + + void SetDepthCompareOpEXT(VkCompareOp compare_op) const noexcept { + dld->vkCmdSetDepthCompareOpEXT(handle, compare_op); + } + + void SetDepthTestEnableEXT(bool enable) const noexcept { + dld->vkCmdSetDepthTestEnableEXT(handle, enable ? VK_TRUE : VK_FALSE); + } + + void SetDepthWriteEnableEXT(bool enable) const noexcept { + dld->vkCmdSetDepthWriteEnableEXT(handle, enable ? VK_TRUE : VK_FALSE); + } + + void SetFrontFaceEXT(VkFrontFace front_face) const noexcept { + dld->vkCmdSetFrontFaceEXT(handle, front_face); + } + + void SetPrimitiveTopologyEXT(VkPrimitiveTopology primitive_topology) const noexcept { + dld->vkCmdSetPrimitiveTopologyEXT(handle, primitive_topology); + } + + void SetStencilOpEXT(VkStencilFaceFlags face_mask, VkStencilOp fail_op, VkStencilOp pass_op, + VkStencilOp depth_fail_op, VkCompareOp compare_op) const noexcept { + dld->vkCmdSetStencilOpEXT(handle, face_mask, fail_op, pass_op, depth_fail_op, compare_op); + } + + void SetStencilTestEnableEXT(bool enable) const noexcept { + dld->vkCmdSetStencilTestEnableEXT(handle, enable ? VK_TRUE : VK_FALSE); + } + void BindTransformFeedbackBuffersEXT(u32 first, u32 count, const VkBuffer* buffers, const VkDeviceSize* offsets, const VkDeviceSize* sizes) const noexcept { @@ -996,4 +1051,7 @@ private: std::optional<std::vector<VkExtensionProperties>> EnumerateInstanceExtensionProperties( const InstanceDispatch& dld); +std::optional<std::vector<VkLayerProperties>> EnumerateInstanceLayerProperties( + const InstanceDispatch& dld); + } // namespace Vulkan::vk diff --git a/src/video_core/shader/decode/half_set.cpp b/src/video_core/shader/decode/half_set.cpp index 848e46874..b2e88fa20 100644 --- a/src/video_core/shader/decode/half_set.cpp +++ b/src/video_core/shader/decode/half_set.cpp @@ -13,55 +13,101 @@ namespace VideoCommon::Shader { +using std::move; using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; +using Tegra::Shader::PredCondition; u32 ShaderIR::DecodeHalfSet(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); - if (instr.hset2.ftz == 0) { - LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); + PredCondition cond; + bool bf; + bool ftz; + bool neg_a; + bool abs_a; + bool neg_b; + bool abs_b; + switch (opcode->get().GetId()) { + case OpCode::Id::HSET2_C: + case OpCode::Id::HSET2_IMM: + cond = instr.hsetp2.cbuf_and_imm.cond; + bf = instr.Bit(53); + ftz = instr.Bit(54); + neg_a = instr.Bit(43); + abs_a = instr.Bit(44); + neg_b = instr.Bit(56); + abs_b = instr.Bit(54); + break; + case OpCode::Id::HSET2_R: + cond = instr.hsetp2.reg.cond; + bf = instr.Bit(49); + ftz = instr.Bit(50); + neg_a = instr.Bit(43); + abs_a = instr.Bit(44); + neg_b = instr.Bit(31); + abs_b = instr.Bit(30); + break; + default: + UNREACHABLE(); } - Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hset2.type_a); - op_a = GetOperandAbsNegHalf(op_a, instr.hset2.abs_a, instr.hset2.negate_a); - - Node op_b = [&]() { + Node op_b = [this, instr, opcode] { switch (opcode->get().GetId()) { + case OpCode::Id::HSET2_C: + // Inform as unimplemented as this is not tested. + UNIMPLEMENTED_MSG("HSET2_C is not implemented"); + return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); case OpCode::Id::HSET2_R: return GetRegister(instr.gpr20); + case OpCode::Id::HSET2_IMM: + return UnpackHalfImmediate(instr, true); default: UNREACHABLE(); - return Immediate(0); + return Node{}; } }(); - op_b = UnpackHalfFloat(op_b, instr.hset2.type_b); - op_b = GetOperandAbsNegHalf(op_b, instr.hset2.abs_b, instr.hset2.negate_b); - const Node second_pred = GetPredicate(instr.hset2.pred39, instr.hset2.neg_pred); + if (!ftz) { + LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); + } + + Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hset2.type_a); + op_a = GetOperandAbsNegHalf(op_a, abs_a, neg_a); + + switch (opcode->get().GetId()) { + case OpCode::Id::HSET2_R: + op_b = GetOperandAbsNegHalf(move(op_b), abs_b, neg_b); + [[fallthrough]]; + case OpCode::Id::HSET2_C: + op_b = UnpackHalfFloat(move(op_b), instr.hset2.type_b); + break; + default: + break; + } - const Node comparison_pair = GetPredicateComparisonHalf(instr.hset2.cond, op_a, op_b); + Node second_pred = GetPredicate(instr.hset2.pred39, instr.hset2.neg_pred); + + Node comparison_pair = GetPredicateComparisonHalf(cond, op_a, op_b); const OperationCode combiner = GetPredicateCombiner(instr.hset2.op); // HSET2 operates on each half float in the pack. std::array<Node, 2> values; for (u32 i = 0; i < 2; ++i) { - const u32 raw_value = instr.hset2.bf ? 0x3c00 : 0xffff; - const Node true_value = Immediate(raw_value << (i * 16)); - const Node false_value = Immediate(0); - - const Node comparison = - Operation(OperationCode::LogicalPick2, comparison_pair, Immediate(i)); - const Node predicate = Operation(combiner, comparison, second_pred); + const u32 raw_value = bf ? 0x3c00 : 0xffff; + Node true_value = Immediate(raw_value << (i * 16)); + Node false_value = Immediate(0); + Node comparison = Operation(OperationCode::LogicalPick2, comparison_pair, Immediate(i)); + Node predicate = Operation(combiner, comparison, second_pred); values[i] = - Operation(OperationCode::Select, NO_PRECISE, predicate, true_value, false_value); + Operation(OperationCode::Select, predicate, move(true_value), move(false_value)); } - const Node value = Operation(OperationCode::UBitwiseOr, NO_PRECISE, values[0], values[1]); - SetRegister(bb, instr.gpr0, value); + Node value = Operation(OperationCode::UBitwiseOr, values[0], values[1]); + SetRegister(bb, instr.gpr0, move(value)); return pc; } diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp index 60b6ad72a..07778dc3e 100644 --- a/src/video_core/shader/decode/image.cpp +++ b/src/video_core/shader/decode/image.cpp @@ -97,6 +97,7 @@ ComponentType GetComponentType(Tegra::Engines::SamplerDescriptor descriptor, break; case TextureFormat::B5G6R5: case TextureFormat::B6G5R5: + case TextureFormat::BF10GF11RF11: if (component == 0) { return descriptor.b_type; } @@ -119,7 +120,7 @@ ComponentType GetComponentType(Tegra::Engines::SamplerDescriptor descriptor, } break; } - UNIMPLEMENTED_MSG("texture format not implement={}", format); + UNIMPLEMENTED_MSG("Texture format not implemented={}", format); return ComponentType::FLOAT; } @@ -191,6 +192,14 @@ u32 GetComponentSize(TextureFormat format, std::size_t component) { return 6; } return 0; + case TextureFormat::BF10GF11RF11: + if (component == 1 || component == 2) { + return 11; + } + if (component == 0) { + return 10; + } + return 0; case TextureFormat::G8R24: if (component == 0) { return 8; @@ -211,10 +220,9 @@ u32 GetComponentSize(TextureFormat format, std::size_t component) { return (component == 0 || component == 1) ? 8 : 0; case TextureFormat::G4R4: return (component == 0 || component == 1) ? 4 : 0; - default: - UNIMPLEMENTED_MSG("texture format not implement={}", format); - return 0; } + UNIMPLEMENTED_MSG("Texture format not implemented={}", format); + return 0; } std::size_t GetImageComponentMask(TextureFormat format) { @@ -235,6 +243,7 @@ std::size_t GetImageComponentMask(TextureFormat format) { case TextureFormat::R32_B24G8: case TextureFormat::B5G6R5: case TextureFormat::B6G5R5: + case TextureFormat::BF10GF11RF11: return std::size_t{R | G | B}; case TextureFormat::R32_G32: case TextureFormat::R16_G16: @@ -248,10 +257,9 @@ std::size_t GetImageComponentMask(TextureFormat format) { case TextureFormat::R8: case TextureFormat::R1: return std::size_t{R}; - default: - UNIMPLEMENTED_MSG("texture format not implement={}", format); - return std::size_t{R | G | B | A}; } + UNIMPLEMENTED_MSG("Texture format not implemented={}", format); + return std::size_t{R | G | B | A}; } std::size_t GetImageTypeNumCoordinates(Tegra::Shader::ImageType image_type) { @@ -299,7 +307,7 @@ std::pair<Node, bool> ShaderIR::GetComponentValue(ComponentType component_type, return {std::move(original_value), true}; } default: - UNIMPLEMENTED_MSG("Unimplement component type={}", component_type); + UNIMPLEMENTED_MSG("Unimplemented component type={}", component_type); return {std::move(original_value), true}; } } @@ -459,7 +467,7 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) { default: break; } - UNIMPLEMENTED_MSG("Unimplemented operation={} type={}", + UNIMPLEMENTED_MSG("Unimplemented operation={}, type={}", static_cast<u64>(instr.suatom_d.operation.Value()), static_cast<u64>(instr.suatom_d.operation_type.Value())); return OperationCode::AtomicImageAdd; diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp index d00e10913..c0a8f233f 100644 --- a/src/video_core/shader/decode/other.cpp +++ b/src/video_core/shader/decode/other.cpp @@ -83,7 +83,7 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { return Operation(OperationCode::YNegate); case SystemVariable::InvocationInfo: LOG_WARNING(HW_GPU, "S2R instruction with InvocationInfo is incomplete"); - return Immediate(0U); + return Immediate(0x00ff'0000U); case SystemVariable::WscaleFactorXY: UNIMPLEMENTED_MSG("S2R WscaleFactorXY is not implemented"); return Immediate(0U); diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index 8f0bb996e..29ebf65ba 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp @@ -357,13 +357,11 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { return pc; } -ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo(SamplerInfo info, u32 offset, - std::optional<u32> buffer) { +ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo( + SamplerInfo info, std::optional<Tegra::Engines::SamplerDescriptor> sampler) { if (info.IsComplete()) { return info; } - const auto sampler = buffer ? registry.ObtainBindlessSampler(*buffer, offset) - : registry.ObtainBoundSampler(offset); if (!sampler) { LOG_WARNING(HW_GPU, "Unknown sampler info"); info.type = info.type.value_or(Tegra::Shader::TextureType::Texture2D); @@ -381,8 +379,8 @@ ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo(SamplerInfo info, u32 offset, std::optional<Sampler> ShaderIR::GetSampler(Tegra::Shader::Sampler sampler, SamplerInfo sampler_info) { - const auto offset = static_cast<u32>(sampler.index.Value()); - const auto info = GetSamplerInfo(sampler_info, offset); + const u32 offset = static_cast<u32>(sampler.index.Value()); + const auto info = GetSamplerInfo(sampler_info, registry.ObtainBoundSampler(offset)); // If this sampler has already been used, return the existing mapping. const auto it = std::find_if(used_samplers.begin(), used_samplers.end(), @@ -404,20 +402,19 @@ std::optional<Sampler> ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg, const Node sampler_register = GetRegister(reg); const auto [base_node, tracked_sampler_info] = TrackBindlessSampler(sampler_register, global_code, static_cast<s64>(global_code.size())); - ASSERT(base_node != nullptr); - if (base_node == nullptr) { + if (!base_node) { + UNREACHABLE(); return std::nullopt; } - if (const auto bindless_sampler_info = - std::get_if<BindlessSamplerNode>(&*tracked_sampler_info)) { - const u32 buffer = bindless_sampler_info->GetIndex(); - const u32 offset = bindless_sampler_info->GetOffset(); - info = GetSamplerInfo(info, offset, buffer); + if (const auto sampler_info = std::get_if<BindlessSamplerNode>(&*tracked_sampler_info)) { + const u32 buffer = sampler_info->index; + const u32 offset = sampler_info->offset; + info = GetSamplerInfo(info, registry.ObtainBindlessSampler(buffer, offset)); // If this sampler has already been used, return the existing mapping. const auto it = std::find_if(used_samplers.begin(), used_samplers.end(), - [buffer = buffer, offset = offset](const Sampler& entry) { + [buffer, offset](const Sampler& entry) { return entry.buffer == buffer && entry.offset == offset; }); if (it != used_samplers.end()) { @@ -431,10 +428,32 @@ std::optional<Sampler> ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg, return used_samplers.emplace_back(next_index, offset, buffer, *info.type, *info.is_array, *info.is_shadow, *info.is_buffer, false); } - if (const auto array_sampler_info = std::get_if<ArraySamplerNode>(&*tracked_sampler_info)) { - const u32 base_offset = array_sampler_info->GetBaseOffset() / 4; - index_var = GetCustomVariable(array_sampler_info->GetIndexVar()); - info = GetSamplerInfo(info, base_offset); + if (const auto sampler_info = std::get_if<SeparateSamplerNode>(&*tracked_sampler_info)) { + const std::pair indices = sampler_info->indices; + const std::pair offsets = sampler_info->offsets; + info = GetSamplerInfo(info, registry.ObtainSeparateSampler(indices, offsets)); + + // Try to use an already created sampler if it exists + const auto it = std::find_if( + used_samplers.begin(), used_samplers.end(), [indices, offsets](const Sampler& entry) { + return offsets == std::pair{entry.offset, entry.secondary_offset} && + indices == std::pair{entry.buffer, entry.secondary_buffer}; + }); + if (it != used_samplers.end()) { + ASSERT(it->is_separated && it->type == info.type && it->is_array == info.is_array && + it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer); + return *it; + } + + // Otherwise create a new mapping for this sampler + const u32 next_index = static_cast<u32>(used_samplers.size()); + return used_samplers.emplace_back(next_index, offsets, indices, *info.type, *info.is_array, + *info.is_shadow, *info.is_buffer); + } + if (const auto sampler_info = std::get_if<ArraySamplerNode>(&*tracked_sampler_info)) { + const u32 base_offset = sampler_info->base_offset / 4; + index_var = GetCustomVariable(sampler_info->bindless_var); + info = GetSamplerInfo(info, registry.ObtainBoundSampler(base_offset)); // If this sampler has already been used, return the existing mapping. const auto it = std::find_if( diff --git a/src/video_core/shader/memory_util.cpp b/src/video_core/shader/memory_util.cpp index 074f21691..5071c83ca 100644 --- a/src/video_core/shader/memory_util.cpp +++ b/src/video_core/shader/memory_util.cpp @@ -66,12 +66,12 @@ ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, GPUVAddr gpu_add u64 GetUniqueIdentifier(Tegra::Engines::ShaderType shader_type, bool is_a, const ProgramCode& code, const ProgramCode& code_b) { - u64 unique_identifier = boost::hash_value(code); + size_t unique_identifier = boost::hash_value(code); if (is_a) { // VertexA programs include two programs boost::hash_combine(unique_identifier, boost::hash_value(code_b)); } - return unique_identifier; + return static_cast<u64>(unique_identifier); } } // namespace VideoCommon::Shader diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index c5e5165ff..8f230d57a 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h @@ -275,10 +275,11 @@ using Node = std::shared_ptr<NodeData>; using Node4 = std::array<Node, 4>; using NodeBlock = std::vector<Node>; -class BindlessSamplerNode; -class ArraySamplerNode; +struct ArraySamplerNode; +struct BindlessSamplerNode; +struct SeparateSamplerNode; -using TrackSamplerData = std::variant<BindlessSamplerNode, ArraySamplerNode>; +using TrackSamplerData = std::variant<BindlessSamplerNode, SeparateSamplerNode, ArraySamplerNode>; using TrackSampler = std::shared_ptr<TrackSamplerData>; struct Sampler { @@ -288,63 +289,51 @@ struct Sampler { : index{index}, offset{offset}, type{type}, is_array{is_array}, is_shadow{is_shadow}, is_buffer{is_buffer}, is_indexed{is_indexed} {} + /// Separate sampler constructor + constexpr explicit Sampler(u32 index, std::pair<u32, u32> offsets, std::pair<u32, u32> buffers, + Tegra::Shader::TextureType type, bool is_array, bool is_shadow, + bool is_buffer) + : index{index}, offset{offsets.first}, secondary_offset{offsets.second}, + buffer{buffers.first}, secondary_buffer{buffers.second}, type{type}, is_array{is_array}, + is_shadow{is_shadow}, is_buffer{is_buffer}, is_separated{true} {} + /// Bindless samplers constructor constexpr explicit Sampler(u32 index, u32 offset, u32 buffer, Tegra::Shader::TextureType type, bool is_array, bool is_shadow, bool is_buffer, bool is_indexed) : index{index}, offset{offset}, buffer{buffer}, type{type}, is_array{is_array}, is_shadow{is_shadow}, is_buffer{is_buffer}, is_bindless{true}, is_indexed{is_indexed} {} - u32 index = 0; ///< Emulated index given for the this sampler. - u32 offset = 0; ///< Offset in the const buffer from where the sampler is being read. - u32 buffer = 0; ///< Buffer where the bindless sampler is being read (unused on bound samplers). - u32 size = 1; ///< Size of the sampler. + u32 index = 0; ///< Emulated index given for the this sampler. + u32 offset = 0; ///< Offset in the const buffer from where the sampler is being read. + u32 secondary_offset = 0; ///< Secondary offset in the const buffer. + u32 buffer = 0; ///< Buffer where the bindless sampler is read. + u32 secondary_buffer = 0; ///< Secondary buffer where the bindless sampler is read. + u32 size = 1; ///< Size of the sampler. Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc) - bool is_array = false; ///< Whether the texture is being sampled as an array texture or not. - bool is_shadow = false; ///< Whether the texture is being sampled as a depth texture or not. - bool is_buffer = false; ///< Whether the texture is a texture buffer without sampler. - bool is_bindless = false; ///< Whether this sampler belongs to a bindless texture or not. - bool is_indexed = false; ///< Whether this sampler is an indexed array of textures. + bool is_array = false; ///< Whether the texture is being sampled as an array texture or not. + bool is_shadow = false; ///< Whether the texture is being sampled as a depth texture or not. + bool is_buffer = false; ///< Whether the texture is a texture buffer without sampler. + bool is_bindless = false; ///< Whether this sampler belongs to a bindless texture or not. + bool is_indexed = false; ///< Whether this sampler is an indexed array of textures. + bool is_separated = false; ///< Whether the image and sampler is separated or not. }; /// Represents a tracked bindless sampler into a direct const buffer -class ArraySamplerNode final { -public: - explicit ArraySamplerNode(u32 index, u32 base_offset, u32 bindless_var) - : index{index}, base_offset{base_offset}, bindless_var{bindless_var} {} - - constexpr u32 GetIndex() const { - return index; - } - - constexpr u32 GetBaseOffset() const { - return base_offset; - } - - constexpr u32 GetIndexVar() const { - return bindless_var; - } - -private: +struct ArraySamplerNode { u32 index; u32 base_offset; u32 bindless_var; }; -/// Represents a tracked bindless sampler into a direct const buffer -class BindlessSamplerNode final { -public: - explicit BindlessSamplerNode(u32 index, u32 offset) : index{index}, offset{offset} {} - - constexpr u32 GetIndex() const { - return index; - } - - constexpr u32 GetOffset() const { - return offset; - } +/// Represents a tracked separate sampler image pair that was folded statically +struct SeparateSamplerNode { + std::pair<u32, u32> indices; + std::pair<u32, u32> offsets; +}; -private: +/// Represents a tracked bindless sampler into a direct const buffer +struct BindlessSamplerNode { u32 index; u32 offset; }; diff --git a/src/video_core/shader/node_helper.h b/src/video_core/shader/node_helper.h index 11231bbea..1e0886185 100644 --- a/src/video_core/shader/node_helper.h +++ b/src/video_core/shader/node_helper.h @@ -48,7 +48,7 @@ Node MakeNode(Args&&... args) { template <typename T, typename... Args> TrackSampler MakeTrackSampler(Args&&... args) { static_assert(std::is_convertible_v<T, TrackSamplerData>); - return std::make_shared<TrackSamplerData>(T(std::forward<Args>(args)...)); + return std::make_shared<TrackSamplerData>(T{std::forward<Args>(args)...}); } template <typename... Args> diff --git a/src/video_core/shader/registry.cpp b/src/video_core/shader/registry.cpp index af70b3f35..cdf274e54 100644 --- a/src/video_core/shader/registry.cpp +++ b/src/video_core/shader/registry.cpp @@ -93,6 +93,26 @@ std::optional<SamplerDescriptor> Registry::ObtainBoundSampler(u32 offset) { return value; } +std::optional<Tegra::Engines::SamplerDescriptor> Registry::ObtainSeparateSampler( + std::pair<u32, u32> buffers, std::pair<u32, u32> offsets) { + SeparateSamplerKey key; + key.buffers = buffers; + key.offsets = offsets; + const auto iter = separate_samplers.find(key); + if (iter != separate_samplers.end()) { + return iter->second; + } + if (!engine) { + return std::nullopt; + } + + const u32 handle_1 = engine->AccessConstBuffer32(stage, key.buffers.first, key.offsets.first); + const u32 handle_2 = engine->AccessConstBuffer32(stage, key.buffers.second, key.offsets.second); + const SamplerDescriptor value = engine->AccessSampler(handle_1 | handle_2); + separate_samplers.emplace(key, value); + return value; +} + std::optional<Tegra::Engines::SamplerDescriptor> Registry::ObtainBindlessSampler(u32 buffer, u32 offset) { const std::pair key = {buffer, offset}; diff --git a/src/video_core/shader/registry.h b/src/video_core/shader/registry.h index 0c80d35fd..231206765 100644 --- a/src/video_core/shader/registry.h +++ b/src/video_core/shader/registry.h @@ -19,8 +19,39 @@ namespace VideoCommon::Shader { +struct SeparateSamplerKey { + std::pair<u32, u32> buffers; + std::pair<u32, u32> offsets; +}; + +} // namespace VideoCommon::Shader + +namespace std { + +template <> +struct hash<VideoCommon::Shader::SeparateSamplerKey> { + std::size_t operator()(const VideoCommon::Shader::SeparateSamplerKey& key) const noexcept { + return std::hash<u32>{}(key.buffers.first ^ key.buffers.second ^ key.offsets.first ^ + key.offsets.second); + } +}; + +template <> +struct equal_to<VideoCommon::Shader::SeparateSamplerKey> { + bool operator()(const VideoCommon::Shader::SeparateSamplerKey& lhs, + const VideoCommon::Shader::SeparateSamplerKey& rhs) const noexcept { + return lhs.buffers == rhs.buffers && lhs.offsets == rhs.offsets; + } +}; + +} // namespace std + +namespace VideoCommon::Shader { + using KeyMap = std::unordered_map<std::pair<u32, u32>, u32, Common::PairHash>; using BoundSamplerMap = std::unordered_map<u32, Tegra::Engines::SamplerDescriptor>; +using SeparateSamplerMap = + std::unordered_map<SeparateSamplerKey, Tegra::Engines::SamplerDescriptor>; using BindlessSamplerMap = std::unordered_map<std::pair<u32, u32>, Tegra::Engines::SamplerDescriptor, Common::PairHash>; @@ -73,6 +104,9 @@ public: std::optional<Tegra::Engines::SamplerDescriptor> ObtainBoundSampler(u32 offset); + std::optional<Tegra::Engines::SamplerDescriptor> ObtainSeparateSampler( + std::pair<u32, u32> buffers, std::pair<u32, u32> offsets); + std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset); /// Inserts a key. @@ -128,6 +162,7 @@ private: Tegra::Engines::ConstBufferEngineInterface* engine = nullptr; KeyMap keys; BoundSamplerMap bound_samplers; + SeparateSamplerMap separate_samplers; BindlessSamplerMap bindless_samplers; u32 bound_buffer; GraphicsInfo graphics_info; diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 15ae152f2..3a98b2104 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -330,8 +330,8 @@ private: OperationCode GetPredicateCombiner(Tegra::Shader::PredOperation operation); /// Queries the missing sampler info from the execution context. - SamplerInfo GetSamplerInfo(SamplerInfo info, u32 offset, - std::optional<u32> buffer = std::nullopt); + SamplerInfo GetSamplerInfo(SamplerInfo info, + std::optional<Tegra::Engines::SamplerDescriptor> sampler); /// Accesses a texture sampler. std::optional<Sampler> GetSampler(Tegra::Shader::Sampler sampler, SamplerInfo info); @@ -409,8 +409,14 @@ private: std::tuple<Node, u32, u32> TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const; - std::tuple<Node, TrackSampler> TrackBindlessSampler(Node tracked, const NodeBlock& code, - s64 cursor); + std::pair<Node, TrackSampler> TrackBindlessSampler(Node tracked, const NodeBlock& code, + s64 cursor); + + std::pair<Node, TrackSampler> HandleBindlessIndirectRead(const CbufNode& cbuf, + const OperationNode& operation, + Node gpr, Node base_offset, + Node tracked, const NodeBlock& code, + s64 cursor); std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const; diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp index eb97bfd41..d5ed81442 100644 --- a/src/video_core/shader/track.cpp +++ b/src/video_core/shader/track.cpp @@ -14,6 +14,7 @@ namespace VideoCommon::Shader { namespace { + std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor, OperationCode operation_code) { for (; cursor >= 0; --cursor) { @@ -63,7 +64,8 @@ bool AmendNodeCv(std::size_t amend_index, Node node) { if (const auto operation = std::get_if<OperationNode>(&*node)) { operation->SetAmendIndex(amend_index); return true; - } else if (const auto conditional = std::get_if<ConditionalNode>(&*node)) { + } + if (const auto conditional = std::get_if<ConditionalNode>(&*node)) { conditional->SetAmendIndex(amend_index); return true; } @@ -72,40 +74,27 @@ bool AmendNodeCv(std::size_t amend_index, Node node) { } // Anonymous namespace -std::tuple<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, const NodeBlock& code, - s64 cursor) { +std::pair<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, const NodeBlock& code, + s64 cursor) { if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) { + const u32 cbuf_index = cbuf->GetIndex(); + // Constant buffer found, test if it's an immediate const auto& offset = cbuf->GetOffset(); if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) { - auto track = - MakeTrackSampler<BindlessSamplerNode>(cbuf->GetIndex(), immediate->GetValue()); + auto track = MakeTrackSampler<BindlessSamplerNode>(cbuf_index, immediate->GetValue()); return {tracked, track}; } if (const auto operation = std::get_if<OperationNode>(&*offset)) { const u32 bound_buffer = registry.GetBoundBuffer(); - if (bound_buffer != cbuf->GetIndex()) { + if (bound_buffer != cbuf_index) { return {}; } - const auto pair = DecoupleIndirectRead(*operation); - if (!pair) { - return {}; + if (const std::optional pair = DecoupleIndirectRead(*operation)) { + auto [gpr, base_offset] = *pair; + return HandleBindlessIndirectRead(*cbuf, *operation, gpr, base_offset, tracked, + code, cursor); } - auto [gpr, base_offset] = *pair; - const auto offset_inm = std::get_if<ImmediateNode>(&*base_offset); - const auto& gpu_driver = registry.AccessGuestDriverProfile(); - const u32 bindless_cv = NewCustomVariable(); - Node op = - Operation(OperationCode::UDiv, gpr, Immediate(gpu_driver.GetTextureHandlerSize())); - - const Node cv_node = GetCustomVariable(bindless_cv); - Node amend_op = Operation(OperationCode::Assign, cv_node, std::move(op)); - const std::size_t amend_index = DeclareAmend(std::move(amend_op)); - AmendNodeCv(amend_index, code[cursor]); - // TODO Implement Bindless Index custom variable - auto track = MakeTrackSampler<ArraySamplerNode>(cbuf->GetIndex(), - offset_inm->GetValue(), bindless_cv); - return {tracked, track}; } return {}; } @@ -122,10 +111,23 @@ std::tuple<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, cons return TrackBindlessSampler(source, code, new_cursor); } if (const auto operation = std::get_if<OperationNode>(&*tracked)) { - for (std::size_t i = operation->GetOperandsCount(); i > 0; --i) { - if (auto found = TrackBindlessSampler((*operation)[i - 1], code, cursor); - std::get<0>(found)) { - // Cbuf found in operand. + const OperationNode& op = *operation; + + const OperationCode opcode = operation->GetCode(); + if (opcode == OperationCode::IBitwiseOr || opcode == OperationCode::UBitwiseOr) { + ASSERT(op.GetOperandsCount() == 2); + auto [node_a, index_a, offset_a] = TrackCbuf(op[0], code, cursor); + auto [node_b, index_b, offset_b] = TrackCbuf(op[1], code, cursor); + if (node_a && node_b) { + auto track = MakeTrackSampler<SeparateSamplerNode>(std::pair{index_a, index_b}, + std::pair{offset_a, offset_b}); + return {tracked, std::move(track)}; + } + } + std::size_t i = op.GetOperandsCount(); + while (i--) { + if (auto found = TrackBindlessSampler(op[i - 1], code, cursor); std::get<0>(found)) { + // Constant buffer found in operand. return found; } } @@ -139,6 +141,26 @@ std::tuple<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, cons return {}; } +std::pair<Node, TrackSampler> ShaderIR::HandleBindlessIndirectRead( + const CbufNode& cbuf, const OperationNode& operation, Node gpr, Node base_offset, Node tracked, + const NodeBlock& code, s64 cursor) { + const auto offset_imm = std::get<ImmediateNode>(*base_offset); + const auto& gpu_driver = registry.AccessGuestDriverProfile(); + const u32 bindless_cv = NewCustomVariable(); + const u32 texture_handler_size = gpu_driver.GetTextureHandlerSize(); + Node op = Operation(OperationCode::UDiv, gpr, Immediate(texture_handler_size)); + + Node cv_node = GetCustomVariable(bindless_cv); + Node amend_op = Operation(OperationCode::Assign, std::move(cv_node), std::move(op)); + const std::size_t amend_index = DeclareAmend(std::move(amend_op)); + AmendNodeCv(amend_index, code[cursor]); + + // TODO: Implement bindless index custom variable + auto track = + MakeTrackSampler<ArraySamplerNode>(cbuf.GetIndex(), offset_imm.GetValue(), bindless_cv); + return {tracked, track}; +} + std::tuple<Node, u32, u32> ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const { if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) { diff --git a/src/video_core/shader_cache.h b/src/video_core/shader_cache.h new file mode 100644 index 000000000..b7608fc7b --- /dev/null +++ b/src/video_core/shader_cache.h @@ -0,0 +1,240 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <algorithm> +#include <memory> +#include <mutex> +#include <unordered_map> +#include <utility> +#include <vector> + +#include "common/assert.h" +#include "common/common_types.h" +#include "video_core/rasterizer_interface.h" + +namespace VideoCommon { + +template <class T> +class ShaderCache { + static constexpr u64 PAGE_BITS = 14; + static constexpr u64 PAGE_SIZE = u64(1) << PAGE_BITS; + + struct Entry { + VAddr addr_start; + VAddr addr_end; + T* data; + + bool is_memory_marked = true; + + constexpr bool Overlaps(VAddr start, VAddr end) const noexcept { + return start < addr_end && addr_start < end; + } + }; + +public: + virtual ~ShaderCache() = default; + + /// @brief Removes shaders inside a given region + /// @note Checks for ranges + /// @param addr Start address of the invalidation + /// @param size Number of bytes of the invalidation + void InvalidateRegion(VAddr addr, std::size_t size) { + std::scoped_lock lock{invalidation_mutex}; + InvalidatePagesInRegion(addr, size); + RemovePendingShaders(); + } + + /// @brief Unmarks a memory region as cached and marks it for removal + /// @param addr Start address of the CPU write operation + /// @param size Number of bytes of the CPU write operation + void OnCPUWrite(VAddr addr, std::size_t size) { + std::lock_guard lock{invalidation_mutex}; + InvalidatePagesInRegion(addr, size); + } + + /// @brief Flushes delayed removal operations + void SyncGuestHost() { + std::scoped_lock lock{invalidation_mutex}; + RemovePendingShaders(); + } + + /// @brief Tries to obtain a cached shader starting in a given address + /// @note Doesn't check for ranges, the given address has to be the start of the shader + /// @param addr Start address of the shader, this doesn't cache for region + /// @return Pointer to a valid shader, nullptr when nothing is found + T* TryGet(VAddr addr) const { + std::scoped_lock lock{lookup_mutex}; + + const auto it = lookup_cache.find(addr); + if (it == lookup_cache.end()) { + return nullptr; + } + return it->second->data; + } + +protected: + explicit ShaderCache(VideoCore::RasterizerInterface& rasterizer_) : rasterizer{rasterizer_} {} + + /// @brief Register in the cache a given entry + /// @param data Shader to store in the cache + /// @param addr Start address of the shader that will be registered + /// @param size Size in bytes of the shader + void Register(std::unique_ptr<T> data, VAddr addr, std::size_t size) { + std::scoped_lock lock{invalidation_mutex, lookup_mutex}; + + const VAddr addr_end = addr + size; + Entry* const entry = NewEntry(addr, addr_end, data.get()); + + const u64 page_end = (addr_end + PAGE_SIZE - 1) >> PAGE_BITS; + for (u64 page = addr >> PAGE_BITS; page < page_end; ++page) { + invalidation_cache[page].push_back(entry); + } + + storage.push_back(std::move(data)); + + rasterizer.UpdatePagesCachedCount(addr, size, 1); + } + + /// @brief Called when a shader is going to be removed + /// @param shader Shader that will be removed + /// @pre invalidation_cache is locked + /// @pre lookup_mutex is locked + virtual void OnShaderRemoval([[maybe_unused]] T* shader) {} + +private: + /// @brief Invalidate pages in a given region + /// @pre invalidation_mutex is locked + void InvalidatePagesInRegion(VAddr addr, std::size_t size) { + const VAddr addr_end = addr + size; + const u64 page_end = (addr_end + PAGE_SIZE - 1) >> PAGE_BITS; + for (u64 page = addr >> PAGE_BITS; page < page_end; ++page) { + auto it = invalidation_cache.find(page); + if (it == invalidation_cache.end()) { + continue; + } + InvalidatePageEntries(it->second, addr, addr_end); + } + } + + /// @brief Remove shaders marked for deletion + /// @pre invalidation_mutex is locked + void RemovePendingShaders() { + if (marked_for_removal.empty()) { + return; + } + // Remove duplicates + std::sort(marked_for_removal.begin(), marked_for_removal.end()); + marked_for_removal.erase(std::unique(marked_for_removal.begin(), marked_for_removal.end()), + marked_for_removal.end()); + + std::vector<T*> removed_shaders; + removed_shaders.reserve(marked_for_removal.size()); + + std::scoped_lock lock{lookup_mutex}; + + for (Entry* const entry : marked_for_removal) { + removed_shaders.push_back(entry->data); + + const auto it = lookup_cache.find(entry->addr_start); + ASSERT(it != lookup_cache.end()); + lookup_cache.erase(it); + } + marked_for_removal.clear(); + + if (!removed_shaders.empty()) { + RemoveShadersFromStorage(std::move(removed_shaders)); + } + } + + /// @brief Invalidates entries in a given range for the passed page + /// @param entries Vector of entries in the page, it will be modified on overlaps + /// @param addr Start address of the invalidation + /// @param addr_end Non-inclusive end address of the invalidation + /// @pre invalidation_mutex is locked + void InvalidatePageEntries(std::vector<Entry*>& entries, VAddr addr, VAddr addr_end) { + std::size_t index = 0; + while (index < entries.size()) { + Entry* const entry = entries[index]; + if (!entry->Overlaps(addr, addr_end)) { + ++index; + continue; + } + + UnmarkMemory(entry); + RemoveEntryFromInvalidationCache(entry); + marked_for_removal.push_back(entry); + } + } + + /// @brief Removes all references to an entry in the invalidation cache + /// @param entry Entry to remove from the invalidation cache + /// @pre invalidation_mutex is locked + void RemoveEntryFromInvalidationCache(const Entry* entry) { + const u64 page_end = (entry->addr_end + PAGE_SIZE - 1) >> PAGE_BITS; + for (u64 page = entry->addr_start >> PAGE_BITS; page < page_end; ++page) { + const auto entries_it = invalidation_cache.find(page); + ASSERT(entries_it != invalidation_cache.end()); + std::vector<Entry*>& entries = entries_it->second; + + const auto entry_it = std::find(entries.begin(), entries.end(), entry); + ASSERT(entry_it != entries.end()); + entries.erase(entry_it); + } + } + + /// @brief Unmarks an entry from the rasterizer cache + /// @param entry Entry to unmark from memory + void UnmarkMemory(Entry* entry) { + if (!entry->is_memory_marked) { + return; + } + entry->is_memory_marked = false; + + const VAddr addr = entry->addr_start; + const std::size_t size = entry->addr_end - addr; + rasterizer.UpdatePagesCachedCount(addr, size, -1); + } + + /// @brief Removes a vector of shaders from a list + /// @param removed_shaders Shaders to be removed from the storage + /// @pre invalidation_mutex is locked + /// @pre lookup_mutex is locked + void RemoveShadersFromStorage(std::vector<T*> removed_shaders) { + // Notify removals + for (T* const shader : removed_shaders) { + OnShaderRemoval(shader); + } + + // Remove them from the cache + const auto is_removed = [&removed_shaders](std::unique_ptr<T>& shader) { + return std::find(removed_shaders.begin(), removed_shaders.end(), shader.get()) != + removed_shaders.end(); + }; + storage.erase(std::remove_if(storage.begin(), storage.end(), is_removed), storage.end()); + } + + /// @brief Creates a new entry in the lookup cache and returns its pointer + /// @pre lookup_mutex is locked + Entry* NewEntry(VAddr addr, VAddr addr_end, T* data) { + auto entry = std::make_unique<Entry>(Entry{addr, addr_end, data}); + Entry* const entry_pointer = entry.get(); + + lookup_cache.emplace(addr, std::move(entry)); + return entry_pointer; + } + + VideoCore::RasterizerInterface& rasterizer; + + mutable std::mutex lookup_mutex; + std::mutex invalidation_mutex; + + std::unordered_map<u64, std::unique_ptr<Entry>> lookup_cache; + std::unordered_map<u64, std::vector<Entry*>> invalidation_cache; + std::vector<std::unique_ptr<T>> storage; + std::vector<Entry*> marked_for_removal; +}; + +} // namespace VideoCommon diff --git a/src/video_core/texture_cache/format_lookup_table.cpp b/src/video_core/texture_cache/format_lookup_table.cpp index 7032e0059..f476f03b0 100644 --- a/src/video_core/texture_cache/format_lookup_table.cpp +++ b/src/video_core/texture_cache/format_lookup_table.cpp @@ -41,7 +41,7 @@ struct Table { ComponentType alpha_component; bool is_srgb; }; -constexpr std::array<Table, 77> DefinitionTable = {{ +constexpr std::array<Table, 78> DefinitionTable = {{ {TextureFormat::A8R8G8B8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ABGR8U}, {TextureFormat::A8R8G8B8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::ABGR8S}, {TextureFormat::A8R8G8B8, C, UINT, UINT, UINT, UINT, PixelFormat::ABGR8UI}, @@ -98,6 +98,7 @@ constexpr std::array<Table, 77> DefinitionTable = {{ {TextureFormat::ZF32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::Z32F}, {TextureFormat::Z16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::Z16}, {TextureFormat::S8Z24, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8Z24}, + {TextureFormat::G24R8, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8Z24}, {TextureFormat::ZF32_X24S8, C, FLOAT, UINT, UNORM, UNORM, PixelFormat::Z32FS8}, {TextureFormat::DXT1, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT1}, diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp index 715f39d0d..0caf3b4f0 100644 --- a/src/video_core/texture_cache/surface_base.cpp +++ b/src/video_core/texture_cache/surface_base.cpp @@ -120,6 +120,9 @@ std::optional<std::pair<u32, u32>> SurfaceBaseImpl::GetLayerMipmap( } const auto relative_address{static_cast<GPUVAddr>(candidate_gpu_addr - gpu_addr)}; const auto layer{static_cast<u32>(relative_address / layer_size)}; + if (layer >= params.depth) { + return {}; + } const GPUVAddr mipmap_address = relative_address - layer_size * layer; const auto mipmap_it = Common::BinaryFind(mipmap_offsets.begin(), mipmap_offsets.end(), mipmap_address); @@ -248,12 +251,11 @@ void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager, // Use an extra temporal buffer auto& tmp_buffer = staging_cache.GetBuffer(1); - // Special case for 3D Texture Segments - const bool must_read_current_data = - params.block_depth > 0 && params.target == VideoCore::Surface::SurfaceTarget::Texture2D; tmp_buffer.resize(guest_memory_size); host_ptr = tmp_buffer.data(); - if (must_read_current_data) { + + if (params.target == SurfaceTarget::Texture3D) { + // Special case for 3D texture segments memory_manager.ReadBlockUnsafe(gpu_addr, host_ptr, guest_memory_size); } diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index 79e10ffbb..173f2edba 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -217,8 +217,8 @@ public: } bool IsProtected() const { - // Only 3D Slices are to be protected - return is_target && params.block_depth > 0; + // Only 3D slices are to be protected + return is_target && params.target == SurfaceTarget::Texture3D; } bool IsRenderTarget() const { @@ -250,6 +250,11 @@ public: return GetView(ViewParams(overview_params.target, 0, num_layers, 0, params.num_levels)); } + TView Emplace3DView(u32 slice, u32 depth, u32 base_level, u32 num_levels) { + return GetView(ViewParams(VideoCore::Surface::SurfaceTarget::Texture3D, slice, depth, + base_level, num_levels)); + } + std::optional<TView> EmplaceIrregularView(const SurfaceParams& view_params, const GPUVAddr view_addr, const std::size_t candidate_size, const u32 mipmap, @@ -272,8 +277,8 @@ public: std::optional<TView> EmplaceView(const SurfaceParams& view_params, const GPUVAddr view_addr, const std::size_t candidate_size) { if (params.target == SurfaceTarget::Texture3D || - (params.num_levels == 1 && !params.is_layered) || - view_params.target == SurfaceTarget::Texture3D) { + view_params.target == SurfaceTarget::Texture3D || + (params.num_levels == 1 && !params.is_layered)) { return {}; } const auto layer_mipmap{GetLayerMipmap(view_addr)}; diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp index 884fabffe..0b2b2b8c4 100644 --- a/src/video_core/texture_cache/surface_params.cpp +++ b/src/video_core/texture_cache/surface_params.cpp @@ -215,10 +215,19 @@ SurfaceParams SurfaceParams::CreateForFramebuffer(Core::System& system, std::siz params.num_levels = 1; params.emulated_levels = 1; - const bool is_layered = config.layers > 1 && params.block_depth == 0; - params.is_layered = is_layered; - params.depth = is_layered ? config.layers.Value() : 1; - params.target = is_layered ? SurfaceTarget::Texture2DArray : SurfaceTarget::Texture2D; + if (config.memory_layout.is_3d != 0) { + params.depth = config.layers.Value(); + params.is_layered = false; + params.target = SurfaceTarget::Texture3D; + } else if (config.layers > 1) { + params.depth = config.layers.Value(); + params.is_layered = true; + params.target = SurfaceTarget::Texture2DArray; + } else { + params.depth = 1; + params.is_layered = false; + params.target = SurfaceTarget::Texture2D; + } return params; } @@ -237,7 +246,7 @@ SurfaceParams SurfaceParams::CreateForFermiCopySurface( params.width = config.width; params.height = config.height; params.pitch = config.pitch; - // TODO(Rodrigo): Try to guess the surface target from depth and layer parameters + // TODO(Rodrigo): Try to guess texture arrays from parameters params.target = SurfaceTarget::Texture2D; params.depth = 1; params.num_levels = 1; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 8bfc541d4..cdcddb225 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -14,6 +14,7 @@ #include <unordered_map> #include <vector> +#include <boost/container/small_vector.hpp> #include <boost/icl/interval_map.hpp> #include <boost/range/iterator_range.hpp> @@ -23,6 +24,7 @@ #include "core/core.h" #include "core/memory.h" #include "core/settings.h" +#include "video_core/compatible_formats.h" #include "video_core/dirty_flags.h" #include "video_core/engines/fermi_2d.h" #include "video_core/engines/maxwell_3d.h" @@ -46,13 +48,14 @@ class RasterizerInterface; namespace VideoCommon { +using VideoCore::Surface::FormatCompatibility; using VideoCore::Surface::PixelFormat; - using VideoCore::Surface::SurfaceTarget; using RenderTargetConfig = Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig; template <typename TSurface, typename TView> class TextureCache { + using VectorSurface = boost::container::small_vector<TSurface, 1>; public: void InvalidateRegion(VAddr addr, std::size_t size) { @@ -246,7 +249,7 @@ public: auto& surface = render_targets[index].target; surface->MarkAsRenderTarget(false, NO_RT); const auto& cr_params = surface->GetSurfaceParams(); - if (!cr_params.is_tiled && Settings::values.use_asynchronous_gpu_emulation) { + if (!cr_params.is_tiled && Settings::values.use_asynchronous_gpu_emulation.GetValue()) { AsyncFlushSurface(surface); } } @@ -296,30 +299,30 @@ public: const GPUVAddr src_gpu_addr = src_config.Address(); const GPUVAddr dst_gpu_addr = dst_config.Address(); DeduceBestBlit(src_params, dst_params, src_gpu_addr, dst_gpu_addr); - const std::optional<VAddr> dst_cpu_addr = - system.GPU().MemoryManager().GpuToCpuAddress(dst_gpu_addr); - const std::optional<VAddr> src_cpu_addr = - system.GPU().MemoryManager().GpuToCpuAddress(src_gpu_addr); - std::pair<TSurface, TView> dst_surface = - GetSurface(dst_gpu_addr, *dst_cpu_addr, dst_params, true, false); - std::pair<TSurface, TView> src_surface = - GetSurface(src_gpu_addr, *src_cpu_addr, src_params, true, false); - ImageBlit(src_surface.second, dst_surface.second, copy_config); + + const auto& memory_manager = system.GPU().MemoryManager(); + const std::optional<VAddr> dst_cpu_addr = memory_manager.GpuToCpuAddress(dst_gpu_addr); + const std::optional<VAddr> src_cpu_addr = memory_manager.GpuToCpuAddress(src_gpu_addr); + std::pair dst_surface = GetSurface(dst_gpu_addr, *dst_cpu_addr, dst_params, true, false); + TView src_surface = GetSurface(src_gpu_addr, *src_cpu_addr, src_params, true, false).second; + ImageBlit(src_surface, dst_surface.second, copy_config); dst_surface.first->MarkAsModified(true, Tick()); } - TSurface TryFindFramebufferSurface(VAddr addr) { + TSurface TryFindFramebufferSurface(VAddr addr) const { if (!addr) { return nullptr; } const VAddr page = addr >> registry_page_bits; - std::vector<TSurface>& list = registry[page]; - for (auto& surface : list) { - if (surface->GetCpuAddr() == addr) { - return surface; - } + const auto it = registry.find(page); + if (it == registry.end()) { + return nullptr; } - return nullptr; + const auto& list = it->second; + const auto found = std::find_if(list.begin(), list.end(), [addr](const auto& surface) { + return surface->GetCpuAddr() == addr; + }); + return found != list.end() ? *found : nullptr; } u64 Tick() { @@ -498,18 +501,18 @@ private: * @param untopological Indicates to the recycler that the texture has no way * to match the overlaps due to topological reasons. **/ - RecycleStrategy PickStrategy(std::vector<TSurface>& overlaps, const SurfaceParams& params, + RecycleStrategy PickStrategy(VectorSurface& overlaps, const SurfaceParams& params, const GPUVAddr gpu_addr, const MatchTopologyResult untopological) { if (Settings::IsGPULevelExtreme()) { return RecycleStrategy::Flush; } // 3D Textures decision - if (params.block_depth > 1 || params.target == SurfaceTarget::Texture3D) { + if (params.target == SurfaceTarget::Texture3D) { return RecycleStrategy::Flush; } for (const auto& s : overlaps) { const auto& s_params = s->GetSurfaceParams(); - if (s_params.block_depth > 1 || s_params.target == SurfaceTarget::Texture3D) { + if (s_params.target == SurfaceTarget::Texture3D) { return RecycleStrategy::Flush; } } @@ -538,9 +541,8 @@ private: * @param untopological Indicates to the recycler that the texture has no way to match the * overlaps due to topological reasons. **/ - std::pair<TSurface, TView> RecycleSurface(std::vector<TSurface>& overlaps, - const SurfaceParams& params, const GPUVAddr gpu_addr, - const bool preserve_contents, + std::pair<TSurface, TView> RecycleSurface(VectorSurface& overlaps, const SurfaceParams& params, + const GPUVAddr gpu_addr, const bool preserve_contents, const MatchTopologyResult untopological) { const bool do_load = preserve_contents && Settings::IsGPULevelExtreme(); for (auto& surface : overlaps) { @@ -594,7 +596,7 @@ private: } else { new_surface = GetUncachedSurface(gpu_addr, params); } - const auto& final_params = new_surface->GetSurfaceParams(); + const SurfaceParams& final_params = new_surface->GetSurfaceParams(); if (cr_params.type != final_params.type) { if (Settings::IsGPULevelExtreme()) { BufferCopy(current_surface, new_surface); @@ -602,7 +604,7 @@ private: } else { std::vector<CopyParams> bricks = current_surface->BreakDown(final_params); for (auto& brick : bricks) { - ImageCopy(current_surface, new_surface, brick); + TryCopyImage(current_surface, new_surface, brick); } } Unregister(current_surface); @@ -650,47 +652,65 @@ private: * @param params The parameters on the new surface. * @param gpu_addr The starting address of the new surface. **/ - std::optional<std::pair<TSurface, TView>> TryReconstructSurface(std::vector<TSurface>& overlaps, + std::optional<std::pair<TSurface, TView>> TryReconstructSurface(VectorSurface& overlaps, const SurfaceParams& params, - const GPUVAddr gpu_addr) { + GPUVAddr gpu_addr) { if (params.target == SurfaceTarget::Texture3D) { - return {}; + return std::nullopt; } - bool modified = false; + const auto test_modified = [](TSurface& surface) { return surface->IsModified(); }; TSurface new_surface = GetUncachedSurface(gpu_addr, params); - u32 passed_tests = 0; + + if (std::none_of(overlaps.begin(), overlaps.end(), test_modified)) { + LoadSurface(new_surface); + for (const auto& surface : overlaps) { + Unregister(surface); + } + Register(new_surface); + return {{new_surface, new_surface->GetMainView()}}; + } + + std::size_t passed_tests = 0; for (auto& surface : overlaps) { const SurfaceParams& src_params = surface->GetSurfaceParams(); - if (src_params.is_layered || src_params.num_levels > 1) { - // We send this cases to recycle as they are more complex to handle - return {}; - } - const std::size_t candidate_size = surface->GetSizeInBytes(); - auto mipmap_layer{new_surface->GetLayerMipmap(surface->GetGpuAddr())}; + const auto mipmap_layer{new_surface->GetLayerMipmap(surface->GetGpuAddr())}; if (!mipmap_layer) { continue; } - const auto [layer, mipmap] = *mipmap_layer; - if (new_surface->GetMipmapSize(mipmap) != candidate_size) { + const auto [base_layer, base_mipmap] = *mipmap_layer; + if (new_surface->GetMipmapSize(base_mipmap) != surface->GetMipmapSize(0)) { continue; } - modified |= surface->IsModified(); - // Now we got all the data set up - const u32 width = SurfaceParams::IntersectWidth(src_params, params, 0, mipmap); - const u32 height = SurfaceParams::IntersectHeight(src_params, params, 0, mipmap); - const CopyParams copy_params(0, 0, 0, 0, 0, layer, 0, mipmap, width, height, 1); - passed_tests++; - ImageCopy(surface, new_surface, copy_params); + ++passed_tests; + + // Copy all mipmaps and layers + const u32 block_width = params.GetDefaultBlockWidth(); + const u32 block_height = params.GetDefaultBlockHeight(); + for (u32 mipmap = base_mipmap; mipmap < base_mipmap + src_params.num_levels; ++mipmap) { + const u32 width = SurfaceParams::IntersectWidth(src_params, params, 0, mipmap); + const u32 height = SurfaceParams::IntersectHeight(src_params, params, 0, mipmap); + if (width < block_width || height < block_height) { + // Current APIs forbid copying small compressed textures, avoid errors + break; + } + const CopyParams copy_params(0, 0, 0, 0, 0, base_layer, 0, mipmap, width, height, + src_params.depth); + TryCopyImage(surface, new_surface, copy_params); + } } if (passed_tests == 0) { - return {}; + return std::nullopt; + } + if (Settings::IsGPULevelExtreme() && passed_tests != overlaps.size()) { // In Accurate GPU all tests should pass, else we recycle - } else if (Settings::IsGPULevelExtreme() && passed_tests != overlaps.size()) { - return {}; + return std::nullopt; } + + const bool modified = std::any_of(overlaps.begin(), overlaps.end(), test_modified); for (const auto& surface : overlaps) { Unregister(surface); } + new_surface->MarkAsModified(modified, Tick()); Register(new_surface); return {{new_surface, new_surface->GetMainView()}}; @@ -708,53 +728,11 @@ private: * @param preserve_contents Indicates that the new surface should be loaded from memory or * left blank. */ - std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(std::vector<TSurface>& overlaps, + std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(VectorSurface& overlaps, const SurfaceParams& params, - const GPUVAddr gpu_addr, - const VAddr cpu_addr, + GPUVAddr gpu_addr, VAddr cpu_addr, bool preserve_contents) { - if (params.target == SurfaceTarget::Texture3D) { - bool failed = false; - if (params.num_levels > 1) { - // We can't handle mipmaps in 3D textures yet, better fallback to LLE approach - return std::nullopt; - } - TSurface new_surface = GetUncachedSurface(gpu_addr, params); - bool modified = false; - for (auto& surface : overlaps) { - const SurfaceParams& src_params = surface->GetSurfaceParams(); - if (src_params.target != SurfaceTarget::Texture2D) { - failed = true; - break; - } - if (src_params.height != params.height) { - failed = true; - break; - } - if (src_params.block_depth != params.block_depth || - src_params.block_height != params.block_height) { - failed = true; - break; - } - const u32 offset = static_cast<u32>(surface->GetCpuAddr() - cpu_addr); - const auto offsets = params.GetBlockOffsetXYZ(offset); - const auto z = std::get<2>(offsets); - modified |= surface->IsModified(); - const CopyParams copy_params(0, 0, 0, 0, 0, z, 0, 0, params.width, params.height, - 1); - ImageCopy(surface, new_surface, copy_params); - } - if (failed) { - return std::nullopt; - } - for (const auto& surface : overlaps) { - Unregister(surface); - } - new_surface->MarkAsModified(modified, Tick()); - Register(new_surface); - auto view = new_surface->GetMainView(); - return {{std::move(new_surface), view}}; - } else { + if (params.target != SurfaceTarget::Texture3D) { for (const auto& surface : overlaps) { if (!surface->MatchTarget(params.target)) { if (overlaps.size() == 1 && surface->GetCpuAddr() == cpu_addr) { @@ -770,11 +748,60 @@ private: continue; } if (surface->MatchesStructure(params) == MatchStructureResult::FullMatch) { - return {{surface, surface->GetMainView()}}; + return std::make_pair(surface, surface->GetMainView()); } } return InitializeSurface(gpu_addr, params, preserve_contents); } + + if (params.num_levels > 1) { + // We can't handle mipmaps in 3D textures yet, better fallback to LLE approach + return std::nullopt; + } + + if (overlaps.size() == 1) { + const auto& surface = overlaps[0]; + const SurfaceParams& overlap_params = surface->GetSurfaceParams(); + // Don't attempt to render to textures with more than one level for now + // The texture has to be to the right or the sample address if we want to render to it + if (overlap_params.num_levels == 1 && cpu_addr >= surface->GetCpuAddr()) { + const u32 offset = static_cast<u32>(cpu_addr - surface->GetCpuAddr()); + const u32 slice = std::get<2>(params.GetBlockOffsetXYZ(offset)); + if (slice < overlap_params.depth) { + auto view = surface->Emplace3DView(slice, params.depth, 0, 1); + return std::make_pair(std::move(surface), std::move(view)); + } + } + } + + TSurface new_surface = GetUncachedSurface(gpu_addr, params); + bool modified = false; + + for (auto& surface : overlaps) { + const SurfaceParams& src_params = surface->GetSurfaceParams(); + if (src_params.target != SurfaceTarget::Texture2D || + src_params.height != params.height || + src_params.block_depth != params.block_depth || + src_params.block_height != params.block_height) { + return std::nullopt; + } + modified |= surface->IsModified(); + + const u32 offset = static_cast<u32>(surface->GetCpuAddr() - cpu_addr); + const u32 slice = std::get<2>(params.GetBlockOffsetXYZ(offset)); + const u32 width = params.width; + const u32 height = params.height; + const CopyParams copy_params(0, 0, 0, 0, 0, slice, 0, 0, width, height, 1); + TryCopyImage(surface, new_surface, copy_params); + } + for (const auto& surface : overlaps) { + Unregister(surface); + } + new_surface->MarkAsModified(modified, Tick()); + Register(new_surface); + + TView view = new_surface->GetMainView(); + return std::make_pair(std::move(new_surface), std::move(view)); } /** @@ -810,7 +837,7 @@ private: TSurface& current_surface = iter->second; const auto topological_result = current_surface->MatchesTopology(params); if (topological_result != MatchTopologyResult::FullMatch) { - std::vector<TSurface> overlaps{current_surface}; + VectorSurface overlaps{current_surface}; return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, topological_result); } @@ -852,7 +879,7 @@ private: } } - // Check if it's a 3D texture + // Manage 3D textures if (params.block_depth > 0) { auto surface = Manage3DSurfaces(overlaps, params, gpu_addr, cpu_addr, preserve_contents); @@ -868,12 +895,9 @@ private: // two things either the candidate surface is a supertexture of the overlap // or they don't match in any known way. if (!current_surface->IsInside(gpu_addr, gpu_addr + candidate_size)) { - if (current_surface->GetGpuAddr() == gpu_addr) { - std::optional<std::pair<TSurface, TView>> view = - TryReconstructSurface(overlaps, params, gpu_addr); - if (view) { - return *view; - } + const std::optional view = TryReconstructSurface(overlaps, params, gpu_addr); + if (view) { + return *view; } return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, MatchTopologyResult::FullMatch); @@ -1030,7 +1054,7 @@ private: void DeduceBestBlit(SurfaceParams& src_params, SurfaceParams& dst_params, const GPUVAddr src_gpu_addr, const GPUVAddr dst_gpu_addr) { auto deduced_src = DeduceSurface(src_gpu_addr, src_params); - auto deduced_dst = DeduceSurface(src_gpu_addr, src_params); + auto deduced_dst = DeduceSurface(dst_gpu_addr, dst_params); if (deduced_src.Failed() || deduced_dst.Failed()) { return; } @@ -1126,23 +1150,25 @@ private: } } - std::vector<TSurface> GetSurfacesInRegion(const VAddr cpu_addr, const std::size_t size) { + VectorSurface GetSurfacesInRegion(const VAddr cpu_addr, const std::size_t size) { if (size == 0) { return {}; } const VAddr cpu_addr_end = cpu_addr + size; - VAddr start = cpu_addr >> registry_page_bits; const VAddr end = (cpu_addr_end - 1) >> registry_page_bits; - std::vector<TSurface> surfaces; - while (start <= end) { - std::vector<TSurface>& list = registry[start]; - for (auto& surface : list) { - if (!surface->IsPicked() && surface->Overlaps(cpu_addr, cpu_addr_end)) { - surface->MarkAsPicked(true); - surfaces.push_back(surface); + VectorSurface surfaces; + for (VAddr start = cpu_addr >> registry_page_bits; start <= end; ++start) { + const auto it = registry.find(start); + if (it == registry.end()) { + continue; + } + for (auto& surface : it->second) { + if (surface->IsPicked() || !surface->Overlaps(cpu_addr, cpu_addr_end)) { + continue; } + surface->MarkAsPicked(true); + surfaces.push_back(surface); } - start++; } for (auto& surface : surfaces) { surface->MarkAsPicked(false); @@ -1167,6 +1193,19 @@ private: return {}; } + /// Try to do an image copy logging when formats are incompatible. + void TryCopyImage(TSurface& src, TSurface& dst, const CopyParams& copy) { + const SurfaceParams& src_params = src->GetSurfaceParams(); + const SurfaceParams& dst_params = dst->GetSurfaceParams(); + if (!format_compatibility.TestCopy(src_params.pixel_format, dst_params.pixel_format)) { + LOG_ERROR(HW_GPU, "Illegal copy between formats={{{}, {}}}", + static_cast<int>(dst_params.pixel_format), + static_cast<int>(src_params.pixel_format)); + return; + } + ImageCopy(src, dst, copy); + } + constexpr PixelFormat GetSiblingFormat(PixelFormat format) const { return siblings_table[static_cast<std::size_t>(format)]; } @@ -1216,6 +1255,7 @@ private: VideoCore::RasterizerInterface& rasterizer; FormatLookupTable format_lookup_table; + FormatCompatibility format_compatibility; u64 ticks{}; diff --git a/src/video_core/textures/texture.cpp b/src/video_core/textures/texture.cpp index d1939d744..4171e3ef2 100644 --- a/src/video_core/textures/texture.cpp +++ b/src/video_core/textures/texture.cpp @@ -48,7 +48,7 @@ constexpr std::array<float, 256> SRGB_CONVERSION_LUT = { }; unsigned SettingsMinimumAnisotropy() noexcept { - switch (static_cast<Anisotropy>(Settings::values.max_anisotropy)) { + switch (static_cast<Anisotropy>(Settings::values.max_anisotropy.GetValue())) { default: case Anisotropy::Default: return 1U; diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp index f60bdc60a..45f360bdd 100644 --- a/src/video_core/video_core.cpp +++ b/src/video_core/video_core.cpp @@ -19,7 +19,7 @@ namespace { std::unique_ptr<VideoCore::RendererBase> CreateRenderer(Core::Frontend::EmuWindow& emu_window, Core::System& system, Core::Frontend::GraphicsContext& context) { - switch (Settings::values.renderer_backend) { + switch (Settings::values.renderer_backend.GetValue()) { case Settings::RendererBackend::OpenGL: return std::make_unique<OpenGL::RendererOpenGL>(emu_window, system, context); #ifdef HAS_VULKAN @@ -42,7 +42,7 @@ std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Cor return nullptr; } - if (Settings::values.use_asynchronous_gpu_emulation) { + if (Settings::values.use_asynchronous_gpu_emulation.GetValue()) { return std::make_unique<VideoCommon::GPUAsynch>(system, std::move(renderer), std::move(context)); } @@ -51,8 +51,8 @@ std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Cor u16 GetResolutionScaleFactor(const RendererBase& renderer) { return static_cast<u16>( - Settings::values.resolution_factor != 0 - ? Settings::values.resolution_factor + Settings::values.resolution_factor.GetValue() != 0 + ? Settings::values.resolution_factor.GetValue() : renderer.GetRenderWindow().GetFramebufferLayout().GetScalingRatio()); } diff --git a/src/yuzu/CMakeLists.txt b/src/yuzu/CMakeLists.txt index 8b9404718..6b25a7fa0 100644 --- a/src/yuzu/CMakeLists.txt +++ b/src/yuzu/CMakeLists.txt @@ -24,6 +24,8 @@ add_executable(yuzu compatibility_list.h configuration/config.cpp configuration/config.h + configuration/configuration_shared.cpp + configuration/configuration_shared.h configuration/configure.ui configuration/configure_audio.cpp configuration/configure_audio.h @@ -60,9 +62,12 @@ add_executable(yuzu configuration/configure_mouse_advanced.cpp configuration/configure_mouse_advanced.h configuration/configure_mouse_advanced.ui - configuration/configure_per_general.cpp - configuration/configure_per_general.h - configuration/configure_per_general.ui + configuration/configure_per_game.cpp + configuration/configure_per_game.h + configuration/configure_per_game.ui + configuration/configure_per_game_addons.cpp + configuration/configure_per_game_addons.h + configuration/configure_per_game_addons.ui configuration/configure_profile_manager.cpp configuration/configure_profile_manager.h configuration/configure_profile_manager.ui @@ -147,7 +152,7 @@ endif() create_target_directory_groups(yuzu) target_link_libraries(yuzu PRIVATE common core input_common video_core) -target_link_libraries(yuzu PRIVATE Boost::boost glad Qt5::OpenGL Qt5::Widgets) +target_link_libraries(yuzu PRIVATE Boost::boost glad Qt5::Widgets) target_link_libraries(yuzu PRIVATE ${PLATFORM_LIBRARIES} Threads::Threads) if (ENABLE_VULKAN AND NOT WIN32) @@ -208,6 +213,10 @@ if (MSVC) copy_yuzu_unicorn_deps(yuzu) endif() +if (NOT APPLE) + target_compile_definitions(yuzu PRIVATE HAS_OPENGL) +endif() + if (ENABLE_VULKAN) target_include_directories(yuzu PRIVATE ../../externals/Vulkan-Headers/include) target_compile_definitions(yuzu PRIVATE HAS_VULKAN) diff --git a/src/yuzu/bootmanager.cpp b/src/yuzu/bootmanager.cpp index 1adf8932b..5738787ac 100644 --- a/src/yuzu/bootmanager.cpp +++ b/src/yuzu/bootmanager.cpp @@ -8,13 +8,16 @@ #include <QHBoxLayout> #include <QKeyEvent> #include <QMessageBox> -#include <QOffscreenSurface> -#include <QOpenGLContext> #include <QPainter> #include <QScreen> #include <QStringList> #include <QWindow> +#ifdef HAS_OPENGL +#include <QOffscreenSurface> +#include <QOpenGLContext> +#endif + #if !defined(WIN32) && HAS_VULKAN #include <qpa/qplatformnativeinterface.h> #endif @@ -41,49 +44,65 @@ EmuThread::EmuThread() = default; EmuThread::~EmuThread() = default; void EmuThread::run() { - MicroProfileOnThreadCreate("EmuThread"); + std::string name = "yuzu:EmuControlThread"; + MicroProfileOnThreadCreate(name.c_str()); + Common::SetCurrentThreadName(name.c_str()); + + auto& system = Core::System::GetInstance(); + + system.RegisterHostThread(); + + auto& gpu = system.GPU(); // Main process has been loaded. Make the context current to this thread and begin GPU and CPU // execution. - Core::System::GetInstance().GPU().Start(); + gpu.Start(); + + gpu.ObtainContext(); emit LoadProgress(VideoCore::LoadCallbackStage::Prepare, 0, 0); - Core::System::GetInstance().Renderer().Rasterizer().LoadDiskResources( + system.Renderer().Rasterizer().LoadDiskResources( stop_run, [this](VideoCore::LoadCallbackStage stage, std::size_t value, std::size_t total) { emit LoadProgress(stage, value, total); }); emit LoadProgress(VideoCore::LoadCallbackStage::Complete, 0, 0); + gpu.ReleaseContext(); + // Holds whether the cpu was running during the last iteration, // so that the DebugModeLeft signal can be emitted before the // next execution step bool was_active = false; while (!stop_run) { if (running) { - if (!was_active) + if (was_active) { emit DebugModeLeft(); + } - Core::System::ResultStatus result = Core::System::GetInstance().RunLoop(); + running_guard = true; + Core::System::ResultStatus result = system.Run(); if (result != Core::System::ResultStatus::Success) { + running_guard = false; this->SetRunning(false); - emit ErrorThrown(result, Core::System::GetInstance().GetStatusDetails()); + emit ErrorThrown(result, system.GetStatusDetails()); } + running_wait.Wait(); + result = system.Pause(); + if (result != Core::System::ResultStatus::Success) { + running_guard = false; + this->SetRunning(false); + emit ErrorThrown(result, system.GetStatusDetails()); + } + running_guard = false; - was_active = running || exec_step; - if (!was_active && !stop_run) + if (!stop_run) { + was_active = true; emit DebugModeEntered(); + } } else if (exec_step) { - if (!was_active) - emit DebugModeLeft(); - - exec_step = false; - Core::System::GetInstance().SingleStep(); - emit DebugModeEntered(); - yieldCurrentThread(); - - was_active = false; + UNIMPLEMENTED(); } else { std::unique_lock lock{running_mutex}; running_cv.wait(lock, [this] { return IsRunning() || exec_step || stop_run; }); @@ -91,13 +110,14 @@ void EmuThread::run() { } // Shutdown the core emulation - Core::System::GetInstance().Shutdown(); + system.Shutdown(); #if MICROPROFILE_ENABLED MicroProfileOnThreadExit(); #endif } +#ifdef HAS_OPENGL class OpenGLSharedContext : public Core::Frontend::GraphicsContext { public: /// Create the original context that should be shared from @@ -106,6 +126,9 @@ public: format.setVersion(4, 3); format.setProfile(QSurfaceFormat::CompatibilityProfile); format.setOption(QSurfaceFormat::FormatOption::DeprecatedFunctions); + if (Settings::values.renderer_debug) { + format.setOption(QSurfaceFormat::FormatOption::DebugContext); + } // TODO: expose a setting for buffer value (ie default/single/double/triple) format.setSwapBehavior(QSurfaceFormat::DefaultSwapBehavior); format.setSwapInterval(0); @@ -122,7 +145,7 @@ public: // disable vsync for any shared contexts auto format = share_context->format(); - format.setSwapInterval(main_surface ? Settings::values.use_vsync : 0); + format.setSwapInterval(main_surface ? Settings::values.use_vsync.GetValue() : 0); context = std::make_unique<QOpenGLContext>(); context->setShareContext(share_context); @@ -180,6 +203,7 @@ private: std::unique_ptr<QOffscreenSurface> offscreen_surface{}; QSurface* surface; }; +#endif class DummyContext : public Core::Frontend::GraphicsContext {}; @@ -352,7 +376,7 @@ QByteArray GRenderWindow::saveGeometry() { } qreal GRenderWindow::windowPixelRatio() const { - return devicePixelRatio(); + return devicePixelRatioF(); } std::pair<u32, u32> GRenderWindow::ScaleTouch(const QPointF& pos) const { @@ -470,13 +494,15 @@ void GRenderWindow::resizeEvent(QResizeEvent* event) { } std::unique_ptr<Core::Frontend::GraphicsContext> GRenderWindow::CreateSharedContext() const { - if (Settings::values.renderer_backend == Settings::RendererBackend::OpenGL) { +#ifdef HAS_OPENGL + if (Settings::values.renderer_backend.GetValue() == Settings::RendererBackend::OpenGL) { auto c = static_cast<OpenGLSharedContext*>(main_context.get()); // Bind the shared contexts to the main surface in case the backend wants to take over // presentation return std::make_unique<OpenGLSharedContext>(c->GetShareContext(), child_widget->windowHandle()); } +#endif return std::make_unique<DummyContext>(); } @@ -485,7 +511,7 @@ bool GRenderWindow::InitRenderTarget() { first_frame = false; - switch (Settings::values.renderer_backend) { + switch (Settings::values.renderer_backend.GetValue()) { case Settings::RendererBackend::OpenGL: if (!InitializeOpenGL()) { return false; @@ -512,7 +538,7 @@ bool GRenderWindow::InitRenderTarget() { OnFramebufferSizeChanged(); BackupGeometry(); - if (Settings::values.renderer_backend == Settings::RendererBackend::OpenGL) { + if (Settings::values.renderer_backend.GetValue() == Settings::RendererBackend::OpenGL) { if (!LoadOpenGL()) { return false; } @@ -557,6 +583,7 @@ void GRenderWindow::OnMinimalClientAreaChangeRequest(std::pair<u32, u32> minimal } bool GRenderWindow::InitializeOpenGL() { +#ifdef HAS_OPENGL // TODO: One of these flags might be interesting: WA_OpaquePaintEvent, WA_NoBackground, // WA_DontShowOnScreen, WA_DeleteOnClose auto child = new OpenGLRenderWidget(this); @@ -568,6 +595,11 @@ bool GRenderWindow::InitializeOpenGL() { std::make_unique<OpenGLSharedContext>(context->GetShareContext(), child->windowHandle())); return true; +#else + QMessageBox::warning(this, tr("OpenGL not available!"), + tr("yuzu has not been compiled with OpenGL support.")); + return false; +#endif } bool GRenderWindow::InitializeVulkan() { diff --git a/src/yuzu/bootmanager.h b/src/yuzu/bootmanager.h index 3626604ca..6c59b4d5c 100644 --- a/src/yuzu/bootmanager.h +++ b/src/yuzu/bootmanager.h @@ -59,6 +59,12 @@ public: this->running = running; lock.unlock(); running_cv.notify_all(); + if (!running) { + running_wait.Set(); + /// Wait until effectively paused + while (running_guard) + ; + } } /** @@ -84,6 +90,8 @@ private: std::atomic_bool stop_run{false}; std::mutex running_mutex; std::condition_variable running_cv; + Common::Event running_wait{}; + std::atomic_bool running_guard{false}; signals: /** diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp index b08b87426..1b2b1b2bb 100644 --- a/src/yuzu/configuration/config.cpp +++ b/src/yuzu/configuration/config.cpp @@ -13,17 +13,20 @@ #include "input_common/udp/client.h" #include "yuzu/configuration/config.h" -Config::Config() { +Config::Config(const std::string& config_file, bool is_global) { // TODO: Don't hardcode the path; let the frontend decide where to put the config files. - qt_config_loc = FileUtil::GetUserPath(FileUtil::UserPath::ConfigDir) + "qt-config.ini"; + qt_config_loc = FileUtil::GetUserPath(FileUtil::UserPath::ConfigDir) + config_file; FileUtil::CreateFullPath(qt_config_loc); qt_config = std::make_unique<QSettings>(QString::fromStdString(qt_config_loc), QSettings::IniFormat); + global = is_global; Reload(); } Config::~Config() { - Save(); + if (global) { + Save(); + } } const std::array<int, Settings::NativeButton::NumButtons> Config::default_buttons = { @@ -211,8 +214,8 @@ const std::array<int, Settings::NativeKeyboard::NumKeyboardMods> Config::default // This must be in alphabetical order according to action name as it must have the same order as // UISetting::values.shortcuts, which is alphabetically ordered. // clang-format off -const std::array<UISettings::Shortcut, 15> Config::default_hotkeys{{ - {QStringLiteral("Capture Screenshot"), QStringLiteral("Main Window"), {QStringLiteral("Ctrl+P"), Qt::ApplicationShortcut}}, +const std::array<UISettings::Shortcut, 16> Config::default_hotkeys{{ + {QStringLiteral("Capture Screenshot"), QStringLiteral("Main Window"), {QStringLiteral("Ctrl+P"), Qt::WidgetWithChildrenShortcut}}, {QStringLiteral("Change Docked Mode"), QStringLiteral("Main Window"), {QStringLiteral("F10"), Qt::ApplicationShortcut}}, {QStringLiteral("Continue/Pause Emulation"), QStringLiteral("Main Window"), {QStringLiteral("F4"), Qt::WindowShortcut}}, {QStringLiteral("Decrease Speed Limit"), QStringLiteral("Main Window"), {QStringLiteral("-"), Qt::ApplicationShortcut}}, @@ -220,8 +223,9 @@ const std::array<UISettings::Shortcut, 15> Config::default_hotkeys{{ {QStringLiteral("Exit yuzu"), QStringLiteral("Main Window"), {QStringLiteral("Ctrl+Q"), Qt::WindowShortcut}}, {QStringLiteral("Fullscreen"), QStringLiteral("Main Window"), {QStringLiteral("F11"), Qt::WindowShortcut}}, {QStringLiteral("Increase Speed Limit"), QStringLiteral("Main Window"), {QStringLiteral("+"), Qt::ApplicationShortcut}}, - {QStringLiteral("Load Amiibo"), QStringLiteral("Main Window"), {QStringLiteral("F2"), Qt::ApplicationShortcut}}, - {QStringLiteral("Load File"), QStringLiteral("Main Window"), {QStringLiteral("Ctrl+O"), Qt::WindowShortcut}}, + {QStringLiteral("Load Amiibo"), QStringLiteral("Main Window"), {QStringLiteral("F2"), Qt::WidgetWithChildrenShortcut}}, + {QStringLiteral("Load File"), QStringLiteral("Main Window"), {QStringLiteral("Ctrl+O"), Qt::WidgetWithChildrenShortcut}}, + {QStringLiteral("Mute Audio"), QStringLiteral("Main Window"), {QStringLiteral("Ctrl+M"), Qt::WindowShortcut}}, {QStringLiteral("Restart Emulation"), QStringLiteral("Main Window"), {QStringLiteral("F6"), Qt::WindowShortcut}}, {QStringLiteral("Stop Emulation"), QStringLiteral("Main Window"), {QStringLiteral("F5"), Qt::WindowShortcut}}, {QStringLiteral("Toggle Filter Bar"), QStringLiteral("Main Window"), {QStringLiteral("Ctrl+F"), Qt::WindowShortcut}}, @@ -401,16 +405,19 @@ void Config::ApplyDefaultProfileIfInputInvalid() { void Config::ReadAudioValues() { qt_config->beginGroup(QStringLiteral("Audio")); - Settings::values.sink_id = ReadSetting(QStringLiteral("output_engine"), QStringLiteral("auto")) - .toString() - .toStdString(); - Settings::values.enable_audio_stretching = - ReadSetting(QStringLiteral("enable_audio_stretching"), true).toBool(); - Settings::values.audio_device_id = - ReadSetting(QStringLiteral("output_device"), QStringLiteral("auto")) - .toString() - .toStdString(); - Settings::values.volume = ReadSetting(QStringLiteral("volume"), 1).toFloat(); + if (global) { + Settings::values.sink_id = + ReadSetting(QStringLiteral("output_engine"), QStringLiteral("auto")) + .toString() + .toStdString(); + Settings::values.audio_device_id = + ReadSetting(QStringLiteral("output_device"), QStringLiteral("auto")) + .toString() + .toStdString(); + } + ReadSettingGlobal(Settings::values.enable_audio_stretching, + QStringLiteral("enable_audio_stretching"), true); + ReadSettingGlobal(Settings::values.volume, QStringLiteral("volume"), 1); qt_config->endGroup(); } @@ -439,6 +446,8 @@ void Config::ReadControlValues() { .toInt()); Settings::values.udp_pad_index = static_cast<u8>(ReadSetting(QStringLiteral("udp_pad_index"), 0).toUInt()); + Settings::values.use_docked_mode = + ReadSetting(QStringLiteral("use_docked_mode"), false).toBool(); qt_config->endGroup(); } @@ -446,7 +455,7 @@ void Config::ReadControlValues() { void Config::ReadCoreValues() { qt_config->beginGroup(QStringLiteral("Core")); - Settings::values.use_multi_core = ReadSetting(QStringLiteral("use_multi_core"), false).toBool(); + ReadSettingGlobal(Settings::values.use_multi_core, QStringLiteral("use_multi_core"), false); qt_config->endGroup(); } @@ -533,6 +542,8 @@ void Config::ReadDebuggingValues() { Settings::values.quest_flag = ReadSetting(QStringLiteral("quest_flag"), false).toBool(); Settings::values.disable_cpu_opt = ReadSetting(QStringLiteral("disable_cpu_opt"), false).toBool(); + Settings::values.disable_macro_jit = + ReadSetting(QStringLiteral("disable_macro_jit"), false).toBool(); qt_config->endGroup(); } @@ -625,34 +636,28 @@ void Config::ReadPathValues() { void Config::ReadRendererValues() { qt_config->beginGroup(QStringLiteral("Renderer")); - Settings::values.renderer_backend = - static_cast<Settings::RendererBackend>(ReadSetting(QStringLiteral("backend"), 0).toInt()); - Settings::values.renderer_debug = ReadSetting(QStringLiteral("debug"), false).toBool(); - Settings::values.vulkan_device = ReadSetting(QStringLiteral("vulkan_device"), 0).toInt(); - Settings::values.resolution_factor = - ReadSetting(QStringLiteral("resolution_factor"), 1.0).toFloat(); - Settings::values.aspect_ratio = ReadSetting(QStringLiteral("aspect_ratio"), 0).toInt(); - Settings::values.max_anisotropy = ReadSetting(QStringLiteral("max_anisotropy"), 0).toInt(); - Settings::values.use_frame_limit = - ReadSetting(QStringLiteral("use_frame_limit"), true).toBool(); - Settings::values.frame_limit = ReadSetting(QStringLiteral("frame_limit"), 100).toInt(); - Settings::values.use_disk_shader_cache = - ReadSetting(QStringLiteral("use_disk_shader_cache"), true).toBool(); - const int gpu_accuracy_level = ReadSetting(QStringLiteral("gpu_accuracy"), 0).toInt(); - Settings::values.gpu_accuracy = static_cast<Settings::GPUAccuracy>(gpu_accuracy_level); - Settings::values.use_asynchronous_gpu_emulation = - ReadSetting(QStringLiteral("use_asynchronous_gpu_emulation"), false).toBool(); - Settings::values.use_vsync = ReadSetting(QStringLiteral("use_vsync"), true).toBool(); - Settings::values.use_assembly_shaders = - ReadSetting(QStringLiteral("use_assembly_shaders"), false).toBool(); - Settings::values.use_fast_gpu_time = - ReadSetting(QStringLiteral("use_fast_gpu_time"), true).toBool(); - Settings::values.force_30fps_mode = - ReadSetting(QStringLiteral("force_30fps_mode"), false).toBool(); - - Settings::values.bg_red = ReadSetting(QStringLiteral("bg_red"), 0.0).toFloat(); - Settings::values.bg_green = ReadSetting(QStringLiteral("bg_green"), 0.0).toFloat(); - Settings::values.bg_blue = ReadSetting(QStringLiteral("bg_blue"), 0.0).toFloat(); + ReadSettingGlobal(Settings::values.renderer_backend, QStringLiteral("backend"), 0); + ReadSettingGlobal(Settings::values.renderer_debug, QStringLiteral("debug"), false); + ReadSettingGlobal(Settings::values.vulkan_device, QStringLiteral("vulkan_device"), 0); + ReadSettingGlobal(Settings::values.aspect_ratio, QStringLiteral("aspect_ratio"), 0); + ReadSettingGlobal(Settings::values.max_anisotropy, QStringLiteral("max_anisotropy"), 0); + ReadSettingGlobal(Settings::values.use_frame_limit, QStringLiteral("use_frame_limit"), true); + ReadSettingGlobal(Settings::values.frame_limit, QStringLiteral("frame_limit"), 100); + ReadSettingGlobal(Settings::values.use_disk_shader_cache, + QStringLiteral("use_disk_shader_cache"), true); + ReadSettingGlobal(Settings::values.gpu_accuracy, QStringLiteral("gpu_accuracy"), 0); + ReadSettingGlobal(Settings::values.use_asynchronous_gpu_emulation, + QStringLiteral("use_asynchronous_gpu_emulation"), false); + ReadSettingGlobal(Settings::values.use_vsync, QStringLiteral("use_vsync"), true); + ReadSettingGlobal(Settings::values.use_assembly_shaders, QStringLiteral("use_assembly_shaders"), + false); + ReadSettingGlobal(Settings::values.use_fast_gpu_time, QStringLiteral("use_fast_gpu_time"), + true); + ReadSettingGlobal(Settings::values.force_30fps_mode, QStringLiteral("force_30fps_mode"), false); + + ReadSettingGlobal(Settings::values.bg_red, QStringLiteral("bg_red"), 0.0); + ReadSettingGlobal(Settings::values.bg_green, QStringLiteral("bg_green"), 0.0); + ReadSettingGlobal(Settings::values.bg_blue, QStringLiteral("bg_blue"), 0.0); qt_config->endGroup(); } @@ -664,11 +669,13 @@ void Config::ReadShortcutValues() { const auto& [keyseq, context] = shortcut; qt_config->beginGroup(group); qt_config->beginGroup(name); + // No longer using ReadSetting for shortcut.second as it innacurately returns a value of 1 + // for WidgetWithChildrenShortcut which is a value of 3. Needed to fix shortcuts the open + // a file dialog in windowed mode UISettings::values.shortcuts.push_back( {name, group, - {ReadSetting(QStringLiteral("KeySeq"), keyseq).toString(), - ReadSetting(QStringLiteral("Context"), context).toInt()}}); + {ReadSetting(QStringLiteral("KeySeq"), keyseq).toString(), shortcut.second}}); qt_config->endGroup(); qt_config->endGroup(); } @@ -679,35 +686,45 @@ void Config::ReadShortcutValues() { void Config::ReadSystemValues() { qt_config->beginGroup(QStringLiteral("System")); - Settings::values.use_docked_mode = - ReadSetting(QStringLiteral("use_docked_mode"), false).toBool(); - - Settings::values.current_user = std::clamp<int>( - ReadSetting(QStringLiteral("current_user"), 0).toInt(), 0, Service::Account::MAX_USERS - 1); + ReadSettingGlobal(Settings::values.current_user, QStringLiteral("current_user"), 0); + Settings::values.current_user = + std::clamp<int>(Settings::values.current_user, 0, Service::Account::MAX_USERS - 1); - Settings::values.language_index = ReadSetting(QStringLiteral("language_index"), 1).toInt(); + ReadSettingGlobal(Settings::values.language_index, QStringLiteral("language_index"), 1); - Settings::values.region_index = ReadSetting(QStringLiteral("region_index"), 1).toInt(); + ReadSettingGlobal(Settings::values.region_index, QStringLiteral("region_index"), 1); - Settings::values.time_zone_index = ReadSetting(QStringLiteral("time_zone_index"), 0).toInt(); + ReadSettingGlobal(Settings::values.time_zone_index, QStringLiteral("time_zone_index"), 0); - const auto rng_seed_enabled = ReadSetting(QStringLiteral("rng_seed_enabled"), false).toBool(); - if (rng_seed_enabled) { - Settings::values.rng_seed = ReadSetting(QStringLiteral("rng_seed"), 0).toULongLong(); - } else { - Settings::values.rng_seed = std::nullopt; + bool rng_seed_enabled; + ReadSettingGlobal(rng_seed_enabled, QStringLiteral("rng_seed_enabled"), false); + bool rng_seed_global = + global || qt_config->value(QStringLiteral("rng_seed/use_global"), true).toBool(); + Settings::values.rng_seed.SetGlobal(rng_seed_global); + if (global || !rng_seed_global) { + if (rng_seed_enabled) { + Settings::values.rng_seed.SetValue( + ReadSetting(QStringLiteral("rng_seed"), 0).toULongLong()); + } else { + Settings::values.rng_seed.SetValue(std::nullopt); + } } - const auto custom_rtc_enabled = - ReadSetting(QStringLiteral("custom_rtc_enabled"), false).toBool(); - if (custom_rtc_enabled) { - Settings::values.custom_rtc = - std::chrono::seconds(ReadSetting(QStringLiteral("custom_rtc"), 0).toULongLong()); - } else { - Settings::values.custom_rtc = std::nullopt; + bool custom_rtc_enabled; + ReadSettingGlobal(custom_rtc_enabled, QStringLiteral("custom_rtc_enabled"), false); + bool custom_rtc_global = + global || qt_config->value(QStringLiteral("custom_rtc/use_global"), true).toBool(); + Settings::values.custom_rtc.SetGlobal(custom_rtc_global); + if (global || !custom_rtc_global) { + if (custom_rtc_enabled) { + Settings::values.custom_rtc.SetValue( + std::chrono::seconds(ReadSetting(QStringLiteral("custom_rtc"), 0).toULongLong())); + } else { + Settings::values.custom_rtc.SetValue(std::nullopt); + } } - Settings::values.sound_index = ReadSetting(QStringLiteral("sound_index"), 1).toInt(); + ReadSettingGlobal(Settings::values.sound_index, QStringLiteral("sound_index"), 1); qt_config->endGroup(); } @@ -720,8 +737,6 @@ void Config::ReadUIValues() { .toString(); UISettings::values.enable_discord_presence = ReadSetting(QStringLiteral("enable_discord_presence"), true).toBool(); - UISettings::values.screenshot_resolution_factor = - static_cast<u16>(ReadSetting(QStringLiteral("screenshot_resolution_factor"), 0).toUInt()); UISettings::values.select_user_on_boot = ReadSetting(QStringLiteral("select_user_on_boot"), false).toBool(); @@ -803,18 +818,20 @@ void Config::ReadWebServiceValues() { } void Config::ReadValues() { - ReadControlValues(); + if (global) { + ReadControlValues(); + ReadDataStorageValues(); + ReadDebuggingValues(); + ReadDisabledAddOnValues(); + ReadServiceValues(); + ReadUIValues(); + ReadWebServiceValues(); + ReadMiscellaneousValues(); + } ReadCoreValues(); ReadRendererValues(); ReadAudioValues(); - ReadDataStorageValues(); ReadSystemValues(); - ReadMiscellaneousValues(); - ReadDebuggingValues(); - ReadWebServiceValues(); - ReadServiceValues(); - ReadDisabledAddOnValues(); - ReadUIValues(); } void Config::SavePlayerValues() { @@ -901,30 +918,35 @@ void Config::SaveTouchscreenValues() { } void Config::SaveValues() { - SaveControlValues(); + if (global) { + SaveControlValues(); + SaveDataStorageValues(); + SaveDebuggingValues(); + SaveDisabledAddOnValues(); + SaveServiceValues(); + SaveUIValues(); + SaveWebServiceValues(); + SaveMiscellaneousValues(); + } SaveCoreValues(); SaveRendererValues(); SaveAudioValues(); - SaveDataStorageValues(); SaveSystemValues(); - SaveMiscellaneousValues(); - SaveDebuggingValues(); - SaveWebServiceValues(); - SaveServiceValues(); - SaveDisabledAddOnValues(); - SaveUIValues(); } void Config::SaveAudioValues() { qt_config->beginGroup(QStringLiteral("Audio")); - WriteSetting(QStringLiteral("output_engine"), QString::fromStdString(Settings::values.sink_id), - QStringLiteral("auto")); - WriteSetting(QStringLiteral("enable_audio_stretching"), - Settings::values.enable_audio_stretching, true); - WriteSetting(QStringLiteral("output_device"), - QString::fromStdString(Settings::values.audio_device_id), QStringLiteral("auto")); - WriteSetting(QStringLiteral("volume"), Settings::values.volume, 1.0f); + if (global) { + WriteSetting(QStringLiteral("output_engine"), + QString::fromStdString(Settings::values.sink_id), QStringLiteral("auto")); + WriteSetting(QStringLiteral("output_device"), + QString::fromStdString(Settings::values.audio_device_id), + QStringLiteral("auto")); + } + WriteSettingGlobal(QStringLiteral("enable_audio_stretching"), + Settings::values.enable_audio_stretching, true); + WriteSettingGlobal(QStringLiteral("volume"), Settings::values.volume, 1.0f); qt_config->endGroup(); } @@ -947,6 +969,7 @@ void Config::SaveControlValues() { WriteSetting(QStringLiteral("udp_input_port"), Settings::values.udp_input_port, InputCommon::CemuhookUDP::DEFAULT_PORT); WriteSetting(QStringLiteral("udp_pad_index"), Settings::values.udp_pad_index, 0); + WriteSetting(QStringLiteral("use_docked_mode"), Settings::values.use_docked_mode, false); qt_config->endGroup(); } @@ -954,7 +977,7 @@ void Config::SaveControlValues() { void Config::SaveCoreValues() { qt_config->beginGroup(QStringLiteral("Core")); - WriteSetting(QStringLiteral("use_multi_core"), Settings::values.use_multi_core, false); + WriteSettingGlobal(QStringLiteral("use_multi_core"), Settings::values.use_multi_core, false); qt_config->endGroup(); } @@ -1011,6 +1034,7 @@ void Config::SaveDebuggingValues() { WriteSetting(QStringLiteral("dump_nso"), Settings::values.dump_nso, false); WriteSetting(QStringLiteral("quest_flag"), Settings::values.quest_flag, false); WriteSetting(QStringLiteral("disable_cpu_opt"), Settings::values.disable_cpu_opt, false); + WriteSetting(QStringLiteral("disable_macro_jit"), Settings::values.disable_macro_jit, false); qt_config->endGroup(); } @@ -1076,31 +1100,34 @@ void Config::SavePathValues() { void Config::SaveRendererValues() { qt_config->beginGroup(QStringLiteral("Renderer")); - WriteSetting(QStringLiteral("backend"), static_cast<int>(Settings::values.renderer_backend), 0); + WriteSettingGlobal(QStringLiteral("backend"), + static_cast<int>(Settings::values.renderer_backend.GetValue(global)), + Settings::values.renderer_backend.UsingGlobal(), 0); WriteSetting(QStringLiteral("debug"), Settings::values.renderer_debug, false); - WriteSetting(QStringLiteral("vulkan_device"), Settings::values.vulkan_device, 0); - WriteSetting(QStringLiteral("resolution_factor"), - static_cast<double>(Settings::values.resolution_factor), 1.0); - WriteSetting(QStringLiteral("aspect_ratio"), Settings::values.aspect_ratio, 0); - WriteSetting(QStringLiteral("max_anisotropy"), Settings::values.max_anisotropy, 0); - WriteSetting(QStringLiteral("use_frame_limit"), Settings::values.use_frame_limit, true); - WriteSetting(QStringLiteral("frame_limit"), Settings::values.frame_limit, 100); - WriteSetting(QStringLiteral("use_disk_shader_cache"), Settings::values.use_disk_shader_cache, - true); - WriteSetting(QStringLiteral("gpu_accuracy"), static_cast<int>(Settings::values.gpu_accuracy), - 0); - WriteSetting(QStringLiteral("use_asynchronous_gpu_emulation"), - Settings::values.use_asynchronous_gpu_emulation, false); - WriteSetting(QStringLiteral("use_vsync"), Settings::values.use_vsync, true); - WriteSetting(QStringLiteral("use_assembly_shaders"), Settings::values.use_assembly_shaders, - false); - WriteSetting(QStringLiteral("use_fast_gpu_time"), Settings::values.use_fast_gpu_time, true); - WriteSetting(QStringLiteral("force_30fps_mode"), Settings::values.force_30fps_mode, false); + WriteSettingGlobal(QStringLiteral("vulkan_device"), Settings::values.vulkan_device, 0); + WriteSettingGlobal(QStringLiteral("aspect_ratio"), Settings::values.aspect_ratio, 0); + WriteSettingGlobal(QStringLiteral("max_anisotropy"), Settings::values.max_anisotropy, 0); + WriteSettingGlobal(QStringLiteral("use_frame_limit"), Settings::values.use_frame_limit, true); + WriteSettingGlobal(QStringLiteral("frame_limit"), Settings::values.frame_limit, 100); + WriteSettingGlobal(QStringLiteral("use_disk_shader_cache"), + Settings::values.use_disk_shader_cache, true); + WriteSettingGlobal(QStringLiteral("gpu_accuracy"), + static_cast<int>(Settings::values.gpu_accuracy.GetValue(global)), + Settings::values.gpu_accuracy.UsingGlobal(), 0); + WriteSettingGlobal(QStringLiteral("use_asynchronous_gpu_emulation"), + Settings::values.use_asynchronous_gpu_emulation, false); + WriteSettingGlobal(QStringLiteral("use_vsync"), Settings::values.use_vsync, true); + WriteSettingGlobal(QStringLiteral("use_assembly_shaders"), + Settings::values.use_assembly_shaders, false); + WriteSettingGlobal(QStringLiteral("use_fast_gpu_time"), Settings::values.use_fast_gpu_time, + true); + WriteSettingGlobal(QStringLiteral("force_30fps_mode"), Settings::values.force_30fps_mode, + false); // Cast to double because Qt's written float values are not human-readable - WriteSetting(QStringLiteral("bg_red"), static_cast<double>(Settings::values.bg_red), 0.0); - WriteSetting(QStringLiteral("bg_green"), static_cast<double>(Settings::values.bg_green), 0.0); - WriteSetting(QStringLiteral("bg_blue"), static_cast<double>(Settings::values.bg_blue), 0.0); + WriteSettingGlobal(QStringLiteral("bg_red"), Settings::values.bg_red, 0.0); + WriteSettingGlobal(QStringLiteral("bg_green"), Settings::values.bg_green, 0.0); + WriteSettingGlobal(QStringLiteral("bg_blue"), Settings::values.bg_blue, 0.0); qt_config->endGroup(); } @@ -1128,23 +1155,28 @@ void Config::SaveShortcutValues() { void Config::SaveSystemValues() { qt_config->beginGroup(QStringLiteral("System")); - WriteSetting(QStringLiteral("use_docked_mode"), Settings::values.use_docked_mode, false); WriteSetting(QStringLiteral("current_user"), Settings::values.current_user, 0); - WriteSetting(QStringLiteral("language_index"), Settings::values.language_index, 1); - WriteSetting(QStringLiteral("region_index"), Settings::values.region_index, 1); - WriteSetting(QStringLiteral("time_zone_index"), Settings::values.time_zone_index, 0); - - WriteSetting(QStringLiteral("rng_seed_enabled"), Settings::values.rng_seed.has_value(), false); - WriteSetting(QStringLiteral("rng_seed"), Settings::values.rng_seed.value_or(0), 0); - - WriteSetting(QStringLiteral("custom_rtc_enabled"), Settings::values.custom_rtc.has_value(), - false); - WriteSetting(QStringLiteral("custom_rtc"), - QVariant::fromValue<long long>( - Settings::values.custom_rtc.value_or(std::chrono::seconds{}).count()), - 0); - - WriteSetting(QStringLiteral("sound_index"), Settings::values.sound_index, 1); + WriteSettingGlobal(QStringLiteral("language_index"), Settings::values.language_index, 1); + WriteSettingGlobal(QStringLiteral("region_index"), Settings::values.region_index, 1); + WriteSettingGlobal(QStringLiteral("time_zone_index"), Settings::values.time_zone_index, 0); + + WriteSettingGlobal(QStringLiteral("rng_seed_enabled"), + Settings::values.rng_seed.GetValue(global).has_value(), + Settings::values.rng_seed.UsingGlobal(), false); + WriteSettingGlobal(QStringLiteral("rng_seed"), + Settings::values.rng_seed.GetValue(global).value_or(0), + Settings::values.rng_seed.UsingGlobal(), 0); + + WriteSettingGlobal(QStringLiteral("custom_rtc_enabled"), + Settings::values.custom_rtc.GetValue(global).has_value(), + Settings::values.custom_rtc.UsingGlobal(), false); + WriteSettingGlobal( + QStringLiteral("custom_rtc"), + QVariant::fromValue<long long>( + Settings::values.custom_rtc.GetValue(global).value_or(std::chrono::seconds{}).count()), + Settings::values.custom_rtc.UsingGlobal(), 0); + + WriteSettingGlobal(QStringLiteral("sound_index"), Settings::values.sound_index, 1); qt_config->endGroup(); } @@ -1156,8 +1188,6 @@ void Config::SaveUIValues() { QString::fromUtf8(UISettings::themes[0].second)); WriteSetting(QStringLiteral("enable_discord_presence"), UISettings::values.enable_discord_presence, true); - WriteSetting(QStringLiteral("screenshot_resolution_factor"), - UISettings::values.screenshot_resolution_factor, 0); WriteSetting(QStringLiteral("select_user_on_boot"), UISettings::values.select_user_on_boot, false); @@ -1238,6 +1268,34 @@ QVariant Config::ReadSetting(const QString& name, const QVariant& default_value) return result; } +template <typename Type> +void Config::ReadSettingGlobal(Settings::Setting<Type>& setting, const QString& name) { + const bool use_global = qt_config->value(name + QStringLiteral("/use_global"), true).toBool(); + setting.SetGlobal(use_global); + if (global || !use_global) { + setting.SetValue(ReadSetting(name).value<Type>()); + } +} + +template <typename Type> +void Config::ReadSettingGlobal(Settings::Setting<Type>& setting, const QString& name, + const QVariant& default_value) { + const bool use_global = qt_config->value(name + QStringLiteral("/use_global"), true).toBool(); + setting.SetGlobal(use_global); + if (global || !use_global) { + setting.SetValue(ReadSetting(name, default_value).value<Type>()); + } +} + +template <typename Type> +void Config::ReadSettingGlobal(Type& setting, const QString& name, + const QVariant& default_value) const { + const bool use_global = qt_config->value(name + QStringLiteral("/use_global"), true).toBool(); + if (global || !use_global) { + setting = ReadSetting(name, default_value).value<Type>(); + } +} + void Config::WriteSetting(const QString& name, const QVariant& value) { qt_config->setValue(name, value); } @@ -1248,6 +1306,40 @@ void Config::WriteSetting(const QString& name, const QVariant& value, qt_config->setValue(name, value); } +template <typename Type> +void Config::WriteSettingGlobal(const QString& name, const Settings::Setting<Type>& setting) { + if (!global) { + qt_config->setValue(name + QStringLiteral("/use_global"), setting.UsingGlobal()); + } + if (global || !setting.UsingGlobal()) { + qt_config->setValue(name, setting.GetValue(global)); + } +} + +template <typename Type> +void Config::WriteSettingGlobal(const QString& name, const Settings::Setting<Type>& setting, + const QVariant& default_value) { + if (!global) { + qt_config->setValue(name + QStringLiteral("/use_global"), setting.UsingGlobal()); + } + if (global || !setting.UsingGlobal()) { + qt_config->setValue(name + QStringLiteral("/default"), + setting.GetValue(global) == default_value.value<Type>()); + qt_config->setValue(name, setting.GetValue(global)); + } +} + +void Config::WriteSettingGlobal(const QString& name, const QVariant& value, bool use_global, + const QVariant& default_value) { + if (!global) { + qt_config->setValue(name + QStringLiteral("/use_global"), use_global); + } + if (global || !use_global) { + qt_config->setValue(name + QStringLiteral("/default"), value == default_value); + qt_config->setValue(name, value); + } +} + void Config::Reload() { ReadValues(); // To apply default value changes diff --git a/src/yuzu/configuration/config.h b/src/yuzu/configuration/config.h index 5cd2a5feb..681f0bca5 100644 --- a/src/yuzu/configuration/config.h +++ b/src/yuzu/configuration/config.h @@ -7,6 +7,7 @@ #include <array> #include <memory> #include <string> +#include <QMetaType> #include <QVariant> #include "core/settings.h" #include "yuzu/uisettings.h" @@ -15,7 +16,7 @@ class QSettings; class Config { public: - Config(); + explicit Config(const std::string& config_loc = "qt-config.ini", bool is_global = true); ~Config(); void Reload(); @@ -27,7 +28,7 @@ public: default_mouse_buttons; static const std::array<int, Settings::NativeKeyboard::NumKeyboardKeys> default_keyboard_keys; static const std::array<int, Settings::NativeKeyboard::NumKeyboardMods> default_keyboard_mods; - static const std::array<UISettings::Shortcut, 15> default_hotkeys; + static const std::array<UISettings::Shortcut, 16> default_hotkeys; private: void ReadValues(); @@ -82,9 +83,33 @@ private: QVariant ReadSetting(const QString& name) const; QVariant ReadSetting(const QString& name, const QVariant& default_value) const; + // Templated ReadSettingGlobal functions will also look for the use_global setting and set + // both the value and the global state properly + template <typename Type> + void ReadSettingGlobal(Settings::Setting<Type>& setting, const QString& name); + template <typename Type> + void ReadSettingGlobal(Settings::Setting<Type>& setting, const QString& name, + const QVariant& default_value); + template <typename Type> + void ReadSettingGlobal(Type& setting, const QString& name, const QVariant& default_value) const; + // Templated WriteSettingGlobal functions will also write the global state if needed and will + // skip writing the actual setting if it defers to the global value void WriteSetting(const QString& name, const QVariant& value); void WriteSetting(const QString& name, const QVariant& value, const QVariant& default_value); + template <typename Type> + void WriteSettingGlobal(const QString& name, const Settings::Setting<Type>& setting); + template <typename Type> + void WriteSettingGlobal(const QString& name, const Settings::Setting<Type>& setting, + const QVariant& default_value); + void WriteSettingGlobal(const QString& name, const QVariant& value, bool use_global, + const QVariant& default_value); std::unique_ptr<QSettings> qt_config; std::string qt_config_loc; + + bool global; }; + +// These metatype declarations cannot be in core/settings.h because core is devoid of QT +Q_DECLARE_METATYPE(Settings::RendererBackend); +Q_DECLARE_METATYPE(Settings::GPUAccuracy); diff --git a/src/yuzu/configuration/configuration_shared.cpp b/src/yuzu/configuration/configuration_shared.cpp new file mode 100644 index 000000000..bb47c3933 --- /dev/null +++ b/src/yuzu/configuration/configuration_shared.cpp @@ -0,0 +1,76 @@ +// Copyright 2016 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <QCheckBox> +#include <QComboBox> +#include "core/settings.h" +#include "yuzu/configuration/configuration_shared.h" +#include "yuzu/configuration/configure_per_game.h" + +void ConfigurationShared::ApplyPerGameSetting(Settings::Setting<bool>* setting, + const QCheckBox* checkbox) { + if (checkbox->checkState() == Qt::PartiallyChecked) { + setting->SetGlobal(true); + } else { + setting->SetGlobal(false); + setting->SetValue(checkbox->checkState() == Qt::Checked); + } +} + +void ConfigurationShared::ApplyPerGameSetting(Settings::Setting<int>* setting, + const QComboBox* combobox) { + if (combobox->currentIndex() == ConfigurationShared::USE_GLOBAL_INDEX) { + setting->SetGlobal(true); + } else { + setting->SetGlobal(false); + setting->SetValue(combobox->currentIndex() - ConfigurationShared::USE_GLOBAL_OFFSET); + } +} + +void ConfigurationShared::ApplyPerGameSetting(Settings::Setting<Settings::RendererBackend>* setting, + const QComboBox* combobox) { + if (combobox->currentIndex() == ConfigurationShared::USE_GLOBAL_INDEX) { + setting->SetGlobal(true); + } else { + setting->SetGlobal(false); + setting->SetValue(static_cast<Settings::RendererBackend>( + combobox->currentIndex() - ConfigurationShared::USE_GLOBAL_OFFSET)); + } +} + +void ConfigurationShared::SetPerGameSetting(QCheckBox* checkbox, + const Settings::Setting<bool>* setting) { + if (setting->UsingGlobal()) { + checkbox->setCheckState(Qt::PartiallyChecked); + } else { + checkbox->setCheckState(setting->GetValue() ? Qt::Checked : Qt::Unchecked); + } +} + +void ConfigurationShared::SetPerGameSetting(QComboBox* combobox, + const Settings::Setting<int>* setting) { + combobox->setCurrentIndex(setting->UsingGlobal() + ? ConfigurationShared::USE_GLOBAL_INDEX + : setting->GetValue() + ConfigurationShared::USE_GLOBAL_OFFSET); +} + +void ConfigurationShared::SetPerGameSetting( + QComboBox* combobox, const Settings::Setting<Settings::RendererBackend>* setting) { + combobox->setCurrentIndex(setting->UsingGlobal() ? ConfigurationShared::USE_GLOBAL_INDEX + : static_cast<int>(setting->GetValue()) + + ConfigurationShared::USE_GLOBAL_OFFSET); +} + +void ConfigurationShared::SetPerGameSetting( + QComboBox* combobox, const Settings::Setting<Settings::GPUAccuracy>* setting) { + combobox->setCurrentIndex(setting->UsingGlobal() ? ConfigurationShared::USE_GLOBAL_INDEX + : static_cast<int>(setting->GetValue()) + + ConfigurationShared::USE_GLOBAL_OFFSET); +} + +void ConfigurationShared::InsertGlobalItem(QComboBox* combobox) { + const QString use_global_text = ConfigurePerGame::tr("Use global configuration"); + combobox->insertItem(ConfigurationShared::USE_GLOBAL_INDEX, use_global_text); + combobox->insertSeparator(ConfigurationShared::USE_GLOBAL_SEPARATOR_INDEX); +} diff --git a/src/yuzu/configuration/configuration_shared.h b/src/yuzu/configuration/configuration_shared.h new file mode 100644 index 000000000..b11b1b950 --- /dev/null +++ b/src/yuzu/configuration/configuration_shared.h @@ -0,0 +1,36 @@ +// Copyright 2016 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <QCheckBox> +#include <QComboBox> +#include <QString> +#include "core/settings.h" + +namespace ConfigurationShared { + +constexpr int USE_GLOBAL_INDEX = 0; +constexpr int USE_GLOBAL_SEPARATOR_INDEX = 1; +constexpr int USE_GLOBAL_OFFSET = 2; + +// Global-aware apply and set functions + +void ApplyPerGameSetting(Settings::Setting<bool>* setting, const QCheckBox* checkbox); +void ApplyPerGameSetting(Settings::Setting<int>* setting, const QComboBox* combobox); +void ApplyPerGameSetting(Settings::Setting<Settings::RendererBackend>* setting, + const QComboBox* combobox); +void ApplyPerGameSetting(Settings::Setting<Settings::GPUAccuracy>* setting, + const QComboBox* combobox); + +void SetPerGameSetting(QCheckBox* checkbox, const Settings::Setting<bool>* setting); +void SetPerGameSetting(QComboBox* combobox, const Settings::Setting<int>* setting); +void SetPerGameSetting(QComboBox* combobox, + const Settings::Setting<Settings::RendererBackend>* setting); +void SetPerGameSetting(QComboBox* combobox, + const Settings::Setting<Settings::GPUAccuracy>* setting); + +void InsertGlobalItem(QComboBox* combobox); + +} // namespace ConfigurationShared diff --git a/src/yuzu/configuration/configure_audio.cpp b/src/yuzu/configuration/configure_audio.cpp index f370c690f..cc021beec 100644 --- a/src/yuzu/configuration/configure_audio.cpp +++ b/src/yuzu/configuration/configure_audio.cpp @@ -11,6 +11,7 @@ #include "core/core.h" #include "core/settings.h" #include "ui_configure_audio.h" +#include "yuzu/configuration/configuration_shared.h" #include "yuzu/configuration/configure_audio.h" ConfigureAudio::ConfigureAudio(QWidget* parent) @@ -24,6 +25,11 @@ ConfigureAudio::ConfigureAudio(QWidget* parent) connect(ui->output_sink_combo_box, qOverload<int>(&QComboBox::currentIndexChanged), this, &ConfigureAudio::UpdateAudioDevices); + ui->volume_label->setVisible(Settings::configuring_global); + ui->volume_combo_box->setVisible(!Settings::configuring_global); + + SetupPerGameUI(); + SetConfiguration(); const bool is_powered_on = Core::System::GetInstance().IsPoweredOn(); @@ -41,8 +47,22 @@ void ConfigureAudio::SetConfiguration() { SetAudioDeviceFromDeviceID(); - ui->toggle_audio_stretching->setChecked(Settings::values.enable_audio_stretching); - ui->volume_slider->setValue(Settings::values.volume * ui->volume_slider->maximum()); + ui->volume_slider->setValue(Settings::values.volume.GetValue() * ui->volume_slider->maximum()); + + if (Settings::configuring_global) { + ui->toggle_audio_stretching->setChecked( + Settings::values.enable_audio_stretching.GetValue()); + } else { + ConfigurationShared::SetPerGameSetting(ui->toggle_audio_stretching, + &Settings::values.enable_audio_stretching); + if (Settings::values.volume.UsingGlobal()) { + ui->volume_combo_box->setCurrentIndex(0); + ui->volume_slider->setEnabled(false); + } else { + ui->volume_combo_box->setCurrentIndex(1); + ui->volume_slider->setEnabled(true); + } + } SetVolumeIndicatorText(ui->volume_slider->sliderPosition()); } @@ -80,15 +100,36 @@ void ConfigureAudio::SetVolumeIndicatorText(int percentage) { } void ConfigureAudio::ApplyConfiguration() { - Settings::values.sink_id = - ui->output_sink_combo_box->itemText(ui->output_sink_combo_box->currentIndex()) - .toStdString(); - Settings::values.enable_audio_stretching = ui->toggle_audio_stretching->isChecked(); - Settings::values.audio_device_id = - ui->audio_device_combo_box->itemText(ui->audio_device_combo_box->currentIndex()) - .toStdString(); - Settings::values.volume = - static_cast<float>(ui->volume_slider->sliderPosition()) / ui->volume_slider->maximum(); + if (Settings::configuring_global) { + Settings::values.sink_id = + ui->output_sink_combo_box->itemText(ui->output_sink_combo_box->currentIndex()) + .toStdString(); + Settings::values.audio_device_id = + ui->audio_device_combo_box->itemText(ui->audio_device_combo_box->currentIndex()) + .toStdString(); + + // Guard if during game and set to game-specific value + if (Settings::values.enable_audio_stretching.UsingGlobal()) { + Settings::values.enable_audio_stretching.SetValue( + ui->toggle_audio_stretching->isChecked()); + } + if (Settings::values.volume.UsingGlobal()) { + Settings::values.volume.SetValue( + static_cast<float>(ui->volume_slider->sliderPosition()) / + ui->volume_slider->maximum()); + } + } else { + ConfigurationShared::ApplyPerGameSetting(&Settings::values.enable_audio_stretching, + ui->toggle_audio_stretching); + if (ui->volume_combo_box->currentIndex() == 0) { + Settings::values.volume.SetGlobal(true); + } else { + Settings::values.volume.SetGlobal(false); + Settings::values.volume.SetValue( + static_cast<float>(ui->volume_slider->sliderPosition()) / + ui->volume_slider->maximum()); + } + } } void ConfigureAudio::changeEvent(QEvent* event) { @@ -122,3 +163,22 @@ void ConfigureAudio::RetranslateUI() { ui->retranslateUi(this); SetVolumeIndicatorText(ui->volume_slider->sliderPosition()); } + +void ConfigureAudio::SetupPerGameUI() { + if (Settings::configuring_global) { + ui->volume_slider->setEnabled(Settings::values.volume.UsingGlobal()); + ui->toggle_audio_stretching->setEnabled( + Settings::values.enable_audio_stretching.UsingGlobal()); + + return; + } + + ui->toggle_audio_stretching->setTristate(true); + connect(ui->volume_combo_box, static_cast<void (QComboBox::*)(int)>(&QComboBox::activated), + this, [this](int index) { ui->volume_slider->setEnabled(index == 1); }); + + ui->output_sink_combo_box->setVisible(false); + ui->output_sink_label->setVisible(false); + ui->audio_device_combo_box->setVisible(false); + ui->audio_device_label->setVisible(false); +} diff --git a/src/yuzu/configuration/configure_audio.h b/src/yuzu/configuration/configure_audio.h index ea83bd72d..d84f4a682 100644 --- a/src/yuzu/configuration/configure_audio.h +++ b/src/yuzu/configuration/configure_audio.h @@ -34,5 +34,7 @@ private: void SetAudioDeviceFromDeviceID(); void SetVolumeIndicatorText(int percentage); + void SetupPerGameUI(); + std::unique_ptr<Ui::ConfigureAudio> ui; }; diff --git a/src/yuzu/configuration/configure_audio.ui b/src/yuzu/configuration/configure_audio.ui index a098b9acc..862ccb988 100644 --- a/src/yuzu/configuration/configure_audio.ui +++ b/src/yuzu/configuration/configure_audio.ui @@ -6,8 +6,8 @@ <rect> <x>0</x> <y>0</y> - <width>188</width> - <height>246</height> + <width>367</width> + <height>368</height> </rect> </property> <layout class="QVBoxLayout"> @@ -18,9 +18,9 @@ </property> <layout class="QVBoxLayout"> <item> - <layout class="QHBoxLayout"> + <layout class="QHBoxLayout" name="_3"> <item> - <widget class="QLabel" name="label_1"> + <widget class="QLabel" name="output_sink_label"> <property name="text"> <string>Output Engine:</string> </property> @@ -31,20 +31,20 @@ </item> </layout> </item> - <item> - <widget class="QCheckBox" name="toggle_audio_stretching"> - <property name="toolTip"> - <string>This post-processing effect adjusts audio speed to match emulation speed and helps prevent audio stutter. This however increases audio latency.</string> - </property> - <property name="text"> - <string>Enable audio stretching</string> - </property> - </widget> - </item> <item> - <layout class="QHBoxLayout"> + <widget class="QCheckBox" name="toggle_audio_stretching"> + <property name="toolTip"> + <string>This post-processing effect adjusts audio speed to match emulation speed and helps prevent audio stutter. This however increases audio latency.</string> + </property> + <property name="text"> + <string>Enable audio stretching</string> + </property> + </widget> + </item> + <item> + <layout class="QHBoxLayout" name="_2"> <item> - <widget class="QLabel" name="label_2"> + <widget class="QLabel" name="audio_device_label"> <property name="text"> <string>Audio Device:</string> </property> @@ -61,7 +61,21 @@ <number>0</number> </property> <item> - <widget class="QLabel" name="label_3"> + <widget class="QComboBox" name="volume_combo_box"> + <item> + <property name="text"> + <string>Use global volume</string> + </property> + </item> + <item> + <property name="text"> + <string>Set volume:</string> + </property> + </item> + </widget> + </item> + <item> + <widget class="QLabel" name="volume_label"> <property name="text"> <string>Volume:</string> </property> @@ -74,7 +88,7 @@ </property> <property name="sizeHint" stdset="0"> <size> - <width>40</width> + <width>30</width> <height>20</height> </size> </property> diff --git a/src/yuzu/configuration/configure_debug.cpp b/src/yuzu/configuration/configure_debug.cpp index c2026763e..2c77441fd 100644 --- a/src/yuzu/configuration/configure_debug.cpp +++ b/src/yuzu/configuration/configure_debug.cpp @@ -39,6 +39,8 @@ void ConfigureDebug::SetConfiguration() { ui->disable_cpu_opt->setChecked(Settings::values.disable_cpu_opt); ui->enable_graphics_debugging->setEnabled(!Core::System::GetInstance().IsPoweredOn()); ui->enable_graphics_debugging->setChecked(Settings::values.renderer_debug); + ui->disable_macro_jit->setEnabled(!Core::System::GetInstance().IsPoweredOn()); + ui->disable_macro_jit->setChecked(Settings::values.disable_macro_jit); } void ConfigureDebug::ApplyConfiguration() { @@ -51,6 +53,7 @@ void ConfigureDebug::ApplyConfiguration() { Settings::values.quest_flag = ui->quest_flag->isChecked(); Settings::values.disable_cpu_opt = ui->disable_cpu_opt->isChecked(); Settings::values.renderer_debug = ui->enable_graphics_debugging->isChecked(); + Settings::values.disable_macro_jit = ui->disable_macro_jit->isChecked(); Debugger::ToggleConsole(); Log::Filter filter; filter.ParseFilterString(Settings::values.log_filter); diff --git a/src/yuzu/configuration/configure_debug.ui b/src/yuzu/configuration/configure_debug.ui index e0d4c4a44..46f0208c6 100644 --- a/src/yuzu/configuration/configure_debug.ui +++ b/src/yuzu/configuration/configure_debug.ui @@ -148,6 +148,19 @@ </property> </widget> </item> + <item> + <widget class="QCheckBox" name="disable_macro_jit"> + <property name="enabled"> + <bool>true</bool> + </property> + <property name="whatsThis"> + <string>When checked, it disables the macro Just In Time compiler. Enabled this makes games run slower</string> + </property> + <property name="text"> + <string>Disable Macro JIT</string> + </property> + </widget> + </item> </layout> </widget> </item> diff --git a/src/yuzu/configuration/configure_dialog.cpp b/src/yuzu/configuration/configure_dialog.cpp index df4473b46..5918e9972 100644 --- a/src/yuzu/configuration/configure_dialog.cpp +++ b/src/yuzu/configuration/configure_dialog.cpp @@ -14,6 +14,8 @@ ConfigureDialog::ConfigureDialog(QWidget* parent, HotkeyRegistry& registry) : QDialog(parent), ui(new Ui::ConfigureDialog), registry(registry) { + Settings::configuring_global = true; + ui->setupUi(this); ui->hotkeysTab->Populate(registry); setWindowFlags(windowFlags() & ~Qt::WindowContextHelpButtonHint); diff --git a/src/yuzu/configuration/configure_general.cpp b/src/yuzu/configuration/configure_general.cpp index cb95423e0..1fb62d1cf 100644 --- a/src/yuzu/configuration/configure_general.cpp +++ b/src/yuzu/configuration/configure_general.cpp @@ -7,40 +7,79 @@ #include "core/core.h" #include "core/settings.h" #include "ui_configure_general.h" +#include "yuzu/configuration/configuration_shared.h" #include "yuzu/configuration/configure_general.h" #include "yuzu/uisettings.h" ConfigureGeneral::ConfigureGeneral(QWidget* parent) : QWidget(parent), ui(new Ui::ConfigureGeneral) { - ui->setupUi(this); + SetupPerGameUI(); + SetConfiguration(); - connect(ui->toggle_frame_limit, &QCheckBox::toggled, ui->frame_limit, &QSpinBox::setEnabled); + connect(ui->toggle_frame_limit, &QCheckBox::stateChanged, ui->frame_limit, [this]() { + ui->frame_limit->setEnabled(ui->toggle_frame_limit->checkState() == Qt::Checked); + }); } ConfigureGeneral::~ConfigureGeneral() = default; void ConfigureGeneral::SetConfiguration() { + const bool runtime_lock = !Core::System::GetInstance().IsPoweredOn(); + + ui->use_multi_core->setEnabled(runtime_lock); + ui->use_multi_core->setChecked(Settings::values.use_multi_core.GetValue()); + ui->toggle_check_exit->setChecked(UISettings::values.confirm_before_closing); ui->toggle_user_on_boot->setChecked(UISettings::values.select_user_on_boot); ui->toggle_background_pause->setChecked(UISettings::values.pause_when_in_background); ui->toggle_hide_mouse->setChecked(UISettings::values.hide_mouse); - ui->toggle_frame_limit->setChecked(Settings::values.use_frame_limit); - ui->frame_limit->setEnabled(ui->toggle_frame_limit->isChecked()); - ui->frame_limit->setValue(Settings::values.frame_limit); + ui->toggle_frame_limit->setChecked(Settings::values.use_frame_limit.GetValue()); + ui->frame_limit->setValue(Settings::values.frame_limit.GetValue()); + + if (!Settings::configuring_global) { + if (Settings::values.use_multi_core.UsingGlobal()) { + ui->use_multi_core->setCheckState(Qt::PartiallyChecked); + } + if (Settings::values.use_frame_limit.UsingGlobal()) { + ui->toggle_frame_limit->setCheckState(Qt::PartiallyChecked); + } + } + + ui->frame_limit->setEnabled(ui->toggle_frame_limit->checkState() == Qt::Checked && + ui->toggle_frame_limit->isEnabled()); } void ConfigureGeneral::ApplyConfiguration() { - UISettings::values.confirm_before_closing = ui->toggle_check_exit->isChecked(); - UISettings::values.select_user_on_boot = ui->toggle_user_on_boot->isChecked(); - UISettings::values.pause_when_in_background = ui->toggle_background_pause->isChecked(); - UISettings::values.hide_mouse = ui->toggle_hide_mouse->isChecked(); + if (Settings::configuring_global) { + UISettings::values.confirm_before_closing = ui->toggle_check_exit->isChecked(); + UISettings::values.select_user_on_boot = ui->toggle_user_on_boot->isChecked(); + UISettings::values.pause_when_in_background = ui->toggle_background_pause->isChecked(); + UISettings::values.hide_mouse = ui->toggle_hide_mouse->isChecked(); + + // Guard if during game and set to game-specific value + if (Settings::values.use_frame_limit.UsingGlobal()) { + Settings::values.use_frame_limit.SetValue(ui->toggle_frame_limit->checkState() == + Qt::Checked); + Settings::values.frame_limit.SetValue(ui->frame_limit->value()); + Settings::values.use_multi_core.SetValue(ui->use_multi_core->isChecked()); + } + } else { + ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_multi_core, + ui->use_multi_core); - Settings::values.use_frame_limit = ui->toggle_frame_limit->isChecked(); - Settings::values.frame_limit = ui->frame_limit->value(); + bool global_frame_limit = ui->toggle_frame_limit->checkState() == Qt::PartiallyChecked; + Settings::values.use_frame_limit.SetGlobal(global_frame_limit); + Settings::values.frame_limit.SetGlobal(global_frame_limit); + if (!global_frame_limit) { + Settings::values.use_frame_limit.SetValue(ui->toggle_frame_limit->checkState() == + Qt::Checked); + Settings::values.frame_limit.SetValue(ui->frame_limit->value()); + } + } } void ConfigureGeneral::changeEvent(QEvent* event) { @@ -54,3 +93,20 @@ void ConfigureGeneral::changeEvent(QEvent* event) { void ConfigureGeneral::RetranslateUI() { ui->retranslateUi(this); } + +void ConfigureGeneral::SetupPerGameUI() { + if (Settings::configuring_global) { + ui->toggle_frame_limit->setEnabled(Settings::values.use_frame_limit.UsingGlobal()); + ui->frame_limit->setEnabled(Settings::values.frame_limit.UsingGlobal()); + + return; + } + + ui->toggle_check_exit->setVisible(false); + ui->toggle_user_on_boot->setVisible(false); + ui->toggle_background_pause->setVisible(false); + ui->toggle_hide_mouse->setVisible(false); + + ui->toggle_frame_limit->setTristate(true); + ui->use_multi_core->setTristate(true); +} diff --git a/src/yuzu/configuration/configure_general.h b/src/yuzu/configuration/configure_general.h index ef05ce065..9c785c22e 100644 --- a/src/yuzu/configuration/configure_general.h +++ b/src/yuzu/configuration/configure_general.h @@ -28,5 +28,7 @@ private: void SetConfiguration(); + void SetupPerGameUI(); + std::unique_ptr<Ui::ConfigureGeneral> ui; }; diff --git a/src/yuzu/configuration/configure_general.ui b/src/yuzu/configuration/configure_general.ui index fc3b7e65a..2711116a2 100644 --- a/src/yuzu/configuration/configure_general.ui +++ b/src/yuzu/configuration/configure_general.ui @@ -52,6 +52,13 @@ </layout> </item> <item> + <widget class="QCheckBox" name="use_multi_core"> + <property name="text"> + <string>Multicore CPU Emulation</string> + </property> + </widget> + </item> + <item> <widget class="QCheckBox" name="toggle_check_exit"> <property name="text"> <string>Confirm exit while emulation is running</string> diff --git a/src/yuzu/configuration/configure_graphics.cpp b/src/yuzu/configuration/configure_graphics.cpp index ea667caef..cb4706bd6 100644 --- a/src/yuzu/configuration/configure_graphics.cpp +++ b/src/yuzu/configuration/configure_graphics.cpp @@ -13,65 +13,27 @@ #include "core/core.h" #include "core/settings.h" #include "ui_configure_graphics.h" +#include "yuzu/configuration/configuration_shared.h" #include "yuzu/configuration/configure_graphics.h" #ifdef HAS_VULKAN #include "video_core/renderer_vulkan/renderer_vulkan.h" #endif -namespace { -enum class Resolution : int { - Auto, - Scale1x, - Scale2x, - Scale3x, - Scale4x, -}; - -float ToResolutionFactor(Resolution option) { - switch (option) { - case Resolution::Auto: - return 0.f; - case Resolution::Scale1x: - return 1.f; - case Resolution::Scale2x: - return 2.f; - case Resolution::Scale3x: - return 3.f; - case Resolution::Scale4x: - return 4.f; - } - return 0.f; -} - -Resolution FromResolutionFactor(float factor) { - if (factor == 0.f) { - return Resolution::Auto; - } else if (factor == 1.f) { - return Resolution::Scale1x; - } else if (factor == 2.f) { - return Resolution::Scale2x; - } else if (factor == 3.f) { - return Resolution::Scale3x; - } else if (factor == 4.f) { - return Resolution::Scale4x; - } - return Resolution::Auto; -} -} // Anonymous namespace - ConfigureGraphics::ConfigureGraphics(QWidget* parent) : QWidget(parent), ui(new Ui::ConfigureGraphics) { - vulkan_device = Settings::values.vulkan_device; + vulkan_device = Settings::values.vulkan_device.GetValue(); RetrieveVulkanDevices(); ui->setupUi(this); + SetupPerGameUI(); + SetConfiguration(); - connect(ui->api, static_cast<void (QComboBox::*)(int)>(&QComboBox::currentIndexChanged), this, + connect(ui->api, qOverload<int>(&QComboBox::currentIndexChanged), this, [this] { UpdateDeviceComboBox(); }); - connect(ui->device, static_cast<void (QComboBox::*)(int)>(&QComboBox::activated), this, + connect(ui->device, qOverload<int>(&QComboBox::activated), this, [this](int device) { UpdateDeviceSelection(device); }); connect(ui->bg_button, &QPushButton::clicked, this, [this] { @@ -81,6 +43,9 @@ ConfigureGraphics::ConfigureGraphics(QWidget* parent) } UpdateBackgroundColorButton(new_bg_color); }); + + ui->bg_label->setVisible(Settings::configuring_global); + ui->bg_combobox->setVisible(!Settings::configuring_global); } void ConfigureGraphics::UpdateDeviceSelection(int device) { @@ -98,31 +63,95 @@ void ConfigureGraphics::SetConfiguration() { const bool runtime_lock = !Core::System::GetInstance().IsPoweredOn(); ui->api->setEnabled(runtime_lock); - ui->api->setCurrentIndex(static_cast<int>(Settings::values.renderer_backend)); - ui->resolution_factor_combobox->setCurrentIndex( - static_cast<int>(FromResolutionFactor(Settings::values.resolution_factor))); - ui->aspect_ratio_combobox->setCurrentIndex(Settings::values.aspect_ratio); - ui->use_disk_shader_cache->setEnabled(runtime_lock); - ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache); ui->use_asynchronous_gpu_emulation->setEnabled(runtime_lock); - ui->use_asynchronous_gpu_emulation->setChecked(Settings::values.use_asynchronous_gpu_emulation); - UpdateBackgroundColorButton(QColor::fromRgbF(Settings::values.bg_red, Settings::values.bg_green, - Settings::values.bg_blue)); + ui->use_disk_shader_cache->setEnabled(runtime_lock); + + if (Settings::configuring_global) { + ui->api->setCurrentIndex(static_cast<int>(Settings::values.renderer_backend.GetValue())); + ui->aspect_ratio_combobox->setCurrentIndex(Settings::values.aspect_ratio.GetValue()); + ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache.GetValue()); + ui->use_asynchronous_gpu_emulation->setChecked( + Settings::values.use_asynchronous_gpu_emulation.GetValue()); + } else { + ConfigurationShared::SetPerGameSetting(ui->use_disk_shader_cache, + &Settings::values.use_disk_shader_cache); + ConfigurationShared::SetPerGameSetting(ui->use_asynchronous_gpu_emulation, + &Settings::values.use_asynchronous_gpu_emulation); + + ConfigurationShared::SetPerGameSetting(ui->api, &Settings::values.renderer_backend); + ConfigurationShared::SetPerGameSetting(ui->aspect_ratio_combobox, + &Settings::values.aspect_ratio); + + ui->bg_combobox->setCurrentIndex(Settings::values.bg_red.UsingGlobal() ? 0 : 1); + ui->bg_button->setEnabled(!Settings::values.bg_red.UsingGlobal()); + } + + UpdateBackgroundColorButton(QColor::fromRgbF(Settings::values.bg_red.GetValue(), + Settings::values.bg_green.GetValue(), + Settings::values.bg_blue.GetValue())); UpdateDeviceComboBox(); } void ConfigureGraphics::ApplyConfiguration() { - Settings::values.renderer_backend = GetCurrentGraphicsBackend(); - Settings::values.vulkan_device = vulkan_device; - Settings::values.resolution_factor = - ToResolutionFactor(static_cast<Resolution>(ui->resolution_factor_combobox->currentIndex())); - Settings::values.aspect_ratio = ui->aspect_ratio_combobox->currentIndex(); - Settings::values.use_disk_shader_cache = ui->use_disk_shader_cache->isChecked(); - Settings::values.use_asynchronous_gpu_emulation = - ui->use_asynchronous_gpu_emulation->isChecked(); - Settings::values.bg_red = static_cast<float>(bg_color.redF()); - Settings::values.bg_green = static_cast<float>(bg_color.greenF()); - Settings::values.bg_blue = static_cast<float>(bg_color.blueF()); + if (Settings::configuring_global) { + // Guard if during game and set to game-specific value + if (Settings::values.renderer_backend.UsingGlobal()) { + Settings::values.renderer_backend.SetValue(GetCurrentGraphicsBackend()); + } + if (Settings::values.vulkan_device.UsingGlobal()) { + Settings::values.vulkan_device.SetValue(vulkan_device); + } + if (Settings::values.aspect_ratio.UsingGlobal()) { + Settings::values.aspect_ratio.SetValue(ui->aspect_ratio_combobox->currentIndex()); + } + if (Settings::values.use_disk_shader_cache.UsingGlobal()) { + Settings::values.use_disk_shader_cache.SetValue(ui->use_disk_shader_cache->isChecked()); + } + if (Settings::values.use_asynchronous_gpu_emulation.UsingGlobal()) { + Settings::values.use_asynchronous_gpu_emulation.SetValue( + ui->use_asynchronous_gpu_emulation->isChecked()); + } + if (Settings::values.bg_red.UsingGlobal()) { + Settings::values.bg_red.SetValue(static_cast<float>(bg_color.redF())); + Settings::values.bg_green.SetValue(static_cast<float>(bg_color.greenF())); + Settings::values.bg_blue.SetValue(static_cast<float>(bg_color.blueF())); + } + } else { + if (ui->api->currentIndex() == ConfigurationShared::USE_GLOBAL_INDEX) { + Settings::values.renderer_backend.SetGlobal(true); + Settings::values.vulkan_device.SetGlobal(true); + } else { + Settings::values.renderer_backend.SetGlobal(false); + Settings::values.renderer_backend.SetValue(GetCurrentGraphicsBackend()); + if (GetCurrentGraphicsBackend() == Settings::RendererBackend::Vulkan) { + Settings::values.vulkan_device.SetGlobal(false); + Settings::values.vulkan_device.SetValue(vulkan_device); + } else { + Settings::values.vulkan_device.SetGlobal(true); + } + } + + ConfigurationShared::ApplyPerGameSetting(&Settings::values.aspect_ratio, + ui->aspect_ratio_combobox); + + ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_disk_shader_cache, + ui->use_disk_shader_cache); + ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_asynchronous_gpu_emulation, + ui->use_asynchronous_gpu_emulation); + + if (ui->bg_combobox->currentIndex() == ConfigurationShared::USE_GLOBAL_INDEX) { + Settings::values.bg_red.SetGlobal(true); + Settings::values.bg_green.SetGlobal(true); + Settings::values.bg_blue.SetGlobal(true); + } else { + Settings::values.bg_red.SetGlobal(false); + Settings::values.bg_green.SetGlobal(false); + Settings::values.bg_blue.SetGlobal(false); + Settings::values.bg_red.SetValue(static_cast<float>(bg_color.redF())); + Settings::values.bg_green.SetValue(static_cast<float>(bg_color.greenF())); + Settings::values.bg_blue.SetValue(static_cast<float>(bg_color.blueF())); + } + } } void ConfigureGraphics::changeEvent(QEvent* event) { @@ -151,19 +180,27 @@ void ConfigureGraphics::UpdateDeviceComboBox() { ui->device->clear(); bool enabled = false; + + if (!Settings::configuring_global && + ui->api->currentIndex() == ConfigurationShared::USE_GLOBAL_INDEX) { + vulkan_device = Settings::values.vulkan_device.GetValue(); + } switch (GetCurrentGraphicsBackend()) { case Settings::RendererBackend::OpenGL: ui->device->addItem(tr("OpenGL Graphics Device")); enabled = false; break; case Settings::RendererBackend::Vulkan: - for (const auto device : vulkan_devices) { + for (const auto& device : vulkan_devices) { ui->device->addItem(device); } ui->device->setCurrentIndex(vulkan_device); enabled = !vulkan_devices.empty(); break; } + // If in per-game config and use global is selected, don't enable. + enabled &= !(!Settings::configuring_global && + ui->api->currentIndex() == ConfigurationShared::USE_GLOBAL_INDEX); ui->device->setEnabled(enabled && !Core::System::GetInstance().IsPoweredOn()); } @@ -177,5 +214,37 @@ void ConfigureGraphics::RetrieveVulkanDevices() { } Settings::RendererBackend ConfigureGraphics::GetCurrentGraphicsBackend() const { - return static_cast<Settings::RendererBackend>(ui->api->currentIndex()); + if (Settings::configuring_global) { + return static_cast<Settings::RendererBackend>(ui->api->currentIndex()); + } + + if (ui->api->currentIndex() == ConfigurationShared::USE_GLOBAL_INDEX) { + Settings::values.renderer_backend.SetGlobal(true); + return Settings::values.renderer_backend.GetValue(); + } + Settings::values.renderer_backend.SetGlobal(false); + return static_cast<Settings::RendererBackend>(ui->api->currentIndex() - + ConfigurationShared::USE_GLOBAL_OFFSET); +} + +void ConfigureGraphics::SetupPerGameUI() { + if (Settings::configuring_global) { + ui->api->setEnabled(Settings::values.renderer_backend.UsingGlobal()); + ui->device->setEnabled(Settings::values.renderer_backend.UsingGlobal()); + ui->aspect_ratio_combobox->setEnabled(Settings::values.aspect_ratio.UsingGlobal()); + ui->use_asynchronous_gpu_emulation->setEnabled( + Settings::values.use_asynchronous_gpu_emulation.UsingGlobal()); + ui->use_disk_shader_cache->setEnabled(Settings::values.use_disk_shader_cache.UsingGlobal()); + ui->bg_button->setEnabled(Settings::values.bg_red.UsingGlobal()); + + return; + } + + connect(ui->bg_combobox, static_cast<void (QComboBox::*)(int)>(&QComboBox::activated), this, + [this](int index) { ui->bg_button->setEnabled(index == 1); }); + + ui->use_disk_shader_cache->setTristate(true); + ui->use_asynchronous_gpu_emulation->setTristate(true); + ConfigurationShared::InsertGlobalItem(ui->aspect_ratio_combobox); + ConfigurationShared::InsertGlobalItem(ui->api); } diff --git a/src/yuzu/configuration/configure_graphics.h b/src/yuzu/configuration/configure_graphics.h index 7e0596d9c..24f01c739 100644 --- a/src/yuzu/configuration/configure_graphics.h +++ b/src/yuzu/configuration/configure_graphics.h @@ -35,6 +35,8 @@ private: void RetrieveVulkanDevices(); + void SetupPerGameUI(); + Settings::RendererBackend GetCurrentGraphicsBackend() const; std::unique_ptr<Ui::ConfigureGraphics> ui; diff --git a/src/yuzu/configuration/configure_graphics.ui b/src/yuzu/configuration/configure_graphics.ui index c816d6108..62418fc14 100644 --- a/src/yuzu/configuration/configure_graphics.ui +++ b/src/yuzu/configuration/configure_graphics.ui @@ -85,39 +85,34 @@ </widget> </item> <item> - <layout class="QHBoxLayout" name="horizontalLayout_2"> + <layout class="QHBoxLayout" name="horizontalLayout_6"> <item> - <widget class="QLabel" name="label"> + <widget class="QLabel" name="ar_label"> <property name="text"> - <string>Internal Resolution:</string> + <string>Aspect Ratio:</string> </property> </widget> </item> <item> - <widget class="QComboBox" name="resolution_factor_combobox"> - <item> - <property name="text"> - <string>Auto (Window Size)</string> - </property> - </item> + <widget class="QComboBox" name="aspect_ratio_combobox"> <item> <property name="text"> - <string>Native (1280x720)</string> + <string>Default (16:9)</string> </property> </item> <item> <property name="text"> - <string>2x Native (2560x1440)</string> + <string>Force 4:3</string> </property> </item> <item> <property name="text"> - <string>3x Native (3840x2160)</string> + <string>Force 21:9</string> </property> </item> <item> <property name="text"> - <string>4x Native (5120x2880)</string> + <string>Stretch to Window</string> </property> </item> </widget> @@ -125,42 +120,30 @@ </layout> </item> <item> - <layout class="QHBoxLayout" name="horizontalLayout_6"> + <layout class="QHBoxLayout" name="horizontalLayout_3"> <item> - <widget class="QLabel" name="ar_label"> - <property name="text"> - <string>Aspect Ratio:</string> + <widget class="QComboBox" name="bg_combobox"> + <property name="currentText"> + <string>Use global background color</string> + </property> + <property name="currentIndex"> + <number>0</number> + </property> + <property name="maxVisibleItems"> + <number>10</number> </property> - </widget> - </item> - <item> - <widget class="QComboBox" name="aspect_ratio_combobox"> - <item> - <property name="text"> - <string>Default (16:9)</string> - </property> - </item> - <item> - <property name="text"> - <string>Force 4:3</string> - </property> - </item> <item> <property name="text"> - <string>Force 21:9</string> + <string>Use global background color</string> </property> </item> <item> <property name="text"> - <string>Stretch to Window</string> + <string>Set background color:</string> </property> </item> </widget> </item> - </layout> - </item> - <item> - <layout class="QHBoxLayout" name="horizontalLayout_3"> <item> <widget class="QLabel" name="bg_label"> <property name="text"> @@ -169,6 +152,19 @@ </widget> </item> <item> + <spacer name="horizontalSpacer"> + <property name="orientation"> + <enum>Qt::Horizontal</enum> + </property> + <property name="sizeHint" stdset="0"> + <size> + <width>40</width> + <height>20</height> + </size> + </property> + </spacer> + </item> + <item> <widget class="QPushButton" name="bg_button"> <property name="maximumSize"> <size> diff --git a/src/yuzu/configuration/configure_graphics_advanced.cpp b/src/yuzu/configuration/configure_graphics_advanced.cpp index 37aadf7f8..7c0fa7ec5 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.cpp +++ b/src/yuzu/configuration/configure_graphics_advanced.cpp @@ -5,6 +5,7 @@ #include "core/core.h" #include "core/settings.h" #include "ui_configure_graphics_advanced.h" +#include "yuzu/configuration/configuration_shared.h" #include "yuzu/configuration/configure_graphics_advanced.h" ConfigureGraphicsAdvanced::ConfigureGraphicsAdvanced(QWidget* parent) @@ -12,8 +13,7 @@ ConfigureGraphicsAdvanced::ConfigureGraphicsAdvanced(QWidget* parent) ui->setupUi(this); - // TODO: Remove this after assembly shaders are fully integrated - ui->use_assembly_shaders->setVisible(false); + SetupPerGameUI(); SetConfiguration(); } @@ -22,26 +22,81 @@ ConfigureGraphicsAdvanced::~ConfigureGraphicsAdvanced() = default; void ConfigureGraphicsAdvanced::SetConfiguration() { const bool runtime_lock = !Core::System::GetInstance().IsPoweredOn(); - ui->gpu_accuracy->setCurrentIndex(static_cast<int>(Settings::values.gpu_accuracy)); ui->use_vsync->setEnabled(runtime_lock); - ui->use_vsync->setChecked(Settings::values.use_vsync); ui->use_assembly_shaders->setEnabled(runtime_lock); - ui->use_assembly_shaders->setChecked(Settings::values.use_assembly_shaders); - ui->use_fast_gpu_time->setChecked(Settings::values.use_fast_gpu_time); ui->force_30fps_mode->setEnabled(runtime_lock); - ui->force_30fps_mode->setChecked(Settings::values.force_30fps_mode); ui->anisotropic_filtering_combobox->setEnabled(runtime_lock); - ui->anisotropic_filtering_combobox->setCurrentIndex(Settings::values.max_anisotropy); + + if (Settings::configuring_global) { + ui->gpu_accuracy->setCurrentIndex( + static_cast<int>(Settings::values.gpu_accuracy.GetValue())); + ui->use_vsync->setChecked(Settings::values.use_vsync.GetValue()); + ui->use_assembly_shaders->setChecked(Settings::values.use_assembly_shaders.GetValue()); + ui->use_fast_gpu_time->setChecked(Settings::values.use_fast_gpu_time.GetValue()); + ui->force_30fps_mode->setChecked(Settings::values.force_30fps_mode.GetValue()); + ui->anisotropic_filtering_combobox->setCurrentIndex( + Settings::values.max_anisotropy.GetValue()); + } else { + ConfigurationShared::SetPerGameSetting(ui->gpu_accuracy, &Settings::values.gpu_accuracy); + ConfigurationShared::SetPerGameSetting(ui->use_vsync, &Settings::values.use_vsync); + ConfigurationShared::SetPerGameSetting(ui->use_assembly_shaders, + &Settings::values.use_assembly_shaders); + ConfigurationShared::SetPerGameSetting(ui->use_fast_gpu_time, + &Settings::values.use_fast_gpu_time); + ConfigurationShared::SetPerGameSetting(ui->force_30fps_mode, + &Settings::values.force_30fps_mode); + ConfigurationShared::SetPerGameSetting(ui->anisotropic_filtering_combobox, + &Settings::values.max_anisotropy); + } } void ConfigureGraphicsAdvanced::ApplyConfiguration() { - auto gpu_accuracy = static_cast<Settings::GPUAccuracy>(ui->gpu_accuracy->currentIndex()); - Settings::values.gpu_accuracy = gpu_accuracy; - Settings::values.use_vsync = ui->use_vsync->isChecked(); - Settings::values.use_assembly_shaders = ui->use_assembly_shaders->isChecked(); - Settings::values.use_fast_gpu_time = ui->use_fast_gpu_time->isChecked(); - Settings::values.force_30fps_mode = ui->force_30fps_mode->isChecked(); - Settings::values.max_anisotropy = ui->anisotropic_filtering_combobox->currentIndex(); + // Subtract 2 if configuring per-game (separator and "use global configuration" take 2 slots) + const auto gpu_accuracy = static_cast<Settings::GPUAccuracy>( + ui->gpu_accuracy->currentIndex() - + ((Settings::configuring_global) ? 0 : ConfigurationShared::USE_GLOBAL_OFFSET)); + + if (Settings::configuring_global) { + // Must guard in case of a during-game configuration when set to be game-specific. + if (Settings::values.gpu_accuracy.UsingGlobal()) { + Settings::values.gpu_accuracy.SetValue(gpu_accuracy); + } + if (Settings::values.use_vsync.UsingGlobal()) { + Settings::values.use_vsync.SetValue(ui->use_vsync->isChecked()); + } + if (Settings::values.use_assembly_shaders.UsingGlobal()) { + Settings::values.use_assembly_shaders.SetValue(ui->use_assembly_shaders->isChecked()); + } + if (Settings::values.use_fast_gpu_time.UsingGlobal()) { + Settings::values.use_fast_gpu_time.SetValue(ui->use_fast_gpu_time->isChecked()); + } + if (Settings::values.force_30fps_mode.UsingGlobal()) { + Settings::values.force_30fps_mode.SetValue(ui->force_30fps_mode->isChecked()); + } + if (Settings::values.max_anisotropy.UsingGlobal()) { + Settings::values.max_anisotropy.SetValue( + ui->anisotropic_filtering_combobox->currentIndex()); + } + } else { + ConfigurationShared::ApplyPerGameSetting(&Settings::values.max_anisotropy, + ui->anisotropic_filtering_combobox); + ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_vsync, ui->use_vsync); + ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_assembly_shaders, + ui->use_assembly_shaders); + ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_fast_gpu_time, + ui->use_fast_gpu_time); + ConfigurationShared::ApplyPerGameSetting(&Settings::values.force_30fps_mode, + ui->force_30fps_mode); + ConfigurationShared::ApplyPerGameSetting(&Settings::values.max_anisotropy, + ui->anisotropic_filtering_combobox); + + if (ui->gpu_accuracy->currentIndex() == ConfigurationShared::USE_GLOBAL_INDEX) { + Settings::values.gpu_accuracy.SetGlobal(true); + } else { + Settings::values.gpu_accuracy.SetGlobal(false); + Settings::values.gpu_accuracy.SetValue(gpu_accuracy); + } + } } void ConfigureGraphicsAdvanced::changeEvent(QEvent* event) { @@ -55,3 +110,25 @@ void ConfigureGraphicsAdvanced::changeEvent(QEvent* event) { void ConfigureGraphicsAdvanced::RetranslateUI() { ui->retranslateUi(this); } + +void ConfigureGraphicsAdvanced::SetupPerGameUI() { + // Disable if not global (only happens during game) + if (Settings::configuring_global) { + ui->gpu_accuracy->setEnabled(Settings::values.gpu_accuracy.UsingGlobal()); + ui->use_vsync->setEnabled(Settings::values.use_vsync.UsingGlobal()); + ui->use_assembly_shaders->setEnabled(Settings::values.use_assembly_shaders.UsingGlobal()); + ui->use_fast_gpu_time->setEnabled(Settings::values.use_fast_gpu_time.UsingGlobal()); + ui->force_30fps_mode->setEnabled(Settings::values.force_30fps_mode.UsingGlobal()); + ui->anisotropic_filtering_combobox->setEnabled( + Settings::values.max_anisotropy.UsingGlobal()); + + return; + } + + ConfigurationShared::InsertGlobalItem(ui->gpu_accuracy); + ui->use_vsync->setTristate(true); + ui->use_assembly_shaders->setTristate(true); + ui->use_fast_gpu_time->setTristate(true); + ui->force_30fps_mode->setTristate(true); + ConfigurationShared::InsertGlobalItem(ui->anisotropic_filtering_combobox); +} diff --git a/src/yuzu/configuration/configure_graphics_advanced.h b/src/yuzu/configuration/configure_graphics_advanced.h index bbc9d4355..c043588ff 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.h +++ b/src/yuzu/configuration/configure_graphics_advanced.h @@ -26,5 +26,7 @@ private: void SetConfiguration(); + void SetupPerGameUI(); + std::unique_ptr<Ui::ConfigureGraphicsAdvanced> ui; }; diff --git a/src/yuzu/configuration/configure_input_player.cpp b/src/yuzu/configuration/configure_input_player.cpp index e4eb5594b..00433926d 100644 --- a/src/yuzu/configuration/configure_input_player.cpp +++ b/src/yuzu/configuration/configure_input_player.cpp @@ -70,6 +70,20 @@ static QString ButtonToText(const Common::ParamPackage& param) { return GetKeyName(param.Get("code", 0)); } + if (param.Get("engine", "") == "gcpad") { + if (param.Has("axis")) { + const QString axis_str = QString::fromStdString(param.Get("axis", "")); + const QString direction_str = QString::fromStdString(param.Get("direction", "")); + + return QObject::tr("GC Axis %1%2").arg(axis_str, direction_str); + } + if (param.Has("button")) { + const QString button_str = QString::number(int(std::log2(param.Get("button", 0)))); + return QObject::tr("GC Button %1").arg(button_str); + } + return GetKeyName(param.Get("code", 0)); + } + if (param.Get("engine", "") == "sdl") { if (param.Has("hat")) { const QString hat_str = QString::fromStdString(param.Get("hat", "")); @@ -126,6 +140,25 @@ static QString AnalogToText(const Common::ParamPackage& param, const std::string return {}; } + if (param.Get("engine", "") == "gcpad") { + if (dir == "modifier") { + return QObject::tr("[unused]"); + } + + if (dir == "left" || dir == "right") { + const QString axis_x_str = QString::fromStdString(param.Get("axis_x", "")); + + return QObject::tr("GC Axis %1").arg(axis_x_str); + } + + if (dir == "up" || dir == "down") { + const QString axis_y_str = QString::fromStdString(param.Get("axis_y", "")); + + return QObject::tr("GC Axis %1").arg(axis_y_str); + } + + return {}; + } return QObject::tr("[unknown]"); } @@ -332,7 +365,8 @@ ConfigureInputPlayer::ConfigureInputPlayer(QWidget* parent, std::size_t player_i connect(analog_map_deadzone_and_modifier_slider[analog_id], &QSlider::valueChanged, [=] { const float slider_value = analog_map_deadzone_and_modifier_slider[analog_id]->value(); - if (analogs_param[analog_id].Get("engine", "") == "sdl") { + if (analogs_param[analog_id].Get("engine", "") == "sdl" || + analogs_param[analog_id].Get("engine", "") == "gcpad") { analog_map_deadzone_and_modifier_slider_label[analog_id]->setText( tr("Deadzone: %1%").arg(slider_value)); analogs_param[analog_id].Set("deadzone", slider_value / 100.0f); @@ -352,6 +386,20 @@ ConfigureInputPlayer::ConfigureInputPlayer(QWidget* parent, std::size_t player_i connect(poll_timer.get(), &QTimer::timeout, [this] { Common::ParamPackage params; + if (InputCommon::GetGCButtons()->IsPolling()) { + params = InputCommon::GetGCButtons()->GetNextInput(); + if (params.Has("engine")) { + SetPollingResult(params, false); + return; + } + } + if (InputCommon::GetGCAnalogs()->IsPolling()) { + params = InputCommon::GetGCAnalogs()->GetNextInput(); + if (params.Has("engine")) { + SetPollingResult(params, false); + return; + } + } for (auto& poller : device_pollers) { params = poller->GetNextInput(); if (params.Has("engine")) { @@ -480,7 +528,9 @@ void ConfigureInputPlayer::RestoreDefaults() { SetAnalogButton(params, analogs_param[analog_id], analog_sub_buttons[sub_button_id]); } } + UpdateButtonLabels(); + ApplyConfiguration(); } void ConfigureInputPlayer::ClearAll() { @@ -505,6 +555,7 @@ void ConfigureInputPlayer::ClearAll() { } UpdateButtonLabels(); + ApplyConfiguration(); } void ConfigureInputPlayer::UpdateButtonLabels() { @@ -531,7 +582,7 @@ void ConfigureInputPlayer::UpdateButtonLabels() { analog_map_deadzone_and_modifier_slider_label[analog_id]; if (param.Has("engine")) { - if (param.Get("engine", "") == "sdl") { + if (param.Get("engine", "") == "sdl" || param.Get("engine", "") == "gcpad") { if (!param.Has("deadzone")) { param.Set("deadzone", 0.1f); } @@ -580,6 +631,11 @@ void ConfigureInputPlayer::HandleClick( grabKeyboard(); grabMouse(); + if (type == InputCommon::Polling::DeviceType::Button) { + InputCommon::GetGCButtons()->BeginConfiguration(); + } else { + InputCommon::GetGCAnalogs()->BeginConfiguration(); + } timeout_timer->start(5000); // Cancel after 5 seconds poll_timer->start(200); // Check for new inputs every 200ms } @@ -593,6 +649,9 @@ void ConfigureInputPlayer::SetPollingResult(const Common::ParamPackage& params, poller->Stop(); } + InputCommon::GetGCButtons()->EndConfiguration(); + InputCommon::GetGCAnalogs()->EndConfiguration(); + if (!abort) { (*input_setter)(params); } diff --git a/src/yuzu/configuration/configure_per_game.cpp b/src/yuzu/configuration/configure_per_game.cpp new file mode 100644 index 000000000..1e49f0787 --- /dev/null +++ b/src/yuzu/configuration/configure_per_game.cpp @@ -0,0 +1,140 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <algorithm> +#include <memory> +#include <utility> + +#include <QCheckBox> +#include <QHeaderView> +#include <QMenu> +#include <QStandardItemModel> +#include <QString> +#include <QTimer> +#include <QTreeView> + +#include "common/common_paths.h" +#include "common/file_util.h" +#include "core/file_sys/control_metadata.h" +#include "core/file_sys/patch_manager.h" +#include "core/file_sys/xts_archive.h" +#include "core/loader/loader.h" +#include "ui_configure_per_game.h" +#include "yuzu/configuration/config.h" +#include "yuzu/configuration/configure_input.h" +#include "yuzu/configuration/configure_per_game.h" +#include "yuzu/uisettings.h" +#include "yuzu/util/util.h" + +ConfigurePerGame::ConfigurePerGame(QWidget* parent, u64 title_id) + : QDialog(parent), ui(std::make_unique<Ui::ConfigurePerGame>()), title_id(title_id) { + game_config = std::make_unique<Config>(fmt::format("{:016X}.ini", title_id), false); + + Settings::configuring_global = false; + + ui->setupUi(this); + setFocusPolicy(Qt::ClickFocus); + setWindowTitle(tr("Properties")); + + ui->addonsTab->SetTitleId(title_id); + + scene = new QGraphicsScene; + ui->icon_view->setScene(scene); + + LoadConfiguration(); +} + +ConfigurePerGame::~ConfigurePerGame() = default; + +void ConfigurePerGame::ApplyConfiguration() { + ui->addonsTab->ApplyConfiguration(); + ui->generalTab->ApplyConfiguration(); + ui->systemTab->ApplyConfiguration(); + ui->graphicsTab->ApplyConfiguration(); + ui->graphicsAdvancedTab->ApplyConfiguration(); + ui->audioTab->ApplyConfiguration(); + + Settings::Apply(); + Settings::LogSettings(); + + game_config->Save(); +} + +void ConfigurePerGame::changeEvent(QEvent* event) { + if (event->type() == QEvent::LanguageChange) { + RetranslateUI(); + } + + QDialog::changeEvent(event); +} + +void ConfigurePerGame::RetranslateUI() { + ui->retranslateUi(this); +} + +void ConfigurePerGame::LoadFromFile(FileSys::VirtualFile file) { + this->file = std::move(file); + LoadConfiguration(); +} + +void ConfigurePerGame::LoadConfiguration() { + if (file == nullptr) { + return; + } + + ui->addonsTab->LoadFromFile(file); + + ui->display_title_id->setText( + QStringLiteral("%1").arg(title_id, 16, 16, QLatin1Char{'0'}).toUpper()); + + FileSys::PatchManager pm{title_id}; + const auto control = pm.GetControlMetadata(); + const auto loader = Loader::GetLoader(file); + + if (control.first != nullptr) { + ui->display_version->setText(QString::fromStdString(control.first->GetVersionString())); + ui->display_name->setText(QString::fromStdString(control.first->GetApplicationName())); + ui->display_developer->setText(QString::fromStdString(control.first->GetDeveloperName())); + } else { + std::string title; + if (loader->ReadTitle(title) == Loader::ResultStatus::Success) + ui->display_name->setText(QString::fromStdString(title)); + + FileSys::NACP nacp; + if (loader->ReadControlData(nacp) == Loader::ResultStatus::Success) + ui->display_developer->setText(QString::fromStdString(nacp.GetDeveloperName())); + + ui->display_version->setText(QStringLiteral("1.0.0")); + } + + if (control.second != nullptr) { + scene->clear(); + + QPixmap map; + const auto bytes = control.second->ReadAllBytes(); + map.loadFromData(bytes.data(), static_cast<u32>(bytes.size())); + + scene->addPixmap(map.scaled(ui->icon_view->width(), ui->icon_view->height(), + Qt::IgnoreAspectRatio, Qt::SmoothTransformation)); + } else { + std::vector<u8> bytes; + if (loader->ReadIcon(bytes) == Loader::ResultStatus::Success) { + scene->clear(); + + QPixmap map; + map.loadFromData(bytes.data(), static_cast<u32>(bytes.size())); + + scene->addPixmap(map.scaled(ui->icon_view->width(), ui->icon_view->height(), + Qt::IgnoreAspectRatio, Qt::SmoothTransformation)); + } + } + + ui->display_filename->setText(QString::fromStdString(file->GetName())); + + ui->display_format->setText( + QString::fromStdString(Loader::GetFileTypeString(loader->GetFileType()))); + + const auto valueText = ReadableByteSize(file->GetSize()); + ui->display_size->setText(valueText); +} diff --git a/src/yuzu/configuration/configure_per_game.h b/src/yuzu/configuration/configure_per_game.h new file mode 100644 index 000000000..5f9a08cef --- /dev/null +++ b/src/yuzu/configuration/configure_per_game.h @@ -0,0 +1,51 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <memory> +#include <vector> + +#include <QDialog> +#include <QList> + +#include "core/file_sys/vfs_types.h" +#include "yuzu/configuration/config.h" + +class QGraphicsScene; +class QStandardItem; +class QStandardItemModel; +class QTreeView; +class QVBoxLayout; + +namespace Ui { +class ConfigurePerGame; +} + +class ConfigurePerGame : public QDialog { + Q_OBJECT + +public: + explicit ConfigurePerGame(QWidget* parent, u64 title_id); + ~ConfigurePerGame() override; + + /// Save all button configurations to settings file + void ApplyConfiguration(); + + void LoadFromFile(FileSys::VirtualFile file); + +private: + void changeEvent(QEvent* event) override; + void RetranslateUI(); + + void LoadConfiguration(); + + std::unique_ptr<Ui::ConfigurePerGame> ui; + FileSys::VirtualFile file; + u64 title_id; + + QGraphicsScene* scene; + + std::unique_ptr<Config> game_config; +}; diff --git a/src/yuzu/configuration/configure_per_game.ui b/src/yuzu/configuration/configure_per_game.ui new file mode 100644 index 000000000..d2057c4ab --- /dev/null +++ b/src/yuzu/configuration/configure_per_game.ui @@ -0,0 +1,350 @@ +<?xml version="1.0" encoding="UTF-8"?> +<ui version="4.0"> + <class>ConfigurePerGame</class> + <widget class="QDialog" name="ConfigurePerGame"> + <property name="geometry"> + <rect> + <x>0</x> + <y>0</y> + <width>800</width> + <height>600</height> + </rect> + </property> + <property name="windowTitle"> + <string>Dialog</string> + </property> + <layout class="QVBoxLayout" name="verticalLayout_3"> + <item> + <layout class="QHBoxLayout" name="horizontalLayout"> + <item> + <widget class="QGroupBox" name="groupBox"> + <property name="sizePolicy"> + <sizepolicy hsizetype="Maximum" vsizetype="Preferred"> + <horstretch>0</horstretch> + <verstretch>0</verstretch> + </sizepolicy> + </property> + <property name="title"> + <string>Info</string> + </property> + <layout class="QVBoxLayout" name="verticalLayout"> + <item alignment="Qt::AlignHCenter"> + <widget class="QGraphicsView" name="icon_view"> + <property name="sizePolicy"> + <sizepolicy hsizetype="Maximum" vsizetype="Maximum"> + <horstretch>0</horstretch> + <verstretch>0</verstretch> + </sizepolicy> + </property> + <property name="minimumSize"> + <size> + <width>256</width> + <height>256</height> + </size> + </property> + <property name="maximumSize"> + <size> + <width>256</width> + <height>256</height> + </size> + </property> + <property name="verticalScrollBarPolicy"> + <enum>Qt::ScrollBarAlwaysOff</enum> + </property> + <property name="horizontalScrollBarPolicy"> + <enum>Qt::ScrollBarAlwaysOff</enum> + </property> + <property name="interactive"> + <bool>false</bool> + </property> + </widget> + </item> + <item> + <layout class="QGridLayout" name="gridLayout_2"> + <item row="6" column="1"> + <widget class="QLineEdit" name="display_size"> + <property name="enabled"> + <bool>true</bool> + </property> + <property name="readOnly"> + <bool>true</bool> + </property> + </widget> + </item> + <item row="3" column="1"> + <widget class="QLineEdit" name="display_version"> + <property name="enabled"> + <bool>true</bool> + </property> + <property name="readOnly"> + <bool>true</bool> + </property> + </widget> + </item> + <item row="1" column="0"> + <widget class="QLabel" name="label"> + <property name="text"> + <string>Name</string> + </property> + </widget> + </item> + <item row="4" column="0"> + <widget class="QLabel" name="label_4"> + <property name="text"> + <string>Title ID</string> + </property> + </widget> + </item> + <item row="4" column="1"> + <widget class="QLineEdit" name="display_title_id"> + <property name="enabled"> + <bool>true</bool> + </property> + <property name="readOnly"> + <bool>true</bool> + </property> + </widget> + </item> + <item row="7" column="1"> + <widget class="QLineEdit" name="display_filename"> + <property name="enabled"> + <bool>true</bool> + </property> + <property name="readOnly"> + <bool>true</bool> + </property> + </widget> + </item> + <item row="5" column="1"> + <widget class="QLineEdit" name="display_format"> + <property name="enabled"> + <bool>true</bool> + </property> + <property name="readOnly"> + <bool>true</bool> + </property> + </widget> + </item> + <item row="7" column="0"> + <widget class="QLabel" name="label_7"> + <property name="text"> + <string>Filename</string> + </property> + </widget> + </item> + <item row="1" column="1"> + <widget class="QLineEdit" name="display_name"> + <property name="enabled"> + <bool>true</bool> + </property> + <property name="readOnly"> + <bool>true</bool> + </property> + </widget> + </item> + <item row="2" column="1"> + <widget class="QLineEdit" name="display_developer"> + <property name="enabled"> + <bool>true</bool> + </property> + <property name="readOnly"> + <bool>true</bool> + </property> + </widget> + </item> + <item row="5" column="0"> + <widget class="QLabel" name="label_5"> + <property name="text"> + <string>Format</string> + </property> + </widget> + </item> + <item row="3" column="0"> + <widget class="QLabel" name="label_3"> + <property name="text"> + <string>Version</string> + </property> + </widget> + </item> + <item row="6" column="0"> + <widget class="QLabel" name="label_6"> + <property name="text"> + <string>Size</string> + </property> + </widget> + </item> + <item row="2" column="0"> + <widget class="QLabel" name="label_2"> + <property name="text"> + <string>Developer</string> + </property> + </widget> + </item> + </layout> + </item> + <item> + <spacer name="verticalSpacer"> + <property name="orientation"> + <enum>Qt::Vertical</enum> + </property> + <property name="sizeHint" stdset="0"> + <size> + <width>20</width> + <height>40</height> + </size> + </property> + </spacer> + </item> + </layout> + </widget> + </item> + <item> + <layout class="QVBoxLayout" name="VerticalLayout"> + <item> + <layout class="QVBoxLayout" name="verticalLayout_2"/> + </item> + <item> + <widget class="QTabWidget" name="tabWidget"> + <property name="enabled"> + <bool>true</bool> + </property> + <property name="currentIndex"> + <number>0</number> + </property> + <property name="usesScrollButtons"> + <bool>true</bool> + </property> + <property name="documentMode"> + <bool>false</bool> + </property> + <property name="tabsClosable"> + <bool>false</bool> + </property> + <widget class="ConfigurePerGameAddons" name="addonsTab"> + <attribute name="title"> + <string>Add-Ons</string> + </attribute> + </widget> + <widget class="ConfigureGeneral" name="generalTab"> + <attribute name="title"> + <string>General</string> + </attribute> + </widget> + <widget class="ConfigureSystem" name="systemTab"> + <attribute name="title"> + <string>System</string> + </attribute> + </widget> + <widget class="ConfigureGraphics" name="graphicsTab"> + <attribute name="title"> + <string>Graphics</string> + </attribute> + </widget> + <widget class="ConfigureGraphicsAdvanced" name="graphicsAdvancedTab"> + <attribute name="title"> + <string>Adv. Graphics</string> + </attribute> + </widget> + <widget class="ConfigureAudio" name="audioTab"> + <attribute name="title"> + <string>Audio</string> + </attribute> + </widget> + </widget> + </item> + </layout> + </item> + </layout> + </item> + <item> + <widget class="QDialogButtonBox" name="buttonBox"> + <property name="sizePolicy"> + <sizepolicy hsizetype="Preferred" vsizetype="Preferred"> + <horstretch>0</horstretch> + <verstretch>0</verstretch> + </sizepolicy> + </property> + <property name="orientation"> + <enum>Qt::Horizontal</enum> + </property> + <property name="standardButtons"> + <set>QDialogButtonBox::Cancel|QDialogButtonBox::Ok</set> + </property> + </widget> + </item> + </layout> + </widget> + <customwidgets> + <customwidget> + <class>ConfigureGeneral</class> + <extends>QWidget</extends> + <header>configuration/configure_general.h</header> + <container>1</container> + </customwidget> + <customwidget> + <class>ConfigureSystem</class> + <extends>QWidget</extends> + <header>configuration/configure_system.h</header> + <container>1</container> + </customwidget> + <customwidget> + <class>ConfigureAudio</class> + <extends>QWidget</extends> + <header>configuration/configure_audio.h</header> + <container>1</container> + </customwidget> + <customwidget> + <class>ConfigureGraphics</class> + <extends>QWidget</extends> + <header>configuration/configure_graphics.h</header> + <container>1</container> + </customwidget> + <customwidget> + <class>ConfigureGraphicsAdvanced</class> + <extends>QWidget</extends> + <header>configuration/configure_graphics_advanced.h</header> + <container>1</container> + </customwidget> + <customwidget> + <class>ConfigurePerGameAddons</class> + <extends>QWidget</extends> + <header>configuration/configure_per_game_addons.h</header> + <container>1</container> + </customwidget> + </customwidgets> + <resources/> + <connections> + <connection> + <sender>buttonBox</sender> + <signal>accepted()</signal> + <receiver>ConfigurePerGame</receiver> + <slot>accept()</slot> + <hints> + <hint type="sourcelabel"> + <x>248</x> + <y>254</y> + </hint> + <hint type="destinationlabel"> + <x>157</x> + <y>274</y> + </hint> + </hints> + </connection> + <connection> + <sender>buttonBox</sender> + <signal>rejected()</signal> + <receiver>ConfigurePerGame</receiver> + <slot>reject()</slot> + <hints> + <hint type="sourcelabel"> + <x>316</x> + <y>260</y> + </hint> + <hint type="destinationlabel"> + <x>286</x> + <y>274</y> + </hint> + </hints> + </connection> + </connections> +</ui> diff --git a/src/yuzu/configuration/configure_per_general.cpp b/src/yuzu/configuration/configure_per_game_addons.cpp index d7f259f12..478d5d3a1 100644 --- a/src/yuzu/configuration/configure_per_general.cpp +++ b/src/yuzu/configuration/configure_per_game_addons.cpp @@ -15,23 +15,20 @@ #include "common/common_paths.h" #include "common/file_util.h" -#include "core/file_sys/control_metadata.h" +#include "core/core.h" #include "core/file_sys/patch_manager.h" #include "core/file_sys/xts_archive.h" #include "core/loader/loader.h" -#include "ui_configure_per_general.h" +#include "ui_configure_per_game_addons.h" #include "yuzu/configuration/config.h" #include "yuzu/configuration/configure_input.h" -#include "yuzu/configuration/configure_per_general.h" +#include "yuzu/configuration/configure_per_game_addons.h" #include "yuzu/uisettings.h" #include "yuzu/util/util.h" -ConfigurePerGameGeneral::ConfigurePerGameGeneral(QWidget* parent, u64 title_id) - : QDialog(parent), ui(std::make_unique<Ui::ConfigurePerGameGeneral>()), title_id(title_id) { - +ConfigurePerGameAddons::ConfigurePerGameAddons(QWidget* parent) + : QWidget(parent), ui(new Ui::ConfigurePerGameAddons) { ui->setupUi(this); - setFocusPolicy(Qt::ClickFocus); - setWindowTitle(tr("Properties")); layout = new QVBoxLayout; tree_view = new QTreeView; @@ -52,7 +49,7 @@ ConfigurePerGameGeneral::ConfigurePerGameGeneral(QWidget* parent, u64 title_id) item_model->setHeaderData(1, Qt::Horizontal, tr("Version")); // We must register all custom types with the Qt Automoc system so that we are able to use it - // with signals/slots. In this case, QList falls under the umbrells of custom types. + // with signals/slots. In this case, QList falls under the umbrella of custom types. qRegisterMetaType<QList<QStandardItem*>>("QList<QStandardItem*>"); layout->setContentsMargins(0, 0, 0, 0); @@ -61,18 +58,15 @@ ConfigurePerGameGeneral::ConfigurePerGameGeneral(QWidget* parent, u64 title_id) ui->scrollArea->setLayout(layout); - scene = new QGraphicsScene; - ui->icon_view->setScene(scene); + ui->scrollArea->setEnabled(!Core::System::GetInstance().IsPoweredOn()); connect(item_model, &QStandardItemModel::itemChanged, [] { UISettings::values.is_game_list_reload_pending.exchange(true); }); - - LoadConfiguration(); } -ConfigurePerGameGeneral::~ConfigurePerGameGeneral() = default; +ConfigurePerGameAddons::~ConfigurePerGameAddons() = default; -void ConfigurePerGameGeneral::ApplyConfiguration() { +void ConfigurePerGameAddons::ApplyConfiguration() { std::vector<std::string> disabled_addons; for (const auto& item : list_items) { @@ -92,72 +86,35 @@ void ConfigurePerGameGeneral::ApplyConfiguration() { Settings::values.disabled_addons[title_id] = disabled_addons; } -void ConfigurePerGameGeneral::changeEvent(QEvent* event) { +void ConfigurePerGameAddons::LoadFromFile(FileSys::VirtualFile file) { + this->file = std::move(file); + LoadConfiguration(); +} + +void ConfigurePerGameAddons::SetTitleId(u64 id) { + this->title_id = id; +} + +void ConfigurePerGameAddons::changeEvent(QEvent* event) { if (event->type() == QEvent::LanguageChange) { RetranslateUI(); } - QDialog::changeEvent(event); + QWidget::changeEvent(event); } -void ConfigurePerGameGeneral::RetranslateUI() { +void ConfigurePerGameAddons::RetranslateUI() { ui->retranslateUi(this); } -void ConfigurePerGameGeneral::LoadFromFile(FileSys::VirtualFile file) { - this->file = std::move(file); - LoadConfiguration(); -} - -void ConfigurePerGameGeneral::LoadConfiguration() { +void ConfigurePerGameAddons::LoadConfiguration() { if (file == nullptr) { return; } - ui->display_title_id->setText(QString::fromStdString(fmt::format("{:016X}", title_id))); - FileSys::PatchManager pm{title_id}; - const auto control = pm.GetControlMetadata(); const auto loader = Loader::GetLoader(file); - if (control.first != nullptr) { - ui->display_version->setText(QString::fromStdString(control.first->GetVersionString())); - ui->display_name->setText(QString::fromStdString(control.first->GetApplicationName())); - ui->display_developer->setText(QString::fromStdString(control.first->GetDeveloperName())); - } else { - std::string title; - if (loader->ReadTitle(title) == Loader::ResultStatus::Success) - ui->display_name->setText(QString::fromStdString(title)); - - FileSys::NACP nacp; - if (loader->ReadControlData(nacp) == Loader::ResultStatus::Success) - ui->display_developer->setText(QString::fromStdString(nacp.GetDeveloperName())); - - ui->display_version->setText(QStringLiteral("1.0.0")); - } - - if (control.second != nullptr) { - scene->clear(); - - QPixmap map; - const auto bytes = control.second->ReadAllBytes(); - map.loadFromData(bytes.data(), static_cast<u32>(bytes.size())); - - scene->addPixmap(map.scaled(ui->icon_view->width(), ui->icon_view->height(), - Qt::IgnoreAspectRatio, Qt::SmoothTransformation)); - } else { - std::vector<u8> bytes; - if (loader->ReadIcon(bytes) == Loader::ResultStatus::Success) { - scene->clear(); - - QPixmap map; - map.loadFromData(bytes.data(), static_cast<u32>(bytes.size())); - - scene->addPixmap(map.scaled(ui->icon_view->width(), ui->icon_view->height(), - Qt::IgnoreAspectRatio, Qt::SmoothTransformation)); - } - } - FileSys::VirtualFile update_raw; loader->ReadUpdateRaw(update_raw); @@ -182,12 +139,4 @@ void ConfigurePerGameGeneral::LoadConfiguration() { } tree_view->setColumnWidth(0, 5 * tree_view->width() / 16); - - ui->display_filename->setText(QString::fromStdString(file->GetName())); - - ui->display_format->setText( - QString::fromStdString(Loader::GetFileTypeString(loader->GetFileType()))); - - const auto valueText = ReadableByteSize(file->GetSize()); - ui->display_size->setText(valueText); } diff --git a/src/yuzu/configuration/configure_per_general.h b/src/yuzu/configuration/configure_per_game_addons.h index a3b2cdeff..a00ec3539 100644 --- a/src/yuzu/configuration/configure_per_general.h +++ b/src/yuzu/configuration/configure_per_game_addons.h @@ -1,4 +1,4 @@ -// Copyright 2016 Citra Emulator Project +// Copyright 2016 Citra Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. @@ -7,7 +7,6 @@ #include <memory> #include <vector> -#include <QDialog> #include <QList> #include "core/file_sys/vfs_types.h" @@ -19,35 +18,36 @@ class QTreeView; class QVBoxLayout; namespace Ui { -class ConfigurePerGameGeneral; +class ConfigurePerGameAddons; } -class ConfigurePerGameGeneral : public QDialog { +class ConfigurePerGameAddons : public QWidget { Q_OBJECT public: - explicit ConfigurePerGameGeneral(QWidget* parent, u64 title_id); - ~ConfigurePerGameGeneral() override; + explicit ConfigurePerGameAddons(QWidget* parent = nullptr); + ~ConfigurePerGameAddons() override; /// Save all button configurations to settings file void ApplyConfiguration(); void LoadFromFile(FileSys::VirtualFile file); + void SetTitleId(u64 id); + private: void changeEvent(QEvent* event) override; void RetranslateUI(); void LoadConfiguration(); - std::unique_ptr<Ui::ConfigurePerGameGeneral> ui; + std::unique_ptr<Ui::ConfigurePerGameAddons> ui; FileSys::VirtualFile file; u64 title_id; QVBoxLayout* layout; QTreeView* tree_view; QStandardItemModel* item_model; - QGraphicsScene* scene; std::vector<QList<QStandardItem*>> list_items; }; diff --git a/src/yuzu/configuration/configure_per_game_addons.ui b/src/yuzu/configuration/configure_per_game_addons.ui new file mode 100644 index 000000000..aefdebfcd --- /dev/null +++ b/src/yuzu/configuration/configure_per_game_addons.ui @@ -0,0 +1,38 @@ +<?xml version="1.0" encoding="UTF-8"?> +<ui version="4.0"> + <class>ConfigurePerGameAddons</class> + <widget class="QWidget" name="ConfigurePerGameAddons"> + <property name="geometry"> + <rect> + <x>0</x> + <y>0</y> + <width>400</width> + <height>300</height> + </rect> + </property> + <property name="windowTitle"> + <string>Form</string> + </property> + <layout class="QGridLayout" name="gridLayout"> + <item row="0" column="0"> + <widget class="QScrollArea" name="scrollArea"> + <property name="widgetResizable"> + <bool>true</bool> + </property> + <widget class="QWidget" name="scrollAreaWidgetContents"> + <property name="geometry"> + <rect> + <x>0</x> + <y>0</y> + <width>380</width> + <height>280</height> + </rect> + </property> + </widget> + </widget> + </item> + </layout> + </widget> + <resources/> + <connections/> +</ui> diff --git a/src/yuzu/configuration/configure_per_general.ui b/src/yuzu/configuration/configure_per_general.ui deleted file mode 100644 index 8fdd96fa4..000000000 --- a/src/yuzu/configuration/configure_per_general.ui +++ /dev/null @@ -1,276 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<ui version="4.0"> - <class>ConfigurePerGameGeneral</class> - <widget class="QDialog" name="ConfigurePerGameGeneral"> - <property name="geometry"> - <rect> - <x>0</x> - <y>0</y> - <width>400</width> - <height>520</height> - </rect> - </property> - <property name="windowTitle"> - <string>ConfigurePerGameGeneral</string> - </property> - <layout class="QHBoxLayout" name="HorizontalLayout"> - <item> - <layout class="QVBoxLayout" name="VerticalLayout"> - <item> - <widget class="QGroupBox" name="GeneralGroupBox"> - <property name="title"> - <string>Info</string> - </property> - <layout class="QHBoxLayout" name="GeneralHorizontalLayout"> - <item> - <layout class="QGridLayout" name="gridLayout_2"> - <item row="6" column="1" colspan="2"> - <widget class="QLineEdit" name="display_filename"> - <property name="enabled"> - <bool>true</bool> - </property> - <property name="readOnly"> - <bool>true</bool> - </property> - </widget> - </item> - <item row="0" column="1"> - <widget class="QLineEdit" name="display_name"> - <property name="enabled"> - <bool>true</bool> - </property> - <property name="readOnly"> - <bool>true</bool> - </property> - </widget> - </item> - <item row="1" column="0"> - <widget class="QLabel" name="label_2"> - <property name="text"> - <string>Developer</string> - </property> - </widget> - </item> - <item row="5" column="1" colspan="2"> - <widget class="QLineEdit" name="display_size"> - <property name="enabled"> - <bool>true</bool> - </property> - <property name="readOnly"> - <bool>true</bool> - </property> - </widget> - </item> - <item row="0" column="0"> - <widget class="QLabel" name="label"> - <property name="text"> - <string>Name</string> - </property> - </widget> - </item> - <item row="6" column="0"> - <widget class="QLabel" name="label_7"> - <property name="text"> - <string>Filename</string> - </property> - </widget> - </item> - <item row="2" column="0"> - <widget class="QLabel" name="label_3"> - <property name="text"> - <string>Version</string> - </property> - </widget> - </item> - <item row="4" column="0"> - <widget class="QLabel" name="label_5"> - <property name="text"> - <string>Format</string> - </property> - </widget> - </item> - <item row="2" column="1"> - <widget class="QLineEdit" name="display_version"> - <property name="enabled"> - <bool>true</bool> - </property> - <property name="readOnly"> - <bool>true</bool> - </property> - </widget> - </item> - <item row="4" column="1"> - <widget class="QLineEdit" name="display_format"> - <property name="enabled"> - <bool>true</bool> - </property> - <property name="readOnly"> - <bool>true</bool> - </property> - </widget> - </item> - <item row="5" column="0"> - <widget class="QLabel" name="label_6"> - <property name="text"> - <string>Size</string> - </property> - </widget> - </item> - <item row="1" column="1"> - <widget class="QLineEdit" name="display_developer"> - <property name="enabled"> - <bool>true</bool> - </property> - <property name="readOnly"> - <bool>true</bool> - </property> - </widget> - </item> - <item row="3" column="0"> - <widget class="QLabel" name="label_4"> - <property name="text"> - <string>Title ID</string> - </property> - </widget> - </item> - <item row="3" column="1"> - <widget class="QLineEdit" name="display_title_id"> - <property name="enabled"> - <bool>true</bool> - </property> - <property name="readOnly"> - <bool>true</bool> - </property> - </widget> - </item> - <item row="0" column="2" rowspan="5"> - <widget class="QGraphicsView" name="icon_view"> - <property name="sizePolicy"> - <sizepolicy hsizetype="Maximum" vsizetype="Maximum"> - <horstretch>0</horstretch> - <verstretch>0</verstretch> - </sizepolicy> - </property> - <property name="minimumSize"> - <size> - <width>128</width> - <height>128</height> - </size> - </property> - <property name="maximumSize"> - <size> - <width>128</width> - <height>128</height> - </size> - </property> - <property name="verticalScrollBarPolicy"> - <enum>Qt::ScrollBarAlwaysOff</enum> - </property> - <property name="horizontalScrollBarPolicy"> - <enum>Qt::ScrollBarAlwaysOff</enum> - </property> - <property name="sizeAdjustPolicy"> - <enum>QAbstractScrollArea::AdjustToContents</enum> - </property> - <property name="interactive"> - <bool>false</bool> - </property> - </widget> - </item> - </layout> - </item> - </layout> - </widget> - </item> - <item> - <widget class="QGroupBox" name="PerformanceGroupBox"> - <property name="title"> - <string>Add-Ons</string> - </property> - <layout class="QHBoxLayout" name="PerformanceHorizontalLayout"> - <item> - <widget class="QScrollArea" name="scrollArea"> - <property name="widgetResizable"> - <bool>true</bool> - </property> - <widget class="QWidget" name="scrollAreaWidgetContents"> - <property name="geometry"> - <rect> - <x>0</x> - <y>0</y> - <width>350</width> - <height>169</height> - </rect> - </property> - </widget> - </widget> - </item> - <item> - <layout class="QVBoxLayout" name="PerformanceVerticalLayout"/> - </item> - </layout> - </widget> - </item> - <item> - <spacer name="verticalSpacer"> - <property name="orientation"> - <enum>Qt::Vertical</enum> - </property> - <property name="sizeType"> - <enum>QSizePolicy::Fixed</enum> - </property> - <property name="sizeHint" stdset="0"> - <size> - <width>20</width> - <height>40</height> - </size> - </property> - </spacer> - </item> - <item> - <widget class="QDialogButtonBox" name="buttonBox"> - <property name="standardButtons"> - <set>QDialogButtonBox::Cancel|QDialogButtonBox::Ok</set> - </property> - </widget> - </item> - </layout> - </item> - </layout> - </widget> - <resources/> - <connections> - <connection> - <sender>buttonBox</sender> - <signal>accepted()</signal> - <receiver>ConfigurePerGameGeneral</receiver> - <slot>accept()</slot> - <hints> - <hint type="sourcelabel"> - <x>269</x> - <y>567</y> - </hint> - <hint type="destinationlabel"> - <x>269</x> - <y>294</y> - </hint> - </hints> - </connection> - <connection> - <sender>buttonBox</sender> - <signal>rejected()</signal> - <receiver>ConfigurePerGameGeneral</receiver> - <slot>reject()</slot> - <hints> - <hint type="sourcelabel"> - <x>269</x> - <y>567</y> - </hint> - <hint type="destinationlabel"> - <x>269</x> - <y>294</y> - </hint> - </hints> - </connection> - </connections> -</ui> diff --git a/src/yuzu/configuration/configure_service.cpp b/src/yuzu/configuration/configure_service.cpp index 06566e981..0de7a4f0b 100644 --- a/src/yuzu/configuration/configure_service.cpp +++ b/src/yuzu/configuration/configure_service.cpp @@ -68,6 +68,7 @@ void ConfigureService::SetConfiguration() { } std::pair<QString, QString> ConfigureService::BCATDownloadEvents() { +#ifdef YUZU_ENABLE_BOXCAT std::optional<std::string> global; std::map<std::string, Service::BCAT::EventStatus> map; const auto res = Service::BCAT::Boxcat::GetStatus(global, map); @@ -105,7 +106,10 @@ std::pair<QString, QString> ConfigureService::BCATDownloadEvents() { .arg(QString::fromStdString(key)) .arg(FormatEventStatusString(value)); } - return {QStringLiteral("Current Boxcat Events"), std::move(out)}; + return {tr("Current Boxcat Events"), std::move(out)}; +#else + return {tr("Current Boxcat Events"), tr("There are currently no events on boxcat.")}; +#endif } void ConfigureService::OnBCATImplChanged() { diff --git a/src/yuzu/configuration/configure_system.cpp b/src/yuzu/configuration/configure_system.cpp index 10315e7a6..68e02738b 100644 --- a/src/yuzu/configuration/configure_system.cpp +++ b/src/yuzu/configuration/configure_system.cpp @@ -14,6 +14,7 @@ #include "core/core.h" #include "core/settings.h" #include "ui_configure_system.h" +#include "yuzu/configuration/configuration_shared.h" #include "yuzu/configuration/configure_system.h" ConfigureSystem::ConfigureSystem(QWidget* parent) : QWidget(parent), ui(new Ui::ConfigureSystem) { @@ -21,20 +22,25 @@ ConfigureSystem::ConfigureSystem(QWidget* parent) : QWidget(parent), ui(new Ui:: connect(ui->button_regenerate_console_id, &QPushButton::clicked, this, &ConfigureSystem::RefreshConsoleID); - connect(ui->rng_seed_checkbox, &QCheckBox::stateChanged, this, [this](bool checked) { - ui->rng_seed_edit->setEnabled(checked); - if (!checked) { + connect(ui->rng_seed_checkbox, &QCheckBox::stateChanged, this, [this](int state) { + ui->rng_seed_edit->setEnabled(state == Qt::Checked); + if (state != Qt::Checked) { ui->rng_seed_edit->setText(QStringLiteral("00000000")); } }); - connect(ui->custom_rtc_checkbox, &QCheckBox::stateChanged, this, [this](bool checked) { - ui->custom_rtc_edit->setEnabled(checked); - if (!checked) { + connect(ui->custom_rtc_checkbox, &QCheckBox::stateChanged, this, [this](int state) { + ui->custom_rtc_edit->setEnabled(state == Qt::Checked); + if (state != Qt::Checked) { ui->custom_rtc_edit->setDateTime(QDateTime::currentDateTime()); } }); + ui->label_console_id->setVisible(Settings::configuring_global); + ui->button_regenerate_console_id->setVisible(Settings::configuring_global); + + SetupPerGameUI(); + SetConfiguration(); } @@ -54,26 +60,58 @@ void ConfigureSystem::RetranslateUI() { void ConfigureSystem::SetConfiguration() { enabled = !Core::System::GetInstance().IsPoweredOn(); + const auto rng_seed = + QStringLiteral("%1") + .arg(Settings::values.rng_seed.GetValue().value_or(0), 8, 16, QLatin1Char{'0'}) + .toUpper(); + const auto rtc_time = Settings::values.custom_rtc.GetValue().value_or( + std::chrono::seconds(QDateTime::currentSecsSinceEpoch())); - ui->combo_language->setCurrentIndex(Settings::values.language_index); - ui->combo_region->setCurrentIndex(Settings::values.region_index); - ui->combo_time_zone->setCurrentIndex(Settings::values.time_zone_index); - ui->combo_sound->setCurrentIndex(Settings::values.sound_index); - - ui->rng_seed_checkbox->setChecked(Settings::values.rng_seed.has_value()); - ui->rng_seed_edit->setEnabled(Settings::values.rng_seed.has_value()); - - const auto rng_seed = QStringLiteral("%1") - .arg(Settings::values.rng_seed.value_or(0), 8, 16, QLatin1Char{'0'}) - .toUpper(); - ui->rng_seed_edit->setText(rng_seed); - - ui->custom_rtc_checkbox->setChecked(Settings::values.custom_rtc.has_value()); - ui->custom_rtc_edit->setEnabled(Settings::values.custom_rtc.has_value()); + if (Settings::configuring_global) { + ui->combo_language->setCurrentIndex(Settings::values.language_index.GetValue()); + ui->combo_region->setCurrentIndex(Settings::values.region_index.GetValue()); + ui->combo_time_zone->setCurrentIndex(Settings::values.time_zone_index.GetValue()); + ui->combo_sound->setCurrentIndex(Settings::values.sound_index.GetValue()); + + ui->rng_seed_checkbox->setChecked(Settings::values.rng_seed.GetValue().has_value()); + ui->rng_seed_edit->setEnabled(Settings::values.rng_seed.GetValue().has_value() && + Settings::values.rng_seed.UsingGlobal()); + ui->rng_seed_edit->setText(rng_seed); + + ui->custom_rtc_checkbox->setChecked(Settings::values.custom_rtc.GetValue().has_value()); + ui->custom_rtc_edit->setEnabled(Settings::values.custom_rtc.GetValue().has_value() && + Settings::values.rng_seed.UsingGlobal()); + ui->custom_rtc_edit->setDateTime(QDateTime::fromSecsSinceEpoch(rtc_time.count())); + } else { + ConfigurationShared::SetPerGameSetting(ui->combo_language, + &Settings::values.language_index); + ConfigurationShared::SetPerGameSetting(ui->combo_region, &Settings::values.region_index); + ConfigurationShared::SetPerGameSetting(ui->combo_time_zone, + &Settings::values.time_zone_index); + ConfigurationShared::SetPerGameSetting(ui->combo_sound, &Settings::values.sound_index); + + if (Settings::values.rng_seed.UsingGlobal()) { + ui->rng_seed_checkbox->setCheckState(Qt::PartiallyChecked); + } else { + ui->rng_seed_checkbox->setCheckState( + Settings::values.rng_seed.GetValue().has_value() ? Qt::Checked : Qt::Unchecked); + ui->rng_seed_edit->setEnabled(Settings::values.rng_seed.GetValue().has_value()); + if (Settings::values.rng_seed.GetValue().has_value()) { + ui->rng_seed_edit->setText(rng_seed); + } + } - const auto rtc_time = Settings::values.custom_rtc.value_or( - std::chrono::seconds(QDateTime::currentSecsSinceEpoch())); - ui->custom_rtc_edit->setDateTime(QDateTime::fromSecsSinceEpoch(rtc_time.count())); + if (Settings::values.custom_rtc.UsingGlobal()) { + ui->custom_rtc_checkbox->setCheckState(Qt::PartiallyChecked); + } else { + ui->custom_rtc_checkbox->setCheckState( + Settings::values.custom_rtc.GetValue().has_value() ? Qt::Checked : Qt::Unchecked); + ui->custom_rtc_edit->setEnabled(Settings::values.custom_rtc.GetValue().has_value()); + if (Settings::values.custom_rtc.GetValue().has_value()) { + ui->custom_rtc_edit->setDateTime(QDateTime::fromSecsSinceEpoch(rtc_time.count())); + } + } + } } void ConfigureSystem::ReadSystemSettings() {} @@ -83,22 +121,78 @@ void ConfigureSystem::ApplyConfiguration() { return; } - Settings::values.language_index = ui->combo_language->currentIndex(); - Settings::values.region_index = ui->combo_region->currentIndex(); - Settings::values.time_zone_index = ui->combo_time_zone->currentIndex(); - Settings::values.sound_index = ui->combo_sound->currentIndex(); + if (Settings::configuring_global) { + // Guard if during game and set to game-specific value + if (Settings::values.language_index.UsingGlobal()) { + Settings::values.language_index.SetValue(ui->combo_language->currentIndex()); + } + if (Settings::values.region_index.UsingGlobal()) { + Settings::values.region_index.SetValue(ui->combo_region->currentIndex()); + } + if (Settings::values.time_zone_index.UsingGlobal()) { + Settings::values.time_zone_index.SetValue(ui->combo_time_zone->currentIndex()); + } + if (Settings::values.sound_index.UsingGlobal()) { + Settings::values.sound_index.SetValue(ui->combo_sound->currentIndex()); + } + + if (Settings::values.rng_seed.UsingGlobal()) { + if (ui->rng_seed_checkbox->isChecked()) { + Settings::values.rng_seed.SetValue( + ui->rng_seed_edit->text().toULongLong(nullptr, 16)); + } else { + Settings::values.rng_seed.SetValue(std::nullopt); + } + } - if (ui->rng_seed_checkbox->isChecked()) { - Settings::values.rng_seed = ui->rng_seed_edit->text().toULongLong(nullptr, 16); + if (Settings::values.custom_rtc.UsingGlobal()) { + if (ui->custom_rtc_checkbox->isChecked()) { + Settings::values.custom_rtc.SetValue( + std::chrono::seconds(ui->custom_rtc_edit->dateTime().toSecsSinceEpoch())); + } else { + Settings::values.custom_rtc.SetValue(std::nullopt); + } + } } else { - Settings::values.rng_seed = std::nullopt; - } + ConfigurationShared::ApplyPerGameSetting(&Settings::values.language_index, + ui->combo_language); + ConfigurationShared::ApplyPerGameSetting(&Settings::values.region_index, ui->combo_region); + ConfigurationShared::ApplyPerGameSetting(&Settings::values.time_zone_index, + ui->combo_time_zone); + ConfigurationShared::ApplyPerGameSetting(&Settings::values.sound_index, ui->combo_sound); + + switch (ui->rng_seed_checkbox->checkState()) { + case Qt::Checked: + Settings::values.rng_seed.SetGlobal(false); + Settings::values.rng_seed.SetValue(ui->rng_seed_edit->text().toULongLong(nullptr, 16)); + break; + case Qt::Unchecked: + Settings::values.rng_seed.SetGlobal(false); + Settings::values.rng_seed.SetValue(std::nullopt); + break; + case Qt::PartiallyChecked: + Settings::values.rng_seed.SetGlobal(false); + Settings::values.rng_seed.SetValue(std::nullopt); + Settings::values.rng_seed.SetGlobal(true); + break; + } - if (ui->custom_rtc_checkbox->isChecked()) { - Settings::values.custom_rtc = - std::chrono::seconds(ui->custom_rtc_edit->dateTime().toSecsSinceEpoch()); - } else { - Settings::values.custom_rtc = std::nullopt; + switch (ui->custom_rtc_checkbox->checkState()) { + case Qt::Checked: + Settings::values.custom_rtc.SetGlobal(false); + Settings::values.custom_rtc.SetValue( + std::chrono::seconds(ui->custom_rtc_edit->dateTime().toSecsSinceEpoch())); + break; + case Qt::Unchecked: + Settings::values.custom_rtc.SetGlobal(false); + Settings::values.custom_rtc.SetValue(std::nullopt); + break; + case Qt::PartiallyChecked: + Settings::values.custom_rtc.SetGlobal(false); + Settings::values.custom_rtc.SetValue(std::nullopt); + Settings::values.custom_rtc.SetGlobal(true); + break; + } } Settings::Apply(); @@ -120,3 +214,25 @@ void ConfigureSystem::RefreshConsoleID() { ui->label_console_id->setText( tr("Console ID: 0x%1").arg(QString::number(console_id, 16).toUpper())); } + +void ConfigureSystem::SetupPerGameUI() { + if (Settings::configuring_global) { + ui->combo_language->setEnabled(Settings::values.language_index.UsingGlobal()); + ui->combo_region->setEnabled(Settings::values.region_index.UsingGlobal()); + ui->combo_time_zone->setEnabled(Settings::values.time_zone_index.UsingGlobal()); + ui->combo_sound->setEnabled(Settings::values.sound_index.UsingGlobal()); + ui->rng_seed_checkbox->setEnabled(Settings::values.rng_seed.UsingGlobal()); + ui->rng_seed_edit->setEnabled(Settings::values.rng_seed.UsingGlobal()); + ui->custom_rtc_checkbox->setEnabled(Settings::values.custom_rtc.UsingGlobal()); + ui->custom_rtc_edit->setEnabled(Settings::values.custom_rtc.UsingGlobal()); + + return; + } + + ConfigurationShared::InsertGlobalItem(ui->combo_language); + ConfigurationShared::InsertGlobalItem(ui->combo_region); + ConfigurationShared::InsertGlobalItem(ui->combo_time_zone); + ConfigurationShared::InsertGlobalItem(ui->combo_sound); + ui->rng_seed_checkbox->setTristate(true); + ui->custom_rtc_checkbox->setTristate(true); +} diff --git a/src/yuzu/configuration/configure_system.h b/src/yuzu/configuration/configure_system.h index 26d42d5c5..f317ef8b5 100644 --- a/src/yuzu/configuration/configure_system.h +++ b/src/yuzu/configuration/configure_system.h @@ -32,6 +32,8 @@ private: void RefreshConsoleID(); + void SetupPerGameUI(); + std::unique_ptr<Ui::ConfigureSystem> ui; bool enabled = false; diff --git a/src/yuzu/debugger/wait_tree.cpp b/src/yuzu/debugger/wait_tree.cpp index c1ea25fb8..9bb0a0109 100644 --- a/src/yuzu/debugger/wait_tree.cpp +++ b/src/yuzu/debugger/wait_tree.cpp @@ -2,10 +2,13 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <fmt/format.h> + #include "yuzu/debugger/wait_tree.h" #include "yuzu/util/util.h" #include "common/assert.h" +#include "core/arm/arm_interface.h" #include "core/core.h" #include "core/hle/kernel/handle_table.h" #include "core/hle/kernel/mutex.h" @@ -59,8 +62,10 @@ std::vector<std::unique_ptr<WaitTreeThread>> WaitTreeItem::MakeThreadItemList() std::size_t row = 0; auto add_threads = [&](const std::vector<std::shared_ptr<Kernel::Thread>>& threads) { for (std::size_t i = 0; i < threads.size(); ++i) { - item_list.push_back(std::make_unique<WaitTreeThread>(*threads[i])); - item_list.back()->row = row; + if (!threads[i]->IsHLEThread()) { + item_list.push_back(std::make_unique<WaitTreeThread>(*threads[i])); + item_list.back()->row = row; + } ++row; } }; @@ -114,20 +119,21 @@ QString WaitTreeCallstack::GetText() const { std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeCallstack::GetChildren() const { std::vector<std::unique_ptr<WaitTreeItem>> list; - constexpr std::size_t BaseRegister = 29; - auto& memory = Core::System::GetInstance().Memory(); - u64 base_pointer = thread.GetContext64().cpu_registers[BaseRegister]; + if (thread.IsHLEThread()) { + return list; + } - while (base_pointer != 0) { - const u64 lr = memory.Read64(base_pointer + sizeof(u64)); - if (lr == 0) { - break; - } + if (thread.GetOwnerProcess() == nullptr || !thread.GetOwnerProcess()->Is64BitProcess()) { + return list; + } - list.push_back(std::make_unique<WaitTreeText>( - tr("0x%1").arg(lr - sizeof(u32), 16, 16, QLatin1Char{'0'}))); + auto backtrace = Core::ARM_Interface::GetBacktraceFromContext(Core::System::GetInstance(), + thread.GetContext64()); - base_pointer = memory.Read64(base_pointer); + for (auto& entry : backtrace) { + std::string s = fmt::format("{:20}{:016X} {:016X} {:016X} {}", entry.module, entry.address, + entry.original_address, entry.offset, entry.name); + list.push_back(std::make_unique<WaitTreeText>(QString::fromStdString(s))); } return list; @@ -206,7 +212,15 @@ QString WaitTreeThread::GetText() const { status = tr("running"); break; case Kernel::ThreadStatus::Ready: - status = tr("ready"); + if (!thread.IsPaused()) { + if (thread.WasRunning()) { + status = tr("running"); + } else { + status = tr("ready"); + } + } else { + status = tr("paused"); + } break; case Kernel::ThreadStatus::Paused: status = tr("paused"); @@ -254,7 +268,15 @@ QColor WaitTreeThread::GetColor() const { case Kernel::ThreadStatus::Running: return QColor(Qt::GlobalColor::darkGreen); case Kernel::ThreadStatus::Ready: - return QColor(Qt::GlobalColor::darkBlue); + if (!thread.IsPaused()) { + if (thread.WasRunning()) { + return QColor(Qt::GlobalColor::darkGreen); + } else { + return QColor(Qt::GlobalColor::darkBlue); + } + } else { + return QColor(Qt::GlobalColor::lightGray); + } case Kernel::ThreadStatus::Paused: return QColor(Qt::GlobalColor::lightGray); case Kernel::ThreadStatus::WaitHLEEvent: @@ -319,7 +341,7 @@ std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeThread::GetChildren() const { if (thread.GetStatus() == Kernel::ThreadStatus::WaitSynch) { list.push_back(std::make_unique<WaitTreeObjectList>(thread.GetSynchronizationObjects(), - thread.IsSleepingOnWait())); + thread.IsWaitingSync())); } list.push_back(std::make_unique<WaitTreeCallstack>(thread)); diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp index 270cccc77..4d501a8f9 100644 --- a/src/yuzu/main.cpp +++ b/src/yuzu/main.cpp @@ -16,7 +16,7 @@ #include "applets/software_keyboard.h" #include "applets/web_browser.h" #include "configuration/configure_input.h" -#include "configuration/configure_per_general.h" +#include "configuration/configure_per_game.h" #include "core/file_sys/vfs.h" #include "core/file_sys/vfs_real.h" #include "core/frontend/applets/general_frontend.h" @@ -56,6 +56,7 @@ static FileSys::VirtualFile VfsDirectoryCreateFileWrapper(const FileSys::Virtual #include <QShortcut> #include <QStatusBar> #include <QSysInfo> +#include <QUrl> #include <QtConcurrent/QtConcurrent> #include <fmt/format.h> @@ -217,7 +218,20 @@ GMainWindow::GMainWindow() LOG_INFO(Frontend, "yuzu Version: {} | {}-{}", yuzu_build_version, Common::g_scm_branch, Common::g_scm_desc); #ifdef ARCHITECTURE_x86_64 - LOG_INFO(Frontend, "Host CPU: {}", Common::GetCPUCaps().cpu_string); + const auto& caps = Common::GetCPUCaps(); + std::string cpu_string = caps.cpu_string; + if (caps.avx || caps.avx2 || caps.avx512) { + cpu_string += " | AVX"; + if (caps.avx512) { + cpu_string += "512"; + } else if (caps.avx2) { + cpu_string += '2'; + } + if (caps.fma || caps.fma4) { + cpu_string += " | FMA"; + } + } + LOG_INFO(Frontend, "Host CPU: {}", cpu_string); #endif LOG_INFO(Frontend, "Host OS: {}", QSysInfo::prettyProductName().toStdString()); LOG_INFO(Frontend, "Host RAM: {:.2f} GB", @@ -520,14 +534,36 @@ void GMainWindow::InitializeWidgets() { if (emulation_running) { return; } - Settings::values.use_asynchronous_gpu_emulation = - !Settings::values.use_asynchronous_gpu_emulation; - async_status_button->setChecked(Settings::values.use_asynchronous_gpu_emulation); + bool is_async = !Settings::values.use_asynchronous_gpu_emulation.GetValue() || + Settings::values.use_multi_core.GetValue(); + Settings::values.use_asynchronous_gpu_emulation.SetValue(is_async); + async_status_button->setChecked(Settings::values.use_asynchronous_gpu_emulation.GetValue()); Settings::Apply(); }); async_status_button->setText(tr("ASYNC")); async_status_button->setCheckable(true); - async_status_button->setChecked(Settings::values.use_asynchronous_gpu_emulation); + async_status_button->setChecked(Settings::values.use_asynchronous_gpu_emulation.GetValue()); + + // Setup Multicore button + multicore_status_button = new QPushButton(); + multicore_status_button->setObjectName(QStringLiteral("TogglableStatusBarButton")); + multicore_status_button->setFocusPolicy(Qt::NoFocus); + connect(multicore_status_button, &QPushButton::clicked, [&] { + if (emulation_running) { + return; + } + Settings::values.use_multi_core.SetValue(!Settings::values.use_multi_core.GetValue()); + bool is_async = Settings::values.use_asynchronous_gpu_emulation.GetValue() || + Settings::values.use_multi_core.GetValue(); + Settings::values.use_asynchronous_gpu_emulation.SetValue(is_async); + async_status_button->setChecked(Settings::values.use_asynchronous_gpu_emulation.GetValue()); + multicore_status_button->setChecked(Settings::values.use_multi_core.GetValue()); + Settings::Apply(); + }); + multicore_status_button->setText(tr("MULTICORE")); + multicore_status_button->setCheckable(true); + multicore_status_button->setChecked(Settings::values.use_multi_core.GetValue()); + statusBar()->insertPermanentWidget(0, multicore_status_button); statusBar()->insertPermanentWidget(0, async_status_button); // Setup Renderer API button @@ -545,16 +581,16 @@ void GMainWindow::InitializeWidgets() { renderer_status_button->setCheckable(false); renderer_status_button->setDisabled(true); #else - renderer_status_button->setChecked(Settings::values.renderer_backend == + renderer_status_button->setChecked(Settings::values.renderer_backend.GetValue() == Settings::RendererBackend::Vulkan); connect(renderer_status_button, &QPushButton::clicked, [=] { if (emulation_running) { return; } if (renderer_status_button->isChecked()) { - Settings::values.renderer_backend = Settings::RendererBackend::Vulkan; + Settings::values.renderer_backend.SetValue(Settings::RendererBackend::Vulkan); } else { - Settings::values.renderer_backend = Settings::RendererBackend::OpenGL; + Settings::values.renderer_backend.SetValue(Settings::RendererBackend::OpenGL); } Settings::Apply(); @@ -653,6 +689,11 @@ void GMainWindow::InitializeHotkeys() { ui.action_Capture_Screenshot->setShortcutContext( hotkey_registry.GetShortcutContext(main_window, capture_screenshot)); + ui.action_Fullscreen->setShortcut( + hotkey_registry.GetHotkey(main_window, fullscreen, this)->key()); + ui.action_Fullscreen->setShortcutContext( + hotkey_registry.GetShortcutContext(main_window, fullscreen)); + connect(hotkey_registry.GetHotkey(main_window, QStringLiteral("Load File"), this), &QShortcut::activated, this, &GMainWindow::OnMenuLoadFile); connect( @@ -686,24 +727,24 @@ void GMainWindow::InitializeHotkeys() { }); connect(hotkey_registry.GetHotkey(main_window, QStringLiteral("Toggle Speed Limit"), this), &QShortcut::activated, this, [&] { - Settings::values.use_frame_limit = !Settings::values.use_frame_limit; + Settings::values.use_frame_limit.SetValue( + !Settings::values.use_frame_limit.GetValue()); UpdateStatusBar(); }); - // TODO: Remove this comment/static whenever the next major release of - // MSVC occurs and we make it a requirement (see: - // https://developercommunity.visualstudio.com/content/problem/93922/constexprs-are-trying-to-be-captured-in-lambda-fun.html) - static constexpr u16 SPEED_LIMIT_STEP = 5; + constexpr u16 SPEED_LIMIT_STEP = 5; connect(hotkey_registry.GetHotkey(main_window, QStringLiteral("Increase Speed Limit"), this), &QShortcut::activated, this, [&] { - if (Settings::values.frame_limit < 9999 - SPEED_LIMIT_STEP) { - Settings::values.frame_limit += SPEED_LIMIT_STEP; + if (Settings::values.frame_limit.GetValue() < 9999 - SPEED_LIMIT_STEP) { + Settings::values.frame_limit.SetValue(SPEED_LIMIT_STEP + + Settings::values.frame_limit.GetValue()); UpdateStatusBar(); } }); connect(hotkey_registry.GetHotkey(main_window, QStringLiteral("Decrease Speed Limit"), this), &QShortcut::activated, this, [&] { - if (Settings::values.frame_limit > SPEED_LIMIT_STEP) { - Settings::values.frame_limit -= SPEED_LIMIT_STEP; + if (Settings::values.frame_limit.GetValue() > SPEED_LIMIT_STEP) { + Settings::values.frame_limit.SetValue(Settings::values.frame_limit.GetValue() - + SPEED_LIMIT_STEP); UpdateStatusBar(); } }); @@ -715,7 +756,7 @@ void GMainWindow::InitializeHotkeys() { }); connect(hotkey_registry.GetHotkey(main_window, QStringLiteral("Capture Screenshot"), this), &QShortcut::activated, this, [&] { - if (emu_thread->IsRunning()) { + if (emu_thread != nullptr && emu_thread->IsRunning()) { OnCaptureScreenshot(); } }); @@ -726,6 +767,9 @@ void GMainWindow::InitializeHotkeys() { Settings::values.use_docked_mode); dock_status_button->setChecked(Settings::values.use_docked_mode); }); + connect(hotkey_registry.GetHotkey(main_window, QStringLiteral("Mute Audio"), this), + &QShortcut::activated, this, + [] { Settings::values.audio_muted = !Settings::values.audio_muted; }); } void GMainWindow::SetDefaultUIGeometry() { @@ -826,6 +870,10 @@ void GMainWindow::ConnectMenuEvents() { connect(ui.action_Stop, &QAction::triggered, this, &GMainWindow::OnStopGame); connect(ui.action_Report_Compatibility, &QAction::triggered, this, &GMainWindow::OnMenuReportCompatibility); + connect(ui.action_Open_Mods_Page, &QAction::triggered, this, &GMainWindow::OnOpenModsPage); + connect(ui.action_Open_Quickstart_Guide, &QAction::triggered, this, + &GMainWindow::OnOpenQuickstartGuide); + connect(ui.action_Open_FAQ, &QAction::triggered, this, &GMainWindow::OnOpenFAQ); connect(ui.action_Restart, &QAction::triggered, this, [this] { BootGame(QString(game_path)); }); connect(ui.action_Configure, &QAction::triggered, this, &GMainWindow::OnConfigure); @@ -839,10 +887,6 @@ void GMainWindow::ConnectMenuEvents() { connect(ui.action_Reset_Window_Size, &QAction::triggered, this, &GMainWindow::ResetWindowSize); // Fullscreen - ui.action_Fullscreen->setShortcut( - hotkey_registry - .GetHotkey(QStringLiteral("Main Window"), QStringLiteral("Fullscreen"), this) - ->key()); connect(ui.action_Fullscreen, &QAction::triggered, this, &GMainWindow::ToggleFullscreen); // Movie @@ -910,6 +954,8 @@ bool GMainWindow::LoadROM(const QString& filename) { nullptr, // E-Commerce }); + system.RegisterHostThread(); + const Core::System::ResultStatus result{system.Load(*render_window, filename.toStdString())}; const auto drd_callout = @@ -996,6 +1042,17 @@ void GMainWindow::BootGame(const QString& filename) { LOG_INFO(Frontend, "yuzu starting..."); StoreRecentFile(filename); // Put the filename on top of the list + u64 title_id{0}; + + const auto v_file = Core::GetGameFileFromPath(vfs, filename.toUtf8().constData()); + const auto loader = Loader::GetLoader(v_file); + if (!(loader == nullptr || loader->ReadProgramId(title_id) != Loader::ResultStatus::Success)) { + // Load per game settings + Config per_game_config(fmt::format("{:016X}.ini", title_id), false); + } + + Settings::LogSettings(); + if (UISettings::values.select_user_on_boot) { SelectAndSetCurrentUser(); } @@ -1020,12 +1077,14 @@ void GMainWindow::BootGame(const QString& filename) { &LoadingScreen::OnLoadProgress, Qt::QueuedConnection); // Update the GUI + UpdateStatusButtons(); if (ui.action_Single_Window_Mode->isChecked()) { game_list->hide(); game_list_placeholder->hide(); } status_bar_update_timer.start(2000); async_status_button->setDisabled(true); + multicore_status_button->setDisabled(true); renderer_status_button->setDisabled(true); if (UISettings::values.hide_mouse) { @@ -1034,20 +1093,20 @@ void GMainWindow::BootGame(const QString& filename) { ui.centralwidget->setMouseTracking(true); } - const u64 title_id = Core::System::GetInstance().CurrentProcess()->GetTitleID(); - std::string title_name; + std::string title_version; const auto res = Core::System::GetInstance().GetGameName(title_name); - if (res != Loader::ResultStatus::Success) { - const auto metadata = FileSys::PatchManager(title_id).GetControlMetadata(); - if (metadata.first != nullptr) - title_name = metadata.first->GetApplicationName(); - if (title_name.empty()) - title_name = FileUtil::GetFilename(filename.toStdString()); + const auto metadata = FileSys::PatchManager(title_id).GetControlMetadata(); + if (metadata.first != nullptr) { + title_version = metadata.first->GetVersionString(); + title_name = metadata.first->GetApplicationName(); + } + if (res != Loader::ResultStatus::Success || title_name.empty()) { + title_name = FileUtil::GetFilename(filename.toStdString()); } - LOG_INFO(Frontend, "Booting game: {:016X} | {}", title_id, title_name); - UpdateWindowTitle(QString::fromStdString(title_name)); + LOG_INFO(Frontend, "Booting game: {:016X} | {} | {}", title_id, title_name, title_version); + UpdateWindowTitle(title_name, title_version); loading_screen->Prepare(Core::System::GetInstance().GetAppLoader()); loading_screen->show(); @@ -1113,6 +1172,7 @@ void GMainWindow::ShutdownGame() { game_fps_label->setVisible(false); emu_frametime_label->setVisible(false); async_status_button->setEnabled(true); + multicore_status_button->setEnabled(true); #ifdef HAS_VULKAN renderer_status_button->setEnabled(true); #endif @@ -1474,7 +1534,7 @@ void GMainWindow::OnGameListOpenPerGameProperties(const std::string& file) { return; } - ConfigurePerGameGeneral dialog(this, title_id); + ConfigurePerGame dialog(this, title_id); dialog.LoadFromFile(v_file); auto result = dialog.exec(); if (result == QDialog::Accepted) { @@ -1485,7 +1545,14 @@ void GMainWindow::OnGameListOpenPerGameProperties(const std::string& file) { game_list->PopulateAsync(UISettings::values.game_dirs); } - config->Save(); + // Do not cause the global config to write local settings into the config file + Settings::RestoreGlobalState(); + + if (!Core::System::GetInstance().IsPoweredOn()) { + config->Save(); + } + } else { + Settings::RestoreGlobalState(); } } @@ -1772,6 +1839,9 @@ void GMainWindow::OnStopGame() { } ShutdownGame(); + + Settings::RestoreGlobalState(); + UpdateStatusButtons(); } void GMainWindow::OnLoadComplete() { @@ -1797,6 +1867,26 @@ void GMainWindow::OnMenuReportCompatibility() { } } +void GMainWindow::OpenURL(const QUrl& url) { + const bool open = QDesktopServices::openUrl(url); + if (!open) { + QMessageBox::warning(this, tr("Error opening URL"), + tr("Unable to open the URL \"%1\".").arg(url.toString())); + } +} + +void GMainWindow::OnOpenModsPage() { + OpenURL(QUrl(QStringLiteral("https://github.com/yuzu-emu/yuzu/wiki/Switch-Mods"))); +} + +void GMainWindow::OnOpenQuickstartGuide() { + OpenURL(QUrl(QStringLiteral("https://yuzu-emu.org/help/quickstart/"))); +} + +void GMainWindow::OnOpenFAQ() { + OpenURL(QUrl(QStringLiteral("https://yuzu-emu.org/wiki/faq/"))); +} + void GMainWindow::ToggleFullscreen() { if (!emulation_running) { return; @@ -1859,7 +1949,7 @@ void GMainWindow::ToggleWindowMode() { void GMainWindow::ResetWindowSize() { const auto aspect_ratio = Layout::EmulationAspectRatio( - static_cast<Layout::AspectRatio>(Settings::values.aspect_ratio), + static_cast<Layout::AspectRatio>(Settings::values.aspect_ratio.GetValue()), static_cast<float>(Layout::ScreenUndocked::Height) / Layout::ScreenUndocked::Width); if (!ui.action_Single_Window_Mode->isChecked()) { render_window->resize(Layout::ScreenUndocked::Height / aspect_ratio, @@ -1907,12 +1997,7 @@ void GMainWindow::OnConfigure() { ui.centralwidget->setMouseTracking(false); } - dock_status_button->setChecked(Settings::values.use_docked_mode); - async_status_button->setChecked(Settings::values.use_asynchronous_gpu_emulation); -#ifdef HAS_VULKAN - renderer_status_button->setChecked(Settings::values.renderer_backend == - Settings::RendererBackend::Vulkan); -#endif + UpdateStatusButtons(); } void GMainWindow::OnLoadAmiibo() { @@ -1995,7 +2080,8 @@ void GMainWindow::OnCaptureScreenshot() { OnStartGame(); } -void GMainWindow::UpdateWindowTitle(const QString& title_name) { +void GMainWindow::UpdateWindowTitle(const std::string& title_name, + const std::string& title_version) { const auto full_name = std::string(Common::g_build_fullname); const auto branch_name = std::string(Common::g_scm_branch); const auto description = std::string(Common::g_scm_desc); @@ -2004,7 +2090,7 @@ void GMainWindow::UpdateWindowTitle(const QString& title_name) { const auto date = QDateTime::currentDateTime().toString(QStringLiteral("yyyy-MM-dd")).toStdString(); - if (title_name.isEmpty()) { + if (title_name.empty()) { const auto fmt = std::string(Common::g_title_bar_format_idle); setWindowTitle(QString::fromStdString(fmt::format(fmt.empty() ? "yuzu {0}| {1}-{2}" : fmt, full_name, branch_name, description, @@ -2012,8 +2098,8 @@ void GMainWindow::UpdateWindowTitle(const QString& title_name) { } else { const auto fmt = std::string(Common::g_title_bar_format_running); setWindowTitle(QString::fromStdString( - fmt::format(fmt.empty() ? "yuzu {0}| {3} | {1}-{2}" : fmt, full_name, branch_name, - description, title_name.toStdString(), date, build_id))); + fmt::format(fmt.empty() ? "yuzu {0}| {3} | {6} | {1}-{2}" : fmt, full_name, branch_name, + description, title_name, date, build_id, title_version))); } } @@ -2025,21 +2111,34 @@ void GMainWindow::UpdateStatusBar() { auto results = Core::System::GetInstance().GetAndResetPerfStats(); - if (Settings::values.use_frame_limit) { + if (Settings::values.use_frame_limit.GetValue()) { emu_speed_label->setText(tr("Speed: %1% / %2%") .arg(results.emulation_speed * 100.0, 0, 'f', 0) - .arg(Settings::values.frame_limit)); + .arg(Settings::values.frame_limit.GetValue())); } else { emu_speed_label->setText(tr("Speed: %1%").arg(results.emulation_speed * 100.0, 0, 'f', 0)); } game_fps_label->setText(tr("Game: %1 FPS").arg(results.game_fps, 0, 'f', 0)); emu_frametime_label->setText(tr("Frame: %1 ms").arg(results.frametime * 1000.0, 0, 'f', 2)); - emu_speed_label->setVisible(true); + emu_speed_label->setVisible(!Settings::values.use_multi_core.GetValue()); game_fps_label->setVisible(true); emu_frametime_label->setVisible(true); } +void GMainWindow::UpdateStatusButtons() { + dock_status_button->setChecked(Settings::values.use_docked_mode); + multicore_status_button->setChecked(Settings::values.use_multi_core.GetValue()); + Settings::values.use_asynchronous_gpu_emulation.SetValue( + Settings::values.use_asynchronous_gpu_emulation.GetValue() || + Settings::values.use_multi_core.GetValue()); + async_status_button->setChecked(Settings::values.use_asynchronous_gpu_emulation.GetValue()); +#ifdef HAS_VULKAN + renderer_status_button->setChecked(Settings::values.renderer_backend.GetValue() == + Settings::RendererBackend::Vulkan); +#endif +} + void GMainWindow::HideMouseCursor() { if (emu_thread == nullptr || UISettings::values.hide_mouse == false) { mouse_hide_timer.stop(); @@ -2123,6 +2222,9 @@ void GMainWindow::OnCoreError(Core::System::ResultStatus result, std::string det if (answer == QMessageBox::Yes) { if (emu_thread) { ShutdownGame(); + + Settings::RestoreGlobalState(); + UpdateStatusButtons(); } } else { // Only show the message if the game is still running. @@ -2154,7 +2256,7 @@ void GMainWindow::OnReinitializeKeys(ReinitializeKeyBehavior behavior) { "title.keys_autogenerated"); } - Core::Crypto::KeyManager keys{}; + Core::Crypto::KeyManager& keys = Core::Crypto::KeyManager::Instance(); if (keys.BaseDeriveNecessary()) { Core::Crypto::PartitionDataManager pdm{vfs->OpenDirectory( FileUtil::GetUserPath(FileUtil::UserPath::SysDataDir), FileSys::Mode::Read)}; @@ -2285,9 +2387,13 @@ void GMainWindow::closeEvent(QCloseEvent* event) { hotkey_registry.SaveHotkeys(); // Shutdown session if the emu thread is active... - if (emu_thread != nullptr) + if (emu_thread != nullptr) { ShutdownGame(); + Settings::RestoreGlobalState(); + UpdateStatusButtons(); + } + render_window->close(); QWidget::closeEvent(event); @@ -2467,8 +2573,6 @@ int main(int argc, char* argv[]) { QObject::connect(&app, &QGuiApplication::applicationStateChanged, &main_window, &GMainWindow::OnAppFocusStateChanged); - Settings::LogSettings(); - int result = app.exec(); detached_tasks.WaitForAllTasks(); return result; diff --git a/src/yuzu/main.h b/src/yuzu/main.h index 4f4c8ddbe..8e3d39c38 100644 --- a/src/yuzu/main.h +++ b/src/yuzu/main.h @@ -181,6 +181,9 @@ private slots: void OnPauseGame(); void OnStopGame(); void OnMenuReportCompatibility(); + void OnOpenModsPage(); + void OnOpenQuickstartGuide(); + void OnOpenFAQ(); /// Called whenever a user selects a game in the game list widget. void OnGameListLoadFile(QString game_path); void OnGameListOpenFolder(GameListOpenTarget target, const std::string& game_path); @@ -215,10 +218,13 @@ private slots: private: std::optional<u64> SelectRomFSDumpTarget(const FileSys::ContentProvider&, u64 program_id); - void UpdateWindowTitle(const QString& title_name = {}); + void UpdateWindowTitle(const std::string& title_name = {}, + const std::string& title_version = {}); void UpdateStatusBar(); + void UpdateStatusButtons(); void HideMouseCursor(); void ShowMouseCursor(); + void OpenURL(const QUrl& url); Ui::MainWindow ui; @@ -234,6 +240,7 @@ private: QLabel* game_fps_label = nullptr; QLabel* emu_frametime_label = nullptr; QPushButton* async_status_button = nullptr; + QPushButton* multicore_status_button = nullptr; QPushButton* renderer_status_button = nullptr; QPushButton* dock_status_button = nullptr; QTimer status_bar_update_timer; diff --git a/src/yuzu/main.ui b/src/yuzu/main.ui index 97c90f50b..bee6e107e 100644 --- a/src/yuzu/main.ui +++ b/src/yuzu/main.ui @@ -113,6 +113,9 @@ <string>&Help</string> </property> <addaction name="action_Report_Compatibility"/> + <addaction name="action_Open_Mods_Page"/> + <addaction name="action_Open_Quickstart_Guide"/> + <addaction name="action_Open_FAQ"/> <addaction name="separator"/> <addaction name="action_About"/> </widget> @@ -256,6 +259,21 @@ <bool>false</bool> </property> </action> + <action name="action_Open_Mods_Page"> + <property name="text"> + <string>Open Mods Page</string> + </property> + </action> + <action name="action_Open_Quickstart_Guide"> + <property name="text"> + <string>Open Quickstart Guide</string> + </property> + </action> + <action name="action_Open_FAQ"> + <property name="text"> + <string>FAQ</string> + </property> + </action> <action name="action_Open_yuzu_Folder"> <property name="text"> <string>Open yuzu Folder</string> diff --git a/src/yuzu/yuzu.rc b/src/yuzu/yuzu.rc index 1b253653f..4a3645a71 100644 --- a/src/yuzu/yuzu.rc +++ b/src/yuzu/yuzu.rc @@ -16,4 +16,4 @@ IDI_ICON1 ICON "../../dist/yuzu.ico" // RT_MANIFEST // -1 RT_MANIFEST "../../dist/yuzu.manifest" +0 RT_MANIFEST "../../dist/yuzu.manifest" diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp index c20d48c42..23763144f 100644 --- a/src/yuzu_cmd/config.cpp +++ b/src/yuzu_cmd/config.cpp @@ -354,65 +354,72 @@ void Config::ReadValues() { const auto rng_seed_enabled = sdl2_config->GetBoolean("System", "rng_seed_enabled", false); if (rng_seed_enabled) { - Settings::values.rng_seed = sdl2_config->GetInteger("System", "rng_seed", 0); + Settings::values.rng_seed.SetValue(sdl2_config->GetInteger("System", "rng_seed", 0)); } else { - Settings::values.rng_seed = std::nullopt; + Settings::values.rng_seed.SetValue(std::nullopt); } const auto custom_rtc_enabled = sdl2_config->GetBoolean("System", "custom_rtc_enabled", false); if (custom_rtc_enabled) { - Settings::values.custom_rtc = - std::chrono::seconds(sdl2_config->GetInteger("System", "custom_rtc", 0)); + Settings::values.custom_rtc.SetValue( + std::chrono::seconds(sdl2_config->GetInteger("System", "custom_rtc", 0))); } else { - Settings::values.custom_rtc = std::nullopt; + Settings::values.custom_rtc.SetValue(std::nullopt); } - Settings::values.language_index = sdl2_config->GetInteger("System", "language_index", 1); - Settings::values.time_zone_index = sdl2_config->GetInteger("System", "time_zone_index", 0); + Settings::values.language_index.SetValue( + sdl2_config->GetInteger("System", "language_index", 1)); + Settings::values.time_zone_index.SetValue( + sdl2_config->GetInteger("System", "time_zone_index", 0)); // Core - Settings::values.use_multi_core = sdl2_config->GetBoolean("Core", "use_multi_core", false); + Settings::values.use_multi_core.SetValue( + sdl2_config->GetBoolean("Core", "use_multi_core", false)); // Renderer const int renderer_backend = sdl2_config->GetInteger( "Renderer", "backend", static_cast<int>(Settings::RendererBackend::OpenGL)); - Settings::values.renderer_backend = static_cast<Settings::RendererBackend>(renderer_backend); + Settings::values.renderer_backend.SetValue( + static_cast<Settings::RendererBackend>(renderer_backend)); Settings::values.renderer_debug = sdl2_config->GetBoolean("Renderer", "debug", false); - Settings::values.vulkan_device = sdl2_config->GetInteger("Renderer", "vulkan_device", 0); - - Settings::values.resolution_factor = - static_cast<float>(sdl2_config->GetReal("Renderer", "resolution_factor", 1.0)); - Settings::values.aspect_ratio = - static_cast<int>(sdl2_config->GetInteger("Renderer", "aspect_ratio", 0)); - Settings::values.max_anisotropy = - static_cast<int>(sdl2_config->GetInteger("Renderer", "max_anisotropy", 0)); - Settings::values.use_frame_limit = sdl2_config->GetBoolean("Renderer", "use_frame_limit", true); - Settings::values.frame_limit = - static_cast<u16>(sdl2_config->GetInteger("Renderer", "frame_limit", 100)); - Settings::values.use_disk_shader_cache = - sdl2_config->GetBoolean("Renderer", "use_disk_shader_cache", false); + Settings::values.vulkan_device.SetValue( + sdl2_config->GetInteger("Renderer", "vulkan_device", 0)); + + Settings::values.aspect_ratio.SetValue( + static_cast<int>(sdl2_config->GetInteger("Renderer", "aspect_ratio", 0))); + Settings::values.max_anisotropy.SetValue( + static_cast<int>(sdl2_config->GetInteger("Renderer", "max_anisotropy", 0))); + Settings::values.use_frame_limit.SetValue( + sdl2_config->GetBoolean("Renderer", "use_frame_limit", true)); + Settings::values.frame_limit.SetValue( + static_cast<u16>(sdl2_config->GetInteger("Renderer", "frame_limit", 100))); + Settings::values.use_disk_shader_cache.SetValue( + sdl2_config->GetBoolean("Renderer", "use_disk_shader_cache", false)); const int gpu_accuracy_level = sdl2_config->GetInteger("Renderer", "gpu_accuracy", 0); - Settings::values.gpu_accuracy = static_cast<Settings::GPUAccuracy>(gpu_accuracy_level); - Settings::values.use_asynchronous_gpu_emulation = - sdl2_config->GetBoolean("Renderer", "use_asynchronous_gpu_emulation", false); - Settings::values.use_vsync = - static_cast<u16>(sdl2_config->GetInteger("Renderer", "use_vsync", 1)); - Settings::values.use_assembly_shaders = - sdl2_config->GetBoolean("Renderer", "use_assembly_shaders", false); - Settings::values.use_fast_gpu_time = - sdl2_config->GetBoolean("Renderer", "use_fast_gpu_time", true); - - Settings::values.bg_red = static_cast<float>(sdl2_config->GetReal("Renderer", "bg_red", 0.0)); - Settings::values.bg_green = - static_cast<float>(sdl2_config->GetReal("Renderer", "bg_green", 0.0)); - Settings::values.bg_blue = static_cast<float>(sdl2_config->GetReal("Renderer", "bg_blue", 0.0)); + Settings::values.gpu_accuracy.SetValue(static_cast<Settings::GPUAccuracy>(gpu_accuracy_level)); + Settings::values.use_asynchronous_gpu_emulation.SetValue( + sdl2_config->GetBoolean("Renderer", "use_asynchronous_gpu_emulation", false)); + Settings::values.use_vsync.SetValue( + static_cast<u16>(sdl2_config->GetInteger("Renderer", "use_vsync", 1))); + Settings::values.use_assembly_shaders.SetValue( + sdl2_config->GetBoolean("Renderer", "use_assembly_shaders", false)); + Settings::values.use_fast_gpu_time.SetValue( + sdl2_config->GetBoolean("Renderer", "use_fast_gpu_time", true)); + + Settings::values.bg_red.SetValue( + static_cast<float>(sdl2_config->GetReal("Renderer", "bg_red", 0.0))); + Settings::values.bg_green.SetValue( + static_cast<float>(sdl2_config->GetReal("Renderer", "bg_green", 0.0))); + Settings::values.bg_blue.SetValue( + static_cast<float>(sdl2_config->GetReal("Renderer", "bg_blue", 0.0))); // Audio Settings::values.sink_id = sdl2_config->Get("Audio", "output_engine", "auto"); - Settings::values.enable_audio_stretching = - sdl2_config->GetBoolean("Audio", "enable_audio_stretching", true); + Settings::values.enable_audio_stretching.SetValue( + sdl2_config->GetBoolean("Audio", "enable_audio_stretching", true)); Settings::values.audio_device_id = sdl2_config->Get("Audio", "output_device", "auto"); - Settings::values.volume = static_cast<float>(sdl2_config->GetReal("Audio", "volume", 1)); + Settings::values.volume.SetValue( + static_cast<float>(sdl2_config->GetReal("Audio", "volume", 1))); // Miscellaneous Settings::values.log_filter = sdl2_config->Get("Miscellaneous", "log_filter", "*:Trace"); @@ -432,6 +439,8 @@ void Config::ReadValues() { Settings::values.quest_flag = sdl2_config->GetBoolean("Debugging", "quest_flag", false); Settings::values.disable_cpu_opt = sdl2_config->GetBoolean("Debugging", "disable_cpu_opt", false); + Settings::values.disable_macro_jit = + sdl2_config->GetBoolean("Debugging", "disable_macro_jit", false); const auto title_list = sdl2_config->Get("AddOns", "title_ids", ""); std::stringstream ss(title_list); diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h index abc6e6e65..45c07ed5d 100644 --- a/src/yuzu_cmd/default_ini.h +++ b/src/yuzu_cmd/default_ini.h @@ -117,11 +117,6 @@ use_hw_renderer = # 0: Interpreter (slow), 1 (default): JIT (fast) use_shader_jit = -# Resolution scale factor -# 0: Auto (scales resolution to window size), 1: Native Switch screen resolution, Otherwise a scale -# factor for the Switch resolution -resolution_factor = - # Aspect ratio # 0: Default (16:9), 1: Force 4:3, 2: Force 21:9, 3: Stretch to Window aspect_ratio = @@ -291,6 +286,8 @@ quest_flag = # Determines whether or not JIT CPU optimizations are enabled # false: Optimizations Enabled, true: Optimizations Disabled disable_cpu_opt = +# Enables/Disables the macro JIT compiler +disable_macro_jit=false [WebService] # Whether or not to enable telemetry diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp b/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp index 411e7e647..e78025737 100644 --- a/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp +++ b/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp @@ -98,6 +98,9 @@ EmuWindow_SDL2_GL::EmuWindow_SDL2_GL(Core::System& system, bool fullscreen) SDL_GL_SetAttribute(SDL_GL_BLUE_SIZE, 8); SDL_GL_SetAttribute(SDL_GL_ALPHA_SIZE, 0); SDL_GL_SetAttribute(SDL_GL_SHARE_WITH_CURRENT_CONTEXT, 1); + if (Settings::values.renderer_debug) { + SDL_GL_SetAttribute(SDL_GL_CONTEXT_FLAGS, SDL_GL_CONTEXT_DEBUG_FLAG); + } SDL_GL_SetSwapInterval(0); std::string window_title = fmt::format("yuzu {} | {}-{}", Common::g_build_fullname, @@ -162,7 +165,7 @@ std::unique_ptr<Core::Frontend::GraphicsContext> EmuWindow_SDL2_GL::CreateShared void EmuWindow_SDL2_GL::Present() { SDL_GL_MakeCurrent(render_window, window_context); - SDL_GL_SetSwapInterval(Settings::values.use_vsync ? 1 : 0); + SDL_GL_SetSwapInterval(Settings::values.use_vsync.GetValue() ? 1 : 0); while (IsOpen()) { system.Renderer().TryPresent(100); SDL_GL_SwapWindow(render_window); diff --git a/src/yuzu_cmd/yuzu.cpp b/src/yuzu_cmd/yuzu.cpp index 4d2ea7e9e..512b060a7 100644 --- a/src/yuzu_cmd/yuzu.cpp +++ b/src/yuzu_cmd/yuzu.cpp @@ -2,6 +2,7 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <chrono> #include <iostream> #include <memory> #include <string> @@ -180,7 +181,7 @@ int main(int argc, char** argv) { Core::System& system{Core::System::GetInstance()}; std::unique_ptr<EmuWindow_SDL2> emu_window; - switch (Settings::values.renderer_backend) { + switch (Settings::values.renderer_backend.GetValue()) { case Settings::RendererBackend::OpenGL: emu_window = std::make_unique<EmuWindow_SDL2_GL>(system, fullscreen); break; @@ -236,9 +237,11 @@ int main(int argc, char** argv) { system.Renderer().Rasterizer().LoadDiskResources(); std::thread render_thread([&emu_window] { emu_window->Present(); }); + system.Run(); while (emu_window->IsOpen()) { - system.RunLoop(); + std::this_thread::sleep_for(std::chrono::milliseconds(1)); } + system.Pause(); render_thread.join(); system.Shutdown(); diff --git a/src/yuzu_cmd/yuzu.rc b/src/yuzu_cmd/yuzu.rc index 7de8ef3d9..0cde75e2f 100644 --- a/src/yuzu_cmd/yuzu.rc +++ b/src/yuzu_cmd/yuzu.rc @@ -14,4 +14,4 @@ YUZU_ICON ICON "../../dist/yuzu.ico" // RT_MANIFEST // -1 RT_MANIFEST "../../dist/yuzu.manifest" +0 RT_MANIFEST "../../dist/yuzu.manifest" diff --git a/src/yuzu_tester/config.cpp b/src/yuzu_tester/config.cpp index 3be58b15d..acb22885e 100644 --- a/src/yuzu_tester/config.cpp +++ b/src/yuzu_tester/config.cpp @@ -81,6 +81,9 @@ void Config::ReadValues() { Settings::values.touchscreen.diameter_x = 15; Settings::values.touchscreen.diameter_y = 15; + Settings::values.use_docked_mode = + sdl2_config->GetBoolean("Controls", "use_docked_mode", false); + // Data Storage Settings::values.use_virtual_sd = sdl2_config->GetBoolean("Data Storage", "use_virtual_sd", true); @@ -92,59 +95,59 @@ void Config::ReadValues() { FileUtil::GetUserPath(FileUtil::UserPath::SDMCDir))); // System - Settings::values.use_docked_mode = sdl2_config->GetBoolean("System", "use_docked_mode", false); - Settings::values.current_user = std::clamp<int>( sdl2_config->GetInteger("System", "current_user", 0), 0, Service::Account::MAX_USERS - 1); const auto rng_seed_enabled = sdl2_config->GetBoolean("System", "rng_seed_enabled", false); if (rng_seed_enabled) { - Settings::values.rng_seed = sdl2_config->GetInteger("System", "rng_seed", 0); + Settings::values.rng_seed.SetValue(sdl2_config->GetInteger("System", "rng_seed", 0)); } else { - Settings::values.rng_seed = std::nullopt; + Settings::values.rng_seed.SetValue(std::nullopt); } const auto custom_rtc_enabled = sdl2_config->GetBoolean("System", "custom_rtc_enabled", false); if (custom_rtc_enabled) { - Settings::values.custom_rtc = - std::chrono::seconds(sdl2_config->GetInteger("System", "custom_rtc", 0)); + Settings::values.custom_rtc.SetValue( + std::chrono::seconds(sdl2_config->GetInteger("System", "custom_rtc", 0))); } else { - Settings::values.custom_rtc = std::nullopt; + Settings::values.custom_rtc.SetValue(std::nullopt); } // Core - Settings::values.use_multi_core = sdl2_config->GetBoolean("Core", "use_multi_core", false); + Settings::values.use_multi_core.SetValue( + sdl2_config->GetBoolean("Core", "use_multi_core", false)); // Renderer - Settings::values.resolution_factor = - static_cast<float>(sdl2_config->GetReal("Renderer", "resolution_factor", 1.0)); - Settings::values.aspect_ratio = - static_cast<int>(sdl2_config->GetInteger("Renderer", "aspect_ratio", 0)); - Settings::values.max_anisotropy = - static_cast<int>(sdl2_config->GetInteger("Renderer", "max_anisotropy", 0)); - Settings::values.use_frame_limit = false; - Settings::values.frame_limit = 100; - Settings::values.use_disk_shader_cache = - sdl2_config->GetBoolean("Renderer", "use_disk_shader_cache", false); + Settings::values.aspect_ratio.SetValue( + static_cast<int>(sdl2_config->GetInteger("Renderer", "aspect_ratio", 0))); + Settings::values.max_anisotropy.SetValue( + static_cast<int>(sdl2_config->GetInteger("Renderer", "max_anisotropy", 0))); + Settings::values.use_frame_limit.SetValue(false); + Settings::values.frame_limit.SetValue(100); + Settings::values.use_disk_shader_cache.SetValue( + sdl2_config->GetBoolean("Renderer", "use_disk_shader_cache", false)); const int gpu_accuracy_level = sdl2_config->GetInteger("Renderer", "gpu_accuracy", 0); - Settings::values.gpu_accuracy = static_cast<Settings::GPUAccuracy>(gpu_accuracy_level); - Settings::values.use_asynchronous_gpu_emulation = - sdl2_config->GetBoolean("Renderer", "use_asynchronous_gpu_emulation", false); - Settings::values.use_fast_gpu_time = - sdl2_config->GetBoolean("Renderer", "use_fast_gpu_time", true); - - Settings::values.bg_red = static_cast<float>(sdl2_config->GetReal("Renderer", "bg_red", 0.0)); - Settings::values.bg_green = - static_cast<float>(sdl2_config->GetReal("Renderer", "bg_green", 0.0)); - Settings::values.bg_blue = static_cast<float>(sdl2_config->GetReal("Renderer", "bg_blue", 0.0)); + Settings::values.gpu_accuracy.SetValue(static_cast<Settings::GPUAccuracy>(gpu_accuracy_level)); + Settings::values.use_asynchronous_gpu_emulation.SetValue( + sdl2_config->GetBoolean("Renderer", "use_asynchronous_gpu_emulation", false)); + Settings::values.use_fast_gpu_time.SetValue( + sdl2_config->GetBoolean("Renderer", "use_fast_gpu_time", true)); + + Settings::values.bg_red.SetValue( + static_cast<float>(sdl2_config->GetReal("Renderer", "bg_red", 0.0))); + Settings::values.bg_green.SetValue( + static_cast<float>(sdl2_config->GetReal("Renderer", "bg_green", 0.0))); + Settings::values.bg_blue.SetValue( + static_cast<float>(sdl2_config->GetReal("Renderer", "bg_blue", 0.0))); // Audio Settings::values.sink_id = "null"; - Settings::values.enable_audio_stretching = false; + Settings::values.enable_audio_stretching.SetValue(false); Settings::values.audio_device_id = "auto"; - Settings::values.volume = 0; + Settings::values.volume.SetValue(0); - Settings::values.language_index = sdl2_config->GetInteger("System", "language_index", 1); + Settings::values.language_index.SetValue( + sdl2_config->GetInteger("System", "language_index", 1)); // Miscellaneous Settings::values.log_filter = sdl2_config->Get("Miscellaneous", "log_filter", "*:Trace"); diff --git a/src/yuzu_tester/default_ini.h b/src/yuzu_tester/default_ini.h index ca203b64d..41bbbbf60 100644 --- a/src/yuzu_tester/default_ini.h +++ b/src/yuzu_tester/default_ini.h @@ -21,11 +21,6 @@ use_hw_renderer = # 0: Interpreter (slow), 1 (default): JIT (fast) use_shader_jit = -# Resolution scale factor -# 0: Auto (scales resolution to window size), 1: Native Switch screen resolution, Otherwise a scale -# factor for the Switch resolution -resolution_factor = - # Aspect ratio # 0: Default (16:9), 1: Force 4:3, 2: Force 21:9, 3: Stretch to Window aspect_ratio = diff --git a/src/yuzu_tester/service/yuzutest.cpp b/src/yuzu_tester/service/yuzutest.cpp index 85d3f436b..2d3f6e3a7 100644 --- a/src/yuzu_tester/service/yuzutest.cpp +++ b/src/yuzu_tester/service/yuzutest.cpp @@ -53,7 +53,7 @@ private: IPC::ResponseBuilder rb{ctx, 3}; rb.Push(RESULT_SUCCESS); - rb.Push<u32>(write_size); + rb.Push<u32>(static_cast<u32>(write_size)); } void StartIndividual(Kernel::HLERequestContext& ctx) { diff --git a/src/yuzu_tester/yuzu.cpp b/src/yuzu_tester/yuzu.cpp index 676e70ebd..083667baf 100644 --- a/src/yuzu_tester/yuzu.cpp +++ b/src/yuzu_tester/yuzu.cpp @@ -2,6 +2,7 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <chrono> #include <iostream> #include <memory> #include <string> @@ -255,9 +256,11 @@ int main(int argc, char** argv) { system.GPU().Start(); system.Renderer().Rasterizer().LoadDiskResources(); + system.Run(); while (!finished) { - system.RunLoop(); + std::this_thread::sleep_for(std::chrono::milliseconds(1)); } + system.Pause(); detached_tasks.WaitForAllTasks(); return return_value; diff --git a/src/yuzu_tester/yuzu.rc b/src/yuzu_tester/yuzu.rc index 7de8ef3d9..0cde75e2f 100644 --- a/src/yuzu_tester/yuzu.rc +++ b/src/yuzu_tester/yuzu.rc @@ -14,4 +14,4 @@ YUZU_ICON ICON "../../dist/yuzu.ico" // RT_MANIFEST // -1 RT_MANIFEST "../../dist/yuzu.manifest" +0 RT_MANIFEST "../../dist/yuzu.manifest" |